Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import gzip 

2import io 

3import json 

4import os 

5 

6import pytest 

7from sdc_etl_libs.pgp_helpers.local_pgp import LocalPGP 

8from sdc_etl_libs.sdc_data_schema.schema_toolbox import SchemaToolbox 

9from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe 

10from sdc_etl_libs.sdc_file_helpers.SDCFileFactory import SDCFileFactory 

11 

12# CWD 

13PATH = os.path.dirname(os.path.abspath(__file__)) 

14 

15# Set up PGP 

16pgp = LocalPGP.__new__(LocalPGP) 

17pgp.set_public_key(open(PATH + '/key.pub').read()) 

18pgp.set_private_key(open(PATH + '/key.priv').read()) 

19 

20# CSV Variables 

21test_schema_csv = json.loads(open(PATH + "/test_schema_csv.json").read()) 

22test_ep_schema_csv = SchemaToolbox.get_endpoint_data_from_schema(test_schema_csv, "main_source", validate_=True) 

23test_file_csv = open(PATH + '/test.csv') 

24 

25# JSON Variables 

26test_data_schema_json = json.loads(open(PATH + "/test_schema_json.json").read()) 

27test_ep_schema_json = SchemaToolbox.get_endpoint_data_from_schema(test_data_schema_json, "main_source", validate_=True) 

28test_file_json = open(PATH + '/test.json') 

29 

30# EDI Variables 

31test_data_schema_edi = json.loads(open(PATH + "/test_schema_edi.json").read()) 

32test_ep_schema_edi = SchemaToolbox.get_endpoint_data_from_schema(test_data_schema_edi, "main_source", validate_=True) 

33test_file_edi = open(PATH + "/test.DAT", "rb") 

34 

35# Excel Variables 

36test_data_schema_excel = json.loads(open(PATH + "/test_schema_excel.json").read()) 

37test_ep_schema_excel = SchemaToolbox.get_endpoint_data_from_schema( 

38 test_data_schema_excel, "main_source", validate_=True) 

39test_file_excel = open(PATH + "/test.xlsx", "rb") 

40 

41# GZIP Variables 

42test_data_schema_csv_gzip = json.loads(open(PATH + "/test_schema_csv_gzip.json").read()) 

43test_ep_schema_csv_gzip = SchemaToolbox.get_endpoint_data_from_schema( 

44 test_data_schema_csv_gzip, "main_source", validate_=True) 

45test_file_csv_gzip = io.BytesIO(gzip.compress(bytes(open(PATH + "/test_gzip.tsv").read(), encoding='utf-8'))) 

46 

47# AVRO Variables 

48 

49# PARQUET Variables 

50 

51 

52def test_csv(): 

53 """ 

54 Test encrypted/decrypted csv file can be loaded into sdc dataframe 

55 """ 

56 

57 s_buf = pgp.decrypt(pgp.encrypt(test_file_csv)) 

58 sdc_file = SDCFileFactory.get_file(test_schema_csv, test_ep_schema_csv, "neustar-file-name.csv", 

59 "fact-funnel-path/", s_buf) 

60 sdc_df = sdc_file.get_file_as_dataframe() 

61 

62 assert sdc_df is not None 

63 assert isinstance(sdc_df, Dataframe) 

64 assert sdc_df.df is not None 

65 assert len(sdc_df.df) == 6 

66 

67 # sdc_file_obj = SDCFileFactory.get_endpoint_file_obj(ep_schema_csv, sdc_file) 

68 # df_file_obj = sdc_df.get_as_file_obj_from_endpoint_schema(ep_schema_csv) 

69 

70 

71def test_json(): 

72 """ 

73 Test encrypted/decrypted json file can be loaded into sdc dataframe 

74 """ 

75 

76 s_buf = pgp.decrypt(pgp.encrypt(test_file_json)) 

77 sdc_file = SDCFileFactory.get_file(test_data_schema_json, test_ep_schema_json, "aligner-orders", "Fantasia/", s_buf) 

78 sdc_df = sdc_file.get_file_as_dataframe() 

79 

80 assert sdc_df is not None 

81 assert isinstance(sdc_df, Dataframe) 

82 assert sdc_df.df is not None 

83 assert len(sdc_df.df) == 2 

84 

85 

86def test_edi(): 

87 """ 

88 Test encrypted/decrypted edi file can be loaded into sdc dataframe 

89 """ 

90 

91 s_buf = pgp.decrypt(pgp.encrypt(test_file_edi)) 

92 sdc_file = SDCFileFactory.get_file(test_data_schema_edi, test_ep_schema_edi, "UPS-EDI-210.DAT", "", s_buf) 

93 sdc_df = sdc_file.get_file_as_dataframe() 

94 

95 assert sdc_df is not None 

96 assert isinstance(sdc_df, Dataframe) 

97 assert sdc_df.df is not None 

98 assert len(sdc_df.df) == 101 

99 

100 

101def test_excel(): 

102 """ 

103 Test encrypted/decrypted excel file can be loaded into sdc dataframe 

104 """ 

105 

106 s_buf = pgp.decrypt(pgp.encrypt(test_file_excel)) 

107 sdc_file = SDCFileFactory.get_file(test_data_schema_excel, test_ep_schema_excel, "UPS-EDI-210.DAT", "", s_buf) 

108 sdc_df = sdc_file.get_file_as_dataframe() 

109 

110 assert sdc_df is not None 

111 assert isinstance(sdc_df, Dataframe) 

112 assert sdc_df.df is not None 

113 assert sdc_df.df.shape == (14, 20) 

114 assert len(list(sdc_df.df.columns.values)) > 0 

115 

116 

117def test_gzip(): 

118 """ 

119 Test encrypted/decrypted csv gzip file can be loaded into sdc dataframe 

120 """ 

121 

122 s_buf = pgp.decrypt(pgp.encrypt(test_file_csv_gzip)) 

123 sdc_file = SDCFileFactory.get_file(test_data_schema_csv_gzip, test_ep_schema_csv_gzip, "neustar-file-name.csv", 

124 "fact-funnel-path/", s_buf) 

125 sdc_df = sdc_file.get_file_as_dataframe() 

126 

127 assert sdc_df is not None 

128 assert isinstance(sdc_df, Dataframe) 

129 assert sdc_df.df is not None 

130 assert len(sdc_df.df) == 5 

131 

132 

133@pytest.mark.skip(reason="Resolve this avro test as it is currently not returning a DF") 

134def test_avro(): 

135 """ 

136 Test encrypted/decrypted avro file can be loaded into sdc dataframe 

137 """ 

138 pass 

139 

140 

141@pytest.mark.skip(reason="Resolve this parquet test as it is currently not returning a DF") 

142def test_parquet(): 

143 """ 

144 Test encrypted/decrypted parquet file can be loaded into sdc dataframe 

145 """ 

146 pass