Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from sdc_etl_libs.sdc_dataframe.sdc_dataframe_helpers import SDCDataframeHelpers 

2from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe 

3from sdc_etl_libs.sdc_dataframe.SDCDataframeEnums import SDCDFTypes 

4import pandas as pd 

5import numpy as np 

6 

7schema = { 

8 "estimated_row_size": "80b", 

9 "estimated_row_count": 100, 

10 "fields": [ 

11 {"name": "ID", "type": {"type": "string"}, "sf_merge_key": True}, 

12 {"name": "STATUS", "type": {"type": "string"}, "is_nullable": True}, 

13 {"name": "NAME", "type": {"type": "string"}, "is_nullable": True}, 

14 {"name": "LAST_NAME", "type": {"type": "string"}, "is_nullable": True}, 

15 {"name": "_ETL_FILENAME", "type": {"type": "string"}, "add_column": True} 

16 ] 

17} 

18 

19schema_without_merge_key = { 

20 "estimated_row_size": "80b", 

21 "estimated_row_count": 100, 

22 "fields": [ 

23 {"name": "ID", "type": {"type": "string"}, "is_nullable": True}, 

24 {"name": "STATUS", "type": {"type": "string"}, "is_nullable": True}, 

25 {"name": "NAME", "type": {"type": "string"}, "is_nullable": True}, 

26 {"name": "LAST_NAME", "type": {"type": "string"}, "is_nullable": True} 

27 ] 

28} 

29 

30schema_with_rename = { 

31 "estimated_row_size": "80b", 

32 "estimated_row_count": 100, 

33 "fields": [ 

34 {"name": "ID", "type": {"type": "string"}, "sf_merge_key": True}, 

35 {"name": "STATUS", "type": {"type": "string"}, "sf_merge_key": True, "rename": "STATUS2"}, 

36 {"name": "NAME", "type": {"type": "string"}, "is_nullable": True}, 

37 ] 

38} 

39 

40schema_tracking = { 

41 "estimated_row_size": "80b", 

42 "estimated_row_count": 100, 

43 "fields": [ 

44 {"name": "SHIP_DATE", "type": {"type": "string"}}, 

45 {"name": "TRACKING_ID", "type": {"type": "string"}, "sf_merge_key": True}, 

46 {"name": "ORDER_NUMBER", "type": {"type": "string"}}, 

47 {"name": "LOCATION_COUNTRY", "type": {"type": "string"}}, 

48 {"name": "STATUS", "type": {"type": "string"}}, 

49 {"name": "EVENT_DATE_TS", "type": {"type": "long"}, "sf_merge_key": True}, 

50 {"name": "LOCATION_STATE", "type": {"type": "string"}}, 

51 {"name": "SHIPMENT_UUID", "type": {"type": "string"}}, 

52 {"name": "EVENT_DATE", "type": {"type": "string", "logical_type": "datetime"}}, 

53 {"name": "LOCATION_CITY", "type": {"type": "string"}}, 

54 {"name": "EXPECTED_DELIVERY", "type": {"type": "string"}, "is_nullable": True}, 

55 {"name": "_ETL_DATA_ITEM_NAME", "type": {"type": "string"}, "add_column": True} 

56 ] 

57} 

58 

59def test_concat_a_couple_of_sdc_dataframes_with_merge_keys(): 

60 """ 

61 Ensure concat a couple of SDC Dataframes with merge key fields (TRACKING_ID, EVENT_DATE_TS) works. 

62 """ 

63 pandas_df1 = pd.DataFrame(data={ 

64 'SHIP_DATE': ["2020-12-07", "2020-12-07"], 

65 'TRACKING_ID': ["100906849", "100907169"], 

66 'ORDER_NUMBER': ["", ""], 

67 'LOCATION_COUNTRY': ['DEU', 'FRA'], 

68 'STATUS': ['Consignment Delivered', 'Shipment Delivered In'], 

69 'EVENT_DATE_TS': [1607304977, 1607304978], 

70 'LOCATION_STATE': ["", ""], 

71 'SHIPMENT_UUID': ['654dd40b-7a66-4747-bb57-eb1925beeab7', '654dd40b-7a66-4747-bb57-eb1925beeab7'], 

72 'EVENT_DATE': ["2020-12-07 1:36:17", "2020-12-07 1:36:17"], 

73 'LOCATION_CITY': ["", ""], 

74 'EXPECTED_DELIVERY': ["", ""] 

75 }) 

76 

77 pandas_df2 = pd.DataFrame(data={ 

78 'SHIP_DATE': ["2021-12-07", "2022-12-07"], 

79 'TRACKING_ID': ["100906849", "200907167"], 

80 'ORDER_NUMBER': ["", ""], 

81 'LOCATION_COUNTRY': ['DEU', 'FRA'], 

82 'STATUS': ['Consignment Delivered', 'Shipment Delivered In'], 

83 'EVENT_DATE_TS': [1607304977, 1607304978], 

84 'LOCATION_STATE': ["", ""], 

85 'SHIPMENT_UUID': ['654dd40b-7a66-4747-bb57-eb1925beeab7', '654dd40b-7a66-4747-bb57-eb1925beeab7'], 

86 'EVENT_DATE': ["2020-12-07 1:36:17", "2020-12-07 1:36:17"], 

87 'LOCATION_CITY': ["", ""], 

88 'EXPECTED_DELIVERY': ["", ""] 

89 }) 

90 sdc_df_1 = Dataframe(SDCDFTypes.PANDAS, schema_tracking) 

91 sdc_df_2 = Dataframe(SDCDFTypes.PANDAS, schema_tracking) 

92 sdc_df_1.process_df(pandas_df1) 

93 sdc_df_2.process_df(pandas_df2) 

94 sdc_df_result = SDCDataframeHelpers.concat_sdc_dataframe(schema_tracking, sdc_df_1, sdc_df_2) 

95 assert len(sdc_df_result.df) == 3 

96 assert sdc_df_result.df[["TRACKING_ID", "SHIP_DATE"]].values.tolist() == [ 

97 ['100907169', '2020-12-07'], 

98 ['100906849', '2021-12-07'], 

99 ['200907167', '2022-12-07']] 

100 

101def test_concat_a_couple_of_sdc_dataframes_without_merge_keys(): 

102 """ 

103 Ensure concat a couple of SDC Dataframes with merge keys field (ID) works. 

104 """ 

105 pandas_df1 = pd.DataFrame(data={ 

106 'ID': ["1", "2"], 

107 'STATUS': ['INACTIVE', 'ACTIVE'], 

108 'NAME': ["MILU", "NALA"], 

109 'LAST_NAME': ["SMIT", "WALDORF"]}) 

110 sdc_df_1 = Dataframe(SDCDFTypes.PANDAS, schema_without_merge_key) 

111 sdc_df_1.process_df(pandas_df1) 

112 

113 pandas_df2 = pd.DataFrame(data={ 

114 'ID': ["1", "3"], 

115 'STATUS': ['ACTIVE', 'ACTIVE'], 

116 'NAME': ["MILU", "JAIME"]}) 

117 sdc_df_2 = Dataframe(SDCDFTypes.PANDAS, schema_without_merge_key) 

118 sdc_df_2.process_df(pandas_df2) 

119 

120 sdc_df_result = SDCDataframeHelpers.concat_sdc_dataframe(schema_without_merge_key, sdc_df_1, sdc_df_2) 

121 sdc_df_result.df = sdc_df_result.df.fillna(value=np.nan) 

122 assert len(sdc_df_result.df) == 4 

123 assert sdc_df_result.df[["ID", "STATUS", "NAME", "LAST_NAME"]].values.tolist() == [ 

124 ['1', 'INACTIVE', 'MILU', "SMIT"], 

125 ['2', 'ACTIVE', 'NALA', 'WALDORF'], 

126 ['1', 'ACTIVE', 'MILU', np.nan], 

127 ['3', 'ACTIVE', 'JAIME', np.nan] 

128 ] 

129 

130 

131def test_concat_a_none_sdc_dataframes_with_a_sdc_dataframes(): 

132 """ 

133 Ensure concat a none with a SDC Dataframes works. 

134 it should return the SDC Dataframe. 

135 """ 

136 sdc_df_1 = Dataframe(None, schema) 

137 pandas_df2 = pd.DataFrame(data={'ID': ["1", "2"], 'STATUS': ['ACTIVE', 'ACTIVE']}) 

138 sdc_df_2 = Dataframe(SDCDFTypes.PANDAS, schema) 

139 sdc_df_2.process_df(pandas_df2) 

140 

141 sdc_df_result = SDCDataframeHelpers.concat_sdc_dataframe(schema, sdc_df_1, sdc_df_2) 

142 

143 assert len(sdc_df_result.df) == 2 

144 assert sdc_df_result.df[["ID", "STATUS"]].values.tolist() == [['1', 'ACTIVE'], ['2', 'ACTIVE']] 

145 

146 

147def test_get_merge_key_fields_from_schema(): 

148 merge_keys = SDCDataframeHelpers.get_merge_key_fields(schema) 

149 empty_merge_keys = SDCDataframeHelpers.get_merge_key_fields(schema_without_merge_key) 

150 merge_keys_with_rename = SDCDataframeHelpers.get_merge_key_fields(schema_with_rename) 

151 assert merge_keys == ['ID'] 

152 assert empty_merge_keys == [] 

153 assert merge_keys_with_rename == ['ID', 'STATUS2']