Coverage for libs/sdc_etl_libs/tests/dataframe_tests/sdc_dataframe_transformations_test.py : 100%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import sys
2import os
3sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../../")
4from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe
5from sdc_etl_libs.sdc_dataframe.SDCDataframeEnums import SDCDFTypes
6import pytest
7import pandas as pd
8from enum import Enum
10def test_drop_columns():
11 test_schema = """
12 {
13 "namespace": "Dataframe",
14 "type": "object",
15 "name": "I am a test",
16 "fields": [
17 {"name":"Id1","type":{"type":"string"}},
18 {"name":"Id2","type":{"type":"int"}},
19 {"name":"Id3","type":{"type":"boolean"}},
20 {"name":"Id4","type":{"type":"string"}},
21 {"name":"Id5","type":{"type":"float"}},
22 {"name":"Id6","type":{"type":"string"}},
23 {"name":"Id7","type":{"type":"string", "logical_type":"datetime"}}
24 ]
25 }
26 """
28 df = Dataframe(SDCDFTypes.PANDAS, test_schema)
30 df.load_data([{
31 "Id1": "string",
32 "Id2": 5,
33 "Id3": True,
34 "Id4": "string",
35 "Id5": 5.5,
36 "Id6": "string",
37 "Id7": "2019-06-13T15:06:18.337Z",
38 }])
40 assert len(df.df.columns) == 7
42 # Test dropping 1 column
43 df.drop_columns(column_list_=["ID1"])
44 assert "ID1" not in df.df.columns.tolist()
46 # Test dropping multiple columns at once
47 df.drop_columns(["ID3", "ID7"])
48 assert "ID3" not in df.df.columns.tolist()
49 assert "ID7" not in df.df.columns.tolist()
51 # Test function fails when argument passed is not a list
52 with pytest.raises(Exception):
53 df.drop_columns("ID1")
55def test_drop_columns_from_schema():
57 test_schema = """
58 {
59 "namespace": "Dataframe",
60 "type": "object",
61 "name": "I am a test",
62 "fields": [
63 {"name":"Id1","type":{"type":"string"},"drop_column":true},
64 {"name":"Id2","type":{"type":"int"}},
65 {"name":"Id3","type":{"type":"boolean"}},
66 {"name":"Id4","type":{"type":"string"}},
67 {"name":"Id5","type":{"type":"float"}},
68 {"name":"Id6","type":{"type":"string"}},
69 {"name":"Id7","type":{"type":"string", "logical_type":"datetime"}}
70 ]
71 }
72 """
74 df = Dataframe(SDCDFTypes.PANDAS, test_schema)
76 df.load_data([{
77 "Id1": "string",
78 "Id2": 5,
79 "Id3": True,
80 "Id4": "string",
81 "Id5": 5.5,
82 "Id6": "string",
83 "Id7": "2019-06-13T15:06:18.337Z",
84 }])
86 assert len(df.df.columns) == 6
88 # Test dropping 1 column
89 # df.drop_columns(column_list_=["ID1"])
90 assert "ID1" not in df.df.columns.tolist()
93def test_fill_in_column():
95 test_schema = """
96 {
97 "namespace": "Dataframe",
98 "type": "object",
99 "name": "I am a test",
100 "fields": [
101 {"name":"Id1","type":{"type":"string"}},
102 {"name":"Id2","type":{"type":"string", "add_column": true }}
103 ]
104 }
105 """
107 df = Dataframe(SDCDFTypes.PANDAS, test_schema)
109 df.load_data([
110 {"Id1": "string", "Id2": None},
111 {"Id1": "string", "Id2": None},
112 {"Id1": "string", "Id2": None},
113 {"Id1": "string", "Id2": None},
114 {"Id1": "string", "Id2": None}])
116 assert df.df["ID2"].isnull().all() == True
118 df.fill_in_column(column_name_="ID2", column_value_="Cat", create_column_=False)
120 assert df.df["ID2"].all() == 'Cat'
122 df.fill_in_column(column_name_="ID2", column_value_="Dog", create_column_=False)
124 assert df.df["ID2"].all() == 'Dog'
126 with pytest.raises(Exception):
127 df.fill_in_column(column_name_="ID4", column_value_="Dog",
128 create_column_=False)
130 df.fill_in_column(column_name_="ID4", column_value_="Dog", create_column_=True)
132 assert df.df["ID4"].all() == 'Dog'
135def test_concat_columns_transformation():
136 class TransformationType(Enum):
137 pre = "PRE"
138 post = "POST"
140 test_schema = """
141 {
142 "namespace": "Dataframe",
143 "type": "object",
144 "name": "I am a test",
145 "fields": [
146 {"name":"ID1","type":{"type":"string"}},
147 {"name":"ID2","type":{"type":"string", "add_column": true }},
148 {"name":"ID3",
149 "type":{"type":"string"},
150 "is_pii": true,
151 "transformations": [{"transformation_type":"PRE", "type":"concat_columns", "opts": {"list_columns": ["ID1", "ID2"], "separator":" "}}]
152 },
153 {"name":"ID4",
154 "type":{"type":"string"},
155 "is_pii": true,
156 "transformations": [{"transformation_type":"POST", "type":"concat_columns", "opts": {"list_columns": ["ID1", "ID2"], "separator":" "}}]
157 }
158 ]
159 }
160 """
162 dataframeClass = Dataframe(SDCDFTypes.PANDAS, test_schema)
163 dataframeClass.df = pd.DataFrame([["value1", "value4"], ["value2", "value5"]], columns=['ID1', 'ID2'])
164 dataframeClass.perform_column_transformations(TransformationType.pre)
166 assert len(dataframeClass.df) == 2
167 assert (list(dataframeClass.df.columns.values)) == ['ID1', 'ID2', 'ID3']
168 assert dataframeClass.df.loc[dataframeClass.df['ID1'] == "value1"]["ID3"].item() == "value1 value4"
169 assert dataframeClass.df.loc[dataframeClass.df['ID1'] == "value2"]["ID3"].item() == "value2 value5"
171 dataframeClass.perform_column_transformations(TransformationType.post)
172 assert len(dataframeClass.df) == 2
173 assert (list(dataframeClass.df.columns.values)) == ['ID1', 'ID2', 'ID3', 'ID4']