Coverage for libs/sdc_etl_libs/tests/sdc_file_helpers_tests/sdc_json_file_test.py : 100%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
2import gzip
3import json
4import io
5import logging
6import os
7from sdc_etl_libs.sdc_file_helpers.SDCFileFactory import SDCFileFactory
8from sdc_etl_libs.sdc_data_schema.schema_toolbox import SchemaToolbox
11data_schema_1 = json.loads(open(os.path.dirname(os.path.abspath(__file__)) + "/test_schema_json_file_1.json").read())
12ep_schema_1 = SchemaToolbox.get_endpoint_data_from_schema(data_schema_1, "main_source", validate_=True)
13ep_schema_2 = SchemaToolbox.get_endpoint_data_from_schema(data_schema_1, "main_source_no_lines", validate_=True)
14ep_schema_3 = SchemaToolbox.get_endpoint_data_from_schema(data_schema_1, "main_source_gzipped", validate_=True)
17def test_get_file_as_dataframe_load_json_success_multiple_jsons():
18 """
19 Ensure loading multiple lines of JSON into SDCDataframe works.
20 """
21 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": ' \
22 '{"id": "ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly"' \
23 ', "timestamp": "2020-02-01T00:01:40.223558+00:00"}\n{"manufacturingOrderID": ' \
24 '"fb0e49fd-6f7c-4bdf-9be0-b3fcb8c3dcae", "orthodonticOrderID": ' \
25 '{"id": "31ba26c6-579e-4f0a-8257-2fbcd9489078"}, "timestamp": "2020-02-01T00:01:40.225287+00:00", ' \
26 '"eventType": "ManufacturingOrderReadyForAssembly"}'
27 file_obj = io.StringIO(json_string)
28 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_1, "aligner-orders", "Fantasia/", file_obj)
29 df = sdc_file.get_file_as_dataframe()
30 assert df.df is not None
31 assert len(df.df) == 2
34def test_get_file_as_dataframe_load_malformed_json_exception(mocker):
35 """
36 Ensure a logging error is triggered when a malformed schema is passed in.
37 """
38 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": {"id": ' \
39 '"ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly", ' \
40 '"timestamp": "2020-02-01T00:01:40.223558+00:00"}}'
41 file_obj = io.StringIO(json_string)
42 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_1, "aligner-orders", "Fantasia/", file_obj)
44 mocker.patch('logging.error')
45 df = sdc_file.get_file_as_dataframe()
46 logging.error.assert_called()
49def test_get_file_as_dataframe_load_single_json():
50 """
51 Ensure parameter lines = False works as intended and loads a single line of JSON into SDCDataframe
52 """
53 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": {"id": ' \
54 '"ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly", ' \
55 '"timestamp": "2020-02-01T00:01:40.223558+00:00"}'
56 file_obj = io.StringIO(json_string )
57 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_2, "aligner-orders", "Fantasia/", file_obj)
58 df = sdc_file.get_file_as_dataframe()
59 assert df.df is not None
60 assert len(df.df) == 1
63def test_get_file_as_dataframe_load_json_gzipped():
64 """
65 Ensure loading multiple lines of JSON from GZIP into SDCDataframe works.
66 """
68 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": ' \
69 '{"id": "ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly"' \
70 ', "timestamp": "2020-02-01T00:01:40.223558+00:00"}\n{"manufacturingOrderID": ' \
71 '"fb0e49fd-6f7c-4bdf-9be0-b3fcb8c3dcae", "orthodonticOrderID": ' \
72 '{"id": "31ba26c6-579e-4f0a-8257-2fbcd9489078"}, "timestamp": "2020-02-01T00:01:40.225287+00:00", ' \
73 '"eventType": "ManufacturingOrderReadyForAssembly"}'
74 compressed_data = gzip.compress(json_string.encode('utf-8'))
75 file_obj = io.BytesIO(compressed_data)
76 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_3, "aligner-orders", "Fantasia/", file_obj)
78 df = sdc_file.get_file_as_dataframe()
79 assert df.df is not None
80 assert len(df.df) == 2
83def test_get_file_as_object_load_json_gzipped():
84 """
85 Ensure loading multiple lines of JSON from GZIP into a object works.
86 """
87 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": ' \
88 '{"id": "ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly"' \
89 ', "timestamp": "2020-02-01T00:01:40.223558+00:00"}\n{"manufacturingOrderID": ' \
90 '"fb0e49fd-6f7c-4bdf-9be0-b3fcb8c3dcae", "orthodonticOrderID": ' \
91 '{"id": "31ba26c6-579e-4f0a-8257-2fbcd9489078"}, "timestamp": "2020-02-01T00:01:40.225287+00:00", ' \
92 '"eventType": "ManufacturingOrderReadyForAssembly"}'
93 compressed_data = gzip.compress(json_string.encode('utf-8'))
94 file_obj = io.BytesIO(compressed_data)
95 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_3, "aligner-orders", "Fantasia/", file_obj)
97 jsonObj = sdc_file.get_file_as_object()
98 assert isinstance(jsonObj, io.BytesIO) is True
101def test_get_file_as_object_load_single_json():
102 """
103 Ensure parameter lines = False works as intended and loads a single line of JSON into SDCDataframe
104 """
105 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": {"id": ' \
106 '"ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly", ' \
107 '"timestamp": "2020-02-01T00:01:40.223558+00:00"}'
108 file_obj = io.BytesIO(json_string.encode('utf-8'))
109 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_2, "aligner-orders", "Fantasia/", file_obj)
110 jsonObj = sdc_file.get_file_as_object()
111 assert isinstance(jsonObj, io.BytesIO) is True
113test_get_file_as_dataframe_load_json_gzipped()