Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1 

2import gzip 

3import json 

4import io 

5import logging 

6import os 

7from sdc_etl_libs.sdc_file_helpers.SDCFileFactory import SDCFileFactory 

8from sdc_etl_libs.sdc_data_schema.schema_toolbox import SchemaToolbox 

9 

10 

11data_schema_1 = json.loads(open(os.path.dirname(os.path.abspath(__file__)) + "/test_schema_json_file_1.json").read()) 

12ep_schema_1 = SchemaToolbox.get_endpoint_data_from_schema(data_schema_1, "main_source", validate_=True) 

13ep_schema_2 = SchemaToolbox.get_endpoint_data_from_schema(data_schema_1, "main_source_no_lines", validate_=True) 

14ep_schema_3 = SchemaToolbox.get_endpoint_data_from_schema(data_schema_1, "main_source_gzipped", validate_=True) 

15 

16 

17def test_get_file_as_dataframe_load_json_success_multiple_jsons(): 

18 """ 

19 Ensure loading multiple lines of JSON into SDCDataframe works. 

20 """ 

21 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": ' \ 

22 '{"id": "ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly"' \ 

23 ', "timestamp": "2020-02-01T00:01:40.223558+00:00"}\n{"manufacturingOrderID": ' \ 

24 '"fb0e49fd-6f7c-4bdf-9be0-b3fcb8c3dcae", "orthodonticOrderID": ' \ 

25 '{"id": "31ba26c6-579e-4f0a-8257-2fbcd9489078"}, "timestamp": "2020-02-01T00:01:40.225287+00:00", ' \ 

26 '"eventType": "ManufacturingOrderReadyForAssembly"}' 

27 file_obj = io.StringIO(json_string) 

28 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_1, "aligner-orders", "Fantasia/", file_obj) 

29 df = sdc_file.get_file_as_dataframe() 

30 assert df.df is not None 

31 assert len(df.df) == 2 

32 

33 

34def test_get_file_as_dataframe_load_malformed_json_exception(mocker): 

35 """ 

36 Ensure a logging error is triggered when a malformed schema is passed in. 

37 """ 

38 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": {"id": ' \ 

39 '"ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly", ' \ 

40 '"timestamp": "2020-02-01T00:01:40.223558+00:00"}}' 

41 file_obj = io.StringIO(json_string) 

42 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_1, "aligner-orders", "Fantasia/", file_obj) 

43 

44 mocker.patch('logging.error') 

45 df = sdc_file.get_file_as_dataframe() 

46 logging.error.assert_called() 

47 

48 

49def test_get_file_as_dataframe_load_single_json(): 

50 """ 

51 Ensure parameter lines = False works as intended and loads a single line of JSON into SDCDataframe 

52 """ 

53 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": {"id": ' \ 

54 '"ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly", ' \ 

55 '"timestamp": "2020-02-01T00:01:40.223558+00:00"}' 

56 file_obj = io.StringIO(json_string ) 

57 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_2, "aligner-orders", "Fantasia/", file_obj) 

58 df = sdc_file.get_file_as_dataframe() 

59 assert df.df is not None 

60 assert len(df.df) == 1 

61 

62 

63def test_get_file_as_dataframe_load_json_gzipped(): 

64 """ 

65 Ensure loading multiple lines of JSON from GZIP into SDCDataframe works. 

66 """ 

67 

68 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": ' \ 

69 '{"id": "ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly"' \ 

70 ', "timestamp": "2020-02-01T00:01:40.223558+00:00"}\n{"manufacturingOrderID": ' \ 

71 '"fb0e49fd-6f7c-4bdf-9be0-b3fcb8c3dcae", "orthodonticOrderID": ' \ 

72 '{"id": "31ba26c6-579e-4f0a-8257-2fbcd9489078"}, "timestamp": "2020-02-01T00:01:40.225287+00:00", ' \ 

73 '"eventType": "ManufacturingOrderReadyForAssembly"}' 

74 compressed_data = gzip.compress(json_string.encode('utf-8')) 

75 file_obj = io.BytesIO(compressed_data) 

76 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_3, "aligner-orders", "Fantasia/", file_obj) 

77 

78 df = sdc_file.get_file_as_dataframe() 

79 assert df.df is not None 

80 assert len(df.df) == 2 

81 

82 

83def test_get_file_as_object_load_json_gzipped(): 

84 """ 

85 Ensure loading multiple lines of JSON from GZIP into a object works. 

86 """ 

87 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": ' \ 

88 '{"id": "ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly"' \ 

89 ', "timestamp": "2020-02-01T00:01:40.223558+00:00"}\n{"manufacturingOrderID": ' \ 

90 '"fb0e49fd-6f7c-4bdf-9be0-b3fcb8c3dcae", "orthodonticOrderID": ' \ 

91 '{"id": "31ba26c6-579e-4f0a-8257-2fbcd9489078"}, "timestamp": "2020-02-01T00:01:40.225287+00:00", ' \ 

92 '"eventType": "ManufacturingOrderReadyForAssembly"}' 

93 compressed_data = gzip.compress(json_string.encode('utf-8')) 

94 file_obj = io.BytesIO(compressed_data) 

95 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_3, "aligner-orders", "Fantasia/", file_obj) 

96 

97 jsonObj = sdc_file.get_file_as_object() 

98 assert isinstance(jsonObj, io.BytesIO) is True 

99 

100 

101def test_get_file_as_object_load_single_json(): 

102 """ 

103 Ensure parameter lines = False works as intended and loads a single line of JSON into SDCDataframe 

104 """ 

105 json_string = '{"manufacturingOrderID": "85384a48-aa03-41e4-ae44-0ae845890087", "orthodonticOrderID": {"id": ' \ 

106 '"ab355624-cbcd-4694-937b-de04c8bb07e7"}, "eventType": "ManufacturingOrderReadyForAssembly", ' \ 

107 '"timestamp": "2020-02-01T00:01:40.223558+00:00"}' 

108 file_obj = io.BytesIO(json_string.encode('utf-8')) 

109 sdc_file = SDCFileFactory.get_file(data_schema_1, ep_schema_2, "aligner-orders", "Fantasia/", file_obj) 

110 jsonObj = sdc_file.get_file_as_object() 

111 assert isinstance(jsonObj, io.BytesIO) is True 

112 

113test_get_file_as_dataframe_load_json_gzipped()