Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import logging 

2 

3from sdc_etl_libs.sdc_file_helpers.SDCAvroFile import SDCAvroFile 

4from sdc_etl_libs.sdc_file_helpers.SDCCSVFile import SDCCSVFile 

5from sdc_etl_libs.sdc_file_helpers.SDCEDIFile import SDCEDIFile 

6from sdc_etl_libs.sdc_file_helpers.SDCExcelFile import SDCExcelFile 

7from sdc_etl_libs.sdc_file_helpers.SDCFile import SDCFile 

8from sdc_etl_libs.sdc_file_helpers.SDCJsonFile import SDCJsonFile 

9from sdc_etl_libs.sdc_file_helpers.SDCParquetFile import SDCParquetFile 

10 

11 

12class SDCFileFactory: 

13 

14 @staticmethod 

15 def get_file(schema_, endpoint_schema_, file_name_, file_path_, file_obj_): 

16 """ 

17 This function is used to create an sdcfile when reading a raw datafile from a source. 

18 

19 :param schema_: Json schema of the data. 

20 :param endpoint_schema_: The endpoint schema of the source 

21 :param file_name_: Name of the file 

22 :param file_path_: Path you wish to write too 

23 :param file_obj_: Raw file object 

24 :return: SDCFILE 

25 """ 

26 

27 file_type = endpoint_schema_["info"]["file_info"]["type"] 

28 

29 if file_type.lower() == "csv": 

30 return SDCCSVFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

31 elif file_type.lower() == "parquet": 

32 return SDCParquetFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

33 elif file_type.lower() == "avro": 

34 return SDCAvroFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

35 elif file_type.lower() == "json": 

36 return SDCJsonFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

37 elif file_type.lower() == "file": 

38 return SDCFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

39 elif file_type.lower() == "excel": 

40 return SDCExcelFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

41 elif file_type.lower() == "edi": 

42 return SDCEDIFile(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

43 else: 

44 logging.exception(f"{file_type} is not a valid file option.") 

45 

46 @staticmethod 

47 def get_endpoint_file_obj(endpoint_schema_, sdcfile_obj_): 

48 """ 

49 This method takes a endpoint schema and sdcfile and outputs the file 

50 in accordance to the endpoint file_info parameters 

51 

52 :param endpoint_schema_: The json schema of the endpoint 

53 :param sdcfile_obj_: A SDCFILE object with data loaded in it. 

54 :return: File like object (BytesIO) 

55 """ 

56 

57 if not isinstance(sdcfile_obj_, SDCFile): 

58 raise Exception("Must pass in a SDCFILE object type") 

59 

60 file_type = endpoint_schema_["info"]["file_info"]["type"] 

61 

62 if file_type == "csv" or file_type == "parquet" or file_type == "json": 

63 if isinstance(sdcfile_obj_, SDCParquetFile) or isinstance(sdcfile_obj_, SDCCSVFile)\ 

64 or isinstance(sdcfile_obj_, SDCJsonFile): 

65 temp_df = sdcfile_obj_.get_file_as_dataframe() 

66 return temp_df.get_as_file_obj_from_endpoint_schema(endpoint_schema_) 

67 else: 

68 raise Exception(f"Unable to produce output file {file_type} from raw file.") 

69 

70 elif file_type == "file" or file_type == "edi": 

71 return sdcfile_obj_.get_file_as_object() 

72 else: 

73 logging.exception(f"{file_type} is not a valid file option.") 

74 

75 @staticmethod 

76 def is_type_sdcfile(data_): 

77 """ 

78 Checks to see if a data object is of a standard file type that can be 

79 directly loaded into an SDCDataframe. 

80 

81 :param data_: A data object to check. 

82 :return: Boolean. 

83 """ 

84 

85 if isinstance(data_, SDCCSVFile) \ 

86 or isinstance(data_, SDCParquetFile) \ 

87 or isinstance(data_, SDCAvroFile) \ 

88 or isinstance(data_, SDCJsonFile) \ 

89 or isinstance(data_, SDCEDIFile) \ 

90 or isinstance(data_, SDCExcelFile): 

91 return True 

92 else: 

93 return False