Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import logging 

2 

3from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe 

4from sdc_etl_libs.sdc_dataframe.SDCDataframeEnums import SDCDFTypes 

5from sdc_etl_libs.sdc_file_helpers.SDCFile import SDCFile 

6 

7 

8class SDCParquetFile(SDCFile): 

9 type = None 

10 file_name = None 

11 file_path = None 

12 file_obj = None 

13 schema = None 

14 endpoint_schema = None 

15 df = None 

16 endpoint_type = None 

17 

18 def __init__(self, schema_, endpoint_schema_, file_name_, file_path_, file_obj_): 

19 super(SDCParquetFile, self).__init__(schema_, endpoint_schema_, file_name_, file_path_, file_obj_) 

20 

21 def get_file_size(self): 

22 pass 

23 

24 def get_file_as_object(self): 

25 """ 

26 Writes out the contents of the dataframe into Parquet File like object. 

27 :return: File Like object (BufferIO) 

28 """ 

29 df = self.get_file_as_dataframe() 

30 output = df.write_to_parquet() 

31 

32 return output 

33 

34 def get_file_as_dataframe(self): 

35 """ 

36 Converts a parquet file object to a dataframe. 

37 

38 :return: A fully processed SDCDataframe. 

39 """ 

40 

41 df = Dataframe(SDCDFTypes.PANDAS, self.schema) 

42 try: 

43 self.file_obj.seek(0) 

44 df.read_from_parquet(self.file_obj) 

45 except Exception as e: 

46 logging.error(e) 

47 logging.error(f"Failed processing parquet file.") 

48 

49 return df