Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import pandas as pd 

2from pytz import timezone 

3from sdc_etl_libs.sdc_dataframe.udfs.pandas.PandasUDF import PandasUDF 

4 

5 

6class ConvertToDatetimePandasUDF(PandasUDF): 

7 

8 @staticmethod 

9 def apply_udf(row_, **opts_): 

10 """ 

11 Applies the to_datetime pandas function to a given column. Currently being used as an alternative to the 

12 "logical_type" functionality in data schemas, since the coercion does not always work. This UDF may be 

13 used to replace that functionality altogether in the future. 

14 param: row_ of a dataframe 

15 type: row_: Pandas Series() 

16 param: opts_: contains "source_column" and "to_datetime_params" 

17 type: opts: dict 

18 - "origin_column_name": Name of origin column 

19 - "to_datetime_params": Any parameters that can be used in the pandas.to_datetime function. See full list of 

20 parameters here https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html 

21 - "origin_timezone": Timezone of source data. Defaults to UTC. 

22 - "target_timezone": Timezone of sink data 

23 return: datetime 

24 """ 

25 to_datetime_params = opts_["to_datetime_params"] 

26 origin_timezone = opts_.get("origin_timezone", "UTC") 

27 target_timezone = opts_.get("target_timezone") 

28 if target_timezone: 

29 new_datetime = pd.to_datetime(row_[opts_["origin_column_name"]], **to_datetime_params).replace\ 

30 (tzinfo=timezone(origin_timezone)).astimezone(timezone(target_timezone)) 

31 else: 

32 new_datetime = pd.to_datetime(row_[opts_["origin_column_name"]], **to_datetime_params) 

33 return new_datetime 

34