Coverage for libs/sdc_etl_libs/sdc_dataframe/udfs/pandas/ConvertToDatetimePandasUDF.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import pandas as pd
2from pytz import timezone
3from sdc_etl_libs.sdc_dataframe.udfs.pandas.PandasUDF import PandasUDF
6class ConvertToDatetimePandasUDF(PandasUDF):
8 @staticmethod
9 def apply_udf(row_, **opts_):
10 """
11 Applies the to_datetime pandas function to a given column. Currently being used as an alternative to the
12 "logical_type" functionality in data schemas, since the coercion does not always work. This UDF may be
13 used to replace that functionality altogether in the future.
14 param: row_ of a dataframe
15 type: row_: Pandas Series()
16 param: opts_: contains "source_column" and "to_datetime_params"
17 type: opts: dict
18 - "origin_column_name": Name of origin column
19 - "to_datetime_params": Any parameters that can be used in the pandas.to_datetime function. See full list of
20 parameters here https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html
21 - "origin_timezone": Timezone of source data. Defaults to UTC.
22 - "target_timezone": Timezone of sink data
23 return: datetime
24 """
25 to_datetime_params = opts_["to_datetime_params"]
26 origin_timezone = opts_.get("origin_timezone", "UTC")
27 target_timezone = opts_.get("target_timezone")
28 if target_timezone:
29 new_datetime = pd.to_datetime(row_[opts_["origin_column_name"]], **to_datetime_params).replace\
30 (tzinfo=timezone(origin_timezone)).astimezone(timezone(target_timezone))
31 else:
32 new_datetime = pd.to_datetime(row_[opts_["origin_column_name"]], **to_datetime_params)
33 return new_datetime