Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1 

2import json 

3import logging 

4import requests 

5import datetime 

6from dateutil import parser 

7from sdc_etl_libs.api_helpers.API import API 

8from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe 

9from sdc_etl_libs.sdc_dataframe.SDCDataframeEnums import SDCDFTypes 

10from sdc_etl_libs.sdc_file_helpers.SDCFileHelpers import SDCFileHelpers 

11from sdc_etl_libs.sdc_data_schema.schema_validation import SchemaValidation 

12from sdc_etl_libs.sdc_data_schema.schema_toolbox import SchemaToolbox 

13 

14 

15logging.basicConfig(level=logging.INFO) 

16 

17 

18class TimeControl(API): 

19 

20 def __init__(self): 

21 self.credentials = self.get_credentials("aws_secrets", "timecontrol/api") 

22 self.headers = {'X-API-Key': self.credentials["apikey"]} 

23 

24 def get_daily_filter(self, datetime_, days_, filter_field_list_): 

25 """ 

26 Constructs a filter for Time Management calls using the list of supplied 

27 fields. A start date and end date is set with the fields used with 

28 "greater than or equal to" start date and "less than" end date. When 

29 there is more than one field they are combined with 'or'. 

30 

31 :param datetime_: Datetime object tp serve as end date. 

32 :param days_: Number of days to go back from datetime_ to set as start date. 

33 :param filter_field_list_: List of fields to create complex filter with. 

34 :return: URL filter as string. 

35 """ 

36 

37 if not isinstance(filter_field_list_, list): 

38 raise Exception("fields_ must be a list for Time Management " 

39 "get_daily_filter()") 

40 

41 if type(datetime_) == str: 

42 datetime_ = parser.parse(datetime_) 

43 

44 startdate = (datetime_ - datetime.timedelta(days_)).strftime("%Y-%m-%dT00:00:00") 

45 enddate = datetime_.strftime("%Y-%m-%dT00:00:00") 

46 url_filter = \ 

47 " or ".join(f"({field} ge DateTime'{startdate}' " 

48 f"and {field} lt DateTime'{enddate}')" for field in filter_field_list_) 

49 

50 return url_filter 

51 

52 def get_key_filter(self, start_key_, json_key_path_="Key"): 

53 """ 

54 Creates a filter based on the primary Key identifier from the endpoint. 

55 Filter will be greater than or equal to the provided start_key_. 

56 

57 :param start_key_: Int. Key number to start from. 

58 :param json_key_path_: The Key path in the JSON returned from the endpoint. 

59 Default is "Key". Keys can be selected from within the nested JSON by 

60 using parentheses to traverse the tree. Example: 

61 Employee/Timesheets/Key 

62 :return: URL filter as a string. 

63 """ 

64 

65 if not isinstance(start_key_, int): 

66 raise Exception("start_key_ must be an int for get_key_filter()") 

67 

68 url_filter = f"{json_key_path_} ge {start_key_}" 

69 

70 return url_filter 

71 

72 def process_endpoint(self, base_endpoint_url_, filter_=None, limit_=1000): 

73 """ 

74 Processes a TimeControl API endpoint. 

75 :param base_endpoint_url_: API base endpoint URL. 

76 :param filter_: Filter as URL string (OData specfication). Default = None. 

77 https://www.odata.org/documentation/odata-version-3-0/url-conventions/ 

78 :param limit_: Int. Number of records to return each page. Deafult = 1000. 

79 :return: Data as a list of flattened dictionaries. 

80 """ 

81 

82 data = [] 

83 skip = 0 

84 page = 1 

85 data_json = ['start'] 

86 

87 while data_json and page < 1000: 

88 

89 requests_url_with_pagination = f"{base_endpoint_url_}?$skip={skip}&$top={limit_}" \ 

90 f"{'&$filter='+filter_ if filter_ else ''}" 

91 

92 logging.info(requests_url_with_pagination) 

93 

94 r = requests.get(requests_url_with_pagination, headers=self.headers) 

95 

96 if r.status_code == 200: 

97 try: 

98 data_json = json.loads(r.content) 

99 skip += limit_ 

100 page = int(skip / limit_) 

101 if not data_json: 

102 logging.info(f"No results from page {page}.") 

103 break 

104 except Exception as e: 

105 logging.error(e) 

106 raise Exception(f"Unable to load data into JSON format.") 

107 

108 for item in data_json: 

109 data.append(item) 

110 

111 logging.info(f"Grabbed {len(data_json):,} record(s) from page " 

112 f"{page}.") 

113 else: 

114 raise Exception( 

115 f"Failed to get access group data from api. " 

116 f"Status: {r.status_code}") 

117 

118 return data 

119 

120 def get_data(self, data_schema_name_, endpoint_, filter_=None, limit_=1000): 

121 """ 

122 Grabs data from a TimeControl endpoint and returns as an 

123 SDCDataframe object if data is avaiable. 

124 :param data_schema_name_: 

125 :param endpoint_: TimeControl API endpoint, which will be appended to 

126 base URL. 

127 :param filter_: Filter as URL string. Deafult = None. 

128 :param limit_: Int. Number of records to return per page. Default = 1000. 

129 :return: SDCDataframe object if data, else, None. 

130 """ 

131 

132 data_schema = json.loads(open(SDCFileHelpers.get_file_path( 

133 'schema', f"TimeControl/{data_schema_name_}.json")).read()) 

134 validation = SchemaValidation() 

135 validated_schema = validation.validate_schema(data_schema) 

136 validated_source_endpoint_schema = SchemaToolbox.get_endpoint_data_from_schema(validated_schema, "main_source") 

137 self.base_url = validated_source_endpoint_schema["info"]["access"]["base_url"] 

138 df = Dataframe(SDCDFTypes.PANDAS, validated_schema) 

139 

140 requests_url = self.base_url + f'/{endpoint_}' 

141 

142 data = self.process_endpoint(requests_url, filter_, limit_) 

143 

144 if len(data) >= 1: 

145 df.load_data(data) 

146 return df 

147 

148 else: 

149 logging.warning("Received no data.") 

150 return None