Coverage for libs/sdc_etl_libs/api_helpers/apis/Verizon/VerizonAPI.py : 83%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Verizon API connector module.
3"""
5import io
6import json
7import logging
8import time
9from datetime import datetime, timedelta
11import requests
12from jinja2 import Environment, FileSystemLoader
13from pytz import timezone, utc
15from retrying import retry
16from sdc_etl_libs.api_helpers import APIUtils
17from sdc_etl_libs.api_helpers.AsyncOAuthAPI import AsyncOAuthAPI
18from sdc_etl_libs.api_helpers.SDCAPIExceptions import (
19 AccessException, APIInternalErrorException, InputArgumentException,
20 RateLimitException, RequestException)
21from sdc_etl_libs.sdc_file_helpers.SDCFileHelpers import SDCFileHelpers
24class Verizon(AsyncOAuthAPI):
25 """
26 Connector/Wrapper class for accessing Verizon campaigns information/metrics.
27 Ref.: https://developer.verizonmedia.com/
28 https://developer.verizonmedia.com/dsp/api/docs/
29 https://developer.yahoo.com/dsp/docs/
30 """
32 # pylint: disable=too-many-arguments
33 # pylint: disable=too-many-instance-attributes
34 # pylint: disable-msg=too-many-locals
35 def __init__(self,
36 authorization_token_=None,
37 refresh_token_=None,
38 access_token_=None,
39 encoded_client_credentials_=None,
40 credential_id_="verizon/api",
41 region_="us-east-2",
42 suffix_="Verizon",
43 schema_: dict = None,
44 endpoint_schema_: dict = None,
45 **kwargs):
46 """
47 Initiate API wrapper instance
48 :param authorization_token_(str): Verizon DSP API authorization token, only available through manual process on
49 YDN(Yahoo Developer Network)
50 :param refresh_token_(str): Refresh token used to generate new access tokens
51 :param access_token_(str): Access token used to query resources against Verizon DSP API
52 :param encoded_client_credentials_(str): encoded token composed by client_id and client_secret.
53 Ie. ENCODED(CLIENT_ID:CLIENT_SECRET)
54 :param credential_id_(str): AWS Secrets key for Verizon API credentials
55 :param region_(str): AWS region
56 """
57 super().__init__(
58 authorization_token_=authorization_token_,
59 refresh_token_=refresh_token_,
60 access_token_=access_token_,
61 credential_id_=credential_id_,
62 region_=region_)
64 self.encoded_client_credentials = self.credentials[
65 'encoded_client_credentials'] if encoded_client_credentials_ is None else encoded_client_credentials_
66 self.token_url = endpoint_schema_["info"]["access"]["token_url"]
67 self.base_url = endpoint_schema_["info"]["access"]["base_url"]
68 self.suffix = suffix_
70 self.schema = schema_
71 self.endpoint_schema = endpoint_schema_
73 self.endpoint_name = endpoint_schema_["info"]["access"]["endpoint_name"]
74 self.template_name = endpoint_schema_["info"]["opts"]["template_name"] or self.endpoint_name
75 self.input_dict = endpoint_schema_["info"]["opts"]["input_dict"]
76 self.output_schema = endpoint_schema_["info"]["opts"]["output_schema"] or str(self.suffix) + "/" + str(self.endpoint_name)
77 self.submission_status = None
78 self.timezone = endpoint_schema_["info"]["opts"]["tz"]
79 self.execution_date = kwargs.get("execution_date", datetime.utcnow())
81 def __get_new_refresh_token(self, authorization_code=None):
82 """
83 Ref: An Introduction to OAuth 2: https://www.digitalocean.com/community/tutorials/an-introduction-to-oauth-2
84 Given valid 'authorization code' and 'client credentials' obtained from the User resource manager,
85 return a new refresh token to be updated on the secret vault.
86 Usually a new refresh token invalidates the previous one and the authorization code is a one time resource
87 it is invalidated as soon as it is used to create the refresh code.
88 This function/method is for internal use only, the fresh token should be updated on secrets vault manually or
89 automatically through a callback url interface.
91 Ref: An Introduction to OAuth 2: https://www.digitalocean.com/community/tutorials/an-introduction-to-oauth-2
93 :param authorization_code (str): code retrieved manually from the User resource manager.
95 :return refresh_token(str): new refresh token to be updated on secrets vault to allow the retrieving of new
96 access tokens
98 Returns:
99 object: t
100 """
101 headers_payload = {
102 'Content-Type': 'application/x-www-form-urlencoded',
103 'Authorization': "Basic {}".format(self.encoded_client_credentials)
104 }
106 data_payload = {'grant_type': 'authorization_code', 'redirect_uri': 'oob', 'code': authorization_code}
108 response = requests.post(self.token_url, headers=headers_payload, data=data_payload)
109 logging.warning(
110 "Please, update secret vault system to reflect the new refresh token: key = 'verizon_refresh_token'")
111 return json.loads(response.text)['refresh_token']
113 def get_new_access_token(self,
114 client_id=None,
115 client_secret=None,
116 refresh_token=None,
117 access_token_url: str = None):
118 """
119 Given a pre-defined refresh token, return a new access token.
120 :param client_id(str): client identification
121 :param client_secret(str): client secret
123 :return access_token(str): New access token
124 """
125 logging.debug("Verizon doesn't use client_id(%s)/client_secret(%s)", client_id, client_secret)
126 logging.debug(
127 "Verizon only uses this method refresh_token(%s)/access_token_url(%s) for by passing refresh process",
128 refresh_token, access_token_url)
130 headers_payload = {
131 'Content-Type': 'application/x-www-form-urlencoded',
132 'Authorization': "Basic {}".format(self.encoded_client_credentials)
133 }
135 data_payload = {
136 'grant_type': 'refresh_token',
137 'redirect_uri': 'https://www.example.com',
138 'refresh_token': self.refresh_token
139 }
141 try:
142 response = requests.post(self.token_url, headers=headers_payload, data=data_payload)
144 if not response.ok:
145 logging.info("Response not OK: %d. An status code different than 2XX was returned.",
146 response.status_code)
147 raise AccessException("Response not OK. An status code different than 2XX was returned.")
148 except BaseException as e:
149 logging.error("Access token reset failed: %s\n%s", json.loads(response.text)['debug_message'], e)
150 raise AccessException(f"Access token reset failed:{json.loads(response.text)['debug_message']}")
152 logging.debug("New access token obtained.")
153 logging.info(response.status_code)
154 self.access_token = json.loads(response.text).get('access_token')
155 return self.access_token
157 # pylint: disable=E0213
158 def __retry_if_access_error(exception):
159 """
160 Auxiliar function to check for a given exception. Used by @retry decoration function.
162 :return is_access_error(bool): True if we should retry (in this case when it's an AccessException), False
163 otherwise
164 """
165 return isinstance(exception, AccessException)
167 @retry(retry_on_exception=__retry_if_access_error, stop_max_attempt_number=2, wait_fixed=2000)
168 def request_data(self, data_payload):
169 """
170 Given a query payload informed as input, a data report request is created and a value object with the info
171 needed to track the report processing is returned.
173 :param data_payload(dict): a json like payload following the api rules. Ref.
174 https://developer.verizonmedia.com/dsp/api/docs/reporting/payloadspec.html
176 :return report_status(ReportSubmissionStatus): ReportSubmissionStatus object wrapping the response payload.
177 """
178 headers_payload = {
179 'Content-Type': 'application/json',
180 'X-Auth-Method': 'OAUTH',
181 'X-Auth-Token': self.access_token,
182 }
184 response = requests.request(method="POST", url=self.base_url, headers=headers_payload, data=data_payload)
185 self.__handle_response(response)
186 submission_status = ReportSubmissionStatus(payload=json.loads(response.text))
187 self.submission_status = submission_status
189 return submission_status
191 @retry(retry_on_exception=__retry_if_access_error, stop_max_attempt_number=2, wait_fixed=2000)
192 def check_data_request_status(self, request_id=None):
193 """
194 Given a valid customer_report_id obtained from a previously submitted data report request, return the status of
195 the report process. In case of Success it will inform a url pointing to the digital file containing the data to
196 be downloaded.
198 Ref.: https://developer.verizonmedia.com/dsp/api/docs/reporting/payloadspec.html#id13
200 :param request_id(str): id of the target report submission
202 :return report_status(ReportSubmissionStatus): ReportSubmissionStatus object wrapping the status response
203 payload.
204 """
206 url_ = self.base_url + request_id
207 headers = {'Content-Type': 'application/json', 'X-Auth-Method': 'OAUTH', 'X-Auth-Token': self.access_token}
208 response = requests.request(method="GET", url=url_, headers=headers, data=None)
209 self.__handle_response(response)
210 self.submission_status = ReportSubmissionStatus(payload=json.loads(response.text))
212 return self.submission_status
214 def __get_input_data_payload(self, template_name, input_dict):
215 """
216 Template logic to produce API query payload
217 :param template_name(str): jinja template name
218 :param input_dict(dict): jinja template input values as dict
220 :return query_payload(str): query payload
221 """
222 template_loader = FileSystemLoader(searchpath=SDCFileHelpers.get_file_path(type_='template', path_=self.suffix))
223 template_env = Environment(loader=template_loader, autoescape=True)
224 template = template_env.get_template("{}.j2".format(template_name))
225 # Template from jinja2 file
226 return template.render(input_dict)
228 @classmethod
229 def __download_report_data_file(cls, response_ojb):
230 """
231 Decides if and how to download the data once the api return valid url containing valid data.
232 :param response_ojb(ReportSubmissionStatus): Value Object(VO) representing the response payload
234 :return report_data(str): report data in csv format
235 """
236 is_report_pending = response_ojb.status in ("Submitted", "Processing")
237 report_has_output = response_ojb.url is not None
238 report_data = None
239 if not is_report_pending and report_has_output:
240 report_data = APIUtils.Downloader.download_data_file_to_memory_lazely(response_ojb.url)
241 else:
242 logging.warning("No data returned by report: \n%s", str(response_ojb))
244 return report_data
246 def __poll_verizon_data_report(self, input_data_payload, interval=60):
247 """
248 Poll function for keeping checking request status
249 :param input_data_payload(str): input query payload. Verizon DSL to quer data through its API
250 :param interval(int): Polling interval
252 :return report_status(ReportSubmissionStatus): response object with status of the given request
253 """
254 submission_status = self.request_data(data_payload=input_data_payload)
255 is_report_pending = submission_status.status in ("Submitted", "Processing")
256 # API can return Success but without URL
257 flag = is_report_pending
258 i = 0
259 response_obj = submission_status
260 while flag:
261 response_obj = self.check_data_request_status(request_id=submission_status.customer_report_id)
262 flag = response_obj.status in ("Submitted", "Processing")
263 logging.info("%s : %s : %s : %s + \n %s", str(datetime.now()), str(i), str(submission_status.status),
264 str(submission_status.url), str(response_obj))
265 i += 1
266 if flag:
267 time.sleep(interval) # Avoid 422 due more than 5 requests per min
268 return response_obj
270 @staticmethod
271 def __get_columns_from_schema(data_schema_):
272 """
273 Get a a list od column names from a schema object
274 :param data_schema_(dict): schema object
276 :return columns(list): list of columns except the generated _SF_INSERTEDDATETIME column which is not part
277 of the original data
278 """
279 # Get list of columns from schema file
280 cols = []
281 for field in data_schema_['fields']:
282 cols.append(field['name'])
283 # Remove last element. It is a metadata not found on the original data coming from the API
284 cols.pop()
285 return cols
287 @staticmethod
288 def get_localized_start_end_time(execution_date, time_zone, delta):
289 """
290 Gets Start Time and End Time from execution date and tz.
291 :param tz(str): Timezone
292 :param execution_date(datetime): Execution Time in utc
293 :param delta(str): Delta to be decreased from execution time. Can be informed by d(days), h(hours), m(minutes),
294 s(seconds). Ie. 3d(three days bak in time)
296 :return start_time_str(str), end_time_str(str): Response start time and end time.
297 """
298 granularity_map = APIUtils.parse_mult_value_time_delta(delta)
299 start_time_utc = execution_date
300 start_time = start_time_utc.replace(tzinfo=utc).astimezone(timezone(time_zone))
301 end_time = start_time
302 start_time = (start_time - timedelta(
303 days=granularity_map["d"],
304 hours=granularity_map["h"],
305 minutes=granularity_map["m"],
306 seconds=granularity_map["s"])) # execution_date
308 start_time_str = start_time.replace(microsecond=0).isoformat()
309 end_time_str = end_time.replace(microsecond=0).isoformat()
311 return start_time_str, end_time_str
313 def get_spending_data_response(self, template=None, input_dict=None):
314 """
315 Get a list of column names from a schema object
316 :param template(str): template name
317 :param input_dict(dict): dictionary with template input parameter
319 :return data(list): List of flattened dictionaries
320 """
321 # YESTERDAY dateT00:00:00 to dateT23:59:59
322 start_time, end_time = Verizon.get_localized_start_end_time(
323 self.execution_date.replace(hour=23, minute=59, second=59, microsecond=0) - timedelta(days=0),
324 self.timezone, "23h59m59s")
326 # Update report_start_date and report_end_date to the values previously calculated
327 dt_1 = {"report_start_date": start_time}
328 dt_2 = {"report_end_date": end_time}
329 input_dict.update(dt_1)
330 input_dict.update(dt_2)
332 input_data_payload = self.__get_input_data_payload(template, input_dict)
333 final_response_ojb = self.__poll_verizon_data_report(input_data_payload=input_data_payload, interval=15)
334 # Replace string null by empty string to avoid issues if the column is not string
335 report_data = Verizon.__download_report_data_file(final_response_ojb).replace("null", "")
337 logging.info("Loading schema %s", self.output_schema)
338 data_schema = json.loads(
339 open(SDCFileHelpers.get_file_path('schema', "{}.json".format(self.output_schema))).read())
340 col_names = Verizon.__get_columns_from_schema(data_schema)
342 l_dict = SDCFileHelpers.convert_file_to_flattened_dict(
343 io.StringIO(report_data), file_type_='csv', delimiter_=',', column_header_list_=col_names)
344 # remove original header, before return it
345 return l_dict[1:]
347 def get_response_data(self):
348 """
349 Returns the appropriate endpoint response based on the endpoint name
350 :return data(list): List of flattened dictionaries.
351 """
352 if self.endpoint_name in ["advertiser_spending", "spending"]:
353 response = self.get_spending_data_response(template=self.template_name, input_dict=self.input_dict)
354 else:
355 raise InputArgumentException("Verizon endpoint not supported.")
356 return response
358 def __handle_response(self, response):
359 """
360 Handle Verizon API response. Throw given exception for different response state. Allow process flow if status
361 code is 200, throw exception otherwise
362 :param response(request): Verizon API response
363 """
364 response_content = json.dumps(json.loads(response.text), indent=2)
366 # Access exception due expired access token
367 if response.status_code == 401:
368 logging.warning("Response payload: \n%s", response_content)
369 logging.warning("Trying to refresh access token...")
370 self.reset_access_token()
371 raise AccessException("Access Exception, refresh tokens and retry or contact account representative.")
372 # Rate limit reached(Records per day)
373 if response.status_code == 200 and json.loads(response.text).get("propertyName") == "RateLimit":
374 logging.warning("Response payload: \n%s", response_content)
375 raise RateLimitException("API Rate limit error: " + str(response.status_code) + " : " + response.text)
376 # Rate limit reached(Requests per min)
377 if response.status_code == 200 and json.loads(response.text).get("propertyName") == "RPM":
378 logging.warning("Response payload: \n%s", response_content)
379 raise RateLimitException("API Rate limit error(Requests per minute): " + str(response.status_code) + " : " +
380 response.text)
381 # General errors returned by the API
382 if json.loads(response.text).get("errorMsg") is not None:
383 logging.warning("Response payload: \n%s", response_content)
384 raise APIInternalErrorException("API Rate limit error: " + str(response.status_code) + " : " +
385 response.text)
386 # Catch anything where Response not OK(2XX), if pass 401 checked above as token expired
387 try:
388 response.raise_for_status()
389 except requests.exceptions.HTTPError as err:
390 logging.warning("HTTPError: \n%s", err)
391 logging.warning("Response payload: \n%s", response_content)
392 logging.warning("Trying to refresh access token...")
393 self.reset_access_token()
394 raise RequestException("Request error(API/Request specific): " + str(response.status_code) + " : " +
395 response.text)
398# pylint: disable=too-few-public-methods
399class ReportSubmissionStatus():
400 """
401 Value object to wrap Report submission status.
403 :paran customerReportId(str): report identified. It is used to download the submitted/generated report data file.
404 :paran status(str): report submission status
405 :paran url(str): url of the csv file containing the report data
406 :paran reportFormat(str): file format, currently only CSV is supported by Verizon
407 :paran requestPayload(str): original query payload sent to request the report data
408 :paran jobStartDate(str): job execution start datetime
409 :paran jobEndDate(str): job execution end datetime
410 :paran numRows(str): number of row retrieved
411 :paran errorMsg(str): Verizon internal error codes
412 """
414 # pylint: disable=too-many-instance-attributes
415 def __init__(self, payload=None):
416 """Build object according to json response payload."""
417 self.customer_report_id = payload.get("customerReportId")
418 self.status = payload.get("status")
419 self.url = payload.get("url")
420 self.report_format = payload.get("reportFormat")
421 self.request_payload = payload.get("requestPayload")
422 self.job_start_date = payload.get("jobStartDate")
423 self.job_end_date = payload.get("jobEndDate")
424 self.num_rows = payload.get("numRows")
425 self.error_msg = payload.get("errorMsg")
427 def __str__(self):
428 """String representation of the object."""
429 return """
430 ReportSubmissionStatus(
431 customerReportId: {customer_report_id}
432 status: {status}
433 url: {url}
434 reportFormat: {report_format}
435 requestPayload: {request_payload}
436 jobStartDate: {job_start_date}
437 jobEndDate: {job_end_date}
438 numRows: {num_rows}
439 errorMsg: {error_msg}
440 )
441 """.format(**self.__dict__)