Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Verizon API connector module. 

3""" 

4 

5import io 

6import json 

7import logging 

8import time 

9from datetime import datetime, timedelta 

10 

11import requests 

12from jinja2 import Environment, FileSystemLoader 

13from pytz import timezone, utc 

14 

15from retrying import retry 

16from sdc_etl_libs.api_helpers import APIUtils 

17from sdc_etl_libs.api_helpers.AsyncOAuthAPI import AsyncOAuthAPI 

18from sdc_etl_libs.api_helpers.SDCAPIExceptions import ( 

19 AccessException, APIInternalErrorException, InputArgumentException, 

20 RateLimitException, RequestException) 

21from sdc_etl_libs.sdc_file_helpers.SDCFileHelpers import SDCFileHelpers 

22 

23 

24class Verizon(AsyncOAuthAPI): 

25 """ 

26 Connector/Wrapper class for accessing Verizon campaigns information/metrics. 

27 Ref.: https://developer.verizonmedia.com/ 

28 https://developer.verizonmedia.com/dsp/api/docs/ 

29 https://developer.yahoo.com/dsp/docs/ 

30 """ 

31 

32 # pylint: disable=too-many-arguments 

33 # pylint: disable=too-many-instance-attributes 

34 # pylint: disable-msg=too-many-locals 

35 def __init__(self, 

36 authorization_token_=None, 

37 refresh_token_=None, 

38 access_token_=None, 

39 encoded_client_credentials_=None, 

40 credential_id_="verizon/api", 

41 region_="us-east-2", 

42 suffix_="Verizon", 

43 schema_: dict = None, 

44 endpoint_schema_: dict = None, 

45 **kwargs): 

46 """ 

47 Initiate API wrapper instance 

48 :param authorization_token_(str): Verizon DSP API authorization token, only available through manual process on 

49 YDN(Yahoo Developer Network) 

50 :param refresh_token_(str): Refresh token used to generate new access tokens 

51 :param access_token_(str): Access token used to query resources against Verizon DSP API 

52 :param encoded_client_credentials_(str): encoded token composed by client_id and client_secret. 

53 Ie. ENCODED(CLIENT_ID:CLIENT_SECRET) 

54 :param credential_id_(str): AWS Secrets key for Verizon API credentials 

55 :param region_(str): AWS region 

56 """ 

57 super().__init__( 

58 authorization_token_=authorization_token_, 

59 refresh_token_=refresh_token_, 

60 access_token_=access_token_, 

61 credential_id_=credential_id_, 

62 region_=region_) 

63 

64 self.encoded_client_credentials = self.credentials[ 

65 'encoded_client_credentials'] if encoded_client_credentials_ is None else encoded_client_credentials_ 

66 self.token_url = endpoint_schema_["info"]["access"]["token_url"] 

67 self.base_url = endpoint_schema_["info"]["access"]["base_url"] 

68 self.suffix = suffix_ 

69 

70 self.schema = schema_ 

71 self.endpoint_schema = endpoint_schema_ 

72 

73 self.endpoint_name = endpoint_schema_["info"]["access"]["endpoint_name"] 

74 self.template_name = endpoint_schema_["info"]["opts"]["template_name"] or self.endpoint_name 

75 self.input_dict = endpoint_schema_["info"]["opts"]["input_dict"] 

76 self.output_schema = endpoint_schema_["info"]["opts"]["output_schema"] or str(self.suffix) + "/" + str(self.endpoint_name) 

77 self.submission_status = None 

78 self.timezone = endpoint_schema_["info"]["opts"]["tz"] 

79 self.execution_date = kwargs.get("execution_date", datetime.utcnow()) 

80 

81 def __get_new_refresh_token(self, authorization_code=None): 

82 """ 

83 Ref: An Introduction to OAuth 2: https://www.digitalocean.com/community/tutorials/an-introduction-to-oauth-2 

84 Given valid 'authorization code' and 'client credentials' obtained from the User resource manager, 

85 return a new refresh token to be updated on the secret vault. 

86 Usually a new refresh token invalidates the previous one and the authorization code is a one time resource 

87 it is invalidated as soon as it is used to create the refresh code. 

88 This function/method is for internal use only, the fresh token should be updated on secrets vault manually or 

89 automatically through a callback url interface. 

90 

91 Ref: An Introduction to OAuth 2: https://www.digitalocean.com/community/tutorials/an-introduction-to-oauth-2 

92 

93 :param authorization_code (str): code retrieved manually from the User resource manager. 

94 

95 :return refresh_token(str): new refresh token to be updated on secrets vault to allow the retrieving of new 

96 access tokens 

97 

98 Returns: 

99 object: t 

100 """ 

101 headers_payload = { 

102 'Content-Type': 'application/x-www-form-urlencoded', 

103 'Authorization': "Basic {}".format(self.encoded_client_credentials) 

104 } 

105 

106 data_payload = {'grant_type': 'authorization_code', 'redirect_uri': 'oob', 'code': authorization_code} 

107 

108 response = requests.post(self.token_url, headers=headers_payload, data=data_payload) 

109 logging.warning( 

110 "Please, update secret vault system to reflect the new refresh token: key = 'verizon_refresh_token'") 

111 return json.loads(response.text)['refresh_token'] 

112 

113 def get_new_access_token(self, 

114 client_id=None, 

115 client_secret=None, 

116 refresh_token=None, 

117 access_token_url: str = None): 

118 """ 

119 Given a pre-defined refresh token, return a new access token. 

120 :param client_id(str): client identification 

121 :param client_secret(str): client secret 

122 

123 :return access_token(str): New access token 

124 """ 

125 logging.debug("Verizon doesn't use client_id(%s)/client_secret(%s)", client_id, client_secret) 

126 logging.debug( 

127 "Verizon only uses this method refresh_token(%s)/access_token_url(%s) for by passing refresh process", 

128 refresh_token, access_token_url) 

129 

130 headers_payload = { 

131 'Content-Type': 'application/x-www-form-urlencoded', 

132 'Authorization': "Basic {}".format(self.encoded_client_credentials) 

133 } 

134 

135 data_payload = { 

136 'grant_type': 'refresh_token', 

137 'redirect_uri': 'https://www.example.com', 

138 'refresh_token': self.refresh_token 

139 } 

140 

141 try: 

142 response = requests.post(self.token_url, headers=headers_payload, data=data_payload) 

143 

144 if not response.ok: 

145 logging.info("Response not OK: %d. An status code different than 2XX was returned.", 

146 response.status_code) 

147 raise AccessException("Response not OK. An status code different than 2XX was returned.") 

148 except BaseException as e: 

149 logging.error("Access token reset failed: %s\n%s", json.loads(response.text)['debug_message'], e) 

150 raise AccessException(f"Access token reset failed:{json.loads(response.text)['debug_message']}") 

151 

152 logging.debug("New access token obtained.") 

153 logging.info(response.status_code) 

154 self.access_token = json.loads(response.text).get('access_token') 

155 return self.access_token 

156 

157 # pylint: disable=E0213 

158 def __retry_if_access_error(exception): 

159 """ 

160 Auxiliar function to check for a given exception. Used by @retry decoration function. 

161 

162 :return is_access_error(bool): True if we should retry (in this case when it's an AccessException), False 

163 otherwise 

164 """ 

165 return isinstance(exception, AccessException) 

166 

167 @retry(retry_on_exception=__retry_if_access_error, stop_max_attempt_number=2, wait_fixed=2000) 

168 def request_data(self, data_payload): 

169 """ 

170 Given a query payload informed as input, a data report request is created and a value object with the info 

171 needed to track the report processing is returned. 

172 

173 :param data_payload(dict): a json like payload following the api rules. Ref. 

174 https://developer.verizonmedia.com/dsp/api/docs/reporting/payloadspec.html 

175 

176 :return report_status(ReportSubmissionStatus): ReportSubmissionStatus object wrapping the response payload. 

177 """ 

178 headers_payload = { 

179 'Content-Type': 'application/json', 

180 'X-Auth-Method': 'OAUTH', 

181 'X-Auth-Token': self.access_token, 

182 } 

183 

184 response = requests.request(method="POST", url=self.base_url, headers=headers_payload, data=data_payload) 

185 self.__handle_response(response) 

186 submission_status = ReportSubmissionStatus(payload=json.loads(response.text)) 

187 self.submission_status = submission_status 

188 

189 return submission_status 

190 

191 @retry(retry_on_exception=__retry_if_access_error, stop_max_attempt_number=2, wait_fixed=2000) 

192 def check_data_request_status(self, request_id=None): 

193 """ 

194 Given a valid customer_report_id obtained from a previously submitted data report request, return the status of 

195 the report process. In case of Success it will inform a url pointing to the digital file containing the data to 

196 be downloaded. 

197 

198 Ref.: https://developer.verizonmedia.com/dsp/api/docs/reporting/payloadspec.html#id13 

199 

200 :param request_id(str): id of the target report submission 

201 

202 :return report_status(ReportSubmissionStatus): ReportSubmissionStatus object wrapping the status response 

203 payload. 

204 """ 

205 

206 url_ = self.base_url + request_id 

207 headers = {'Content-Type': 'application/json', 'X-Auth-Method': 'OAUTH', 'X-Auth-Token': self.access_token} 

208 response = requests.request(method="GET", url=url_, headers=headers, data=None) 

209 self.__handle_response(response) 

210 self.submission_status = ReportSubmissionStatus(payload=json.loads(response.text)) 

211 

212 return self.submission_status 

213 

214 def __get_input_data_payload(self, template_name, input_dict): 

215 """ 

216 Template logic to produce API query payload 

217 :param template_name(str): jinja template name 

218 :param input_dict(dict): jinja template input values as dict 

219 

220 :return query_payload(str): query payload 

221 """ 

222 template_loader = FileSystemLoader(searchpath=SDCFileHelpers.get_file_path(type_='template', path_=self.suffix)) 

223 template_env = Environment(loader=template_loader, autoescape=True) 

224 template = template_env.get_template("{}.j2".format(template_name)) 

225 # Template from jinja2 file 

226 return template.render(input_dict) 

227 

228 @classmethod 

229 def __download_report_data_file(cls, response_ojb): 

230 """ 

231 Decides if and how to download the data once the api return valid url containing valid data. 

232 :param response_ojb(ReportSubmissionStatus): Value Object(VO) representing the response payload 

233 

234 :return report_data(str): report data in csv format 

235 """ 

236 is_report_pending = response_ojb.status in ("Submitted", "Processing") 

237 report_has_output = response_ojb.url is not None 

238 report_data = None 

239 if not is_report_pending and report_has_output: 

240 report_data = APIUtils.Downloader.download_data_file_to_memory_lazely(response_ojb.url) 

241 else: 

242 logging.warning("No data returned by report: \n%s", str(response_ojb)) 

243 

244 return report_data 

245 

246 def __poll_verizon_data_report(self, input_data_payload, interval=60): 

247 """ 

248 Poll function for keeping checking request status 

249 :param input_data_payload(str): input query payload. Verizon DSL to quer data through its API 

250 :param interval(int): Polling interval 

251 

252 :return report_status(ReportSubmissionStatus): response object with status of the given request 

253 """ 

254 submission_status = self.request_data(data_payload=input_data_payload) 

255 is_report_pending = submission_status.status in ("Submitted", "Processing") 

256 # API can return Success but without URL 

257 flag = is_report_pending 

258 i = 0 

259 response_obj = submission_status 

260 while flag: 

261 response_obj = self.check_data_request_status(request_id=submission_status.customer_report_id) 

262 flag = response_obj.status in ("Submitted", "Processing") 

263 logging.info("%s : %s : %s : %s + \n %s", str(datetime.now()), str(i), str(submission_status.status), 

264 str(submission_status.url), str(response_obj)) 

265 i += 1 

266 if flag: 

267 time.sleep(interval) # Avoid 422 due more than 5 requests per min 

268 return response_obj 

269 

270 @staticmethod 

271 def __get_columns_from_schema(data_schema_): 

272 """ 

273 Get a a list od column names from a schema object 

274 :param data_schema_(dict): schema object 

275 

276 :return columns(list): list of columns except the generated _SF_INSERTEDDATETIME column which is not part 

277 of the original data 

278 """ 

279 # Get list of columns from schema file 

280 cols = [] 

281 for field in data_schema_['fields']: 

282 cols.append(field['name']) 

283 # Remove last element. It is a metadata not found on the original data coming from the API 

284 cols.pop() 

285 return cols 

286 

287 @staticmethod 

288 def get_localized_start_end_time(execution_date, time_zone, delta): 

289 """ 

290 Gets Start Time and End Time from execution date and tz. 

291 :param tz(str): Timezone 

292 :param execution_date(datetime): Execution Time in utc 

293 :param delta(str): Delta to be decreased from execution time. Can be informed by d(days), h(hours), m(minutes), 

294 s(seconds). Ie. 3d(three days bak in time) 

295 

296 :return start_time_str(str), end_time_str(str): Response start time and end time. 

297 """ 

298 granularity_map = APIUtils.parse_mult_value_time_delta(delta) 

299 start_time_utc = execution_date 

300 start_time = start_time_utc.replace(tzinfo=utc).astimezone(timezone(time_zone)) 

301 end_time = start_time 

302 start_time = (start_time - timedelta( 

303 days=granularity_map["d"], 

304 hours=granularity_map["h"], 

305 minutes=granularity_map["m"], 

306 seconds=granularity_map["s"])) # execution_date 

307 

308 start_time_str = start_time.replace(microsecond=0).isoformat() 

309 end_time_str = end_time.replace(microsecond=0).isoformat() 

310 

311 return start_time_str, end_time_str 

312 

313 def get_spending_data_response(self, template=None, input_dict=None): 

314 """ 

315 Get a list of column names from a schema object 

316 :param template(str): template name 

317 :param input_dict(dict): dictionary with template input parameter 

318 

319 :return data(list): List of flattened dictionaries 

320 """ 

321 # YESTERDAY dateT00:00:00 to dateT23:59:59 

322 start_time, end_time = Verizon.get_localized_start_end_time( 

323 self.execution_date.replace(hour=23, minute=59, second=59, microsecond=0) - timedelta(days=0), 

324 self.timezone, "23h59m59s") 

325 

326 # Update report_start_date and report_end_date to the values previously calculated 

327 dt_1 = {"report_start_date": start_time} 

328 dt_2 = {"report_end_date": end_time} 

329 input_dict.update(dt_1) 

330 input_dict.update(dt_2) 

331 

332 input_data_payload = self.__get_input_data_payload(template, input_dict) 

333 final_response_ojb = self.__poll_verizon_data_report(input_data_payload=input_data_payload, interval=15) 

334 # Replace string null by empty string to avoid issues if the column is not string 

335 report_data = Verizon.__download_report_data_file(final_response_ojb).replace("null", "") 

336 

337 logging.info("Loading schema %s", self.output_schema) 

338 data_schema = json.loads( 

339 open(SDCFileHelpers.get_file_path('schema', "{}.json".format(self.output_schema))).read()) 

340 col_names = Verizon.__get_columns_from_schema(data_schema) 

341 

342 l_dict = SDCFileHelpers.convert_file_to_flattened_dict( 

343 io.StringIO(report_data), file_type_='csv', delimiter_=',', column_header_list_=col_names) 

344 # remove original header, before return it 

345 return l_dict[1:] 

346 

347 def get_response_data(self): 

348 """ 

349 Returns the appropriate endpoint response based on the endpoint name 

350 :return data(list): List of flattened dictionaries. 

351 """ 

352 if self.endpoint_name in ["advertiser_spending", "spending"]: 

353 response = self.get_spending_data_response(template=self.template_name, input_dict=self.input_dict) 

354 else: 

355 raise InputArgumentException("Verizon endpoint not supported.") 

356 return response 

357 

358 def __handle_response(self, response): 

359 """ 

360 Handle Verizon API response. Throw given exception for different response state. Allow process flow if status 

361 code is 200, throw exception otherwise 

362 :param response(request): Verizon API response 

363 """ 

364 response_content = json.dumps(json.loads(response.text), indent=2) 

365 

366 # Access exception due expired access token 

367 if response.status_code == 401: 

368 logging.warning("Response payload: \n%s", response_content) 

369 logging.warning("Trying to refresh access token...") 

370 self.reset_access_token() 

371 raise AccessException("Access Exception, refresh tokens and retry or contact account representative.") 

372 # Rate limit reached(Records per day) 

373 if response.status_code == 200 and json.loads(response.text).get("propertyName") == "RateLimit": 

374 logging.warning("Response payload: \n%s", response_content) 

375 raise RateLimitException("API Rate limit error: " + str(response.status_code) + " : " + response.text) 

376 # Rate limit reached(Requests per min) 

377 if response.status_code == 200 and json.loads(response.text).get("propertyName") == "RPM": 

378 logging.warning("Response payload: \n%s", response_content) 

379 raise RateLimitException("API Rate limit error(Requests per minute): " + str(response.status_code) + " : " + 

380 response.text) 

381 # General errors returned by the API 

382 if json.loads(response.text).get("errorMsg") is not None: 

383 logging.warning("Response payload: \n%s", response_content) 

384 raise APIInternalErrorException("API Rate limit error: " + str(response.status_code) + " : " + 

385 response.text) 

386 # Catch anything where Response not OK(2XX), if pass 401 checked above as token expired 

387 try: 

388 response.raise_for_status() 

389 except requests.exceptions.HTTPError as err: 

390 logging.warning("HTTPError: \n%s", err) 

391 logging.warning("Response payload: \n%s", response_content) 

392 logging.warning("Trying to refresh access token...") 

393 self.reset_access_token() 

394 raise RequestException("Request error(API/Request specific): " + str(response.status_code) + " : " + 

395 response.text) 

396 

397 

398# pylint: disable=too-few-public-methods 

399class ReportSubmissionStatus(): 

400 """ 

401 Value object to wrap Report submission status. 

402 

403 :paran customerReportId(str): report identified. It is used to download the submitted/generated report data file. 

404 :paran status(str): report submission status 

405 :paran url(str): url of the csv file containing the report data 

406 :paran reportFormat(str): file format, currently only CSV is supported by Verizon 

407 :paran requestPayload(str): original query payload sent to request the report data 

408 :paran jobStartDate(str): job execution start datetime 

409 :paran jobEndDate(str): job execution end datetime 

410 :paran numRows(str): number of row retrieved 

411 :paran errorMsg(str): Verizon internal error codes 

412 """ 

413 

414 # pylint: disable=too-many-instance-attributes 

415 def __init__(self, payload=None): 

416 """Build object according to json response payload.""" 

417 self.customer_report_id = payload.get("customerReportId") 

418 self.status = payload.get("status") 

419 self.url = payload.get("url") 

420 self.report_format = payload.get("reportFormat") 

421 self.request_payload = payload.get("requestPayload") 

422 self.job_start_date = payload.get("jobStartDate") 

423 self.job_end_date = payload.get("jobEndDate") 

424 self.num_rows = payload.get("numRows") 

425 self.error_msg = payload.get("errorMsg") 

426 

427 def __str__(self): 

428 """String representation of the object.""" 

429 return """ 

430 ReportSubmissionStatus( 

431 customerReportId: {customer_report_id} 

432 status: {status} 

433 url: {url} 

434 reportFormat: {report_format} 

435 requestPayload: {request_payload} 

436 jobStartDate: {job_start_date} 

437 jobEndDate: {job_end_date} 

438 numRows: {num_rows} 

439 errorMsg: {error_msg} 

440 ) 

441 """.format(**self.__dict__)