Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import logging 

2import os 

3import re 

4from datetime import datetime 

5from distutils.util import strtobool 

6from zipfile import ZipFile 

7import pandas as pd 

8from . import StandardCyborgUtils as utils 

9from .StandardCyborgUtils import Fields, Metrics 

10from sdc_etl_libs.aws_helpers.aws_helpers import AWSHelpers 

11from sdc_etl_libs.database_helpers.DatabaseFactory import DatabaseFactory 

12from sdc_etl_libs.database_helpers.SnowflakeDatabase import SnowflakeDatabase 

13from sdc_etl_libs.sdc_data_exchange.SDCDataExchange import SDCDataExchange 

14from sdc_etl_libs.sdc_data_exchange.SDCDataExchangeEnums import FileResultTypes 

15from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe, SDCDFTypes 

16from sdc_etl_libs.sdc_file_helpers.SDCFileHelpers import SDCFileHelpers 

17from sdc_etl_libs.sdc_data_schema.schema_toolbox import SchemaToolbox 

18 

19 

20class StandardCyborgDataPipeline: 

21 

22 def __init__(self, claimant_exchange_, case_ids_sql_, snowflake_dbhandle_, file_list_sql_, origin_file_list_, 

23 destination_file_list_, include_modification_cases_, only_active_cases_): 

24 

25 self.serra_info = {} 

26 self.claimant_cases = [] 

27 self.case_ids = [] 

28 self.claimant_exchange = claimant_exchange_ 

29 self.origin_file_list = origin_file_list_ 

30 self.destination_file_list = destination_file_list_ 

31 self.file_list_sql = file_list_sql_ 

32 self.snowflake_dbhandle = snowflake_dbhandle_ 

33 self.case_ids_sql = case_ids_sql_ 

34 self.processing_file_name = "snowflake_SERRA_SCAN_DATA_table" 

35 self.etl_results_log = {} 

36 self.records_moved = set() 

37 # Hard Snowflake limit 16,384 

38 self.database_in_clause_limit = 16000 

39 self.include_modification_cases = include_modification_cases_ 

40 self.only_active_cases = only_active_cases_ 

41 self.serra_df = None 

42 self.metrics = { 

43 Metrics.SERRA_TOTAL: 0, 

44 Metrics.RETRY_CASES: 0, 

45 Metrics.TOTAL_DISTINCT_CASES_TO_MOVE: 0, 

46 Metrics.SERRA_CASES_MOVED: set(), 

47 Metrics.RETRY_CASES_MOVED: set(), 

48 Metrics.TOTAL_CASES_MOVED: 0, 

49 Metrics.TOTAL_CASES_NOT_MOVED: 0, 

50 Metrics.RETRY_CASES_EXPIRED: 0, 

51 Metrics.CASES_WITH_OBJ_AND_IMG: 0, 

52 Metrics.CASES_WITH_OBJ: 0 

53 } 

54 

55 def get_claimant_input_data(self, snowflake=True): 

56 """ 

57 Get claiming data from source 

58 """ 

59 if snowflake: 

60 self.get_claimant_input_data_snowflake() 

61 else: 

62 self.get_claimant_input_data_s3() 

63 

64 def get_claimant_input_data_snowflake(self): 

65 """ 

66 Get claiming data from snowflake. 

67 Query table that saves data from serra app 

68 :return: None. 

69 """ 

70 

71 def load_cases_data(query_, metric): 

72 query_string = open(SDCFileHelpers.get_file_path("sql", query_)).read() 

73 df = pd.read_sql(query_string, self.snowflake_dbhandle.connection) 

74 self.metrics[metric] = len(df) 

75 results = df.to_dict(orient='records') 

76 for row in results: 

77 if not row['metadata_email']: 

78 continue 

79 self.serra_info[row['metadata_email'].lower()] = {"path": row['path'], "timestamp": row['create_dt']} 

80 

81 load_cases_data("StandardCyborg/serra_new_data_query.sql", Metrics.SERRA_TOTAL) 

82 load_cases_data("StandardCyborg/serra_retry_data_query.sql", Metrics.RETRY_CASES) 

83 self.metrics[Metrics.TOTAL_DISTINCT_CASES_TO_MOVE] = len(self.serra_info) 

84 self.etl_results_log[self.processing_file_name] = [] 

85 

86 def get_claimant_input_data_s3(self): 

87 """ 

88 Grabs the files in the claimant-cases S3 path, determines if they are case numbers or e-mails, and adds 

89 the values to the appropriate claimant type attribute for the class (self.emails_and_timestamps or self.claimant_cases) 

90 :return: None. 

91 """ 

92 

93 files_to_process = self.claimant_exchange.sink_items_to_process[self.claimant_exchange.source.endpoint_tag] 

94 

95 if len(files_to_process) == 0: 

96 logging.info(f"{FileResultTypes.success.value}: There were no new claimant case files to process.") 

97 

98 else: 

99 for item_no, item_to_process in enumerate( 

100 self.claimant_exchange.sink_items_to_process[self.claimant_exchange.source.endpoint_tag]): 

101 

102 try: 

103 source_data, source_record_count = self.claimant_exchange.source.get_data(item_to_process) 

104 self.processing_file_name = item_to_process 

105 temp_df = source_data.get_file_as_dataframe(start_row_=1) 

106 temp_list = temp_df.df['METADATA_EMAIL'].str.lower().str.strip().tolist() 

107 self.metrics[Metrics.SERRA_TOTAL] = len(temp_df) 

108 

109 # If the @ symbol is in the first line of the list, we are assuming the entire list is e-mails. 

110 if "@" in temp_list[0]: 

111 self.serra_info = dict( 

112 zip(temp_df.df['METADATA_EMAIL'].str.lower(), temp_df.df['CREATEDAT'])) 

113 self.metrics[Metrics.TOTAL_DISTINCT_CASES_TO_MOVE] = len(temp_df) 

114 msg = f"{FileResultTypes.success.value}: Loaded {item_to_process}. {len(self.serra_info.keys())} emails." 

115 # If not e-mails, we assume the entire list is Case Numbers (e.g. Caeb71aa67d37b) 

116 else: 

117 self.claimant_cases = self.claimant_cases + temp_list 

118 msg = f"{FileResultTypes.success.value}: Loaded {item_to_process}. {len(self.claimant_cases)} case numbers." 

119 

120 logging.info(msg) 

121 self.etl_results_log[self.processing_file_name] = [] 

122 self.etl_results_log[self.processing_file_name].append(msg) 

123 break 

124 

125 except Exception as e: 

126 msg = f"{FileResultTypes.error.value}: Sourcing {item_to_process} from " \ 

127 f"{self.claimant_exchange.source.exchange_type} failed." 

128 logging.warning(f"{msg}. {e}") 

129 self.etl_results_log[self.processing_file_name].append(msg) 

130 

131 def get_cases_ids(self): 

132 """ 

133 Gets a list of true vault external keys and case ids to be processed, executing a query to get the most 

134 recent case id by given email or case number. The next variables should be set to be able to use the method: 

135 list. This function add values to self.case_ids object attributes. 

136 :return: None. 

137 """ 

138 try: 

139 file_get_case_ids_sql = SDCFileHelpers.get_file_path("sql", self.case_ids_sql) 

140 

141 if len(self.serra_info.keys()) > 0: 

142 column_expression = "lower(email.email)" 

143 emails_str = SnowflakeDatabase.build_large_in_expression(column_expression, 

144 list(self.serra_info.keys()), 

145 self.database_in_clause_limit) 

146 keys_query = open(file_get_case_ids_sql).read().format(emails_str, 

147 str(self.include_modification_cases).upper()) 

148 

149 else: 

150 column_expression = "lower(cases.case_number)" 

151 cases_str = SnowflakeDatabase.build_large_in_expression(column_expression, 

152 self.claimant_cases, 

153 self.database_in_clause_limit) 

154 keys_query = open(file_get_case_ids_sql).read().format(cases_str, 

155 str(self.include_modification_cases).upper()) 

156 

157 # Run query and return data. 

158 self.snowflake_dbhandle.execute_query(keys_query, return_results_=True) 

159 for rows in self.snowflake_dbhandle.get_results(): 

160 self.case_ids.append(rows[0]) 

161 

162 msg = f"{FileResultTypes.success.value}: Case IDs Loaded." 

163 logging.info(msg) 

164 self.etl_results_log[self.processing_file_name].append(msg) 

165 

166 except Exception as e: 

167 msg = f"{FileResultTypes.error.value}: Getting cases_ids for {self.processing_file_name} failed." 

168 logging.error(f"{msg}. {e}") 

169 self.etl_results_log[self.processing_file_name].append(msg) 

170 

171 def get_source_files_location(self): 

172 """ 

173 Writes file list query result to s3 file. self.case_ids. Case Id list variable should be set to be able to 

174 use the method. This function add values to etl_results_log object attribute. 

175 :return: None. 

176 """ 

177 

178 try: 

179 file_file_list_sql = SDCFileHelpers.get_file_path("sql", self.file_list_sql) 

180 column_expression = "CC.case_id" 

181 cases_ids_str = SnowflakeDatabase.build_large_in_expression(column_expression, 

182 self.case_ids, 

183 self.database_in_clause_limit) 

184 

185 if strtobool(self.only_active_cases): 

186 is_active_statement = "AND HAS_ALIGNER_CHECK = TRUE" 

187 else: 

188 is_active_statement = "" 

189 

190 file_list_query = open(file_file_list_sql).read().format(cases_ids_str, 

191 str(self.include_modification_cases).upper(), 

192 is_active_statement) 

193 

194 # Run get_file_list_query 

195 df = pd.read_sql(file_list_query, self.snowflake_dbhandle.connection) 

196 self.file_list = Dataframe(SDCDFTypes.PANDAS, self.origin_file_list.data_schema) 

197 self.file_list.process_df(df) 

198 self.file_list.df.sort_values(by=[Fields.CASE_NUMBER], inplace=True) 

199 

200 msg = "Source files paths retrieved" 

201 logging.info(msg) 

202 self.etl_results_log[self.processing_file_name].append(msg) 

203 

204 except Exception as e: 

205 msg = f"{FileResultTypes.error.value}: Unable to write file list data for the {len(self.case_ids):,} " \ 

206 f"provided case(s)." 

207 logging.error(f"{msg}. {e}") 

208 self.etl_results_log[self.processing_file_name].append(msg) 

209 

210 def copy_case_files(self): 

211 """ 

212 Copy the list of files to the desired s3 destination. 

213 self.sdc_file_list_df. should be set with the file list of the processing cases to be able to use the method. 

214 This function add values to etl_results_log object attribute. 

215 :return: None. 

216 """ 

217 

218 def copy_file(element, asset_key: str, position: str): 

219 prefix_template = "capture/{case_number}/{lab_id}_{timestamp}/{file_type}/" 

220 file_name_template = "{case_number}_{lab_id}_{position}_{timestamp}{extension}" 

221 asset_path = asset_key + "_PATH" 

222 asset_date = asset_key + "_DATE" 

223 serra_creation_date = self.serra_info[element[Fields.EMAIL]]["timestamp"] 

224 

225 if not utils.is_a_valid_asset(element, position, serra_creation_date, asset_date, asset_path): 

226 return None 

227 

228 asset_service_time = datetime.strptime(element[asset_date].split("+")[0], 

229 '%Y-%m-%d %H:%M:%S.%f' if "." in element[asset_date] 

230 else '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%dT%H:%M:%S') 

231 

232 if position == "obj": 

233 src_bucket = self.origin_file_list.data_schema["endpoints"][1]['info']['access']['bucket'] 

234 src_prefix = '/'.join(map(str, element[asset_path].split('/')[0:-1])) 

235 file_obj = self.origin_file_list.source.get_file_as_file_object(src_bucket, 

236 src_prefix, 

237 os.path.basename(element[asset_path]), 

238 compression_type_="gzip") 

239 z = ZipFile(file_obj) 

240 for filename in z.namelist(): 

241 if re.match('.*Step0.*', filename, flags=re.IGNORECASE): 

242 file_obj = z.open(filename) 

243 new_file_name = "_".join((element[Fields.CASE_NUMBER], 

244 element[Fields.LAB_ID], 

245 asset_service_time, 

246 "step0.obj")) 

247 prefix = prefix_template.format( 

248 case_number=element[Fields.CASE_NUMBER], 

249 lab_id=element[Fields.LAB_ID], 

250 timestamp=self.serra_info[element[Fields.EMAIL]]["timestamp"], 

251 file_type="obj") 

252 self.destination_file_list.source.write_file(self.destination_file_list.sinks[0].bucket, 

253 prefix, 

254 new_file_name, 

255 file_obj) 

256 return prefix + new_file_name 

257 return None 

258 

259 else: 

260 

261 file_name = file_name_template.format(case_number=element[Fields.CASE_NUMBER], 

262 lab_id=element[Fields.LAB_ID], 

263 position=position, 

264 timestamp=asset_service_time, 

265 extension=utils.get_image_file_extension(element[asset_path])) 

266 

267 destination_prefix = prefix_template.format(case_number=element[Fields.CASE_NUMBER], 

268 lab_id=element[Fields.LAB_ID], 

269 timestamp=self.serra_info[element[Fields.EMAIL]][ 

270 "timestamp"], 

271 file_type="images") 

272 

273 self.origin_file_list.source.copy_object( 

274 src_bucket_name_=self.origin_file_list.data_schema["endpoints"][1]['info']['access']['bucket'], 

275 src_prefix_='/'.join(map(str, element[asset_path].split('/')[0:-1])), 

276 obj_name_=os.path.basename(element[asset_path]), 

277 dest_bucket_name_=self.destination_file_list.sinks[0].bucket, 

278 dest_prefix_=destination_prefix, 

279 dest_obj_name_=file_name) 

280 

281 return destination_prefix + file_name 

282 

283 processed_files = 0 

284 self.file_list.df = utils.filter_data(self.file_list.df, self.serra_info) 

285 self.file_list.df = utils.add_columns_to_df(self.file_list.df, Fields.destination_columns.values()) 

286 logging.info(f"Beginning the movement of files to the Standard Cyborg location") 

287 self.execution_time = datetime.now().strftime("%Y-%m-%d_%H:%M_") 

288 self.records_moved = set() 

289 emails_with_pending_obj = dict() 

290 self.load_retry_cases_information() 

291 for index, row in self.file_list.df.iterrows(): 

292 try: 

293 self.file_list.df.loc[index, Fields.SERRA_PATH] = self.serra_info[row[Fields.EMAIL]]["path"] 

294 for asset in Fields.assets_required: 

295 destination_path = copy_file(element=row, asset_key=asset.identifier, position=asset.position) 

296 if destination_path: 

297 if asset.position == "obj": 

298 if (self.retry_cases['METADATA_EMAIL'] == row[Fields.EMAIL]).any(): 

299 self.retry_cases.loc[self.retry_cases['METADATA_EMAIL'] == row[Fields.EMAIL], 

300 'RETRIES'] = Metrics.MOVED_CASE_CODE 

301 self.metrics[Metrics.RETRY_CASES_MOVED].add(row[Fields.EMAIL]) 

302 else: 

303 self.metrics[Metrics.SERRA_CASES_MOVED].add(row[Fields.EMAIL]) 

304 emails_with_pending_obj[row[Fields.EMAIL]] = False 

305 

306 self.file_list.df.loc[index, Fields.destination_columns[asset.identifier]] = destination_path 

307 self.records_moved.add(index) 

308 processed_files += 1 

309 logging.info(f"{processed_files} files moved to S3") 

310 if not destination_path and asset.position == "obj": 

311 if row[Fields.EMAIL] not in emails_with_pending_obj: 

312 emails_with_pending_obj[row[Fields.EMAIL]] = True 

313 break 

314 except Exception as e: 

315 msg = f"{FileResultTypes.error.value}: Copying case Files:" 

316 logging.error(f"{msg}. {e}") 

317 self.etl_results_log[self.processing_file_name].append(msg) 

318 

319 self.emails_for_retry = list( 

320 email for email in emails_with_pending_obj.keys() if emails_with_pending_obj[email]) 

321 msg = f"{FileResultTypes.success.value}: {processed_files} " \ 

322 f"Case Files (STL, OBJ and Images) Moved to Standard Cyborg Bucket" 

323 logging.info(msg) 

324 self.etl_results_log[self.processing_file_name].append(msg) 

325 

326 def load_retry_cases_information(self): 

327 """ 

328 Load all the cases for retry (less than 15 retries) 

329 :return: None. 

330 """ 

331 serra_retry_query = open( 

332 SDCFileHelpers.get_file_path("sql", "StandardCyborg/retry_data_query.sql")).read() 

333 self.retry_cases = pd.read_sql(serra_retry_query, self.snowflake_dbhandle.connection) 

334 if self.retry_cases.empty: 

335 self.retry_cases = pd.DataFrame(columns=['METADATA_EMAIL', 'RETRIES', "INSERT_DT"]) 

336 else: 

337 self.retry_cases.columns = ['METADATA_EMAIL', 'RETRIES', "INSERT_DT"] 

338 

339 def generate_retries_information(self): 

340 """ 

341 Updates the retries table with the information of the current pipeline execution. 

342 The new cases for retry are inserted and the existing ones updated 

343 :return: None. 

344 """ 

345 for email in self.emails_for_retry: 

346 if (self.retry_cases['METADATA_EMAIL'] == email).any(): 

347 retries = self.retry_cases.loc[self.retry_cases['METADATA_EMAIL'] == email, 'RETRIES'].item() 

348 if retries < 14: 

349 self.retry_cases.loc[self.retry_cases['METADATA_EMAIL'] == email, 'RETRIES'] = retries + 1 

350 if retries == 14: 

351 self.metrics[Metrics.RETRY_CASES_EXPIRED] += 1 

352 self.retry_cases.loc[ 

353 self.retry_cases['METADATA_EMAIL'] == email, 'RETRIES'] = Metrics.EXPIRED_CASE_CODE 

354 else: 

355 retry_record = dict() 

356 retry_record['METADATA_EMAIL'] = email 

357 retry_record['RETRIES'] = 1 

358 datetime_obj = datetime.now() 

359 timestamp_str = datetime_obj.strftime("%Y-%m-%d %H:%M:%S.%f") 

360 retry_record["INSERT_DT"] = timestamp_str 

361 self.retry_cases = self.retry_cases.append(retry_record, ignore_index=True) 

362 

363 data_schema = SchemaToolbox.get_data_schema_from_file("StandardCyborg/serra_retry") 

364 sdc_df = Dataframe(SDCDFTypes.PANDAS, data_schema) 

365 sdc_df.process_df(self.retry_cases) 

366 sdc_df.write_dataframe_to_database(self.snowflake_dbhandle, 'RETRY_CASES', "STANDARDCYBORG", upsert_=True) 

367 

368 def generate_metrics(self): 

369 """ 

370 Generates the key metrics of the process 

371 :return: None. 

372 """ 

373 self.metrics[Metrics.SERRA_CASES_MOVED] = len(self.metrics[Metrics.SERRA_CASES_MOVED]) 

374 self.metrics[Metrics.RETRY_CASES_MOVED] = len(self.metrics[Metrics.RETRY_CASES_MOVED]) 

375 self.metrics[Metrics.TOTAL_CASES_MOVED] = self.metrics[Metrics.SERRA_CASES_MOVED] + \ 

376 self.metrics[Metrics.RETRY_CASES_MOVED] 

377 self.metrics[Metrics.TOTAL_CASES_NOT_MOVED] = self.metrics[Metrics.TOTAL_DISTINCT_CASES_TO_MOVE] - \ 

378 self.metrics[Metrics.TOTAL_CASES_MOVED] 

379 records_moved = self.file_list.df[self.file_list.df.index.isin(self.records_moved)] 

380 self.metrics[Metrics.CASES_WITH_OBJ_AND_IMG] = \ 

381 records_moved[records_moved["Closed_img_path"].str.len() > 0]["CASE_ID"].unique().shape[0] 

382 self.metrics[Metrics.CASES_WITH_OBJ] = self.metrics[Metrics.TOTAL_CASES_MOVED] - \ 

383 self.metrics[Metrics.CASES_WITH_OBJ_AND_IMG] 

384 self.metrics['insert_dt'] = datetime.now().strftime("%Y-%m-%d") 

385 serra_uploaded_counts_df = pd.DataFrame([self.metrics]) 

386 serra_uploaded_counts_df.to_sql('SERRA_DATA_METRICS', self.snowflake_dbhandle.connection, 'STANDARDCYBORG', 

387 if_exists='append', chunksize=16000, index=False) 

388 

389 def generate_confirmation_file(self): 

390 """ 

391 Generates a csv confirmation file with the information of the files moved. 

392 The self.sdc_file_list_df is filtered so only the files moved are taking into account and the file is generated 

393 with a specific set of columns 

394 :return: None. 

395 """ 

396 output_prefix = self.destination_file_list.sinks[0].endpoint_schema["info"]["access"]["prefix"] 

397 output_file_name = self.destination_file_list.sinks[0].endpoint_schema["info"]["file_info"]["opts"]["file_name"] 

398 records_moved = self.file_list.df[self.file_list.df.index.isin(self.records_moved)] 

399 records_moved_filtered_columns = records_moved[ 

400 [Fields.CASE_NUMBER, 

401 Fields.CASE_ID, 

402 Fields.EMAIL, 

403 Fields.LAB_ID, 

404 Fields.LAB, 

405 Fields.SERRA_PATH, 

406 Fields.DESTINATION_CLOSED_STRAIGHT, 

407 Fields.DESTINATION_OPEN_LOWER, 

408 Fields.DESTINATION_OPEN_UPPER, 

409 Fields.DESTINATION_OBJ]] 

410 confirmation_df = Dataframe(SDCDFTypes.PANDAS, self.destination_file_list.data_schema) 

411 confirmation_df.process_df(records_moved_filtered_columns) 

412 result = self.destination_file_list.sinks[0].write_data(confirmation_df, 

413 output_prefix + self.execution_time + output_file_name) 

414 msg = f"{FileResultTypes.success.value}: File list data written to s3 as {output_file_name} ({result})" 

415 logging.info(msg) 

416 self.etl_results_log[self.processing_file_name].append(msg) 

417 

418 

419def snowflake_preparation(): 

420 db_creds = AWSHelpers.get_secrets("snowflake/service_account/airflow") 

421 snowflake_user = db_creds["username"] 

422 snowflake_pwd = db_creds["password"] 

423 snowflake_account = db_creds["account"] 

424 snowflake_warehouse = db_creds["warehouse"] 

425 snowflake_role = 'AIRFLOW_SERVICE_ROLE' 

426 return { 

427 "snowflake_warehouse": snowflake_warehouse, 

428 "snowflake_role": snowflake_role, 

429 "snowflake_user": snowflake_user, 

430 "snowflake_pwd": snowflake_pwd, 

431 "snowflake_account": snowflake_account 

432 } 

433 

434 

435def get_cyborg_object(vendor_name_, include_modification_cases_, only_active_cases_, snowflake_args_): 

436 claimant_exchange = SDCDataExchange(f'{vendor_name_}/claimant-cases', 'main_source', 'SDC_sink_0') 

437 origin_file_list = SDCDataExchange(f'{vendor_name_}/origin_file_list', 'main_source', 'main_source') 

438 destination_file_list = SDCDataExchange(f'{vendor_name_}/destination_file_list', 'SDC_sink_0', 'SDC_sink_0') 

439 case_ids_sql = f'StandardCyborg/get_case_ids_ml_query.sql' 

440 file_list_sql = f'StandardCyborg/get_file_list_ml_query.sql' 

441 snowflake_dbhandle = DatabaseFactory.get_database("snowflake", sqlalchemy_=True) 

442 snowflake_dbhandle.connect( 

443 warehouse_=snowflake_args_["snowflake_warehouse"], 

444 database_="MEDICAL_DATA", 

445 schema_="STANDARD_CYBORG", 

446 role_=snowflake_args_["snowflake_role"], 

447 user_=snowflake_args_["snowflake_user"], 

448 password_=snowflake_args_["snowflake_pwd"], 

449 account_=snowflake_args_["snowflake_account"], 

450 airflow_=True) 

451 

452 return StandardCyborgDataPipeline( 

453 claimant_exchange_=claimant_exchange, 

454 case_ids_sql_=case_ids_sql, 

455 snowflake_dbhandle_=snowflake_dbhandle, 

456 file_list_sql_=file_list_sql, 

457 origin_file_list_=origin_file_list, 

458 destination_file_list_=destination_file_list, 

459 include_modification_cases_=include_modification_cases_, 

460 only_active_cases_=only_active_cases_) 

461 

462 

463if __name__ == "__main__": 

464 # execute only if run as a script 

465 snowflake_args = snowflake_preparation() 

466 

467 op_kwargs = {'vendor_name_': "StandardCyborg", 

468 'include_modification_cases_': 'True', 

469 'only_active_cases_': 'False', 

470 'snowflake_args_': snowflake_args} 

471 

472 cyborg_client = get_cyborg_object(**op_kwargs) 

473 

474 # 1. Get claimant data 

475 cyborg_client.get_claimant_input_data() 

476 # 2. Get Cases ID's data 

477 cyborg_client.get_cases_ids() 

478 # 3. Retrieving origin files 

479 cyborg_client.get_source_files_location() 

480 # 4. Moving files to the sdc-ml bucket... 

481 cyborg_client.copy_case_files() 

482 # 5. Generating retries information 

483 cyborg_client.generate_retries_information() 

484 # 6. Generating metrics 

485 cyborg_client.generate_metrics() 

486 # 7. Generating csv confirmation file 

487 cyborg_client.generate_confirmation_file() 

488 

489 cyborg_client.snowflake_dbhandle.connection.close()