Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" Enums that control the data schema framework """ 

2 

3from enum import Enum 

4 

5from sdc_etl_libs.sdc_data_schema import schema_test_enums as SchemaTestEnums 

6 

7############################################################# 

8# Data Schema level enums 

9############################################################# 

10 

11 

12class CredentialsEnvironmentVariablesOptsKeys(Enum): 

13 variables = {"required": True, "data_type": dict} 

14 

15 

16class CredentialsAWSSecretsOptsKeys(Enum): 

17 name = {"required": True, "data_type": str} 

18 

19 

20class CredentialsVaultOptsKeys(Enum): 

21 secrets_engine = {"required": True, "data_type": str} 

22 domain = {"required": True, "data_type": str} 

23 secret_path = {"required": True, "data_type": str} 

24 

25 

26class CredentialsOptsMapping(Enum): 

27 aws_secrets = CredentialsAWSSecretsOptsKeys 

28 vault = CredentialsVaultOptsKeys 

29 environment_variables = CredentialsEnvironmentVariablesOptsKeys 

30 

31 

32class CredentialsKeys(Enum): 

33 type = { 

34 "required": True, 

35 "data_type": str, 

36 "allowed_values": { 

37 "type": "list", 

38 "criteria": ["aws_secrets", "vault", "environment_variables"] 

39 } 

40 } 

41 opts = {"required": True, "data_type": dict, "key_map": CredentialsOptsMapping} 

42 

43 

44class LoggerSnowflakeProcessingByItemOptsKeys(Enum): 

45 ignore_force_merge = {"required": False, "data_type": bool, "default_value": False} 

46 

47 

48class LoggerSnowflakeProcessingByRecordsOptsKeys(Enum): 

49 chunk_size = {"required": False, "data_type": int, "default_value": 15000} 

50 ignore_force_merge = {"required": False, "data_type": bool, "default_value": False} 

51 

52 

53class LoggerSnowflakeProcessingOptsMapping(Enum): 

54 by_item = LoggerSnowflakeProcessingByItemOptsKeys 

55 by_records = LoggerSnowflakeProcessingByRecordsOptsKeys 

56 

57 

58class LoggerSnowflakeProcessingKeys(Enum): 

59 type = { 

60 "required": True, 

61 "data_type": str, 

62 "allowed_values": { 

63 "type": "list", 

64 "criteria": ["by_item", "by_records"] 

65 } 

66 } 

67 opts = {"required": True, "data_type": dict, "key_map": LoggerSnowflakeProcessingOptsMapping} 

68 

69 

70class LoggerSnowflakeOptsKeys(Enum): 

71 environment = { 

72 "required": True, 

73 "data_type": str, 

74 "allowed_values": { 

75 "type": "list", 

76 "criteria": ["production", "development"] 

77 } 

78 } 

79 database = {"required": False, "data_type": str} 

80 table_name = {"required": False, "data_type": str} 

81 schema = {"required": False, "data_type": str} 

82 minutes_until_abandoned = {"required": False, "data_type": int, "default_value": 120} 

83 empty_is_success = {"required": False, "data_type": bool, "default_value": True} 

84 processing = {"required": False, "data_type": dict, "keys": LoggerSnowflakeProcessingKeys} 

85 

86 

87class LoggerSnowflakeAccessKeys(Enum): 

88 credentials = {"required": True, "data_type": dict, "keys": CredentialsKeys} 

89 

90 

91class LoggerOptsMapping(Enum): 

92 snowflake = LoggerSnowflakeOptsKeys 

93 

94 

95class LoggerAccessMapping(Enum): 

96 snowflake = LoggerSnowflakeAccessKeys 

97 

98 

99class LoggerKeys(Enum): 

100 type = {"required": True, "data_type": str, "allowed_values": {"type": "list", "criteria": ["snowflake"]}} 

101 opts = {"required": True, "data_type": dict, "key_map": LoggerOptsMapping} 

102 access = {"required": True, "data_type": dict, "key_map": LoggerAccessMapping} 

103 

104 

105class HivePartitioningDateKeys(Enum): 

106 partitions = { 

107 "required": False, 

108 "data_type": list, 

109 "list_value_opts": { 

110 "data_type": str, 

111 "allow_duplicates": False, 

112 "allow_empty_list": False 

113 }, 

114 "allowed_values": { 

115 "type": "list", 

116 "criteria": ["year", "month", "day", "hour", "date"] 

117 } 

118 } 

119 partition_to_process = {"required": False, "data_type": str} 

120 time_unit_look_back = {"required": False, "data_type": int} 

121 suffix = {"required": False, "data_type": str, "default_value": None} 

122 

123 

124class JSONFileInfoKeys(Enum): 

125 file_regex = {"required": ["source|s3", "source|sftp"], "data_type": str} 

126 file_name = {"required": False, "data_type": str, "optional": ["sink|s3", "sink|sftp"], "default_value": None} 

127 file_name_params = { 

128 "required": False, 

129 "optional": ["sink|s3", "sink|sftp"], 

130 "data_type": list, 

131 "list_value_opts": { 

132 "data_type": str, 

133 "allow_duplicates": False, 

134 "allow_empty_list": False 

135 } 

136 } 

137 lines = {"required": False, "data_type": bool, "default_value": None} 

138 encoding = { 

139 "required": False, 

140 "data_type": str, 

141 "default_value": 'utf-8', 

142 "allowed_values": { 

143 "type": "list", 

144 "criteria": ["utf-8"] 

145 } 

146 } 

147 compression_type = { 

148 "required": False, 

149 "data_type": str, 

150 "default_value": None, 

151 "allowed_values": { 

152 "type": "list", 

153 "criteria": ["gzip", None] 

154 } 

155 } 

156 format = { 

157 "required": False, 

158 "data_type": str, 

159 "default_value": None, 

160 "allowed_values": { 

161 "type": "list", 

162 "criteria": ["binary", "base64", None] 

163 } 

164 } 

165 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

166 custom_pre_processing_functions = { 

167 "required": False, 

168 "data_type": list, 

169 "list_value_opts": { 

170 "data_type": "*" 

171 }, 

172 "default_value": None 

173 } 

174 

175 

176class CSVFileInfoKeys(Enum): 

177 file_regex = {"required": ["source|s3", "source|sftp"], "data_type": str} 

178 delimiter = {"required": ["source|s3", "source|sftp"], "data_type": str} 

179 headers = {"required": ["source|s3", "source|sftp"], "data_type": bool} 

180 encoding = { 

181 "required": False, 

182 "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"], 

183 "default_value": "utf-8", 

184 "data_type": str, 

185 "allowed_values": { 

186 "type": "list", 

187 "criteria": ["utf-8", "latin-1"] 

188 } 

189 } 

190 file_name = { 

191 "required": False, 

192 "data_type": str, 

193 "optional": ["sink|s3", "source|s3", "sink|sftp"], 

194 "default_value": None 

195 } 

196 file_name_params = { 

197 "required": False, 

198 "optional": ["sink|s3", "sink|sftp"], 

199 "data_type": list, 

200 "list_value_opts": { 

201 "data_type": str, 

202 "allow_duplicates": False, 

203 "allow_empty_list": False 

204 } 

205 } 

206 compression_type = { 

207 "required": False, 

208 "data_type": str, 

209 "default_value": None, 

210 "allowed_values": { 

211 "type": "list", 

212 "criteria": ["gzip", None] 

213 } 

214 } 

215 format = { 

216 "required": False, 

217 "data_type": str, 

218 "default_value": None, 

219 "allowed_values": { 

220 "type": "list", 

221 "criteria": ["binary", "base64", None] 

222 } 

223 } 

224 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

225 line_terminator = {"required": False, "data_type": str, "default_value": None} 

226 

227 

228class EDIFileInfoKeys(Enum): 

229 file_regex = {"required": ["source|s3", "source|sftp"], "data_type": str} 

230 file_name = {"required": False, "data_type": str, "optional": ["sink|s3", "sink|sftp"], "default_value": None} 

231 file_name_params = { 

232 "required": False, 

233 "optional": ["sink|s3", "sink|sftp"], 

234 "data_type": list, 

235 "list_value_opts": { 

236 "data_type": str, 

237 "allow_duplicates": False, 

238 "allow_empty_list": False 

239 } 

240 } 

241 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

242 field_separator = {"required": False, "data_type": str, "default_value": "*"} 

243 segment_separator = {"required": False, "data_type": str, "default_value": "|"} 

244 dataframe_type = { 

245 "required": False, 

246 "default_value": "pandas", 

247 "data_type": str, 

248 "allowed_values": { 

249 "type": "list", 

250 "criteria": ["pandas", "spark"] 

251 } 

252 } 

253 edi_code = {"required": True, "data_type": int, "allowed_values": {"type": "list", "criteria": [210, 997]}} 

254 

255 

256class FileFileInfoKeys(Enum): 

257 file_regex = {"required": ["source|s3", "source|sftp"], "data_type": str} 

258 encoding = { 

259 "required": False, 

260 "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"], 

261 "default_value": "utf-8", 

262 "data_type": str, 

263 "allowed_values": { 

264 "type": "list", 

265 "criteria": ["utf-8", "latin-1"] 

266 } 

267 } 

268 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

269 

270 

271class ExcelFileInfoKeys(Enum): 

272 file_regex = {"required": ["source|s3", "source|sftp"], "data_type": str} 

273 headers = {"required": ["source|s3", "source|sftp"], "data_type": bool} 

274 file_name = { 

275 "required": False, 

276 "data_type": str, 

277 "optional": ["sink|s3", "sink|sftp", "source|api"], 

278 "default_value": None 

279 } 

280 format = { 

281 "required": False, 

282 "data_type": str, 

283 "default_value": None, 

284 "allowed_values": { 

285 "type": "list", 

286 "criteria": ["binary", None] 

287 } 

288 } 

289 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

290 process_all_sheets = {"required": True, "data_type": bool, "default_value": True} 

291 sheet_identifiers = { 

292 "required": False, 

293 "data_type": list, 

294 "list_value_opts": { 

295 "data_type": "*", 

296 "allow_duplicates": False, 

297 "allow_empty_list": False 

298 }, 

299 "default_value": [0] 

300 } 

301 skip_n_first_rows = {"required": False, "data_type": int, "default_value": 0} 

302 parse_n_rows = {"required": False, "data_type": int, "default_value": None} 

303 

304 

305class SharepointListInfoKeys(Enum): 

306 headers = {"required": ["source|s3", "source|sftp"], "data_type": bool} 

307 file_name = { 

308 "required": False, 

309 "data_type": str, 

310 "optional": ["sink|s3", "sink|sftp", "source|api"], 

311 "default_value": None 

312 } 

313 format = { 

314 "required": False, 

315 "data_type": str, 

316 "default_value": None, 

317 "allowed_values": { 

318 "type": "list", 

319 "criteria": ["binary", None] 

320 } 

321 } 

322 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

323 

324 

325class ParquetFileInfoKeys(Enum): 

326 file_name = {"required": False, "data_type": str, "optional": ["sink|s3", "sink|sftp"], "default_value": None} 

327 file_name_params = { 

328 "required": False, 

329 "optional": ["sink|s3", "sink|sftp"], 

330 "data_type": list, 

331 "list_value_opts": { 

332 "data_type": str, 

333 "allow_duplicates": False, 

334 "allow_empty_list": False 

335 } 

336 } 

337 compression_type = { 

338 "required": False, 

339 "data_type": str, 

340 "default_value": None, 

341 "allowed_values": { 

342 "type": "list", 

343 "criteria": ["gzip", None] 

344 } 

345 } 

346 format = { 

347 "required": False, 

348 "data_type": str, 

349 "default_value": None, 

350 "allowed_values": { 

351 "type": "list", 

352 "criteria": ["binary", "base64", None] 

353 } 

354 } 

355 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

356 

357 

358class AvroFileInfoKeys(Enum): 

359 headers = {"required": ["source|s3", "source|sftp"], "data_type": bool} 

360 format = { 

361 "required": False, 

362 "data_type": str, 

363 "default_value": None, 

364 "allowed_values": { 

365 "type": "list", 

366 "criteria": ["binary", "base64", None] 

367 } 

368 } 

369 file_regex = {"required": ["source|s3", "source|sftp"], "data_type": str, "default_value": None} 

370 delimiter = {"required": False, "data_type": str, "default_value": None} 

371 file_name = {"required": False, "data_type": str, "optional": ["sink|s3", "sink|sftp"], "default_value": None} 

372 file_name_params = { 

373 "required": False, 

374 "optional": ["sink|s3", "sink|sftp"], 

375 "data_type": list, 

376 "list_value_opts": { 

377 "data_type": str, 

378 "allow_duplicates": False, 

379 "allow_empty_list": False 

380 } 

381 } 

382 compression_type = { 

383 "required": False, 

384 "data_type": str, 

385 "default_value": None, 

386 "allowed_values": { 

387 "type": "list", 

388 "criteria": ["gzip", None] 

389 } 

390 } 

391 pgp_key = {"required": False, "data_type": str, "optional": ["source|s3", "sink|s3", "source|sftp", "sink|sftp"]} 

392 

393 

394class XCOMAirflowEndpointAccessKeys(Enum): 

395 xcom_key = {"required": True, "data_type": str} 

396 

397 

398class XCOMAirflowEndpointOptsKeys(Enum): 

399 pass 

400 

401 

402class APIEndpointAccessKeys(Enum): 

403 api_name = {"required": ["source|api"], "data_type": str} 

404 base_url = {"required": ["source|api"], "data_type": str} 

405 endpoint_name = {"required": ["source|api"], "data_type": str} 

406 token_url = {"required": False, "data_type": str} 

407 scope = {"required": False, "data_type": list} 

408 credentials = {"required": False, "data_type": dict, "keys": CredentialsKeys} 

409 

410 

411class APIEndpointOptsKeys(Enum): 

412 tz = {"required": False, "data_type": str, "default_value": None} 

413 datetime_format = {"required": False, "data_type": str, "default_value": None} 

414 output_schema = {"required": False, "data_type": str} 

415 template_name = {"required": False, "data_type": str} 

416 input_dict = {"required": False, "data_type": dict} 

417 api_call_details = {"required": False, "data_type": dict} 

418 

419 

420class SFTPEndpointAccessKeys(Enum): 

421 host = {"required": True, "data_type": str} 

422 path = {"required": True, "data_type": str} 

423 port = {"required": True, "data_type": int} 

424 credentials = {"required": True, "data_type": dict, "keys": CredentialsKeys} 

425 

426 

427class SFTPEndpointOptsKeys(Enum): 

428 days_threshold_to_delete = {"required": False, "data_type": int} 

429 process_by_block = {"required": False, "data_type": bool, "default_value": False} 

430 rows_by_block = {"required": False, "data_type": int} 

431 

432 

433class LocalDiskEndpointAccessKeys(Enum): 

434 file_name = {"required": True, "data_type": str} 

435 file_path = {"required": True, "data_type": str} 

436 

437 

438class LocalDiskEndpointOptsKeys(Enum): 

439 pass 

440 

441 

442class MockEndpointOptsKeys(Enum): 

443 total_records = {"required": False, "optional": ["source|mock"], "data_type": int, "default_value": 1000} 

444 

445 

446class DynamoDBEndpointAccessKeys(Enum): 

447 table_name = {"required": True, "data_type": str} 

448 region = {"required": False, "data_type": str, "default_value": "us-east-2"} 

449 credentials = {"required": False, "data_type": dict, "keys": CredentialsKeys} 

450 

451 

452class DynamoDBEndpointQueryParams(Enum): 

453 key = {"required": False, "data_type": str} 

454 index = {"required": False, "data_type": str} 

455 table_name = {"required": False, "data_type": str} 

456 sort_key = {"required": False, "data_type": str} 

457 select = {"required": False, "data_type": str} 

458 consistent_read = {"required": False, "data_type": bool} 

459 limit = {"required": False, "data_type": int} 

460 throttle_coeff = {"required": False, "data_type": float} 

461 segment_size_in_gb = {"required": False, "data_type": int} 

462 

463 

464class DynamoDBEndpointOptsKeys(Enum): 

465 method = { 

466 "required": False, 

467 "optional": ["source|dynamodb"], 

468 "data_type": str, 

469 "allowed_values": { 

470 "type": "list", 

471 "criteria": ["scan", "query"] 

472 }, 

473 "default_value": "scan" 

474 } 

475 threads = { 

476 "required": False, 

477 "optional": ["source|dynamodb", "sink|dynamodb"], 

478 "data_type": int, 

479 "default_value": 10 

480 } 

481 boost_wcu = {"required": False, "optional": ["sink|dynamodb"], "data_type": int, "default_value": 1000} 

482 boost_rcu = {"required": False, "optional": ["source|dynamodb"], "data_type": int, "default_value": 1000} 

483 write_filename_to_db = {"required": False, "optional": ["sink|dynamodb"], "data_type": bool} 

484 query_params = { 

485 "required": False, 

486 "optional": ["source|dynamodb"], 

487 "data_type": dict, 

488 "keys": DynamoDBEndpointQueryParams 

489 } 

490 

491 

492class ODBCEndpointAccessKeys(Enum): 

493 database_type = { 

494 "required": True, 

495 "data_type": str, 

496 "allowed_values": { 

497 "type": "list", 

498 "criteria": ["odbc", "odbc_netsuite"] 

499 } 

500 } 

501 host = {"required": True, "data_type": str} 

502 database_name = {"required": True, "data_type": str} 

503 driver = {"required": True, "data_type": str} 

504 port = {"required": False, "data_type": str} 

505 dsn = {"required": False, "data_type": str} 

506 query_name = {"required": ["source|odbc"], "data_type": str} 

507 credentials = {"required": True, "data_type": dict, "keys": CredentialsKeys} 

508 

509 

510class ODBCEndpointOptsKeys(Enum): 

511 query_params = { 

512 "required": False, 

513 "optional": ["source|odbc"], 

514 "data_type": list, 

515 "list_value_opts": { 

516 "data_type": str, 

517 "allow_duplicates": False, 

518 "allow_empty_list": False 

519 }, 

520 "default_value": None 

521 } 

522 

523 

524class SnowflakeEndpointAccessKeys(Enum): 

525 account = {"required": True, "data_type": str} 

526 database = {"required": True, "data_type": str} 

527 table_name = { 

528 "required": ["sink|snowflake"], 

529 "data_type": str, 

530 "allowed_values": { 

531 "type": "regex", 

532 "criteria": "^(?!.*(_TEMP_|_DEDUPED)).*[A-Za-z0-9-]" 

533 } 

534 } 

535 schema = {"required": True, "data_type": str} 

536 warehouse = {"required": False, "data_type": str} 

537 role = {"required": False, "data_type": str} 

538 query_name = {"required": ["source|snowflake"], "data_type": str} 

539 credentials = {"required": True, "data_type": dict, "keys": CredentialsKeys} 

540 

541 

542class SnowflakeEndpointOptsKeys(Enum): 

543 upsert = {"required": False, "optional": ["sink|snowflake"], "data_type": bool, "default_value": False} 

544 write_filename_to_db = {"required": False, "optional": ["sink|snowflake"], "data_type": bool} 

545 dedupe = {"required": False, "optional": ["sink|snowflake"], "data_type": bool, "default_value": False} 

546 bookmark_filenames = {"required": False, "optional": ["sink|snowflake"], "data_type": bool} 

547 style = { 

548 "required": False, 

549 "optional": ["sink|snowflake"], 

550 "data_type": str, 

551 "default_value": "snowflake", 

552 "allowed_values": { 

553 "type": "list", 

554 "criteria": ["snowflake", "heap"] 

555 } 

556 } 

557 query_params = { 

558 "required": False, 

559 "optional": ["source|snowflake"], 

560 "data_type": list, 

561 "list_value_opts": { 

562 "data_type": str, 

563 "allow_duplicates": False, 

564 "allow_empty_list": False 

565 }, 

566 "default_value": None 

567 } 

568 use_string_delimiter = { 

569 "required": False, 

570 "optional": ["sink|snowflake"], 

571 "data_type": bool, 

572 "default_value": False 

573 } 

574 

575 

576class S3EndpointAccessKeys(Enum): 

577 region = {"required": False, "data_type": str, "default_value": "us-east-2"} 

578 bucket = {"required": True, "data_type": str} 

579 prefix = {"required": False, "data_type": str, "default_value": ''} 

580 prefix_filter = {"required": False, "data_type": str, "default_value": None} 

581 credentials = {"required": False, "data_type": dict, "keys": CredentialsKeys} 

582 

583 

584class S3EndpointOptsKeys(Enum): 

585 give_full_path = {"required": False, "data_type": bool, "default_value": False} 

586 process_by_block = {"required": False, "data_type": bool, "default_value": False} 

587 rows_by_block = {"required": False, "data_type": int} 

588 

589 

590class HivePartitioningMapping(Enum): 

591 date = HivePartitioningDateKeys 

592 

593 

594class FileInfoTypeMapping(Enum): 

595 csv = CSVFileInfoKeys 

596 json = JSONFileInfoKeys 

597 parquet = ParquetFileInfoKeys 

598 avro = AvroFileInfoKeys 

599 file = FileFileInfoKeys 

600 excel = ExcelFileInfoKeys 

601 edi = EDIFileInfoKeys 

602 sharepoint_list = SharepointListInfoKeys 

603 

604 

605class EndpointAccessTypeMapping(Enum): 

606 s3 = S3EndpointAccessKeys 

607 api = APIEndpointAccessKeys 

608 sftp = SFTPEndpointAccessKeys 

609 snowflake = SnowflakeEndpointAccessKeys 

610 dynamodb = DynamoDBEndpointAccessKeys 

611 odbc = ODBCEndpointAccessKeys 

612 xcom_airflow = XCOMAirflowEndpointAccessKeys 

613 local_disk = LocalDiskEndpointAccessKeys 

614 test = SchemaTestEnums.TestEndpointAccessKeys 

615 

616 

617class EndpointOptsTypeMapping(Enum): 

618 s3 = S3EndpointOptsKeys 

619 api = APIEndpointOptsKeys 

620 sftp = SFTPEndpointOptsKeys 

621 snowflake = SnowflakeEndpointOptsKeys 

622 dynamodb = DynamoDBEndpointOptsKeys 

623 odbc = ODBCEndpointOptsKeys 

624 xcom_airflow = XCOMAirflowEndpointOptsKeys 

625 local_disk = LocalDiskEndpointOptsKeys 

626 test = SchemaTestEnums.TestEndpointOptsKeys 

627 mock = MockEndpointOptsKeys 

628 

629 

630class HivePartitioningKeys(Enum): 

631 type = {"required": True, "data_type": str, "allowed_values": {"type": "list", "criteria": ["date"]}} 

632 opts = {"required": True, "data_type": dict, "key_map": HivePartitioningMapping} 

633 

634 

635class FileFiltersModifiedDateOptsTimeDeltaKeys(Enum): 

636 type = { 

637 "required": True, 

638 "data_type": str, 

639 "allowed_values": { 

640 "type": "list", 

641 "criteria": ["days", "hours", "minutes"] 

642 } 

643 } 

644 amount = {"required": True, "data_type": int} 

645 

646 

647class FileFiltersModifiedDateOptsKeys(Enum): 

648 comparison = { 

649 "required": True, 

650 "data_type": str, 

651 "allowed_values": { 

652 "type": "list", 

653 "criteria": ["=", ">=", "<=", "<", ">"] 

654 } 

655 } 

656 timedelta = {"required": False, "data_type": dict, "keys": FileFiltersModifiedDateOptsTimeDeltaKeys} 

657 format = {"required": True, "data_type": str} 

658 

659 

660class FileFiltersTypeMapping(Enum): 

661 modified_date = FileFiltersModifiedDateOptsKeys 

662 

663 

664class FileFiltersKeys(Enum): 

665 tag = {"required": True, "data_type": str} 

666 type = {"required": True, "data_type": str, "allowed_values": {"type": "list", "criteria": ["modified_date"]}} 

667 opts = {"required": True, "data_type": dict, "key_map": FileFiltersTypeMapping} 

668 

669 

670class FileInfoKeys(Enum): 

671 type = { 

672 "required": True, 

673 "data_type": str, 

674 "allowed_values": { 

675 "type": "list", 

676 "criteria": ["file", "csv", "json", "file", "parquet", "avro", "excel", "edi", "sharepoint_list"] 

677 } 

678 } 

679 opts = {"required": True, "data_type": dict, "key_map": FileInfoTypeMapping} 

680 

681 

682class ValidationTestsNoOpts(Enum): 

683 pass 

684 

685 

686class ValidationTestsOptsMapping(Enum): 

687 column_not_null = ValidationTestsNoOpts 

688 column_unique = ValidationTestsNoOpts 

689 column_check = ValidationTestsNoOpts 

690 

691 

692class ValidationAccessKeys(Enum): 

693 credentials = {"required": True, "data_type": dict, "keys": CredentialsKeys} 

694 

695 

696class ValidationTestsKeys(Enum): 

697 type = { 

698 "required": True, 

699 "data_type": str, 

700 "allowed_values": { 

701 "type": "list", 

702 "criteria": ["column_not_null", "column_unique", "column_check"] 

703 } 

704 } 

705 opts = {"required": False, "data_type": dict, "key_map": ValidationTestsOptsMapping} 

706 

707 

708class ValidationKeys(Enum): 

709 log = {"required": False, "data_type": bool, "default_value": False} 

710 access = {"required": False, "optional": ["sink|snowflake"], "data_type": dict, "keys": ValidationAccessKeys} 

711 tests = { 

712 "required": True, 

713 "data_type": list, 

714 "list_value_opts": { 

715 "data_type": dict, 

716 "allow_duplicates": False, 

717 "allow_empty_list": False 

718 }, 

719 "keys": ValidationTestsKeys 

720 } 

721 

722 

723class EndpointInfoKeys(Enum): 

724 type = { 

725 "required": True, 

726 "data_type": str, 

727 "allowed_values": { 

728 "type": 

729 "list", 

730 "criteria": [ 

731 "s3", "api", "sftp", "snowflake", "odbc", "xcom_airflow", "local_disk", "dynamodb", "mock", "test", "test2" 

732 ] 

733 } 

734 } 

735 access = {"required": [ 

736 "source|s3", "sink|s3", "source|sftp", "sink|sftp", "source|snowflake", "sink|snowflake", "source|api", 

737 "sink|api" 

738 ], "data_type": dict, "key_map": EndpointAccessTypeMapping} 

739 opts = { 

740 "required": [ 

741 "source|s3", "sink|s3", "source|sftp", "sink|sftp", "source|snowflake", "sink|snowflake", "source|api", 

742 "sink|api" 

743 ], 

744 "data_type": dict, 

745 "key_map": EndpointOptsTypeMapping 

746 } 

747 hive_partitioning = { 

748 "required": False, 

749 "data_type": dict, 

750 "optional": ["sink|s3", "sink|sftp", "source|s3", "source|sftp"], 

751 "keys": HivePartitioningKeys 

752 } 

753 file_info = { 

754 "required": ["source|s3", "sink|s3", "source|sftp", "sink|sftp", "source|local_disk", "sink|local_disk"], 

755 "optional": ["source|api"], 

756 "data_type": dict, 

757 "keys": FileInfoKeys 

758 } 

759 file_filters = { 

760 "required": False, 

761 "optional": ["source|s3"], 

762 "data_type": list, 

763 "list_value_opts": { 

764 "data_type": dict, 

765 "allow_duplicates": False, 

766 "allow_empty_list": False 

767 }, 

768 "keys": FileFiltersKeys 

769 } 

770 logger = { 

771 "required": False, 

772 "data_type": dict, 

773 "optional": ["sink|s3", "sink|sftp", "sink|snowflake", "sink|api", "sink|dynamodb"], 

774 "keys": LoggerKeys 

775 } 

776 validation = {"required": False, "data_type": dict, "keys": ValidationKeys} 

777 

778 

779class EndpointsKeys(Enum): 

780 tag = {"required": True, "data_type": str} 

781 info = {"required": True, "data_type": dict, "keys": EndpointInfoKeys} 

782 type = { 

783 "required": True, 

784 "data_type": str, 

785 "allowed_values": { 

786 "type": "list", 

787 "criteria": ["sink", "source", "test"] 

788 } 

789 } 

790 description = {"required": False, "data_type": str} 

791 

792 

793class FieldTypeKeys(Enum): 

794 type = { 

795 "required": True, 

796 "data_type": str, 

797 "allowed_values": { 

798 "type": "list", 

799 "criteria": [ 

800 "string", "boolean", "float", "double", "int", "long", "datetime", "null" 

801 ] 

802 } 

803 } 

804 logical_type = { 

805 "required": False, 

806 "data_type": str, 

807 "allowed_values": { 

808 "type": "list", 

809 "criteria": [ 

810 "string", "datetime", "json", "array", "name", "first_name", "last_name", "full_address", "phone_number", "text", "url", "file_name" 

811 ] 

812 } 

813 } 

814 

815 

816class FieldConstraintTestOptsNoOpts(Enum): 

817 pass 

818 

819 

820class FieldConstraintTestOptsUnique(Enum): 

821 case_sensitive = {"required": False, "data_type": bool, "default_value": True} 

822 

823 

824class FieldConstraintTestTypeMapping(Enum): 

825 column_not_null = FieldConstraintTestOptsNoOpts 

826 column_unique = FieldConstraintTestOptsUnique 

827 

828 

829class FieldTransformationFunctionsConcatColumnsOpts(Enum): 

830 list_columns = {"required": True, "data_type": list} 

831 separator = {"required": True, "data_type": str} 

832 

833 

834class FieldTransformationConvertToDatetimeOpts(Enum): 

835 to_datetime_params = {"required": False, "data_type": dict} 

836 origin_timezone = {"required": False, "data_type": str} 

837 target_timezone = {"required": False, "data_type": str} 

838 

839 

840class FieldTransformationFunctionsCollapseColumnsOpts(Enum): 

841 list_column_prefix = { 

842 "required": True, 

843 "data_type": list, 

844 "list_value_opts": { 

845 "data_type": str, 

846 "allow_duplicates": False, 

847 "allow_empty_list": True 

848 }, 

849 } 

850 

851 

852class FieldTransformationFunctionsHashRowOpts(Enum): 

853 pass 

854 

855 

856class FieldTransformationTypeMapping(Enum): 

857 concat_columns = FieldTransformationFunctionsConcatColumnsOpts 

858 collapse_columns = FieldTransformationFunctionsCollapseColumnsOpts 

859 hash_row = FieldTransformationFunctionsHashRowOpts 

860 convert_to_datetime = FieldTransformationConvertToDatetimeOpts 

861 

862 

863class FieldConstraintsKeys(Enum): 

864 type = { 

865 "required": True, 

866 "data_type": str, 

867 "allowed_values": { 

868 "type": "list", 

869 "criteria": ["column_not_null", "column_unique"] 

870 } 

871 } 

872 opts = {"required": False, "data_type": dict, "key_map": FieldConstraintTestTypeMapping} 

873 

874 

875class FieldDataframeTransformationKeys(Enum): 

876 transformation_type = { 

877 "required": False, 

878 "data_type": str, 

879 "allowed_values": { 

880 "type": "list", 

881 "criteria": ["PRE", "POST"] 

882 } 

883 } 

884 type = {"required": True, "data_type": str, "allowed_values": {"type": "list", "criteria": ["collapse_columns"]}} 

885 opts = {"required": True, "data_type": dict, "key_map": FieldTransformationTypeMapping} 

886 

887 

888class FieldTransformationKeys(Enum): 

889 transformation_type = { 

890 "required": False, 

891 "data_type": str, 

892 "allowed_values": { 

893 "type": "list", 

894 "criteria": ["PRE", "POST"] 

895 } 

896 } 

897 axis = {"required": False, "data_type": str, "allowed_values": {"type": "list", "criteria": ["columns", "index"]}} 

898 type = { 

899 "required": True, 

900 "data_type": str, 

901 "allowed_values": { 

902 "type": "list", 

903 "criteria": ["concat_columns", "hash_row", "convert_to_datetime"] 

904 } 

905 } 

906 opts = {"required": True, "data_type": dict, "key_map": FieldTransformationTypeMapping} 

907 

908 

909class FieldKeys(Enum): 

910 name = {"required": True, "data_type": str} 

911 type = {"required": True, "data_type": dict, "keys": FieldTypeKeys} 

912 is_nullable = {"required": False, "data_type": bool} 

913 sf_merge_key = {"required": False, "data_type": bool} 

914 is_pii = {"required": False, "data_type": bool} 

915 drop_column = {"required": False, "data_type": bool} 

916 default_value = {"required": False, "data_type": "*"} 

917 add_column = {"required": False, "data_type": bool} 

918 rename = {"required": False, "data_type": str} 

919 transformations = { 

920 "required": False, 

921 "data_type": list, 

922 "list_value_opts": { 

923 "data_type": dict, 

924 "allow_duplicates": False, 

925 "allow_empty_list": False 

926 }, 

927 "keys": FieldTransformationKeys 

928 } 

929 description = {"required": False, "data_type": str} 

930 constraints = { 

931 "required": False, 

932 "data_type": list, 

933 "list_value_opts": { 

934 "data_type": dict, 

935 "allow_duplicates": False, 

936 "allow_empty_list": False 

937 }, 

938 "keys": FieldConstraintsKeys 

939 } 

940 

941 

942class TopLevelKeys(Enum): 

943 # NOTE: The order of these are used to create the updated schema returned by validation 

944 namespace = {"required": True, "data_type": str, "allowed_values": {"type": "regex", "criteria": "^[A-Za-z0-9-]*$"}} 

945 name = {"required": True, "data_type": str, "allowed_values": {"type": "regex", "criteria": "^[A-Za-z0-9-]*$"}} 

946 type = {"required": True, "data_type": str, "allowed_values": {"type": "list", "criteria": ["object"]}} 

947 country_code = {"required": True, "data_type": str, "allowed_values": {"type": "list", "criteria": ["USA"]}} 

948 estimated_row_size = {"required": True, "data_type": str} 

949 estimated_row_count = {"required": True, "data_type": int} 

950 contains_pii = {"required": False, "data_type": bool, "default_value": False} 

951 dataframe_transformations = { 

952 "required": False, 

953 "data_type": list, 

954 "list_value_opts": { 

955 "data_type": dict, 

956 "allow_duplicates": False, 

957 "allow_empty_list": False 

958 }, 

959 "keys": FieldDataframeTransformationKeys 

960 } 

961 endpoints = { 

962 "required": True, 

963 "data_type": list, 

964 "list_value_opts": { 

965 "data_type": dict, 

966 "allow_duplicates": False, 

967 "allow_empty_list": True 

968 }, 

969 "keys": EndpointsKeys 

970 } 

971 fields = { 

972 "required": True, 

973 "data_type": list, 

974 "list_value_opts": { 

975 "data_type": dict, 

976 "allow_duplicates": True, 

977 "allow_empty_list": True 

978 }, 

979 "keys": FieldKeys 

980 } 

981 

982 

983############################################################# 

984# Code level args enums 

985############################################################# 

986 

987 

988class PandasCSVMapper(Enum): 

989 """ 

990 Mapping for: 

991 Data schema file info key -> pd.from_csv function parameter name. 

992 """ 

993 delimiter = "delimiter" 

994 headers = "header" 

995 compression_type = "compression" 

996 line_terminator = "lineterminator" 

997 encoding = "encoding" 

998 

999 

1000class PandasJSONMapper(Enum): 

1001 """ 

1002 Mapping for: 

1003 Data schema file info key -> pd.from_csv function parameter name. 

1004 """ 

1005 lines = "lines" 

1006 compression_type = "compression" 

1007 

1008 

1009class PandasExcelMapper(Enum): 

1010 """ 

1011 Mapping for: 

1012 Data schema file info key -> pd.read_excel function parameter name. 

1013 """ 

1014 headers = "header" 

1015 process_all_sheets = "process_all_sheets" 

1016 sheet_identifiers = "sheet_identifiers" 

1017 skip_n_first_rows = "skip_n_first_rows" 

1018 parse_n_rows = "parse_n_rows" 

1019 

1020 

1021class SDCDFCSVMapper(Enum): 

1022 """ 

1023 Mapping for: 

1024 Data schema file info key -> SDCDataframe function parameter name. 

1025 """ 

1026 delimiter = "delimiter_" 

1027 headers = "headers_" 

1028 

1029 

1030class PandasEDIMapper(Enum): 

1031 """ 

1032 Mapping for: 

1033 Data schema file info key -> SDCDataframe function parameter name 

1034 """ 

1035 field_separator = "field_separator" 

1036 segment_separator = "segment_separator" 

1037 dataframe_type = "dataframe_type" 

1038 edi_code = "edi_code" 

1039 

1040 

1041class SchemaTypeToDatabaseMapper(Enum): 

1042 """ 

1043 Mapping for: 

1044 Data schema file field type > Snowflake database data type 

1045 """ 

1046 string = "VARCHAR" 

1047 int = "INT" 

1048 long = "INT" 

1049 float = "FLOAT" 

1050 boolean = "BOOLEAN" 

1051 double = "DOUBLE" 

1052 

1053 

1054class SchemaLogicalTypeToDatabaseMapper(Enum): 

1055 """ 

1056 Mapping for: 

1057 Data schema file field logical type > Snowflake database data type 

1058 """ 

1059 json = "VARIANT" 

1060 array = "VARIANT" 

1061 datetime = "DATETIME" 

1062 name = "VARCHAR" 

1063 first_name = "VARCHAR" 

1064 last_name = "VARCHAR" 

1065 full_address = "VARCHAR" 

1066 phone_number = "VARCHAR" 

1067 text = "VARCHAR" 

1068 url = "VARCHAR" 

1069 file_name = "VARCHAR" 

1070 

1071 

1072 

1073class SnowflakeDataTypesToSchemaDataTypesMapper(Enum): 

1074 DATE = "datetime" 

1075 TIMESTAMP_NTZ = "datetime" 

1076 TEXT = "string" 

1077 FIXED = "int" 

1078 REAL = "float" 

1079 VARIANT = "json" 

1080 

1081 

1082class SchemaTypeToDatabaseMaskingMapper(Enum): 

1083 """ 

1084 Mapping for: 

1085 Data schema file field type > Snowflake CREATE VIEW masked value 

1086 """ 

1087 string = 'sha2("{0}", 512)' 

1088 int = '-9' 

1089 long = '-9' 

1090 float = '-9.99' 

1091 double = '-9.99' 

1092 boolean = 'NULL' 

1093 json = 'NULL' 

1094 datetime = '\'2099-12-31\'' 

1095 

1096 

1097class FileEnumMapper(Enum): 

1098 """ 

1099 Mapping for: 

1100 Data schema file types to the Enums containing the 

1101 parameter mappings for source and sink functions. 

1102 """ 

1103 csv = {"source": PandasCSVMapper, "sink": SDCDFCSVMapper} 

1104 json = {"source": PandasJSONMapper, "sink": None} 

1105 excel = {"source": PandasExcelMapper, "sink": None} 

1106 edi = {"source": PandasEDIMapper, "sink": PandasEDIMapper}