Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" Tools and helper functions to assist in SDC Data Validation processes""" 

2 

3from sdc_etl_libs.sdc_data_validation.data_validation_test_enums import ValidationTests 

4 

5 

6class DataValidationToolbox: 

7 

8 @staticmethod 

9 def convert_data_schema_to_validation_config(data_schema_, endpoint_schema_): 

10 """ 

11 Pulls out data validation information from a data schema and converts to the validation configuration 

12 setup expected by SDCDataValidation. 

13 

14 :param data_schema_: Full SDC data schema. 

15 :param endpoint_schema_: Endpoint section of SDC data schema. 

16 :return: List. List of dictionaries that represent a validation test ot run (name and options) 

17 

18 Example output: 

19 

20 [ 

21 {'column_allowed_values': {'column': 'FIRST_SCAN_ITEM_ID', 'type': 'regex', 'criteria': '.*'}} 

22 {'column_not_null': {'columns': ['FIRST_SCAN_ITEM_ID', 'BOOK_TO_SHOW_SCORE']}} 

23 {'column_unique': {'columns': ['FIRST_SCAN_ITEM_ID', 'BOOK_TO_SHOW_SCORE']}} 

24 ] 

25 """ 

26 

27 validation_config = [] 

28 group_config = {} 

29 

30 # Parse the "validation" section of the data schema. 

31 for test in endpoint_schema_["info"]["validation"]["tests"]: 

32 if test["type"] in ValidationTests.__members__: 

33 is_grouped = ValidationTests.__members__[test["type"]].value["group_for_quality_tests"] 

34 # If a QUALITY test is part of a GROUP, then create a key in for the test_name in group_config. The columns for 

35 # group will be derived from the fields section of the schema (code block below) 

36 if is_grouped: 

37 if test["type"] not in group_config: 

38 group_config[test["type"]] = {"columns": []} 

39 else: 

40 validation_config.append({test["type"]: {**test.get["opts"]} if test.get('opts') else {}}) 

41 

42 # Got through fields for column-specific validations 

43 for field in data_schema_["fields"]: 

44 if field.get("constraints"): 

45 field_name = field.get("rename") or field.get("name") 

46 for constraint in field.get("constraints"): 

47 # A "constraint" in the data schema field section will translate to a column test in the database table. 

48 # The name of the constraint must match a member name in the ValidationTests enum 

49 if constraint["type"] in ValidationTests.__members__: 

50 # Some validation tests are best run by themselves (such as checking for allowed values in a column). 

51 # This part checks for the NON_GROUP tag of the test and adds it to the validation configuration. 

52 is_grouped = ValidationTests.__members__[constraint["type"]].value["group_for_quality_tests"] 

53 if not is_grouped: 

54 config = {constraint["type"]: {**{"column": field_name}, **constraint["opts"]}} 

55 validation_config.append(config) 

56 elif is_grouped: 

57 if "opts" in constraint: 

58 config = {constraint["type"]: {**{"columns": [field_name]}, **constraint["opts"]}} 

59 validation_config.append(config) 

60 else: 

61 # Some validation tests can be run with multiple columns at at time (such as checking of NULL values). 

62 # This part checks for the GROUP tag of the test and sets up a list of the column names and adds 

63 # them all there to that specific test in a special "group validation" variable. 

64 if constraint["type"] in group_config: 

65 group_config[constraint["type"]]["columns"].append(field_name) 

66 

67 

68 # Add everything from the "group validation" to the main validation configuration. 

69 for k, v in group_config.items(): 

70 validation_config.append({k: v}) 

71 

72 return validation_config 

73 

74 @staticmethod 

75 def convert_job_configuration_code_to_validation_config(): 

76 """ 

77 Place-holder for converting job configuration code to validation configuration. 

78 """ 

79 

80 raise NotImplementedError("Currently, SDCDataValidation cannot be setup with via a job configuration.")