Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" Data Validation Test to check for unique values in a database table column """ 

2 

3from sdc_etl_libs.sdc_data_validation.data_validation_test import ValidationTest 

4from sdc_etl_libs.sdc_data_validation.data_validation_quality_test import QualityValidationTest 

5from sdc_etl_libs.sdc_data_validation.data_validation_test_enums import ValidationTests 

6from sdc_etl_libs.sdc_data_validation.data_validation_exceptions import SDCDataValidationError 

7from sdc_etl_libs.sdc_exchange_endpoints.database_endpoints.SDCColumnCleanseStylesEnums import StyleEnums 

8from sdc_etl_libs.database_helpers.SnowflakeDatabase import SnowflakeDatabase 

9 

10 

11class ColumnUnique(ValidationTest, QualityValidationTest): 

12 

13 def __init__(self, columns, case_sensitive=True, **kwargs): 

14 """ 

15 Constructor for data test. 

16 :param columns: List of Strings. Column name that test will be run against. 

17 :param case_sensitive: Boolean. If True, values will be compared by case-sensitive values. If False, 

18 comparison is done in an case-insensitive manner. 

19 :param **kwargs: See ValidationTest base class constructor for information on this parameter. 

20 """ 

21 

22 super().__init__() 

23 

24 self.test_number = kwargs["test_number"] 

25 self.run_against = kwargs["run_against"] 

26 self.sdc_dataframe = kwargs["sdc_dataframe"] 

27 self.database_info = kwargs["database_info"] 

28 self.endpoint_schema = kwargs["endpoint_schema"] 

29 

30 # Set test name and extract additional details from validation_test_enums: 

31 self.validation_test_name = "column_unique" 

32 self.test_type = ValidationTests[self.validation_test_name].value["test_type"] 

33 

34 # Set params for what data type tests will be run against and check for completeness: 

35 self.check_run_against_parameters() 

36 

37 # Set test-specific attributes that came over as unpacked options from parsed data test info: 

38 self.columns = columns 

39 self.case_sensitive = case_sensitive 

40 

41 def run_validation_test(self): 

42 """ 

43 See docstring in ValidationTest base class for information on this function. 

44 """ 

45 

46 self.run_quality_validation_test() 

47 

48 return self.results 

49 

50 def run_against_dataframe(self): 

51 """ 

52 See docstring in ValidationTest base class for information on this function. 

53 """ 

54 

55 raise NotImplementedError("Test 'column_unique' not currently setup to run against a dataframe.") 

56 

57 def run_against_database(self): 

58 """ 

59 See docstring in ValidationTest base class for information on this function. 

60 """ 

61 

62 query = self.generate_sql_statement() 

63 self.database_info["database_client"].execute_query(query, return_results_=True) 

64 for row in self.database_info["database_client"].get_results(): 

65 results = list(zip(self.columns, row)) 

66 

67 return results 

68 

69 def generate_sql_statement(self): 

70 """ 

71 See docstring in ValidationTest base class for information on this function. 

72 """ 

73 

74 table = f'''"{self.database_info['database_name']}"."{self.database_info['schema_name']}"."{self.database_info['table_name']}"''' 

75 

76 if self.database_info["database_type"] == "snowflake": 

77 query = f'SELECT ' 

78 for no, column in enumerate(self.columns): 

79 column = SnowflakeDatabase.clean_column_name(column_name_=column, rules_=StyleEnums[ 

80 self.endpoint_schema["info"]["opts"]["style"].upper()]) 

81 if self.case_sensitive: 

82 query += f'COALESCE(SUM(CASE WHEN "{column}" IN (SELECT "{column}" FROM {table} GROUP BY 1 HAVING COUNT(*) > 1) THEN 1 END), 0)' \ 

83 f' AS "{column}"{", " if no + 1 != len(self.columns) else " "}' 

84 else: 

85 query += f'COALESCE(SUM(CASE WHEN UPPER("{column}") IN (SELECT UPPER("{column}") FROM {table} GROUP BY 1 HAVING COUNT(*) > 1) THEN 1 END), 0)' \ 

86 f' AS "{column}"{", " if no + 1 != len(self.columns) else " "}' 

87 query += f"""FROM {table};""" 

88 

89 else: 

90 raise SDCDataValidationError(f"'{self.validation_test_name}' not setup for database type " 

91 f"'{self.database_info['database_type']}'") 

92 

93 return query