Coverage for libs/sdc_etl_libs/sdc_data_validation/validation_tests/column_unique.py : 55%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" Data Validation Test to check for unique values in a database table column """
3from sdc_etl_libs.sdc_data_validation.data_validation_test import ValidationTest
4from sdc_etl_libs.sdc_data_validation.data_validation_quality_test import QualityValidationTest
5from sdc_etl_libs.sdc_data_validation.data_validation_test_enums import ValidationTests
6from sdc_etl_libs.sdc_data_validation.data_validation_exceptions import SDCDataValidationError
7from sdc_etl_libs.sdc_exchange_endpoints.database_endpoints.SDCColumnCleanseStylesEnums import StyleEnums
8from sdc_etl_libs.database_helpers.SnowflakeDatabase import SnowflakeDatabase
11class ColumnUnique(ValidationTest, QualityValidationTest):
13 def __init__(self, columns, case_sensitive=True, **kwargs):
14 """
15 Constructor for data test.
16 :param columns: List of Strings. Column name that test will be run against.
17 :param case_sensitive: Boolean. If True, values will be compared by case-sensitive values. If False,
18 comparison is done in an case-insensitive manner.
19 :param **kwargs: See ValidationTest base class constructor for information on this parameter.
20 """
22 super().__init__()
24 self.test_number = kwargs["test_number"]
25 self.run_against = kwargs["run_against"]
26 self.sdc_dataframe = kwargs["sdc_dataframe"]
27 self.database_info = kwargs["database_info"]
28 self.endpoint_schema = kwargs["endpoint_schema"]
30 # Set test name and extract additional details from validation_test_enums:
31 self.validation_test_name = "column_unique"
32 self.test_type = ValidationTests[self.validation_test_name].value["test_type"]
34 # Set params for what data type tests will be run against and check for completeness:
35 self.check_run_against_parameters()
37 # Set test-specific attributes that came over as unpacked options from parsed data test info:
38 self.columns = columns
39 self.case_sensitive = case_sensitive
41 def run_validation_test(self):
42 """
43 See docstring in ValidationTest base class for information on this function.
44 """
46 self.run_quality_validation_test()
48 return self.results
50 def run_against_dataframe(self):
51 """
52 See docstring in ValidationTest base class for information on this function.
53 """
55 raise NotImplementedError("Test 'column_unique' not currently setup to run against a dataframe.")
57 def run_against_database(self):
58 """
59 See docstring in ValidationTest base class for information on this function.
60 """
62 query = self.generate_sql_statement()
63 self.database_info["database_client"].execute_query(query, return_results_=True)
64 for row in self.database_info["database_client"].get_results():
65 results = list(zip(self.columns, row))
67 return results
69 def generate_sql_statement(self):
70 """
71 See docstring in ValidationTest base class for information on this function.
72 """
74 table = f'''"{self.database_info['database_name']}"."{self.database_info['schema_name']}"."{self.database_info['table_name']}"'''
76 if self.database_info["database_type"] == "snowflake":
77 query = f'SELECT '
78 for no, column in enumerate(self.columns):
79 column = SnowflakeDatabase.clean_column_name(column_name_=column, rules_=StyleEnums[
80 self.endpoint_schema["info"]["opts"]["style"].upper()])
81 if self.case_sensitive:
82 query += f'COALESCE(SUM(CASE WHEN "{column}" IN (SELECT "{column}" FROM {table} GROUP BY 1 HAVING COUNT(*) > 1) THEN 1 END), 0)' \
83 f' AS "{column}"{", " if no + 1 != len(self.columns) else " "}'
84 else:
85 query += f'COALESCE(SUM(CASE WHEN UPPER("{column}") IN (SELECT UPPER("{column}") FROM {table} GROUP BY 1 HAVING COUNT(*) > 1) THEN 1 END), 0)' \
86 f' AS "{column}"{", " if no + 1 != len(self.columns) else " "}'
87 query += f"""FROM {table};"""
89 else:
90 raise SDCDataValidationError(f"'{self.validation_test_name}' not setup for database type "
91 f"'{self.database_info['database_type']}'")
93 return query