Coverage for libs/sdc_etl_libs/tests/sdc_data_schema_tests/schema_validation_tests/schema_validation_pii_test.py : 100%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import json
2import os
3import sys
5import pytest
7sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../../../")
8from sdc_etl_libs.sdc_data_schema.schema_exceptions import DataSchemaFailedValidation
9from sdc_etl_libs.sdc_data_schema.schema_validation import (ResultTypeStatuses, SchemaValidation)
12def test_validation__key_contains_pii__check_fields_when_true():
13 """
14 Ensure that if "contains_pii" is set as True then an error is raised if no fields have "is_pii" set as True. If at
15 least one field has "is_pii" set as True, then no errors are raised.
16 """
18 # Test with fields containing pii
19 full_schema_fields = """
20 {
21 "namespace": "Test",
22 "name": "test",
23 "type": "object",
24 "country_code": "USA",
25 "estimated_row_size": "10b",
26 "estimated_row_count": 3000,
27 "contains_pii": true,
28 "endpoints": [],
29 "fields": [
30 {
31 "name": "CALL ID",
32 "type": {
33 "type": "long"
34 }
35 },
36 {
37 "name": "SESSION ID",
38 "type": {
39 "type": "string"
40 },
41 "is_pii": true
42 }
43 ]
44 }
45 """
46 full_schema = json.loads(full_schema_fields)
47 validation = SchemaValidation()
48 schema = validation.validate_schema(full_schema, validation_type_="full")
50 assert validation.ttl_errors == 0
51 assert {
52 'status': 'ERROR',
53 'reason': 'CONFLICT',
54 'section': 'top_level',
55 'note': '"contains_pii" is True but no fields have the key "is_pii" set to True.'
56 } not in validation.results
58 # Test without fields containing pii
59 full_schema_fields = """
60 {
61 "namespace": "Test",
62 "name": "test",
63 "type": "object",
64 "country_code": "USA",
65 "estimated_row_size": "10b",
66 "estimated_row_count": 3000,
67 "contains_pii": true,
68 "endpoints": [],
69 "fields": [
70 {
71 "name": "CALL ID",
72 "type": {
73 "type": "long"
74 }
75 },
76 {
77 "name": "SESSION ID",
78 "type": {
79 "type": "string"
80 }
81 }
82 ]
83 }
84 """
85 full_schema = json.loads(full_schema_fields)
86 validation = SchemaValidation()
87 with pytest.raises(DataSchemaFailedValidation):
88 schema = validation.validate_schema(full_schema, validation_type_="full")
90 assert {
91 'status': 'ERROR',
92 'reason': 'CONFLICT',
93 'section': 'top_level',
94 'note': '"contains_pii" is True but no fields have the key "is_pii" set to True.'
95 } in validation.results
98def test_validation__key_contains_pii__check_fields_when_false():
99 """
100 Ensure that if "contains_pii" is set as False then an error is raised if at least one field has "is_pii" set as True.
101 If no fields have "is_pii" set as True, then no errors are raised.
102 """
104 # Test with fields containing pii
105 full_schema_fields = """
106 {
107 "namespace": "Test",
108 "name": "test",
109 "type": "object",
110 "country_code": "USA",
111 "estimated_row_size": "10b",
112 "estimated_row_count": 3000,
113 "contains_pii": false,
114 "endpoints": [],
115 "fields": [
116 {
117 "name": "CALL ID",
118 "type": {
119 "type": "long"
120 }
121 },
122 {
123 "name": "SESSION ID",
124 "type": {
125 "type": "string"
126 }
127 }
128 ]
129 }
130 """
131 full_schema = json.loads(full_schema_fields)
132 validation = SchemaValidation()
133 schema = validation.validate_schema(full_schema, validation_type_="full")
135 assert validation.ttl_errors == 0
136 assert {
137 'status': 'ERROR',
138 'reason': 'CONFLICT',
139 'section': 'top_level',
140 'note': '"contains_pii" is False but the following fields have the key "is_pii" set to True: [\'SESSION ID\'].'
141 } not in validation.results
143 # Test without fields containing pii
144 full_schema_fields = """
145 {
146 "namespace": "Test",
147 "name": "test",
148 "type": "object",
149 "country_code": "USA",
150 "estimated_row_size": "10b",
151 "estimated_row_count": 3000,
152 "contains_pii": false,
153 "endpoints": [],
154 "fields": [
155 {
156 "name": "CALL ID",
157 "type": {
158 "type": "long"
159 }
160 },
161 {
162 "name": "SESSION ID",
163 "type": {
164 "type": "string"
165 },
166 "is_pii": true
167 }
168 ]
169 }
170 """
171 full_schema = json.loads(full_schema_fields)
172 validation = SchemaValidation()
173 with pytest.raises(DataSchemaFailedValidation):
174 schema = validation.validate_schema(full_schema, validation_type_="full")
176 assert {
177 'status': 'ERROR',
178 'reason': 'CONFLICT',
179 'section': 'top_level',
180 'note': '"contains_pii" is False but the following fields have the key "is_pii" set to True: [\'SESSION ID\'].'
181 } in validation.results
184def test_validation__key_contains_pii__correct_snowflake_table_names():
185 """
186 Ensure that snowflake sink endpoints have the tables named correctly. If "contains_pii" is set as True, then the
187 table name should end with "_PII". If "contains_pii" is set as False, then the table name should end with "_NO_PII".
188 Raise a warning if table name is incorrect according to "contains_pii", or if table does not end with either "_PII"
189 or "_NO_PII".
190 """
192 # Test for correct snowflake table names when "contains_pii" = True
193 full_schema_fields = """
194 {
195 "namespace": "Test",
196 "name": "test",
197 "type": "object",
198 "country_code": "USA",
199 "estimated_row_size": "10b",
200 "estimated_row_count": 3000,
201 "contains_pii": true,
202 "endpoints": [
203 {
204 "type": "sink",
205 "tag": "SDC_sink_0",
206 "info": {
207 "type": "snowflake",
208 "access": {
209 "account": "sd75523",
210 "database": "DATA_ENGINEERING",
211 "table_name": "TREVOR_SECRETS_PII",
212 "schema": "SEALS",
213 "credentials": {
214 "type": "aws_secrets",
215 "opts": {
216 "name": "snowflake/service_account/seal-secrets"
217 }
218 }
219 },
220 "opts": {
221 "upsert": false,
222 "dedupe": false,
223 "style": "snowflake"
224 }
225 }
226 },
227 {
228 "type": "sink",
229 "tag": "SDC_sink_1",
230 "info": {
231 "type": "snowflake",
232 "access": {
233 "account": "sd75523",
234 "database": "DATA_ENGINEERING",
235 "table_name": "TREVOR_SECRETS_NO_PII",
236 "schema": "SEALS",
237 "credentials": {
238 "type": "aws_secrets",
239 "opts": {
240 "name": "snowflake/service_account/seal-secrets"
241 }
242 }
243 },
244 "opts": {
245 "upsert": false,
246 "dedupe": false,
247 "style": "snowflake"
248 }
249 }
250 },
251 {
252 "type": "sink",
253 "tag": "SDC_sink_2",
254 "info": {
255 "type": "snowflake",
256 "access": {
257 "account": "sd75523",
258 "database": "DATA_ENGINEERING",
259 "table_name": "TREVOR_SECRETS",
260 "schema": "SEALS",
261 "credentials": {
262 "type": "aws_secrets",
263 "opts": {
264 "name": "snowflake/service_account/seal-secrets"
265 }
266 }
267 },
268 "opts": {
269 "upsert": false,
270 "dedupe": false,
271 "style": "snowflake"
272 }
273 }
274 }
275 ],
276 "fields": [
277 {
278 "name": "ALL_OF_TREVOR_WNUKS_SECRETS",
279 "type": {
280 "type": "string"
281 },
282 "is_pii": true
283 }
284 ]
285 }
286 """
288 full_schema = json.loads(full_schema_fields)
289 validation = SchemaValidation()
290 schema = validation.validate_schema(full_schema, validation_type_="full")
292 assert validation.ttl_warnings == 2
294 assert {
295 'note': 'Snowflake sink table name "TREVOR_SECRETS_PII" should end with "_PII" when "contains_pii" is True.',
296 'status': 'WARNING',
297 'reason': 'BAD_VALUE',
298 'section': 'top_level:endpoints[SDC_sink_0]:info:access:table_name'
299 } not in validation.results
301 assert {
302 'note': 'Snowflake sink table name "TREVOR_SECRETS_NO_PII" should end with "_PII" when "contains_pii" is True.',
303 'status': 'WARNING',
304 'reason': 'BAD_VALUE',
305 'section': 'top_level:endpoints[SDC_sink_1]:info:access:table_name'
306 } in validation.results
308 assert {
309 'note': 'Snowflake sink table name "TREVOR_SECRETS" should end with "_PII" when "contains_pii" is True.',
310 'status': 'WARNING',
311 'reason': 'BAD_VALUE',
312 'section': 'top_level:endpoints[SDC_sink_2]:info:access:table_name'
313 } in validation.results
315 # Test for correct snowflake table names when "contains_pii" = False
316 full_schema_fields = """
317 {
318 "namespace": "Test",
319 "name": "test",
320 "type": "object",
321 "country_code": "USA",
322 "estimated_row_size": "10b",
323 "estimated_row_count": 3000,
324 "contains_pii": false,
325 "endpoints": [
326 {
327 "type": "sink",
328 "tag": "SDC_sink_0",
329 "info": {
330 "type": "snowflake",
331 "access": {
332 "account": "sd75523",
333 "database": "DATA_ENGINEERING",
334 "table_name": "TREVOR_SECRETS_PII",
335 "schema": "SEALS",
336 "credentials": {
337 "type": "aws_secrets",
338 "opts": {
339 "name": "snowflake/service_account/seal-secrets"
340 }
341 }
342 },
343 "opts": {
344 "upsert": false,
345 "dedupe": false,
346 "style": "snowflake"
347 }
348 }
349 },
350 {
351 "type": "sink",
352 "tag": "SDC_sink_1",
353 "info": {
354 "type": "snowflake",
355 "access": {
356 "account": "sd75523",
357 "database": "DATA_ENGINEERING",
358 "table_name": "TREVOR_SECRETS_NO_PII",
359 "schema": "SEALS",
360 "credentials": {
361 "type": "aws_secrets",
362 "opts": {
363 "name": "snowflake/service_account/seal-secrets"
364 }
365 }
366 },
367 "opts": {
368 "upsert": false,
369 "dedupe": false,
370 "style": "snowflake"
371 }
372 }
373 },
374 {
375 "type": "sink",
376 "tag": "SDC_sink_2",
377 "info": {
378 "type": "snowflake",
379 "access": {
380 "account": "sd75523",
381 "database": "DATA_ENGINEERING",
382 "table_name": "TREVOR_SECRETS",
383 "schema": "SEALS",
384 "credentials": {
385 "type": "aws_secrets",
386 "opts": {
387 "name": "snowflake/service_account/seal-secrets"
388 }
389 }
390 },
391 "opts": {
392 "upsert": false,
393 "dedupe": false,
394 "style": "snowflake"
395 }
396 }
397 }
398 ],
399 "fields": [
400 {
401 "name": "ALL_OF_TREVOR_WNUKS_SECRETS",
402 "type": {
403 "type": "string"
404 },
405 "is_pii": false
406 }
407 ]
408 }
409 """
411 full_schema = json.loads(full_schema_fields)
412 validation = SchemaValidation()
413 schema = validation.validate_schema(full_schema, validation_type_="full")
415 assert validation.ttl_warnings == 2
417 assert {
418 'note': 'Snowflake sink table name "TREVOR_SECRETS_PII" should end with "NO_PII" when "contains_pii" is False.',
419 'status': 'WARNING',
420 'reason': 'BAD_VALUE',
421 'section': 'top_level:endpoints[SDC_sink_0]:info:access:table_name'
422 } in validation.results
424 assert {
425 'note': 'Snowflake sink table name "TREVOR_SECRETS_NO_PII" should end with "NO_PII" when "contains_pii" is False.',
426 'status': 'WARNING',
427 'reason': 'BAD_VALUE',
428 'section': 'top_level:endpoints[SDC_sink_1]:info:access:table_name'
429 } not in validation.results
432 assert {
433 'note': 'Snowflake sink table name "TREVOR_SECRETS" should end with "NO_PII" when "contains_pii" is False.',
434 'status': 'WARNING',
435 'reason': 'BAD_VALUE',
436 'section': 'top_level:endpoints[SDC_sink_2]:info:access:table_name'
437 } in validation.results