Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import json 

2import os 

3import sys 

4 

5import pytest 

6 

7sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../../../") 

8from sdc_etl_libs.sdc_data_schema.schema_exceptions import DataSchemaFailedValidation 

9from sdc_etl_libs.sdc_data_schema.schema_validation import (ResultTypeStatuses, SchemaValidation) 

10 

11 

12def test_validation__key_contains_pii__check_fields_when_true(): 

13 """ 

14 Ensure that if "contains_pii" is set as True then an error is raised if no fields have "is_pii" set as True. If at 

15 least one field has "is_pii" set as True, then no errors are raised. 

16 """ 

17 

18 # Test with fields containing pii 

19 full_schema_fields = """ 

20 { 

21 "namespace": "Test", 

22 "name": "test", 

23 "type": "object", 

24 "country_code": "USA", 

25 "estimated_row_size": "10b", 

26 "estimated_row_count": 3000, 

27 "contains_pii": true, 

28 "endpoints": [], 

29 "fields": [ 

30 { 

31 "name": "CALL ID", 

32 "type": { 

33 "type": "long" 

34 } 

35 }, 

36 { 

37 "name": "SESSION ID", 

38 "type": { 

39 "type": "string" 

40 }, 

41 "is_pii": true 

42 } 

43 ] 

44 } 

45 """ 

46 full_schema = json.loads(full_schema_fields) 

47 validation = SchemaValidation() 

48 schema = validation.validate_schema(full_schema, validation_type_="full") 

49 

50 assert validation.ttl_errors == 0 

51 assert { 

52 'status': 'ERROR', 

53 'reason': 'CONFLICT', 

54 'section': 'top_level', 

55 'note': '"contains_pii" is True but no fields have the key "is_pii" set to True.' 

56 } not in validation.results 

57 

58 # Test without fields containing pii 

59 full_schema_fields = """ 

60 { 

61 "namespace": "Test", 

62 "name": "test", 

63 "type": "object", 

64 "country_code": "USA", 

65 "estimated_row_size": "10b", 

66 "estimated_row_count": 3000, 

67 "contains_pii": true, 

68 "endpoints": [], 

69 "fields": [ 

70 { 

71 "name": "CALL ID", 

72 "type": { 

73 "type": "long" 

74 } 

75 }, 

76 { 

77 "name": "SESSION ID", 

78 "type": { 

79 "type": "string" 

80 } 

81 } 

82 ] 

83 } 

84 """ 

85 full_schema = json.loads(full_schema_fields) 

86 validation = SchemaValidation() 

87 with pytest.raises(DataSchemaFailedValidation): 

88 schema = validation.validate_schema(full_schema, validation_type_="full") 

89 

90 assert { 

91 'status': 'ERROR', 

92 'reason': 'CONFLICT', 

93 'section': 'top_level', 

94 'note': '"contains_pii" is True but no fields have the key "is_pii" set to True.' 

95 } in validation.results 

96 

97 

98def test_validation__key_contains_pii__check_fields_when_false(): 

99 """ 

100 Ensure that if "contains_pii" is set as False then an error is raised if at least one field has "is_pii" set as True. 

101 If no fields have "is_pii" set as True, then no errors are raised. 

102 """ 

103 

104 # Test with fields containing pii 

105 full_schema_fields = """ 

106 { 

107 "namespace": "Test", 

108 "name": "test", 

109 "type": "object", 

110 "country_code": "USA", 

111 "estimated_row_size": "10b", 

112 "estimated_row_count": 3000, 

113 "contains_pii": false, 

114 "endpoints": [], 

115 "fields": [ 

116 { 

117 "name": "CALL ID", 

118 "type": { 

119 "type": "long" 

120 } 

121 }, 

122 { 

123 "name": "SESSION ID", 

124 "type": { 

125 "type": "string" 

126 } 

127 } 

128 ] 

129 } 

130 """ 

131 full_schema = json.loads(full_schema_fields) 

132 validation = SchemaValidation() 

133 schema = validation.validate_schema(full_schema, validation_type_="full") 

134 

135 assert validation.ttl_errors == 0 

136 assert { 

137 'status': 'ERROR', 

138 'reason': 'CONFLICT', 

139 'section': 'top_level', 

140 'note': '"contains_pii" is False but the following fields have the key "is_pii" set to True: [\'SESSION ID\'].' 

141 } not in validation.results 

142 

143 # Test without fields containing pii 

144 full_schema_fields = """ 

145 { 

146 "namespace": "Test", 

147 "name": "test", 

148 "type": "object", 

149 "country_code": "USA", 

150 "estimated_row_size": "10b", 

151 "estimated_row_count": 3000, 

152 "contains_pii": false, 

153 "endpoints": [], 

154 "fields": [ 

155 { 

156 "name": "CALL ID", 

157 "type": { 

158 "type": "long" 

159 } 

160 }, 

161 { 

162 "name": "SESSION ID", 

163 "type": { 

164 "type": "string" 

165 }, 

166 "is_pii": true 

167 } 

168 ] 

169 } 

170 """ 

171 full_schema = json.loads(full_schema_fields) 

172 validation = SchemaValidation() 

173 with pytest.raises(DataSchemaFailedValidation): 

174 schema = validation.validate_schema(full_schema, validation_type_="full") 

175 

176 assert { 

177 'status': 'ERROR', 

178 'reason': 'CONFLICT', 

179 'section': 'top_level', 

180 'note': '"contains_pii" is False but the following fields have the key "is_pii" set to True: [\'SESSION ID\'].' 

181 } in validation.results 

182 

183 

184def test_validation__key_contains_pii__correct_snowflake_table_names(): 

185 """ 

186 Ensure that snowflake sink endpoints have the tables named correctly. If "contains_pii" is set as True, then the 

187 table name should end with "_PII". If "contains_pii" is set as False, then the table name should end with "_NO_PII". 

188 Raise a warning if table name is incorrect according to "contains_pii", or if table does not end with either "_PII" 

189 or "_NO_PII". 

190 """ 

191 

192 # Test for correct snowflake table names when "contains_pii" = True 

193 full_schema_fields = """ 

194 { 

195 "namespace": "Test", 

196 "name": "test", 

197 "type": "object", 

198 "country_code": "USA", 

199 "estimated_row_size": "10b", 

200 "estimated_row_count": 3000, 

201 "contains_pii": true, 

202 "endpoints": [ 

203 { 

204 "type": "sink", 

205 "tag": "SDC_sink_0", 

206 "info": { 

207 "type": "snowflake", 

208 "access": { 

209 "account": "sd75523", 

210 "database": "DATA_ENGINEERING", 

211 "table_name": "TREVOR_SECRETS_PII", 

212 "schema": "SEALS", 

213 "credentials": { 

214 "type": "aws_secrets", 

215 "opts": { 

216 "name": "snowflake/service_account/seal-secrets" 

217 } 

218 } 

219 }, 

220 "opts": { 

221 "upsert": false, 

222 "dedupe": false, 

223 "style": "snowflake" 

224 } 

225 } 

226 }, 

227 { 

228 "type": "sink", 

229 "tag": "SDC_sink_1", 

230 "info": { 

231 "type": "snowflake", 

232 "access": { 

233 "account": "sd75523", 

234 "database": "DATA_ENGINEERING", 

235 "table_name": "TREVOR_SECRETS_NO_PII", 

236 "schema": "SEALS", 

237 "credentials": { 

238 "type": "aws_secrets", 

239 "opts": { 

240 "name": "snowflake/service_account/seal-secrets" 

241 } 

242 } 

243 }, 

244 "opts": { 

245 "upsert": false, 

246 "dedupe": false, 

247 "style": "snowflake" 

248 } 

249 } 

250 }, 

251 { 

252 "type": "sink", 

253 "tag": "SDC_sink_2", 

254 "info": { 

255 "type": "snowflake", 

256 "access": { 

257 "account": "sd75523", 

258 "database": "DATA_ENGINEERING", 

259 "table_name": "TREVOR_SECRETS", 

260 "schema": "SEALS", 

261 "credentials": { 

262 "type": "aws_secrets", 

263 "opts": { 

264 "name": "snowflake/service_account/seal-secrets" 

265 } 

266 } 

267 }, 

268 "opts": { 

269 "upsert": false, 

270 "dedupe": false, 

271 "style": "snowflake" 

272 } 

273 } 

274 } 

275 ], 

276 "fields": [ 

277 { 

278 "name": "ALL_OF_TREVOR_WNUKS_SECRETS", 

279 "type": { 

280 "type": "string" 

281 }, 

282 "is_pii": true 

283 } 

284 ] 

285 } 

286 """ 

287 

288 full_schema = json.loads(full_schema_fields) 

289 validation = SchemaValidation() 

290 schema = validation.validate_schema(full_schema, validation_type_="full") 

291 

292 assert validation.ttl_warnings == 2 

293 

294 assert { 

295 'note': 'Snowflake sink table name "TREVOR_SECRETS_PII" should end with "_PII" when "contains_pii" is True.', 

296 'status': 'WARNING', 

297 'reason': 'BAD_VALUE', 

298 'section': 'top_level:endpoints[SDC_sink_0]:info:access:table_name' 

299 } not in validation.results 

300 

301 assert { 

302 'note': 'Snowflake sink table name "TREVOR_SECRETS_NO_PII" should end with "_PII" when "contains_pii" is True.', 

303 'status': 'WARNING', 

304 'reason': 'BAD_VALUE', 

305 'section': 'top_level:endpoints[SDC_sink_1]:info:access:table_name' 

306 } in validation.results 

307 

308 assert { 

309 'note': 'Snowflake sink table name "TREVOR_SECRETS" should end with "_PII" when "contains_pii" is True.', 

310 'status': 'WARNING', 

311 'reason': 'BAD_VALUE', 

312 'section': 'top_level:endpoints[SDC_sink_2]:info:access:table_name' 

313 } in validation.results 

314 

315 # Test for correct snowflake table names when "contains_pii" = False 

316 full_schema_fields = """ 

317 { 

318 "namespace": "Test", 

319 "name": "test", 

320 "type": "object", 

321 "country_code": "USA", 

322 "estimated_row_size": "10b", 

323 "estimated_row_count": 3000, 

324 "contains_pii": false, 

325 "endpoints": [ 

326 { 

327 "type": "sink", 

328 "tag": "SDC_sink_0", 

329 "info": { 

330 "type": "snowflake", 

331 "access": { 

332 "account": "sd75523", 

333 "database": "DATA_ENGINEERING", 

334 "table_name": "TREVOR_SECRETS_PII", 

335 "schema": "SEALS", 

336 "credentials": { 

337 "type": "aws_secrets", 

338 "opts": { 

339 "name": "snowflake/service_account/seal-secrets" 

340 } 

341 } 

342 }, 

343 "opts": { 

344 "upsert": false, 

345 "dedupe": false, 

346 "style": "snowflake" 

347 } 

348 } 

349 }, 

350 { 

351 "type": "sink", 

352 "tag": "SDC_sink_1", 

353 "info": { 

354 "type": "snowflake", 

355 "access": { 

356 "account": "sd75523", 

357 "database": "DATA_ENGINEERING", 

358 "table_name": "TREVOR_SECRETS_NO_PII", 

359 "schema": "SEALS", 

360 "credentials": { 

361 "type": "aws_secrets", 

362 "opts": { 

363 "name": "snowflake/service_account/seal-secrets" 

364 } 

365 } 

366 }, 

367 "opts": { 

368 "upsert": false, 

369 "dedupe": false, 

370 "style": "snowflake" 

371 } 

372 } 

373 }, 

374 { 

375 "type": "sink", 

376 "tag": "SDC_sink_2", 

377 "info": { 

378 "type": "snowflake", 

379 "access": { 

380 "account": "sd75523", 

381 "database": "DATA_ENGINEERING", 

382 "table_name": "TREVOR_SECRETS", 

383 "schema": "SEALS", 

384 "credentials": { 

385 "type": "aws_secrets", 

386 "opts": { 

387 "name": "snowflake/service_account/seal-secrets" 

388 } 

389 } 

390 }, 

391 "opts": { 

392 "upsert": false, 

393 "dedupe": false, 

394 "style": "snowflake" 

395 } 

396 } 

397 } 

398 ], 

399 "fields": [ 

400 { 

401 "name": "ALL_OF_TREVOR_WNUKS_SECRETS", 

402 "type": { 

403 "type": "string" 

404 }, 

405 "is_pii": false 

406 } 

407 ] 

408 } 

409 """ 

410 

411 full_schema = json.loads(full_schema_fields) 

412 validation = SchemaValidation() 

413 schema = validation.validate_schema(full_schema, validation_type_="full") 

414 

415 assert validation.ttl_warnings == 2 

416 

417 assert { 

418 'note': 'Snowflake sink table name "TREVOR_SECRETS_PII" should end with "NO_PII" when "contains_pii" is False.', 

419 'status': 'WARNING', 

420 'reason': 'BAD_VALUE', 

421 'section': 'top_level:endpoints[SDC_sink_0]:info:access:table_name' 

422 } in validation.results 

423 

424 assert { 

425 'note': 'Snowflake sink table name "TREVOR_SECRETS_NO_PII" should end with "NO_PII" when "contains_pii" is False.', 

426 'status': 'WARNING', 

427 'reason': 'BAD_VALUE', 

428 'section': 'top_level:endpoints[SDC_sink_1]:info:access:table_name' 

429 } not in validation.results 

430 

431 

432 assert { 

433 'note': 'Snowflake sink table name "TREVOR_SECRETS" should end with "NO_PII" when "contains_pii" is False.', 

434 'status': 'WARNING', 

435 'reason': 'BAD_VALUE', 

436 'section': 'top_level:endpoints[SDC_sink_2]:info:access:table_name' 

437 } in validation.results