Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2SDC Data Exchange SFTP Endpoint module 

3""" 

4import logging 

5import os 

6 

7from sdc_etl_libs.sdc_data_exchange.SDCDataExchangeEndpoint import SDCDataExchangeEndpoint 

8from sdc_etl_libs.sdc_data_exchange_loggers.SDCLoggerFactory import SDCLoggerFactory 

9from sdc_etl_libs.sdc_data_exchange_loggers.SDCSnowflakeLogger import SDCSnowflakeLoggerEnums as LoggerEnums 

10from sdc_etl_libs.sdc_dataframe.Dataframe import Dataframe 

11from sdc_etl_libs.sdc_file_helpers.SDCFile import SDCFile 

12from sdc_etl_libs.sdc_file_helpers.SDCFileFactory import SDCFileFactory 

13from sdc_etl_libs.sdc_file_helpers.SDCFileNameHelpers import SDCFileNameHelpers 

14from sdc_etl_libs.sdc_filetransfer.SFTPFileTransfer import SFTP 

15from sdc_etl_libs.pgp_helpers.local_pgp import LocalPGP 

16 

17 

18class SDCSFTPEndpoint(SFTP, SDCDataExchangeEndpoint): 

19 

20 def __init__(self): 

21 """ 

22 SFTP class constructor. 

23 """ 

24 

25 super().__init__() 

26 self.exchange_type = "sftp" 

27 self.endpoint_type = None 

28 self.endpoint_tag = None 

29 self.data_schema = None 

30 self.endpoint_schema = None 

31 self.file_type = None 

32 self.file_regex = None 

33 self.file_name = None 

34 self.username = None 

35 self.password = None 

36 self.rsa_key = None 

37 self.host = None 

38 self.port = None 

39 self.path = None 

40 self.files = None 

41 self.logger = None 

42 self.endpoint_uuid = None 

43 self.empty_is_success = None 

44 self.minutes_until_abandoned = None 

45 self.days_threshold_to_delete = None 

46 self.pgp = None 

47 

48 def check_optional_fields(self): 

49 """ 

50 Check the existence and load any optional field specified for the SFTPEndpoint 

51 :return: None. 

52 """ 

53 self.days_threshold_to_delete = self.endpoint_schema["info"]["opts"].get("days_threshold_to_delete") 

54 

55 def create_exchange_endpoint(self, data_schema_, endpoint_schema_, **kwargs): 

56 """ 

57 Creates a data exchange endpoint for SFTP. Establishes connection 

58 to SFTP. A list of files already that exist in the SFTP path are returned 

59 to self.files. 

60 :param data_schema_: Dict. Entire JSON data schema. 

61 :param endpoint_schema_: Dict. JSON data schema of endpoint. 

62 :return: None. 

63 """ 

64 

65 # TODO: This will have to be updated to account for the new schema key layout 

66 # Endpoint Metadata Attributes 

67 self.endpoint_schema = endpoint_schema_ 

68 self.data_schema = data_schema_ 

69 self.endpoint_tag = self.endpoint_schema["tag"] 

70 self.endpoint_type = self.endpoint_schema["type"] 

71 

72 # Endpoint Attributes 

73 self.file_type = self.endpoint_schema["info"]["file_info"]["type"] 

74 self.host = self.endpoint_schema["info"]["access"]["host"] 

75 self.port = self.endpoint_schema["info"]["access"]["port"] 

76 self.path = self.endpoint_schema["info"]["access"]["path"] 

77 

78 # File Info Attributes 

79 self.file_regex = self.endpoint_schema["info"]["file_info"]["opts"]["file_regex"] 

80 self.headers = self.endpoint_schema["info"]["file_info"]["opts"].get("headers") 

81 self.file_name = SDCFileNameHelpers.get_file_name_from_schema(self.endpoint_schema, kwargs=kwargs) 

82 self.pgp_key = self.endpoint_schema["info"]["file_info"]["opts"].get("pgp_key") 

83 

84 # Optional Fields 

85 self.check_optional_fields() 

86 

87 # TODO: Five9ine & Experian uses RSA Keys. 

88 # Need to update rsa_keys to base64. But time with deployment of new sftp credentials code 

89 # ALL RSA key should be base64 endcoded. 

90 

91 # TODO: Unit Test connecting to SFTP that only takes a password 

92 # Get credentials from secrets, and mock up functionality 

93 

94 # Grab Credentials 

95 self.__set_credentials(self.grab_credentials()) 

96 

97 if self.rsa_key or self.password: 

98 self.connect(host_=self.host, username_=self.username, password_=self.password, rsa_key_=self.rsa_key, port_=self.port) 

99 else: 

100 raise Exception('Retrieved credentials do not contain an "rsa_key" or "password" key-value pair.') 

101 

102 # Initialize PGP object if encryption/decryption needed 

103 if self.pgp_key: 

104 self.pgp = LocalPGP() 

105 self.pgp.vault_get_keys(self.pgp_key) 

106 

107 # Logger Attributes / Setup Logger 

108 if "logger" in self.endpoint_schema["info"]: 

109 if self.endpoint_schema["info"]["logger"]: 

110 self.logger = SDCLoggerFactory.get_logger(logging_info_=self.endpoint_schema["info"]["logger"]) 

111 self.endpoint_uuid = self.logger.generate_endpoint_uuid(self.endpoint_schema, True) 

112 self.empty_is_success = self.endpoint_schema["info"]["logger"]["opts"]["empty_is_success"] 

113 self.minutes_until_abandoned = self.endpoint_schema["info"]["logger"]["opts"]["minutes_until_abandoned"] 

114 

115 # Generate Dictionary of Files Statuses 

116 self.endpoint_items = {"source": [], "sink_successful": [], "sink_failed": [], "sink_processing": []} 

117 if self.logger: 

118 self.endpoint_items["sink_successful"] = \ 

119 self.logger.retrieve_successful_runs(self.endpoint_uuid, self.empty_is_success) 

120 self.endpoint_items["sink_failed"] = \ 

121 self.logger.retrieve_failed_runs(self.endpoint_uuid, True, self.minutes_until_abandoned) 

122 self.endpoint_items["sink_processing"] = \ 

123 self.logger.retrieve_non_abandoned_runs(self.endpoint_uuid, self.minutes_until_abandoned) 

124 else: 

125 files = self.get_obj_list( 

126 path_=self.path, obj_regex_=self.file_regex, give_full_path_=False, include_dirs_=True) 

127 if self.endpoint_type == "sink": 

128 self.endpoint_items["sink_successful"] = files 

129 elif self.endpoint_type == "source": 

130 self.endpoint_items["source"] = files 

131 

132 def __set_credentials(self, creds_: dict = None): 

133 """ 

134 Sets credential values to the following class attributes (if values exist): 

135 - self.username: SFTP username 

136 - self.password: SFTP password 

137 - self.rsa_key: private key 

138 :creds_: Dict. Key/value pairs of secrets. Default = None. 

139 :return: None. 

140 """ 

141 

142 if creds_: 

143 self.username = creds_.get("username") 

144 self.password = creds_.get("password") 

145 self.rsa_key = creds_.get("rsa_key") 

146 

147 def get_data(self, file_name_: None): 

148 """ 

149 Returns data from file as SDCDataframe object with dataframe. 

150 :param file_name_: Name of file to process. 

151 :return: SDCDataframe object. 

152 """ 

153 

154 if isinstance(file_name_, type(None)): 

155 raise ValueError('"file_name_" cannot be None') 

156 

157 file_obj = self.get_file_as_file_object(os.path.join(self.path, file_name_)) 

158 

159 # Attempt decryption if key was a specified 

160 if self.pgp: 

161 file_obj = self.pgp.decrypt(file_obj) 

162 

163 # Count the number of records in the file obj 

164 record_count = None 

165 if self.logger: 

166 record_count = self.get_file_obj_record_count(file_obj) 

167 if self.headers: 

168 record_count -= 1 

169 record_count = max(record_count, 0) 

170 logging.info(f"{file_name_} contains {record_count:,} record(s).") 

171 

172 # Convert file obj into SDCFile object 

173 sdc_file = SDCFileFactory.get_file( 

174 schema_=self.data_schema, 

175 endpoint_schema_=self.endpoint_schema, 

176 file_name_=file_name_, 

177 file_path_=self.path, 

178 file_obj_=file_obj) 

179 

180 return sdc_file, record_count 

181 

182 def write_data(self, data_, file_name_: None): 

183 """ 

184 Writes a SDCDataframe or SDCFile object out to a file on SFTP. 

185 :param file_name_: Name to write file as. 

186 :param data_: Data to be written to SFTP. Can be SDCDataframe object or 

187 SDCFile object. 

188 :return: Log results of writing file to SFTP site as string. 

189 """ 

190 

191 if isinstance(file_name_, type(None)): 

192 raise ValueError('"file_name_" cannot be None') 

193 

194 if self.logger: 

195 self.logger.write_stats = {"TYPE": self.exchange_type, "STATS": []} 

196 

197 file_obj = None 

198 if isinstance(data_, SDCFile): 

199 file_obj = SDCFileFactory.get_endpoint_file_obj(self.endpoint_schema, data_) 

200 elif isinstance(data_, Dataframe): 

201 file_obj = data_.get_as_file_obj_from_endpoint_schema(self.endpoint_schema) 

202 

203 # Attempt encryption if key was a specified 

204 file_name_ = file_name_.strip(".pgp") 

205 if self.pgp: 

206 file_obj = self.pgp.encrypt(file_obj) 

207 file_name_ += ".pgp" 

208 

209 

210 if file_obj: 

211 self.write_file( 

212 self.path, 

213 file_name_, 

214 file_obj, 

215 return_stats_=False) 

216 else: 

217 logging.error("File object is empty: {}".format(file_name)) 

218 raise ValueError("File object is empty: {}".format(file_name)) 

219 

220 size_stats = self.get_obj_stats(os.path.join(self.path, file_name_)) 

221 written_file_obj = self.get_file_as_file_object(os.path.join(self.path, file_name_)) 

222 

223 if self.logger: 

224 record_count = self.get_file_obj_record_count(written_file_obj) 

225 if self.headers: 

226 record_count -= 1 

227 record_count = max(record_count, 0) 

228 

229 result = f"{round(size_stats.st_size / 1000000, 5):,} MB" 

230 logging.info(result) 

231 if self.logger: 

232 self.logger.write_stats["STATS"].append({ 

233 "DEST_TYPE": LoggerEnums.SFTPWritesDestTypes.MAIN.value, 

234 "FILE_SIZE_BYTES": size_stats.st_size, 

235 "TOTAL_RECORDS": record_count 

236 }) 

237 

238 return result 

239 

240 def delete_data(self, delete_criteria): 

241 """ 

242 Deletes files that matches defined criteria 

243 :param delete_criteria: List of criterias that the files must match in order to be deleted 

244 :type delete_criteria: List(DeleteCriterias) 

245 :return: None 

246 """ 

247 

248 try: 

249 files_in_server = self.client.listdir_attr(self.path) 

250 files_to_delete = files_in_server 

251 for criteria in delete_criteria: 

252 files_to_delete = criteria.get_files_to_delete(files_to_delete) 

253 logging.info("Number of files to delete from FTP Server: %s", len(files_to_delete)) 

254 for file in files_to_delete: 

255 try: 

256 self.client.remove(self.path + file.filename) 

257 logging.info("File %s deleted", file.filename) 

258 except Exception as e: 

259 logging.error("Error deleting file %s: %s", file.filename, str(e), exc_info=True) 

260 except Exception as e: 

261 logging.error("Error deleting files from FTP Server: %s", str(e), exc_info=True) 

262 raise Exception("Error deleting files from FTP Server")