diff --git a/trough/_download.py b/trough/_download.py index 1da1953..2cefd78 100644 --- a/trough/_download.py +++ b/trough/_download.py @@ -1,3 +1,4 @@ +import time import numpy as np from datetime import datetime, timedelta import math @@ -11,6 +12,7 @@ import functools import logging import warnings + try: import h5py from madrigalWeb import madrigalWeb @@ -18,10 +20,10 @@ except ImportError as imp_err: warnings.warn(f"Packages required for recreating dataset not installed: {imp_err}") - from trough.exceptions import InvalidConfiguration from trough._arb import parse_arb_fn +RETRIES = 3 logger = logging.getLogger(__name__) @@ -134,26 +136,35 @@ def __init__(self, download_dir, user_name, user_email, user_affil): def _get_tec_experiments(self, start_date: datetime, end_date: datetime): logger.info(f"getting TEC experiments between {start_date} and {end_date}") - experiments = self.server.getExperiments( - 8000, - start_date.year, start_date.month, start_date.day, start_date.hour, start_date.minute, start_date.second, - end_date.year, end_date.month, end_date.day, end_date.hour, end_date.minute, end_date.second, - ) - return experiments - - def _download_file(self, tec_file, local_path, retries=3): - logger.info(f"downloading TEC file {tec_file} to {local_path}") - for retry in range(retries): + err = Exception + for retry in range(RETRIES): try: - if pathlib.Path(local_path).exists(): - logger.info(f"already exists: {local_path}") - else: + return self.server.getExperiments( + 8000, + start_date.year, start_date.month, start_date.day, start_date.hour, start_date.minute, + start_date.second, end_date.year, end_date.month, end_date.day, end_date.hour, end_date.minute, + end_date.second, + ) + except ValueError as err: + logger.warning(f'Failure getting experiments, retrying {retry}') + time.sleep(10) + raise err + + def _download_file(self, tec_file, local_path): + logger.info(f"downloading TEC file {tec_file} to {local_path}") + if pathlib.Path(local_path).exists(): + logger.info(f"already exists: {local_path}") + else: + err = Exception + for retry in range(RETRIES): + try: return self.server.downloadFile( tec_file, local_path, self.user_name, self.user_email, self.user_affil, 'hdf5' ) - except(socket.timeout, TimeoutError): - logger.error(f'Failure downloading {tec_file} because it took more than allowed number of seconds') - self.server = madrigalWeb.MadrigalData("http://cedar.openmadrigal.org") + except ValueError as err: + logger.warning(f'Failure downloading {tec_file}') + time.sleep(10) + raise err def _download_files(self, files): local_files = [] @@ -178,8 +189,17 @@ def _get_file_list(self, start_date, end_date): if cache_key in self.cache: files = self.cache[cache_key] else: - experiment_files = self.server.getExperimentFiles(experiment.id) - files = [exp.name for exp in experiment_files if exp.kindat == 3500] + err = Exception + for retry in range(RETRIES): + try: + experiment_files = self.server.getExperimentFiles(experiment.id) + files = [exp.name for exp in experiment_files if exp.kindat == 3500] + break + except ValueError as err: + logger.warning(f'Failure getting experiment {experiment.id}') + time.sleep(10) + else: + raise err tec_files[cache_key] = files return tec_files @@ -189,8 +209,8 @@ def _verify_local_file(local_file): with h5py.File(local_file, 'r') as f: tec = f['Data']['Array Layout']['2D Parameters']['tec'][()] timestamps = f['Data']['Array Layout']['timestamps'][()] - except Exception as e: - logger.warning(f"bad local file: {local_file}, error: {e}") + except Exception as err: + logger.warning(f"bad local file: {local_file}, error: {err}") return False return (timestamps.shape[0] > 10) and (np.sum(np.isfinite(tec)) > 100) @@ -316,8 +336,8 @@ def _verify_local_file(local_file): try: with h5py.File(local_file, 'r') as f: lon = f['MODEL_NORTH_GEOGRAPHIC_LONGITUDE'][()] - except Exception as e: - logger.warning(f"bad local file: {local_file}, error: {e}") + except Exception as err: + logger.warning(f"bad local file: {local_file}, error: {err}") return False return lon.shape[0] > 10 @@ -329,24 +349,30 @@ def _verify_files(self, local_files, server_files): return bad_server_files -def _download_ftp_file(server, server_file: str, local_path: str, retries=3): +def _download_ftp_file(server, server_file: str, local_path: str): logger.info(f"downloading file {server_file} to {local_path}") - for retry in range(retries): + err = Exception + for retry in range(RETRIES): try: with open(local_path, 'wb') as f: server.retrbinary(f'RETR {str(server_file)}', f.write) return - except(socket.timeout, TimeoutError): + except(socket.timeout, TimeoutError) as err: logger.error(f'Failure downloading {server_file} because it took more than allowed number of seconds') + time.sleep(10) + raise err -def _download_http_file(http_file: str, local_path: str, retries=3): +def _download_http_file(http_file: str, local_path: str): logger.info(f"downloading file {http_file} to {local_path}") - for retry in range(retries): + err = Exception + for retry in range(RETRIES): try: with request.urlopen(http_file, timeout=60) as r: with open(local_path, 'wb') as f: f.write(r.read()) return - except(socket.timeout, TimeoutError): + except(socket.timeout, TimeoutError) as err: logger.error(f'Failure downloading {http_file} because it took more than allowed number of seconds') + time.sleep(10) + raise err diff --git a/trough/_trough.py b/trough/_trough.py index f83f987..a0eb610 100644 --- a/trough/_trough.py +++ b/trough/_trough.py @@ -79,7 +79,11 @@ def _get_weighted_kp(times, omni_data, tau=.6, T=10): values = 2.1 * np.log(.2 * ap_tau + 1) if (times.values[1] - times.values[0]).astype('timedelta64[m]').astype(int) == 60: return values - ut_initial = np.arange(times.values[0], times.values[-1] + np.timedelta64(1, 'h'), np.timedelta64(1, 'h')).astype('datetime64[s]').astype(int) + ut_initial = np.arange( + times.values[0], + times.values[-1] + np.timedelta64(1, 'h'), + np.timedelta64(1, 'h') + ).astype('datetime64[s]').astype(int) ut_final = times.values.astype('datetime64[s]').astype(int) return np.interp(ut_final, ut_initial, values) diff --git a/trough/utils.py b/trough/utils.py index e43a035..6f5eb34 100644 --- a/trough/utils.py +++ b/trough/utils.py @@ -147,7 +147,7 @@ def check(start, end, dt, hemisphere, processed_file): logger.info(f"downloaded data already processed {processed_file=}, checking...") return False except KeyError: - logger.info(f"processed file doesn't have the requested data") + logger.info("processed file doesn't have the requested data") except Exception as e: logger.info(f"error reading processed file {processed_file=}: {e}, removing and reprocessing") processed_file.unlink()