Skip to content

Commit

Permalink
Merge pull request #90 from podaac/develop
Browse files Browse the repository at this point in the history
Develop in main. closes #89 and closes #62 and closes #62
  • Loading branch information
mike-gangl authored Jun 22, 2022
2 parents 519abea + 368c031 commit 1a5f534
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 64 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

## [1.10.2]
### Fixed
- Fixed an issue where using a default global bounding box prevented download of data that didn't use the horizontal spatial domain [87](https://github.com/podaac/data-subscriber/issues/87)
- Fixed limit option not being respected. [86](https://github.com/podaac/data-subscriber/issues/86)

## [1.10.1]
### Fixed
- Support for SHA-256 and SHA-512 checksums
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "podaac-data-subscriber"
version = "1.10.1"
version = "1.10.2"
description = "PO.DAAC Data Subscriber Command Line Tool"
authors = ["PO.DAAC <[email protected]>"]
readme = "README.md"
Expand Down
19 changes: 10 additions & 9 deletions subscriber/podaac_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import tenacity
from datetime import datetime

__version__ = "1.10.1"
__version__ = "1.10.2"
extensions = [".nc", ".h5", ".zip", ".tar.gz"]
edl = "urs.earthdata.nasa.gov"
cmr = "cmr.earthdata.nasa.gov"
Expand Down Expand Up @@ -129,16 +129,17 @@ def refresh_token(old_token: str, client_id: str):


def validate(args):
bounds = args.bbox.split(',')
if len(bounds) != 4:
raise ValueError(
"Error parsing '--bounds': " + args.bbox + ". Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces ") # noqa E501
for b in bounds:
try:
float(b)
except ValueError:
if args.bbox is not None:
bounds = args.bbox.split(',')
if len(bounds) != 4:
raise ValueError(
"Error parsing '--bounds': " + args.bbox + ". Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces ") # noqa E501
for b in bounds:
try:
float(b)
except ValueError:
raise ValueError(
"Error parsing '--bounds': " + args.bbox + ". Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces ") # noqa E501

if args.startDate:
try:
Expand Down
46 changes: 23 additions & 23 deletions subscriber/podaac_data_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,13 @@
__version__ = pa.__version__

page_size = 2000

edl = pa.edl
cmr = pa.cmr
token_url = pa.token_url


# The lines below are to get the IP address. You can make this static and
# assign a fixed value to the IPAddr variable


def parse_cycles(cycle_input):
# if cycle_input is None:
# return None
Expand Down Expand Up @@ -66,14 +63,14 @@ def create_parser():
help="The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z") # noqa E501
parser.add_argument("-ed", "--end-date", required=False, dest="endDate",
help="The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z") # noqa E501

# Adding optional arguments
parser.add_argument("-f", "--force", dest="force", action="store_true", help = "Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches") # noqa E501

# spatiotemporal arguments
parser.add_argument("-b", "--bounds", dest="bbox",
help="The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b=\"-180,-90,180,90\" syntax when calling from the command line. Default: \"-180,-90,180,90\".",
default="-180,-90,180,90") # noqa E501
default=None) # noqa E501

# Arguments for how data are stored locally - much processing is based on
# the underlying directory structure (e.g. year/Day-of-year)
Expand Down Expand Up @@ -101,9 +98,8 @@ def create_parser():
parser.add_argument("-p", "--provider", dest="provider", default='POCLOUD',
help="Specify a provider for collection search. Default is POCLOUD.") # noqa E501

parser.add_argument("--limit", dest="limit", default='2000', type=int,
help="Integer limit for number of granules to download. Useful in testing. Defaults to " + str(
page_size)) # noqa E501
parser.add_argument("--limit", dest="limit", default=None, type=int,
help="Integer limit for number of granules to download. Useful in testing. Defaults to no limit.") # noqa E501

return parser

Expand Down Expand Up @@ -138,8 +134,9 @@ def run(args=None):
process_cmd = args.process_cmd
data_path = args.outputDirectory

if args.limit is not None:
page_size = args.limit
download_limit = None
if args.limit is not None and args.limit > 0:
download_limit = args.limit

if args.offset:
ts_shift = timedelta(hours=int(args.offset))
Expand All @@ -158,9 +155,6 @@ def run(args=None):
logging.info("NOTE: Making new data directory at " + data_path + "(This is the first run.)")
makedirs(data_path, exist_ok=True)

# Change this to whatever extent you need. Format is W Longitude,S Latitude,E Longitude,N Latitude
bounding_extent = args.bbox

if search_cycles is not None:
cmr_cycles = search_cycles
params = [
Expand All @@ -169,7 +163,6 @@ def run(args=None):
('provider', provider),
('ShortName', short_name),
('token', token),
('bounding_box', bounding_extent),
]
for v in cmr_cycles:
params.append(("cycle[]", v))
Expand All @@ -179,20 +172,20 @@ def run(args=None):
else:
temporal_range = pa.get_temporal_range(start_date_time, end_date_time,
datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501
params = {
'page_size': page_size,
'sort_key': "-start_date",
'provider': provider,
'ShortName': short_name,
'temporal': temporal_range,
'token': token,
'bounding_box': bounding_extent,
}
params = [
('page_size', page_size),
('sort_key', "-start_date"),
('provider', provider),
('ShortName', short_name),
('temporal', temporal_range),
]
if args.verbose:
logging.info("Temporal Range: " + temporal_range)

if args.verbose:
logging.info("Provider: " + provider)
if args.bbox is not None:
params.append(('bounding_box', args.bbox))

# If 401 is raised, refresh token and try one more time
try:
Expand Down Expand Up @@ -247,6 +240,8 @@ def run(args=None):
# Make this a non-verbose message
# if args.verbose:
logging.info("Found " + str(len(downloads)) + " total files to download")
if download_limit:
logging.info("Limiting downloads to " + str(args.limit) + " total files")
if args.verbose:
logging.info("Downloading files with extensions: " + str(extensions))

Expand Down Expand Up @@ -277,6 +272,11 @@ def run(args=None):
pa.process_file(process_cmd, output_path, args)
logging.info(str(datetime.now()) + " SUCCESS: " + f)
success_cnt = success_cnt + 1

#if limit is set and we're at or over it, stop downloading
if download_limit and success_cnt >= download_limit:
break

except Exception:
logging.warning(str(datetime.now()) + " FAILURE: " + f, exc_info=True)
failure_cnt = failure_cnt + 1
Expand Down
52 changes: 21 additions & 31 deletions subscriber/podaac_data_subscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def create_parser():
default=False) # noqa E501
parser.add_argument("-b", "--bounds", dest="bbox",
help="The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b=\"-180,-90,180,90\" syntax when calling from the command line. Default: \"-180,-90,180,90\".",
default="-180,-90,180,90") # noqa E501
default=None) # noqa E501

# Arguments for how data are stored locally - much processing is based on
# the underlying directory structure (e.g. year/Day-of-year)
Expand Down Expand Up @@ -178,46 +178,36 @@ def run(args=None):
else:
logging.warning("No .update__" + short_name + " in the data directory. (Is this the first run?)")

# Change this to whatever extent you need. Format is W Longitude,S Latitude,E Longitude,N Latitude
bounding_extent = args.bbox

# There are several ways to query for CMR updates that occured during a given timeframe. Read on in the CMR Search documentation:
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#c-with-new-granules (Collections)
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#c-with-revised-granules (Collections)
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#g-production-date (Granules)
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#g-created-at (Granules)
# The `created_at` parameter works for our purposes. It's a granule search parameter that returns the records ingested since the input timestamp.

if defined_time_range:
# if(data_since):
temporal_range = pa.get_temporal_range(start_date_time, end_date_time,
datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501

params = {
'page_size': page_size,
'sort_key': "-start_date",
'provider': provider,
'ShortName': short_name,
'updated_since': data_within_last_timestamp,
'token': token,
'bounding_box': bounding_extent,
}
params = [
('page_size',page_size),
('sort_key', "-start_date"),
('provider', provider),
('ShortName', short_name),
('updated_since', data_within_last_timestamp),
('token', token),
]

if defined_time_range:
params = {
'page_size': page_size,
'sort_key': "-start_date",
'provider': provider,
'updated_since': data_within_last_timestamp,
'ShortName': short_name,
'temporal': temporal_range,
'token': token,
'bounding_box': bounding_extent,
}

params = [
('page_size', page_size),
('sort_key', "-start_date"),
('provider', provider),
('updated_since', data_within_last_timestamp),
('ShortName', short_name),
('temporal', temporal_range),
('token', token),
]
if args.verbose:
logging.info("Temporal Range: " + temporal_range)

if args.bbox is not None:
params.append(('bounding_box', args.bbox))

if args.verbose:
logging.info("Provider: " + provider)
logging.info("Updated Since: " + data_within_last_timestamp)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_downloader_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,19 @@ def create_downloader_args(args):
args2 = parser.parse_args(args)
return args2


#Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure,
# and offset. Running it a second time to ensure it downlaods the files again-
# the downloader doesn't care about updates.
@pytest.mark.regression
def test_downloader_limit_MUR():
shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True)
args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1'.split())
pdd.run(args2)
# count number of files downloaded...
assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name)])==1
shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2')

#Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure,
# and offset. Running it a second time to ensure it downlaods the files again-
# the downloader doesn't care about updates.
Expand Down

0 comments on commit 1a5f534

Please sign in to comment.