From 0e4f3928c39e32c93acf1671389fbc9c65121544 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 16 Apr 2024 19:57:05 -0400 Subject: [PATCH] Fix CMR-related type hints (#510) There were a number of type hints in `search.py` and `api.py` related to CMR queries that were incorrect. These were fixed. In addition, there were a number of other static type errors that were masked because of ignored `cmr` imports. Added type stubs for `python_cmr` library to unmask and address these additional type errors. In addition: - Aligned vcrpy usage with VCRTestCase as per https://vcrpy.readthedocs.io/en/latest/usage.html#unittest-integration - Restored use of session for CMR paged queries, which was accidentally removed with the introduction of Search-After functionality. - Wrapped a number of docstrings at 88 characters per ruff configuration Fixes #508 --- .gitignore | 76 ++- CHANGELOG.md | 4 + CONTRIBUTING.md | 9 + ci/environment-mindeps.yaml | 3 + earthaccess/api.py | 32 +- earthaccess/py.typed | 0 earthaccess/search.py | 608 +++++++++++++----- poetry.lock | 17 +- pyproject.toml | 32 +- scripts/lint.sh | 5 +- stubs/cmr/__init__.pyi | 10 + stubs/cmr/queries.pyi | 108 ++++ tests/__init__.py | 0 ...esults.test_collections_less_than_2k.yaml} | 0 ...esults.test_collections_more_than_2k.yaml} | 0 .../TestResults.test_data_links.yaml | 466 ++++++++++++++ ...KM_2000.yaml => TestResults.test_get.yaml} | 0 ...estResults.test_get_all_less_than_2k.yaml} | 0 ...estResults.test_get_all_more_than_2k.yaml} | 0 ... TestResults.test_get_more_than_2000.yaml} | 0 tests/unit/test_auth.py | 8 +- tests/unit/test_results.py | 169 ++--- 22 files changed, 1241 insertions(+), 306 deletions(-) create mode 100644 earthaccess/py.typed create mode 100644 stubs/cmr/__init__.pyi create mode 100644 stubs/cmr/queries.pyi create mode 100644 tests/__init__.py rename tests/unit/fixtures/vcr_cassettes/{PODAAC.yaml => TestResults.test_collections_less_than_2k.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{ALL.yaml => TestResults.test_collections_more_than_2k.yaml} (100%) create mode 100644 tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml rename tests/unit/fixtures/vcr_cassettes/{MOD02QKM_2000.yaml => TestResults.test_get.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{TELLUS_GRAC.yaml => TestResults.test_get_all_less_than_2k.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{CYGNSS.yaml => TestResults.test_get_all_more_than_2k.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{MOD02QKM.yaml => TestResults.test_get_more_than_2000.yaml} (100%) diff --git a/.gitignore b/.gitignore index ca8204ba..ab1f58ce 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,78 @@ docs/tutorials/data tests/integration/data .ruff_cache -# OS X +notebooks/data/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Created by https://www.toptal.com/developers/gitignore/api/macos +# Edit at https://www.toptal.com/developers/gitignore?templates=macos + +### macOS ### +# General .DS_Store +.AppleDouble +.LSOverride -notebooks/data/ -.vscode +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +# End of https://www.toptal.com/developers/gitignore/api/macos + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode + +### VisualStudioCode ### +.vscode/ + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode + +# Created by https://www.toptal.com/developers/gitignore/api/direnv +# Edit at https://www.toptal.com/developers/gitignore?templates=direnv + +### direnv ### +.direnv +.envrc + +# End of https://www.toptal.com/developers/gitignore/api/direnv diff --git a/CHANGELOG.md b/CHANGELOG.md index a2002d23..fe0efb32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ * fixed 483 by extracting a common CMR query method for collections and granules using SearchAfter header * Added VCR support for verifying the API call to CMR and the parsing of returned results without relying on CMR availability post development +* Enhancements: + * Corrected and enhanced static type hints for functions and methods that make + CMR queries or handle CMR query results (#508) + ## [v0.9.0] 2024-02-28 * Bug fixes: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 300b5387..a8415a1a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,6 +84,15 @@ Finally, for _development dependencies only_, you must add an entry to make format lint ``` +We attempt to provide comprehensive type annotations within this repository. If +you do not provide fully annotated functions or methods, the `lint` command will +fail. Over time, we plan to increase type-checking strictness in order to +ensure more precise, beneficial type annotations. + +We have included type stubs for the untyped `python-cmr` library, which we +intend to eventually upstream. Since `python-cmr` exposes the `cmr` package, +the stubs appear under `stubs/cmr`. + ### Requirements to merge code (Pull Request Process) - you must include test coverage diff --git a/ci/environment-mindeps.yaml b/ci/environment-mindeps.yaml index 75037c51..cb8fd367 100644 --- a/ci/environment-mindeps.yaml +++ b/ci/environment-mindeps.yaml @@ -17,11 +17,14 @@ dependencies: - multimethod=1.8 - python-dateutil=2.8.2 - importlib-resources=6.3.2 + - typing-extensions=4.10.0 # test dependencies - responses - pytest - pytest-cov + - python-magic - mypy + - types-python-dateutil - types-requests - types-setuptools - ruff diff --git a/earthaccess/api.py b/earthaccess/api.py index a7d35fb0..796dbcb5 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,13 +1,12 @@ -from typing import Any, Dict, List, Optional, Type, Union - import requests import s3fs from fsspec import AbstractFileSystem +from typing_extensions import Any, Dict, List, Optional, Union import earthaccess from .auth import Auth -from .results import DataGranule +from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store from .utils import _validation as validate @@ -28,9 +27,7 @@ def _normalize_location(location: Optional[str]) -> Optional[str]: return location -def search_datasets( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataCollection]: +def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: """Search datasets using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -54,6 +51,9 @@ def search_datasets( A list of DataCollection results that can be used to get information about a dataset, e.g. concept_id, doi, etc. + Raises: + RuntimeError: The CMR query failed. + Examples: ```python datasets = earthaccess.search_datasets( @@ -78,9 +78,7 @@ def search_datasets( return query.get_all() -def search_data( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataGranule]: +def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -104,6 +102,9 @@ def search_data( a list of DataGranules that can be used to access the granule files by using `download()` or `open()`. + Raises: + RuntimeError: The CMR query failed. + Examples: ```python datasets = earthaccess.search_data( @@ -178,6 +179,9 @@ def download( Returns: List of downloaded files + + Raises: + Exception: A file download failed. """ provider = _normalize_location(provider) if isinstance(granules, DataGranule): @@ -194,7 +198,7 @@ def download( def open( - granules: Union[List[str], List[earthaccess.results.DataGranule]], + granules: Union[List[str], List[DataGranule]], provider: Optional[str] = None, ) -> List[AbstractFileSystem]: """Returns a list of fsspec file-like objects that can be used to access files @@ -216,7 +220,7 @@ def open( def get_s3_credentials( daac: Optional[str] = None, provider: Optional[str] = None, - results: Optional[List[earthaccess.results.DataGranule]] = None, + results: Optional[List[DataGranule]] = None, ) -> Dict[str, Any]: """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets. We can use the daac name, the provider, or a list of results from earthaccess.search_data(). @@ -239,7 +243,7 @@ def get_s3_credentials( return earthaccess.__auth__.get_s3_credentials(daac=daac, provider=provider) -def collection_query() -> Type[CollectionQuery]: +def collection_query() -> CollectionQuery: """Returns a query builder instance for NASA collections (datasets). Returns: @@ -252,7 +256,7 @@ def collection_query() -> Type[CollectionQuery]: return query_builder -def granule_query() -> Type[GranuleQuery]: +def granule_query() -> GranuleQuery: """Returns a query builder instance for data granules Returns: @@ -311,7 +315,7 @@ def get_requests_https_session() -> requests.Session: def get_s3fs_session( daac: Optional[str] = None, provider: Optional[str] = None, - results: Optional[earthaccess.results.DataGranule] = None, + results: Optional[DataGranule] = None, ) -> s3fs.S3FileSystem: """Returns a fsspec s3fs file session for direct access when we are in us-west-2. diff --git a/earthaccess/py.typed b/earthaccess/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/earthaccess/search.py b/earthaccess/search.py index 335389fe..fadd7b55 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -1,48 +1,71 @@ import datetime as dt from inspect import getmembers, ismethod -from typing import Any, List, Optional, Tuple, Type, Union -import dateutil.parser as parser # type: ignore +import dateutil.parser as parser import requests +from typing_extensions import ( + Any, + List, + Optional, + Self, + Sequence, + SupportsFloat, + Tuple, + TypeAlias, + Union, + override, +) + from cmr import CollectionQuery, GranuleQuery -# type: ignore from .auth import Auth from .daac import find_provider, find_provider_by_shortname from .results import DataCollection, DataGranule +FloatLike: TypeAlias = Union[str, SupportsFloat] +PointLike: TypeAlias = Tuple[FloatLike, FloatLike] + def get_results( - query: Union[CollectionQuery, GranuleQuery], limit: int = 2000 + session: requests.Session, + query: Union[CollectionQuery, GranuleQuery], + limit: int = 2000, ) -> List[Any]: """ Get all results up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, if the supplied value is greater then the Search-After header - will be used to iterate across multiple requests until either the limit has been reached - or there are no more results. + The default page size is 2000, if the supplied value is greater then the + Search-After header will be used to iterate across multiple requests until + either the limit has been reached or there are no more results. + Parameters: limit: The number of results to return Returns: query results as a list + + Raises: + RuntimeError: The CMR query failed. """ page_size = min(limit, 2000) url = query._build_url() - results: List = [] + results: List[Any] = [] more_results = True headers = dict(query.headers or {}) + while more_results: - response = requests.get(url, headers=headers, params={"page_size": page_size}) - headers["cmr-search-after"] = response.headers.get("cmr-search-after") + response = session.get(url, headers=headers, params={"page_size": page_size}) + + if cmr_search_after := response.headers.get("cmr-search-after"): + headers["cmr-search-after"] = cmr_search_after try: response.raise_for_status() except requests.exceptions.HTTPError as ex: - raise RuntimeError(ex.response.text) + raise RuntimeError(ex.response.text) from ex latest = response.json()["items"] @@ -56,39 +79,46 @@ def get_results( class DataCollections(CollectionQuery): """ ???+ Info - The DataCollection class queries against https://cmr.earthdata.nasa.gov/search/collections.umm_json, + The DataCollection class queries against + https://cmr.earthdata.nasa.gov/search/collections.umm_json, the response has to be in umm_json to use the result classes. """ - _fields = None + _fields: Optional[List[str]] = None _format = "umm_json" def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: - """Builds an instance of DataCollections to query CMR + """Builds an instance of DataCollections to query the CMR. Parameters: auth: An authenticated `Auth` instance. This is an optional parameter for queries that need authentication, e.g. restricted datasets. """ super().__init__(*args, **kwargs) - self.session = requests.session() - if auth is not None and auth.authenticated: + + self.session = ( # To search, we need the new bearer tokens from NASA Earthdata - self.session = auth.get_session(bearer_token=True) + auth.get_session(bearer_token=True) + if auth is not None and auth.authenticated + else requests.session() + ) self._debug = False self.params["has_granules"] = True self.params["include_granule_counts"] = True + @override def hits(self) -> int: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. Restricted datasets will always return zero results even if there are results. - Returns: - The number of results reported by CMR. + The number of results reported by the CMR. + + Raises: + RuntimeError: The CMR query failed. """ url = self._build_url() @@ -97,60 +127,77 @@ def hits(self) -> int: try: response.raise_for_status() except requests.exceptions.HTTPError as ex: - raise RuntimeError(ex.response.text) + raise RuntimeError(ex.response.text) from ex return int(response.headers["CMR-Hits"]) + @override def get(self, limit: int = 2000) -> List[DataCollection]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, we need to be careful with the request size because all the JSON - elements will be loaded into memory. This is more of an issue with granules than collections as - they can be potentially millions of them. + The default page size is 2000, we need to be careful with the request size + because all the JSON elements will be loaded into memory. This is more of an + issue with granules than collections as they can be potentially millions of + them. Parameters: limit: The number of results to return Returns: - query results as a list of `DataCollection` instances. + Query results as a (possibly empty) list of `DataCollection` instances. + + Raises: + RuntimeError: The CMR query failed. """ - return list( + return [ DataCollection(collection, self._fields) - for collection in get_results(self, limit) - ) + for collection in get_results(self.session, self, limit) + ] - def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]: + @override + def concept_id(self, IDs: Sequence[str]) -> Self: """Filter by concept ID. - For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, S12345678-LPDAAC_ECS + + For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, + S12345678-LPDAAC_ECS Collections, granules, tools, services are uniquely identified with this ID. - > - * If providing a collection's concept ID here, it will filter by granules associated with that collection. - * If providing a granule's concept ID here, it will uniquely identify those granules. - * If providing a tool's concept ID here, it will uniquely identify those tools. - * If providing a service's concept ID here, it will uniquely identify those services. + + * If providing a collection's concept ID, it will filter by granules associated + with that collection. + * If providing a granule's concept ID, it will uniquely identify those granules. + * If providing a tool's concept ID, it will uniquely identify those tools. + * If providing a service's concept ID, it will uniquely identify those services. Parameters: IDs: ID(s) to search by. Can be provided as a string or list of strings. + + Returns: + self + + Raises: + ValueError: An ID does not start with a valid prefix. """ - super().concept_id(IDs) - return self + return super().concept_id(IDs) - def keyword(self, text: str) -> Type[CollectionQuery]: + @override + def keyword(self, text: str) -> Self: """Case-insensitive and wildcard (*) search through over two dozen fields in a CMR collection record. This allows for searching against fields like summary and science keywords. Parameters: text: text to search for + + Returns: + self """ - super().keyword(text) - return self + return super().keyword(text) - def doi(self, doi: str) -> Type[CollectionQuery]: + def doi(self, doi: str) -> Self: """Search datasets by DOI. ???+ Tip @@ -160,6 +207,12 @@ def doi(self, doi: str) -> Type[CollectionQuery]: Parameters: doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + + Returns: + self + + Raises: + TypeError: `doi` is not of type `str`. """ if not isinstance(doi, str): raise TypeError("doi must be of type str") @@ -167,8 +220,8 @@ def doi(self, doi: str) -> Type[CollectionQuery]: self.params["doi"] = doi return self - def instrument(self, instrument: str) -> Type[CollectionQuery]: - """Searh datasets by instrument + def instrument(self, instrument: str) -> Self: + """Searh datasets by instrument. ???+ Tip Not all datasets have an associated instrument. This works @@ -176,6 +229,12 @@ def instrument(self, instrument: str) -> Type[CollectionQuery]: Parameters: instrument (String): instrument of a datasets, e.g. instrument=GEDI + + Returns: + self + + Raises: + TypeError: `instrument` is not of type `str`. """ if not isinstance(instrument, str): raise TypeError("instrument must be of type str") @@ -183,8 +242,8 @@ def instrument(self, instrument: str) -> Type[CollectionQuery]: self.params["instrument"] = instrument return self - def project(self, project: str) -> Type[CollectionQuery]: - """Searh datasets by associated project + def project(self, project: str) -> Self: + """Searh datasets by associated project. ???+ Tip Not all datasets have an associated project. This works @@ -193,6 +252,12 @@ def project(self, project: str) -> Type[CollectionQuery]: Parameters: project (String): associated project of a datasets, e.g. project=EMIT + + Returns: + self + + Raises: + TypeError: `project` is not of type `str`. """ if not isinstance(project, str): raise TypeError("project must be of type str") @@ -200,22 +265,29 @@ def project(self, project: str) -> Type[CollectionQuery]: self.params["project"] = project return self - def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: + @override + def parameters(self, **kwargs: Any) -> Self: """Provide query parameters as keyword arguments. The keyword needs to match the name of the method, and the value should either be the value or a tuple of values. ???+ Example ```python - query = DataCollections.parameters(short_name="AST_L1T", - temporal=("2015-01","2015-02"), - point=(42.5, -101.25)) + query = DataCollections.parameters( + short_name="AST_L1T", + temporal=("2015-01","2015-02"), + point=(42.5, -101.25) + ) ``` + Returns: - Query instance + self + + Raises: + ValueError: The name of a keyword argument is not the name of a method. + TypeError: The value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters. """ - methods = {} - for name, func in getmembers(self, predicate=ismethod): - methods[name] = func + methods = dict(getmembers(self, predicate=ismethod)) for key, val in kwargs.items(): # verify the key matches one of our methods @@ -236,33 +308,48 @@ def print_help(self, method: str = "fields") -> None: print([method for method in dir(self) if method.startswith("_") is False]) help(getattr(self, method)) - def fields(self, fields: Optional[List[str]] = None) -> Type[CollectionQuery]: + def fields(self, fields: Optional[List[str]] = None) -> Self: """Masks the response by only showing the fields included in this list. Parameters: - fields (List): list of fields to show, these fields come from the UMM model e.g. Abstract, Title + fields (List): list of fields to show. These fields come from the UMM model + (e.g. Abstract, Title). + + Returns: + self """ self._fields = fields return self - def debug(self, debug: bool = True) -> Type[CollectionQuery]: - """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + def debug(self, debug: bool = True) -> Self: + """If True, prints the actual query to CMR. Note that the pagination happens in + the headers. Parameters: - debug (Boolean): Print CMR query. + debug (Boolean): If `True`, print the CMR query. + + Returns: + self """ - self._debug = True + self._debug = debug return self - def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: - """Only match granules that are hosted in the cloud. This is valid for public collections. + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: + """Only match granules that are hosted in the cloud. This is valid for public + collections. ???+ Tip Cloud hosted collections can be public or restricted. Restricted collections will not be matched using this parameter Parameters: - cloud_hosted: True to require granules only be online + cloud_hosted: If `True`, obtain only cloud-hosted collections. + + Returns: + self + + Raises: + TypeError: `cloud_hosted` is not of type `bool`. """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -273,32 +360,43 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: self.params["provider"] = provider return self - def provider(self, provider: str = "") -> Type[CollectionQuery]: + @override + def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. - E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for on-premises data, - POCLOUD is the PODAAC provider for their data in the cloud. + E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for + on-premises data, POCLOUD is the PODAAC provider for their data in the cloud. Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. + + Returns: + self """ self.params["provider"] = provider return self - def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()`. + def data_center(self, data_center_name: str) -> Self: + """An alias for the `daac` method. Parameters: data_center_name: DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ return self.daac(data_center_name) - def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: - """Only match collections for a given DAAC, by default the on-prem collections for the DAAC. + def daac(self, daac_short_name: str) -> Self: + """Only match collections for a given DAAC, by default the on-prem collections + for the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -308,20 +406,31 @@ def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: self.params["provider"] = find_provider(daac_short_name, cloud_hosted) return self + @override def temporal( self, date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Type[CollectionQuery]: - """Filter by an open or closed date range. Dates can be provided as datetime objects - or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls - to this method before calling execute(). + ) -> Self: + """Filter by an open or closed date range. Dates can be provided as datetime + objects or ISO 8601 formatted strings. Multiple ranges can be provided by + successive calls to this method before calling execute(). Parameters: date_from (String or Datetime object): earliest date of temporal range date_to (String or Datetime object): latest date of temporal range - exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range. + exclude_boundary (Boolean): whether or not to exclude the date_from/to in + the matched range. + + Returns: + self + + Raises: + ValueError: `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is after `date_to`. """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -338,8 +447,7 @@ def temporal( print("The provided end date was not recognized") date_to = "" - super().temporal(date_from, date_to, exclude_boundary) - return self + return super().temporal(date_from, date_to, exclude_boundary) class DataGranules(GranuleQuery): @@ -350,22 +458,30 @@ class DataGranules(GranuleQuery): _format = "umm_json" - def __init__(self, auth: Any = None, *args: Any, **kwargs: Any) -> None: + def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: """Base class for Granule and Collection CMR queries.""" super().__init__(*args, **kwargs) - self.session = requests.session() - if auth is not None and auth.authenticated: + + self.session = ( # To search, we need the new bearer tokens from NASA Earthdata - self.session = auth.get_session(bearer_token=True) + auth.get_session(bearer_token=True) + if auth is not None and auth.authenticated + else requests.session() + ) self._debug = False + @override def hits(self) -> int: """Returns the number of hits the current query will return. - This is done by making a lightweight query to CMR and inspecting the returned headers. + This is done by making a lightweight query to CMR and inspecting the returned + headers. Returns: - The number of results reported by CMR. + The number of results reported by the CMR. + + Raises: + RuntimeError: The CMR query failed. """ url = self._build_url() @@ -382,40 +498,54 @@ def hits(self) -> int: return int(response.headers["CMR-Hits"]) + @override def get(self, limit: int = 2000) -> List[DataGranule]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, we need to be careful with the request size because all the JSON - elements will be loaded into memory. This is more of an issue with granules than collections as - they can be potentially millions of them. + The default page size is 2000, we need to be careful with the request size + because all the JSON elements will be loaded into memory. This is more of an + issue with granules than collections as they can be potentially millions of + them. Parameters: - limit: The number of results to return + limit: The number of results to return. Returns: - query results as a list of `DataGranules` instances. + Query results as a (possibly empty) list of `DataGranules` instances. + + Raises: + RuntimeError: The CMR query failed. """ - response = get_results(self, limit) + response = get_results(self.session, self, limit) cloud = self._is_cloud_hosted(response[0]) - return list(DataGranule(granule, cloud_hosted=cloud) for granule in response) + return [DataGranule(granule, cloud_hosted=cloud) for granule in response] - def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: - """Provide query parameters as keyword arguments. The keyword needs to match the name - of the method, and the value should either be the value or a tuple of values. + @override + def parameters(self, **kwargs: Any) -> Self: + """Provide query parameters as keyword arguments. The keyword needs to match the + name of the method, and the value should either be the value or a tuple of + values. ???+ Example ```python - query = DataCollections.parameters(short_name="AST_L1T", - temporal=("2015-01","2015-02"), - point=(42.5, -101.25)) + query = DataCollections.parameters( + short_name="AST_L1T", + temporal=("2015-01","2015-02"), + point=(42.5, -101.25) + ) ``` Returns: - Query instance + self + + Raises: + ValueError: The name of a keyword argument is not the name of a method. + TypeError: The value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters. """ methods = {} for name, func in getmembers(self, predicate=ismethod): @@ -434,7 +564,8 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: return self - def provider(self, provider: str = "") -> Type[CollectionQuery]: + @override + def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. For example, PODAAC is a data center or DAAC, @@ -443,23 +574,33 @@ def provider(self, provider: str = "") -> Type[CollectionQuery]: Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. + + Returns: + self """ self.params["provider"] = provider return self - def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()`. + def data_center(self, data_center_name: str) -> Self: + """An alias for the `daac` method. Parameters: data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ return self.daac(data_center_name) - def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: - """Only match collections for a given DAAC. Default to on-prem collections for the DAAC. + def daac(self, daac_short_name: str) -> Self: + """Only match collections for a given DAAC. Default to on-prem collections for + the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -469,18 +610,25 @@ def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: self.params["provider"] = find_provider(daac_short_name, cloud_hosted) return self - def orbit_number(self, orbit1: int, orbit2: int) -> Type[GranuleQuery]: + @override + def orbit_number( + self, + orbit1: FloatLike, + orbit2: Optional[FloatLike] = None, + ) -> Self: """Filter by the orbit number the granule was acquired during. Either a single orbit can be targeted or a range of orbits. Parameter: orbit1: orbit to target (lower limit of range when orbit2 is provided) orbit2: upper limit of range + + Returns: + self """ - super().orbit_number(orbit1, orbit2) - return self + return super().orbit_number(orbit1, orbit2) - def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: """Only match granules that are hosted in the cloud. This is valid for public collections and when using the short_name parameter. Concept-Id is unambiguous. @@ -490,7 +638,13 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: Restricted collections will not be matched using this parameter. Parameters: - cloud_hosted: True to require granules only be online + cloud_hosted: If `True`, obtain only granules from cloud-hosted collections. + + Returns: + self + + Raises: + TypeError: `cloud_hosted` is not of type `bool`. """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -503,7 +657,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: self.params["provider"] = provider return self - def granule_name(self, granule_name: str) -> Type[CollectionQuery]: + def granule_name(self, granule_name: str) -> Self: """Find granules matching either granule ur or producer granule id, queries using the readable_granule_name metadata field. @@ -513,6 +667,12 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: Parameters: granule_name: granule name (accepts wildcards) + + Returns: + self + + Raises: + TypeError: if `granule_name` is not of type `str` """ if not isinstance(granule_name, str): raise TypeError("granule_name must be of type string") @@ -521,54 +681,90 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: self.params["options[readable_granule_name][pattern]"] = True return self - def online_only(self, online_only: bool = True) -> Type[GranuleQuery]: + @override + def online_only(self, online_only: bool = True) -> Self: """Only match granules that are listed online and not available for download. - The opposite of this method is downloadable(). + The inverse of this method is `downloadable`. Parameters: - online_only: True to require granules only be online + online_only: If `True`, obtain only granules that are online (not + downloadable) + + Returns: + self + + Raises: + TypeError: `online_only` is not of type `bool`. """ - super().online_only(online_only) - return self + return super().online_only(online_only) - def day_night_flag(self, day_night_flag: str) -> Type[GranuleQuery]: + @override + def day_night_flag(self, day_night_flag: str) -> Self: """Filter by period of the day the granule was collected during. Parameters: day_night_flag: "day", "night", or "unspecified" + + Returns: + self + + Raises: + TypeError: `day_night_flag` is not of type `str`. + ValueError: `day_night_flag` is not one of `"day"`, `"night"`, or + `"unspecified"`. """ - super().day_night_flag(day_night_flag) - return self + return super().day_night_flag(day_night_flag) - def instrument(self, instrument: str = "") -> Type[GranuleQuery]: + @override + def instrument(self, instrument: str) -> Self: """Filter by the instrument associated with the granule. Parameters: instrument: name of the instrument + + Returns: + self + + Raises: + ValueError: `instrument` is not a non-empty string. """ - super().instrument(instrument) - return self + return super().instrument(instrument) - def platform(self, platform: str = "") -> Type[GranuleQuery]: + @override + def platform(self, platform: str) -> Self: """Filter by the satellite platform the granule came from. Parameters: platform: name of the satellite + + Returns: + self + + Raises: + ValueError: `platform` is not a non-empty string. """ - super().platform(platform) - return self + return super().platform(platform) + @override def cloud_cover( - self, min_cover: int = 0, max_cover: int = 100 - ) -> Type[GranuleQuery]: + self, + min_cover: Optional[FloatLike] = 0, + max_cover: Optional[FloatLike] = 100, + ) -> Self: """Filter by the percentage of cloud cover present in the granule. Parameters: min_cover: minimum percentage of cloud cover max_cover: maximum percentage of cloud cover + + Returns: + self + + Raises: + ValueError: `min_cover` or `max_cover` is not convertible to a float, + or `min_cover` is greater than `max_cover`. """ - super().cloud_cover(min_cover, max_cover) - return self + return super().cloud_cover(min_cover, max_cover) def _valid_state(self) -> bool: # spatial params must be paired with a collection limiting parameter @@ -593,41 +789,57 @@ def _is_cloud_hosted(self, granule: Any) -> bool: return True return False - def short_name(self, short_name: str = "") -> Type[GranuleQuery]: + @override + def short_name(self, short_name: str) -> Self: """Filter by short name (aka product or collection name). Parameters: short_name: name of a collection Returns: - Query instance + self """ - super().short_name(short_name) - return self + return super().short_name(short_name) - def debug(self, debug: bool = True) -> Type[GranuleQuery]: - """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + def debug(self, debug: bool = True) -> Self: + """If True, prints the actual query to CMR, notice that the pagination happens + in the headers. Parameters: - debug: Print CMR query. + debug: If `True`, print the CMR query. + + Returns: + self """ - self._debug = True + self._debug = debug return self + @override def temporal( self, date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Type[GranuleQuery]: + ) -> Self: """Filter by an open or closed date range. - Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple - ranges can be provided by successive calls to this method before calling execute(). + + Dates can be provided as a datetime objects or ISO 8601 formatted strings. + Multiple ranges can be provided by successive calls to this method before + calling execute(). Parameters: date_from: earliest date of temporal range date_to: latest date of temporal range exclude_boundary: whether to exclude the date_from/to in the matched range + + Returns: + self + + Raises: + ValueError: `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is after `date_to`. """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -644,96 +856,146 @@ def temporal( print("The provided end date was not recognized") date_to = "" - super().temporal(date_from, date_to, exclude_boundary) - return self + return super().temporal(date_from, date_to, exclude_boundary) - def version(self, version: str = "") -> Type[GranuleQuery]: + @override + def version(self, version: str) -> Self: """Filter by version. Note that CMR defines this as a string. For example, MODIS version 6 products must be searched for with "006". Parameters: version: version string + + Returns: + self """ - super().version(version) - return self + return super().version(version) - def point(self, lon: str, lat: str) -> Type[GranuleQuery]: + @override + def point(self, lon: FloatLike, lat: FloatLike) -> Self: """Filter by granules that include a geographic point. Parameters: - lon (String): longitude of geographic point - lat (String): latitude of geographic point + lon: longitude of geographic point + lat: latitude of geographic point + + Returns: + self + + Raises: + ValueError: `lon` or `lat` cannot be converted to a float. """ - super().point(lon, lat) - return self + return super().point(lon, lat) - def polygon(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: - """Filter by granules that overlap a polygonal area. Must be used in combination with a - collection filtering parameter such as short_name or entry_title. + @override + def polygon(self, coordinates: Sequence[PointLike]) -> Self: + """Filter by granules that overlap a polygonal area. Must be used in combination + with a collection filtering parameter such as short_name or entry_title. Parameters: coordinates: list of (lon, lat) tuples + + Returns: + self + + Raises: + ValueError: `coordinates` is not a sequence of at least 4 coordinate + pairs, any of the coordinates cannot be converted to a float, or the + first and last coordinate pairs are not equal. """ - super().polygon(coordinates) - return self + return super().polygon(coordinates) + @override def bounding_box( self, - lower_left_lon: str, - lower_left_lat: str, - upper_right_lon: str, - upper_right_lat: str, - ) -> Type[GranuleQuery]: - """Filter by granules that overlap a bounding box. Must be used in combination with - a collection filtering parameter such as short_name or entry_title. + lower_left_lon: FloatLike, + lower_left_lat: FloatLike, + upper_right_lon: FloatLike, + upper_right_lat: FloatLike, + ) -> Self: + """Filter by granules that overlap a bounding box. Must be used in combination + with a collection filtering parameter such as short_name or entry_title. Parameters: lower_left_lon: lower left longitude of the box lower_left_lat: lower left latitude of the box upper_right_lon: upper right longitude of the box upper_right_lat: upper right latitude of the box + + Returns: + self + + Raises: + ValueError: A coordinate could not be converted to a float. """ - super().bounding_box( + return super().bounding_box( lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat ) - return self - def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: - """Filter by granules that overlap a series of connected points. Must be used in combination - with a collection filtering parameter such as short_name or entry_title. + @override + def line(self, coordinates: Sequence[PointLike]) -> Self: + """Filter by granules that overlap a series of connected points. Must be used + in combination with a collection filtering parameter such as short_name or + entry_title. Parameters: coordinates: a list of (lon, lat) tuples + + Returns: + self + + Raises: + ValueError: `coordinates` is not a sequence of at least 2 coordinate + pairs, or a coordinate could not be converted to a float. """ - super().line(coordinates) - return self + return super().line(coordinates) - def downloadable(self, downloadable: bool = True) -> Type[GranuleQuery]: - """Only match granules that are available for download. The opposite of this - method is online_only(). + @override + def downloadable(self, downloadable: bool = True) -> Self: + """Only match granules that are available for download. The inverse of this + method is `online_only`. Parameters: - downloadable: True to require granules be downloadable + downloadable: If `True`, obtain only granules that are downloadable. + + Returns: + self + + Raises: + TypeError: `downloadable` is not of type `bool`. """ - super().downloadable(downloadable) - return self + return super().downloadable(downloadable) - def doi(self, doi: str) -> Type[GranuleQuery]: - """Search data granules by DOI + def doi(self, doi: str) -> Self: + """Search data granules by DOI. ???+ Tip Not all datasets have an associated DOI, internally if a DOI is found earthaccess will grab the concept_id for the query to CMR. Parameters: - doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + doi: DOI of a dataset, e.g. 10.5067/AQR50-3Q7CS + + Returns: + self + + Raises: + RuntimeError: The CMR query to get the collection for the DOI fails. """ + + # TODO consider deferring this query until the search is executed collection = DataCollections().doi(doi).get() + + # TODO consider raising an exception when there are multiple collections, since + # we can't know which one the user wants, and choosing one is arbitrary. if len(collection) > 0: concept_id = collection[0].concept_id() self.params["concept_id"] = concept_id else: + # TODO consider removing this print statement since we don't print such + # a message in other cases where no results are found. Seems arbitrary. print( f"earthaccess couldn't find any associated collections with the DOI: {doi}" ) + return self diff --git a/poetry.lock b/poetry.lock index 5ca33866..f8c4eba9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -3084,8 +3084,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3850,6 +3850,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3857,8 +3858,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3875,6 +3884,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3882,6 +3892,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -5109,4 +5120,4 @@ kerchunk = ["dask", "kerchunk"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "5344a948e7ae73de6bcfd7fa30089469daf6b232e3f0498cc1a47ba860ebb497" +content-hash = "530a3cffb6d044e431ec3671268949e797d3c468c0f653b6fea7c90cdc422b3d" diff --git a/pyproject.toml b/pyproject.toml index 6484f32e..dbe917ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ python-dateutil = ">=2.8.2" kerchunk = { version = ">=0.1.2", optional = true } dask = { version = ">=2022.1.0", optional = true } importlib-resources = ">=6.3.2" +typing_extensions = ">=4.10.0" [tool.poetry.extras] kerchunk = ["kerchunk", "dask"] @@ -67,6 +68,7 @@ pymdown-extensions = ">=9.2" pygments = ">=2.11.1" responses = ">=0.14" ruff = "^0.1.6" +types-python-dateutil = ">=2.8.2" types-requests = ">=0.1" types-setuptools = ">=0.1" ipywidgets = ">=7.7.0" @@ -87,22 +89,39 @@ build-backend = "poetry.core.masonry.api" [tool.pytest] filterwarnings = ["error::UserWarning"] - [tool.mypy] -disallow_untyped_defs = false -ignore_missing_imports = true +mypy_path = ["earthaccess", "tests", "stubs"] +disallow_untyped_defs = true +# TODO: incrementally work towards strict mode (currently too many errors) +# strict = true +pretty = true # Show additional context in error messages +enable_error_code = "redundant-self" [[tool.mypy.overrides]] module = [ "tests.*", ] -ignore_errors = true +disallow_untyped_defs = false + +[[tool.mypy.overrides]] +module = [ + "fsspec.*", + "dask.*", + "kerchunk.*", + "pqdm.*", + "s3fs", + "tinynetrc.*", # TODO: generate stubs for tinynetrc and remove this line + "vcr.unittest", # TODO: generate stubs for vcr and remove this line +] +ignore_missing_imports = true +[tool.pyright] +include = ["earthaccess"] +stubPath = "./stubs" [tool.ruff] line-length = 88 -src = ["earthaccess", "tests"] -exclude = ["mypy-stubs", "stubs", "typeshed"] +src = ["earthaccess", "stubs", "tests"] [tool.ruff.lint] extend-select = ["I"] @@ -110,7 +129,6 @@ extend-select = ["I"] [tool.ruff.lint.isort] combine-as-imports = true - [tool.bumpversion] current_version = "0.9.0" commit = false diff --git a/scripts/lint.sh b/scripts/lint.sh index 3a528811..02f9c70a 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -set -e -set -x +set -ex -mypy earthaccess --disallow-untyped-defs +mypy earthaccess stubs tests ruff check . diff --git a/stubs/cmr/__init__.pyi b/stubs/cmr/__init__.pyi new file mode 100644 index 00000000..3ea9733e --- /dev/null +++ b/stubs/cmr/__init__.pyi @@ -0,0 +1,10 @@ +from .queries import ( + CMR_OPS as CMR_OPS, + CMR_SIT as CMR_SIT, + CMR_UAT as CMR_UAT, + CollectionQuery as CollectionQuery, + GranuleQuery as GranuleQuery, + ServiceQuery as ServiceQuery, + ToolQuery as ToolQuery, + VariableQuery as VariableQuery, +) diff --git a/stubs/cmr/queries.pyi b/stubs/cmr/queries.pyi new file mode 100644 index 00000000..3b2fadc3 --- /dev/null +++ b/stubs/cmr/queries.pyi @@ -0,0 +1,108 @@ +import sys +from datetime import datetime +from typing import Any, Optional, SupportsFloat, Union + +if sys.version_info < (3, 9): + from typing import List, MutableMapping, Sequence, Tuple +else: + from builtins import list as List, tuple as Tuple + from collections.abc import MutableMapping, Sequence + +if sys.version_info < (3, 10): + from typing_extensions import TypeAlias +else: + from typing import TypeAlias + +if sys.version_info < (3, 11): + from typing_extensions import Self +else: + from typing import Self + +CMR_OPS: str +CMR_UAT: str +CMR_SIT: str + +FloatLike: TypeAlias = Union[str, SupportsFloat] +PointLike: TypeAlias = Tuple[FloatLike, FloatLike] + +class Query: + params: MutableMapping[str, Any] + options: MutableMapping[str, Any] + concept_id_chars: Sequence[str] + headers: MutableMapping[str, str] + + def __init__(self, route: str, mode: str = ...) -> None: ... + def _build_url(self) -> str: ... + def get(self, limit: int = ...) -> List[Any]: ... + def hits(self) -> int: ... + def get_all(self) -> List[Any]: ... + def parameters(self, **kwargs: Any) -> Self: ... + def format(self, output_format: str = "json") -> Self: ... + def concept_id(self, ids: Sequence[str]) -> Self: ... + def provider(self, provider: str) -> Self: ... + def mode(self, mode: str = ...) -> None: ... + def token(self, token: str) -> Self: ... + def bearer_token(self, bearer_token: str) -> Self: ... + +class GranuleCollectionBaseQuery(Query): + def online_only(self, online_only: bool = True) -> Self: ... + def temporal( + self, + date_from: Optional[Union[str, datetime]], + date_to: Optional[Union[str, datetime]], + exclude_boundary: bool = False, + ) -> Self: ... + def short_name(self, short_name: str) -> Self: ... + def version(self, version: str) -> Self: ... + def point(self, lon: FloatLike, lat: FloatLike) -> Self: ... + def circle(self, lon: FloatLike, lat: FloatLike, dist: FloatLike) -> Self: ... + def polygon(self, coordinates: Sequence[PointLike]) -> Self: ... + def bounding_box( + self, + lower_left_lon: FloatLike, + lower_left_lat: FloatLike, + upper_right_lon: FloatLike, + upper_right_lat: FloatLike, + ) -> Self: ... + def line(self, coordinates: Sequence[PointLike]) -> Self: ... + def downloadable(self, downloadable: bool = True) -> Self: ... + def entry_title(self, entry_title: str) -> Self: ... + +class GranuleQuery(GranuleCollectionBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + def orbit_number( + self, + orbit1: FloatLike, + orbit2: Optional[FloatLike] = ..., + ) -> Self: ... + def day_night_flag(self, day_night_flag: str) -> Self: ... + def cloud_cover( + self, + min_cover: Optional[FloatLike] = ..., + max_cover: Optional[FloatLike] = ..., + ) -> Self: ... + def instrument(self, instrument: str) -> Self: ... + def platform(self, platform: str) -> Self: ... + def sort_key(self, sort_key: str) -> Self: ... + def granule_ur(self, granule_ur: str) -> Self: ... + +class CollectionQuery(GranuleCollectionBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + def archive_center(self, center: str) -> Self: ... + def keyword(self, text: str) -> Self: ... + def native_id(self, native_ids: Sequence[str]) -> Self: ... + def tool_concept_id(self, ids: Sequence[str]) -> Self: ... + def service_concept_id(self, ids: Sequence[str]) -> Self: ... + +class ToolServiceVariableBaseQuery(Query): + def native_id(self, native_ids: Sequence[str]) -> Self: ... + def name(self, name: str) -> Self: ... + +class ToolQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + +class ServiceQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + +class VariableQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/fixtures/vcr_cassettes/PODAAC.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_less_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/PODAAC.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_less_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/ALL.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_more_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/ALL.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_more_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml new file mode 100644 index 00000000..1b9e08ac --- /dev/null +++ b/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml @@ -0,0 +1,466 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/json + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/api/users/tokens + response: + body: + string: '[{"access_token":"eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6ImRzY2h1Y2siLCJleHAiOjE3MTcyNjM4MTMsImlhdCI6MTcxMjA3OTgxMywiaXNzIjoiRWFydGhkYXRhIExvZ2luIn0.S_tw0-5JNFEv3si07GYVxvQi81QejNAT2Sh2ZIxAwmqr9UqoSmYg2Wp2Jdn3jaWrSVsRgxBXuLD5w7XFeRju2qOtIqovN3XGJ8VnTdvpklr-gTjk_iLq58334Zzbu5ntnqy-QTzPCKvjvqr3GNuIJcp9z7j5rzd3MEUYOFP1xsd8wehGLpBHzT6ZSzCOwdgzE1AufKq9Vd2GqM_5bc3M9cj-gGy2g3m1mP2OB41wiGvPzup79ds4t_gEPkCecm2rplCP4n1hrY6ZQtXshgM6o49J1nkGSJjE0olHcPwEujKE2s1htWZEycI1TCCxrGpx8K1vwEd0lNaekgPUWwdOlA","token_type":"Bearer","expiration_date":"06/01/2024"}]' + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"61d0ce8df0bc684ac04ce623aea3668c" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - 28f6c88b-114d-4319-b6a2-0de0f54c9405 + X-Runtime: + - '0.013338' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/api/users/dschuck?client_id=ntD0YGC_SM3Bjs-Tnxd7bg + response: + body: + string: '{"uid":"dschuck","first_name":"Charles","last_name":"Daniels","email_address":"chuck@developmentseed.org","registered_date":" + 2 Apr 2024 17:43:33PM","country":"United States","study_area":"Other","allow_auth_app_emails":true,"user_type":"Application","affiliation":"Commercial","agreed_to_meris_eula":true,"agreed_to_sentinel_eula":true,"email_verified":true,"user_groups":[],"user_authorized_apps":23,"nams_auid":null}' + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"5d6f0c723c97c730432ca73084995037" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - c8cf2bd3-731d-4863-8967-1906de679cbc + X-Runtime: + - '0.017383' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/profile + response: + body: + string: '' + headers: + Cache-Control: + - no-cache + Connection: + - keep-alive + Content-Type: + - text/html; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + Location: + - https://urs.earthdata.nasa.gov/home + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Set-Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0; path=/; expires=Wed, 10 + Apr 2024 21:58:55 GMT; HttpOnly + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - b6384698-e18b-4a99-b80c-b0ebe6cb80b7 + X-Runtime: + - '0.008282' + X-XSS-Protection: + - 1; mode=block + status: + code: 302 + message: Found +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0 + method: GET + uri: https://urs.earthdata.nasa.gov/home + response: + body: + string: "\n\n\n\n\n \n + \ \n \n + \ Earthdata Login\n \n \n\n + \ \n \n \n\n \n \n \n \n \n \n\n \n\n \n + \ \n \n\n + \ \n \n + \ \n\n
\n \n + \
\n
\n
\n + \

Earthdata Login

\n Earthdata Login\n
\n \"Three\n \n
\n\n
\n + \ You must be logged in to access this page\n
\n\n\n\n\n\n\n\n\n + \
\n
\n

\n \n + \ \n \n

\n

\n
\n \n \n

\n

\n + \ \n

\n

\n \n

\n \n

\n \n Register\n

\n

\n + \ \n I + don’t remember my username\n
\n + \ I don’t remember my + password\n
\n \n Help\n

\n
\n\n\n
\n
\n

Get + single sign-on access to all your favorite EOSDIS sites

\n Register for a Profile\n
\n
\n \n By clicking the Log In button above, + you are acknowledging that all Earthdata Login applications running in DAACs \n will have + access to my profile information. \n \n
\n
\n

\n \n + \ Protection and maintenance of user profile information is described + in\n NASA's + Web Privacy Policy.\n \n

\n
\n
\n + \

\n \n Protection and maintenance of user profile + information is described in\n NASA's + Web Privacy Policy.\n \n

\n
\n
\n + \ \n US Govt Property. Unauthorized use subject to prosecution. + Use subject to monitoring per\n NPD2810.\n + \ \n
\n
\n \n
\n
\n \n\n \n + \ \n \n + \ \n + \ \n + \ \n\n \n \n + \ \n\n \n \n\n" + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - text/html; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"7af405988c901d45ff1b80e3d54e85fa" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Set-Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0; path=/; expires=Wed, 10 + Apr 2024 21:58:55 GMT; HttpOnly + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - a5f4494f-83d0-40cc-a966-a45d47155163 + X-Runtime: + - '0.012801' + X-XSS-Protection: + - 1; mode=block + content-length: + - '9058' + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205&temporal%5B%5D=2020-01-01T00:00:00Z,2022-01-01T00:00:00Z&page_size=0 + response: + body: + string: '{"hits":147,"took":55,"items":[]}' + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - CMR-Hits, CMR-Request-Id, X-Request-Id, CMR-Scroll-Id, CMR-Search-After, CMR-Timed-Out, + CMR-Shapefile-Original-Point-Count, CMR-Shapefile-Simplified-Point-Count + CMR-Hits: + - '147' + CMR-Request-Id: + - c8f9b01d-a6a3-4809-8a65-e707f30b3d47 + CMR-Took: + - '55' + Connection: + - keep-alive + Content-MD5: + - 376935be7b2a0e96352603908fe0dcd5 + Content-SHA1: + - b02f8a240f36ad8cd6798334ce2455c338e6d55f + Content-Type: + - application/vnd.nasa.cmr.umm_results+json;version=1.6.5; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:56 GMT + Server: + - ServerTokens ProductOnly + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Vary: + - Accept-Encoding, User-Agent + Via: + - 1.1 f300b5f0c0ff51593fb31953294424c0.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - 6UKqbnR1dCiMWGTDKFtLjE_KomjiZXUhp0ICfljulkAzPCeqJgb3cw== + X-Amz-Cf-Pop: + - PHL51-P1 + X-Cache: + - Miss from cloudfront + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Request-Id: + - 6UKqbnR1dCiMWGTDKFtLjE_KomjiZXUhp0ICfljulkAzPCeqJgb3cw== + X-XSS-Protection: + - 1; mode=block + content-length: + - '33' + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205&temporal%5B%5D=2020-01-01T00:00:00Z,2022-01-01T00:00:00Z&page_size=1 + response: + body: + string: '{"hits":147,"took":253,"items":[{"meta":{"concept-type":"granule","concept-id":"G2546526969-POCLOUD","revision-id":2,"native-id":"ssh_grids_v2205_2020010212","collection-concept-id":"C2270392799-POCLOUD","provider-id":"POCLOUD","format":"application/vnd.nasa.cmr.umm+json","revision-date":"2023-01-11T00:16:28.862Z"},"umm":{"TemporalExtent":{"RangeDateTime":{"EndingDateTime":"2020-01-02T00:00:00.000Z","BeginningDateTime":"2020-01-02T00:00:00.000Z"}},"MetadataSpecification":{"URL":"https://cdn.earthdata.nasa.gov/umm/granule/v1.6.5","Name":"UMM-G","Version":"1.6.5"},"GranuleUR":"ssh_grids_v2205_2020010212","ProviderDates":[{"Type":"Insert","Date":"2023-01-11T00:16:13.878Z"},{"Type":"Update","Date":"2023-01-11T00:16:13.878Z"}],"SpatialExtent":{"HorizontalSpatialDomain":{"Geometry":{"BoundingRectangles":[{"WestBoundingCoordinate":0.083,"SouthBoundingCoordinate":-79.917,"EastBoundingCoordinate":180,"NorthBoundingCoordinate":79.917},{"WestBoundingCoordinate":-180,"SouthBoundingCoordinate":-79.917,"EastBoundingCoordinate":-0.083,"NorthBoundingCoordinate":79.917}]}}},"DataGranule":{"ArchiveAndDistributionInformation":[{"SizeUnit":"MB","Size":9.246453285217285,"Checksum":{"Value":"9002febf17632e5921eba5b8f62237e6","Algorithm":"MD5"},"SizeInBytes":9695609,"Name":"ssh_grids_v2205_2020010212.nc"},{"SizeUnit":"MB","Size":6.008148193359375E-5,"Checksum":{"Value":"b0c271019f89f876b2d3c0a9c46b8f77","Algorithm":"MD5"},"SizeInBytes":63,"Name":"ssh_grids_v2205_2020010212.nc.md5"}],"DayNightFlag":"Unspecified","ProductionDateTime":"2022-10-30T20:57:22.377Z"},"CollectionReference":{"Version":"2205","ShortName":"SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205"},"RelatedUrls":[{"URL":"https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-public/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc.md5","Description":"Download + ssh_grids_v2205_2020010212.nc.md5","Type":"EXTENDED METADATA"},{"URL":"s3://podaac-ops-cumulus-public/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc.md5","Description":"This + link provides direct download access via S3 to the granule","Type":"EXTENDED + METADATA"},{"URL":"https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc","Description":"Download + ssh_grids_v2205_2020010212.nc","Type":"GET DATA"},{"URL":"s3://podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc","Description":"This + link provides direct download access via S3 to the granule","Type":"GET DATA + VIA DIRECT ACCESS"},{"URL":"https://archive.podaac.earthdata.nasa.gov/s3credentials","Description":"api + endpoint to retrieve temporary credentials valid for same-region direct s3 + access","Type":"VIEW RELATED INFORMATION"},{"URL":"https://opendap.earthdata.nasa.gov/collections/C2270392799-POCLOUD/granules/ssh_grids_v2205_2020010212","Type":"USE + SERVICE API","Subtype":"OPENDAP DATA","Description":"OPeNDAP request URL"}]}}]}' + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - CMR-Hits, CMR-Request-Id, X-Request-Id, CMR-Scroll-Id, CMR-Search-After, CMR-Timed-Out, + CMR-Shapefile-Original-Point-Count, CMR-Shapefile-Simplified-Point-Count + CMR-Hits: + - '147' + CMR-Request-Id: + - 59762a2f-23b9-45a2-a99b-be84748be0e5 + CMR-Search-After: + - '["pocloud",1577923200000,2546526969]' + CMR-Took: + - '254' + Connection: + - keep-alive + Content-MD5: + - 53cf1a1f972e393a32e3cd15ec36f700 + Content-SHA1: + - 60c47f03299dda570448eaecfbd8b3ed10f7bd5a + Content-Type: + - application/vnd.nasa.cmr.umm_results+json;version=1.6.5; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:56 GMT + Server: + - ServerTokens ProductOnly + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Vary: + - Accept-Encoding, User-Agent + Via: + - 1.1 f300b5f0c0ff51593fb31953294424c0.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - N9NqPFJ1jWMYiNKZDyuZ71_auV8xRiEL-06uHzn3q2PrTU66FCOKFA== + X-Amz-Cf-Pop: + - PHL51-P1 + X-Cache: + - Miss from cloudfront + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Request-Id: + - N9NqPFJ1jWMYiNKZDyuZ71_auV8xRiEL-06uHzn3q2PrTU66FCOKFA== + X-XSS-Protection: + - 1; mode=block + content-length: + - '3092' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/unit/fixtures/vcr_cassettes/MOD02QKM_2000.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/MOD02QKM_2000.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/TELLUS_GRAC.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_less_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/TELLUS_GRAC.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_less_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/CYGNSS.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_more_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/CYGNSS.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_more_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/MOD02QKM.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get_more_than_2000.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/MOD02QKM.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get_more_than_2000.yaml diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 0c59fc86..b2b0a048 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -11,7 +11,7 @@ class TestCreateAuth(unittest.TestCase): @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_gets_proper_credentials(self, user_input, user_password) -> bool: + def test_auth_gets_proper_credentials(self, user_input, user_password): user_input.return_value = "user" user_password.return_value = "password" json_response = [ @@ -53,9 +53,7 @@ def test_auth_gets_proper_credentials(self, user_input, user_password) -> bool: @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_can_create_proper_credentials( - self, user_input, user_password - ) -> bool: + def test_auth_can_create_proper_credentials(self, user_input, user_password): user_input.return_value = "user" user_password.return_value = "password" json_response = {"access_token": "EDL-token-1", "expiration_date": "12/15/2021"} @@ -94,7 +92,7 @@ def test_auth_can_create_proper_credentials( @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_fails_for_wrong_credentials(self, user_input, user_password) -> bool: + def test_auth_fails_for_wrong_credentials(self, user_input, user_password): user_input.return_value = "bad_user" user_password.return_value = "bad_password" json_response = {"error": "wrong credentials"} diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 06f8256d..cc2f8d3a 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -1,26 +1,14 @@ import logging -import unittest import earthaccess -import vcr from earthaccess.search import DataCollections - -my_vcr = vcr.VCR( - record_mode="once", - decode_compressed_response=True, - # Header matching is not set by default, we need that to test the - # search-after functionality is performing correctly. - match_on=["method", "scheme", "host", "port", "path", "query", "headers"], -) +from vcr.unittest import VCRTestCase # type: ignore[import-untyped] logging.basicConfig() -vcr_log = logging.getLogger("vcr") -vcr_log.setLevel(logging.ERROR) - -headers_to_filters = ["authorization", "Set-Cookie", "User-Agent", "Accept-Encoding"] +logging.getLogger("vcr").setLevel(logging.ERROR) -def assert_unique_results(results): +def unique_results(results): """ When we invoke a search request multiple times we want to ensure that we don't get the same results back. This is a one shot test as the results are preserved @@ -30,7 +18,32 @@ def assert_unique_results(results): return len(unique_concept_ids) == len(results) -class TestResults(unittest.TestCase): +class TestResults(VCRTestCase): + def _get_vcr(self, **kwargs): + myvcr = super(TestResults, self)._get_vcr(**kwargs) + myvcr.cassette_library_dir = "tests/unit/fixtures/vcr_cassettes" + myvcr.decode_compressed_response = True + # Header matching is not set by default, we need that to test the + # search-after functionality is performing correctly. + myvcr.match_on = [ + "method", + "scheme", + "host", + "port", + "path", + "query", + "headers", + ] + myvcr.filter_headers = [ + "Accept-Encoding", + "Authorization", + "Cookie", + "Set-Cookie", + "User-Agent", + ] + + return myvcr + def test_data_links(self): granules = earthaccess.search_data( short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", @@ -58,18 +71,12 @@ def test_get_more_than_2000(self): then we expect multiple invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/MOD02QKM.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data(short_name="MOD02QKM", count=3000) + granules = earthaccess.search_data(short_name="MOD02QKM", count=3000) - self.assertEqual(len(granules), 4000) - - # Assert that we performed one 'hits' search and two 'results' search queries - self.assertEqual(len(cass), 3) - - assert_unique_results(granules) + # Assert that we performed one 'hits' search and two 'results' search queries + self.assertEqual(len(self.cassette), 3) + self.assertEqual(len(granules), 4000) + self.assertTrue(unique_results(granules)) def test_get(self): """ @@ -77,18 +84,12 @@ def test_get(self): to get the maximum no. of granules from a single CMR call (2000) in a single request """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/MOD02QKM_2000.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data(short_name="MOD02QKM", count=2000) - - self.assertEqual(len(granules), 2000) + granules = earthaccess.search_data(short_name="MOD02QKM", count=2000) - # Assert that we performed one 'hits' search and one 'results' search queries - self.assertEqual(len(cass), 2) - - assert_unique_results(granules) + # Assert that we performed one 'hits' search and one 'results' search queries + self.assertEqual(len(self.cassette), 2) + self.assertEqual(len(granules), 2000) + self.assertTrue(unique_results(granules)) def test_get_all_less_than_2k(self): """ @@ -96,20 +97,14 @@ def test_get_all_less_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/TELLUS_GRAC.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data( - short_name="TELLUS_GRAC_L3_JPL_RL06_LND_v04", count=2000 - ) - - self.assertEqual(len(granules), 163) - - # Assert that we performed a hits query and one search results query - self.assertEqual(len(cass), 2) + granules = earthaccess.search_data( + short_name="TELLUS_GRAC_L3_JPL_RL06_LND_v04", count=2000 + ) - assert_unique_results(granules) + # Assert that we performed a hits query and one search results query + self.assertEqual(len(self.cassette), 2) + self.assertEqual(len(granules), 163) + self.assertTrue(unique_results(granules)) def test_get_all_more_than_2k(self): """ @@ -117,20 +112,14 @@ def test_get_all_more_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/CYGNSS.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data( - short_name="CYGNSS_NOAA_L2_SWSP_25KM_V1.2", count=3000 - ) - - self.assertEqual(len(granules), 2520) - - # Assert that we performed a hits query and two search results queries - self.assertEqual(len(cass), 3) + granules = earthaccess.search_data( + short_name="CYGNSS_NOAA_L2_SWSP_25KM_V1.2", count=3000 + ) - assert_unique_results(granules) + # Assert that we performed a hits query and two search results queries + self.assertEqual(len(self.cassette), 3) + self.assertEqual(len(granules), 2520) + self.assertTrue(unique_results(granules)) def test_collections_less_than_2k(self): """ @@ -138,21 +127,14 @@ def test_collections_less_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/PODAAC.yaml", - filter_headers=headers_to_filters, - ) as cass: - query = DataCollections().daac("PODAAC").cloud_hosted(True) - collections = query.get(20) - - self.assertEqual(len(collections), 20) - - # Assert that we performed a single search results query - self.assertEqual(len(cass), 1) + query = DataCollections().daac("PODAAC").cloud_hosted(True) + collections = query.get(20) - assert_unique_results(collections) - - self.is_using_search_after(cass) + # Assert that we performed a single search results query + self.assertEqual(len(self.cassette), 1) + self.assertEqual(len(collections), 20) + self.assertTrue(unique_results(collections)) + self.assert_is_using_search_after(self.cassette) def test_collections_more_than_2k(self): """ @@ -160,30 +142,21 @@ def test_collections_more_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/ALL.yaml", - filter_headers=headers_to_filters, - ) as cass: - query = DataCollections() - collections = query.get(3000) - - self.assertEqual(len(collections), 4000) - - # Assert that we performed two search results queries - self.assertEqual(len(cass), 2) + query = DataCollections() + collections = query.get(3000) - assert_unique_results(collections) + # Assert that we performed two search results queries + self.assertEqual(len(self.cassette), 2) + self.assertEqual(len(collections), 4000) + self.assertTrue(unique_results(collections)) + self.assert_is_using_search_after(self.cassette) - self.is_using_search_after(cass) - - def is_using_search_after(self, cass): - # Verify the page no. was not used + def assert_is_using_search_after(self, cass): first_request = True + for request in cass.requests: + # Verify the page number was not used self.assertTrue("page_num" not in request.uri) # Verify that Search After was used in all requests except first - if first_request: - self.assertFalse("CMR-Search-After" in request.headers) - else: - self.assertTrue("CMR-Search-After" in request.headers) + self.assertEqual(first_request, "CMR-Search-After" not in request.headers) first_request = False