Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interfaces #277

Merged
merged 11 commits into from
Nov 17, 2022
54 changes: 54 additions & 0 deletions erddapy/core/interfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Interface between URL responses and third-party libraries.

This module takes an URL or the bytes response of a request and converts it to Pandas,
XArray, Iris, etc. objects.
"""

import iris
import pandas as pd
import xarray as xr
from netCDF4 import Dataset
vinisalazar marked this conversation as resolved.
Show resolved Hide resolved

from erddapy.core.netcdf import _nc_dataset, _tempnc
from erddapy.core.url import urlopen


def to_pandas(url: str, requests_kwargs=None, **kw) -> pd.DataFrame:
"""Convert a URL to Pandas DataFrame."""
if requests_kwargs is None:
requests_kwargs = {}
data = urlopen(url, **requests_kwargs)
try:
return pd.read_csv(data, **kw)
except Exception as e:
raise ValueError(f"Could not read url {url} with Pandas.read_csv.") from e


def to_ncCF(url: str, protocol: str = None, **kw) -> Dataset:
"""Convert a URL to a netCDF4 Dataset."""
if protocol == "griddap":
raise ValueError(
f"Cannot use .ncCF with griddap protocol. The URL you tried to access is: '{url}'.",
)
auth = kw.pop("auth", None)
return _nc_dataset(url, auth=auth, **kw)


def to_xarray(url: str, response="opendap", **kw) -> xr.Dataset:
"""Convert a URL to an xarray dataset."""
auth = kw.pop("auth", None)
if response == "opendap":
return xr.open_dataset(url, **kw)
else:
nc = _nc_dataset(url, auth=auth, **kw)
return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)


def to_iris(url: str, **kw):
"""Convert a URL to an iris CubeList."""
data = urlopen(url, **kw)
with _tempnc(data) as tmp:
cubes = iris.load_raw(tmp, **kw)
_ = [cube.data for cube in cubes]
vinisalazar marked this conversation as resolved.
Show resolved Hide resolved
return cubes
38 changes: 12 additions & 26 deletions erddapy/erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
_griddap_check_variables,
_griddap_get_constraints,
)
from erddapy.core.netcdf import _nc_dataset, _tempnc
from erddapy.core.interfaces import to_iris, to_ncCF, to_pandas, to_xarray
from erddapy.core.url import (
_check_substrings,
_distinct,
Expand Down Expand Up @@ -344,50 +344,36 @@ def to_pandas(self, **kw):
"""
response = kw.pop("response", "csvp")
url = self.get_download_url(response=response, **kw)
data = urlopen(url, auth=self.auth, **self.requests_kwargs)
return pd.read_csv(data, **kw)
return to_pandas(url, **kw)

def to_ncCF(self, **kw):
def to_ncCF(self, protocol: str = None, **kw):
"""Load the data request into a Climate and Forecast compliant netCDF4-python object."""
if self.protocol == "griddap":
return ValueError("Cannot use ncCF with griddap.")
protocol = protocol if protocol else self.protocol
url = self.get_download_url(response="ncCF", **kw)
nc = _nc_dataset(url, auth=self.auth, **self.requests_kwargs)
return nc
return to_ncCF(url, protocol=protocol, **kw)

def to_xarray(self, **kw):
"""Load the data request into a xarray.Dataset.

Accepts any `xr.open_dataset` keyword arguments.
"""
import xarray as xr

if self.response == "opendap":
url = self.get_download_url()
return xr.open_dataset(url, **kw)
response = "opendap"
elif self.protocol == "griddap":
response = "nc"
else:
response = "nc" if self.protocol == "griddap" else "ncCF"
url = self.get_download_url(response=response)
nc = _nc_dataset(url, auth=self.auth, **self.requests_kwargs)
return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw)
response = "ncCF"
url = self.get_download_url(response=response)
return to_xarray(url, response=response, auth=self.auth, **kw)

def to_iris(self, **kw):
"""Load the data request into an iris.CubeList.

Accepts any `iris.load_raw` keyword arguments.
"""
import iris

response = "nc" if self.protocol == "griddap" else "ncCF"
url = self.get_download_url(response=response, **kw)
data = urlopen(url, auth=self.auth, **self.requests_kwargs)
with _tempnc(data) as tmp:
cubes = iris.load_raw(tmp, **kw)
try:
cubes.realise_data()
except ValueError:
_ = [cube.data for cube in cubes]
return cubes
return to_iris(url, **kw)

@functools.lru_cache(maxsize=None)
def _get_variables(self, dataset_id: OptionalStr = None) -> Dict:
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pytest-flake8
pytest-sugar
pytest-vcr
regionmask
scitools-iris>=3
scitools-iris>=3.3.0
setuptools_scm
sphinx
twine
Expand Down
6 changes: 4 additions & 2 deletions tests/test_erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_format_constraints_url,
_quote_string_constraints,
parse_dates,
urlopen,
)
from erddapy.erddapy import ERDDAP

Expand Down Expand Up @@ -103,13 +104,14 @@ def test_erddap_requests_kwargs():
slowwly_url = f"https://flash-the-slow-api.herokuapp.com/delay/{slowwly_milliseconds}/url/{base_url}"

connection = ERDDAP(slowwly_url)
connection.dataset_id = "M01_sbe37_all"
connection.dataset_id = "raw_asset_inventory"
connection.protocol = "tabledap"

connection.requests_kwargs["timeout"] = timeout_seconds

with pytest.raises(httpx.ReadTimeout):
connection.to_xarray()
url = connection.get_download_url()
_ = urlopen(url, **connection.requests_kwargs)


@pytest.mark.web
Expand Down