Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate filenames #3

Merged
merged 7 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/unit-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Unit test

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

jobs:
validate-fnames:
name: deploy-recipes
runs-on: ubuntu-latest
strategy:
fail-fast: false

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r feedstock/requirements.txt
python -m pip install aiohttp apache-beam pandas
python -m pip install pytest
- name: Test
run: pytest -v tests/
env:
EARTHDATA_USERNAME: "foo"
EARTHDATA_PASSWORD: "bar"
8 changes: 7 additions & 1 deletion feedstock/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ def make_dates(freq="8D"):

def make_modis_url(time: pd.Timestamp, var: str) -> str:
fmt = "%Y%m%d"
end = time + dt.timedelta(days=7)
end = (
# typically the end timestamp is 7 days ahead...
time + dt.timedelta(days=7)
# unless this is dec26 or dec27, then end is dec31
if not time.strftime("%m-%d") in ("12-26", "12-27")
else pd.Timestamp(year=time.year, month=12, day=31)
)
return (
"https://oceandata.sci.gsfc.nasa.gov/ob/getfile/"
f"AQUA_MODIS.{time.strftime(fmt)}_{end.strftime(fmt)}.L3m.8D.{var}.4km.nc"
Expand Down
3 changes: 1 addition & 2 deletions feedstock/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# FIXME: pinned to latest commit on main because concurrency limiting is not released yet
git+https://github.com/pangeo-forge/pangeo-forge-recipes.git@f8dd0387b011eb16aefa178edd4c371ee414a445#egg=pangeo_forge_recipes
git+https://github.com/pangeo-forge/pangeo-forge-recipes.git@target-root-default#egg=pangeo_forge_recipes
3 changes: 3 additions & 0 deletions resources/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The text files stored as `./filenames/*.txt` are copied from the results of queries for the
desired variables, submitted to https://oceandata.sci.gsfc.nasa.gov/api/file_search/, with the
`Display results as text, one file name per line` option selected.
36,784 changes: 36,784 additions & 0 deletions resources/filenames/color.txt

Large diffs are not rendered by default.

54,264 changes: 54,264 additions & 0 deletions resources/filenames/iop.txt

Large diffs are not rendered by default.

3,870 changes: 3,870 additions & 0 deletions resources/filenames/sst.txt

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions tests/test_fnames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import sys
from pathlib import Path

import pytest

# 'feedstock' is not actually an installed package, so make it discoverable here
sys.path.append((Path(__file__).parent.parent / "feedstock").absolute().as_posix())
from recipe import dates, make_modis_url, variables # type: ignore


@pytest.fixture
def expected():
"""The expected fnames."""

# load all filenames text files
fnames = []
for p in Path("resources/filenames").iterdir():
with p.open() as f:
fnames += f.read().splitlines()

# filter filenames to only 4km data for the selected variables
expected = [f for f in fnames if "4km" in f and any([f".{v}" in f for v in variables])]
# we've found that the following date is missing from sst *only* (not other variables)
missing = "20220407"
# first of all, confirm that this is indeed the case
assert not any([(missing in f and "sst" in f) for f in expected]) # missing in sst
assert any([(missing in f and "chlor_a" in f) for f in expected]) # present in chlor_a
assert any([(missing in f and "bbp_443" in f) for f in expected]) # present in bbp_443
# now drop it from all variables, because we're not currently using it in the recipe
expected = [e for e in expected if "20220407" not in e]
expected.sort()
return expected


@pytest.fixture
def generated():
"""Generate fnames using our recipe logic.
Note that the `expected` list is *just* filenames (not full urls), so we parse accordingly.
"""
generated = [make_modis_url(d, var).split("getfile/")[-1] for d in dates for var in variables]
generated.sort()
return generated


@pytest.fixture
def diff(expected: list, generated: list) -> list[dict]:
"""Two-way diff of the fname lists."""
expected_but_not_generated = list(set(expected) - set(generated))
generated_but_not_expected = list(set(generated) - set(expected))

return [
{"exp": exp, "gen": gen}
for exp, gen
in zip(expected_but_not_generated, generated_but_not_expected)
]


def test_fnames(diff: list[dict]):
"""Check that there is no difference between expected and generated."""

# if there is a difference, print it for reference
for d in diff:
for k, v in d.items():
print(k, v)
print("-")
# but there shouldn't be one
assert not diff
Loading