Skip to content

Commit

Permalink
Merge pull request #3 from pangeo-forge/filename-test
Browse files Browse the repository at this point in the history
Validate filenames
  • Loading branch information
cisaacstern authored Aug 31, 2023
2 parents 7b08cc1 + c739462 commit 52dd65f
Show file tree
Hide file tree
Showing 8 changed files with 95,028 additions and 3 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/unit-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Unit test

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

jobs:
validate-fnames:
name: deploy-recipes
runs-on: ubuntu-latest
strategy:
fail-fast: false

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r feedstock/requirements.txt
python -m pip install aiohttp apache-beam pandas
python -m pip install pytest
- name: Test
run: pytest -v tests/
env:
EARTHDATA_USERNAME: "foo"
EARTHDATA_PASSWORD: "bar"
8 changes: 7 additions & 1 deletion feedstock/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ def make_dates(freq="8D"):

def make_modis_url(time: pd.Timestamp, var: str) -> str:
fmt = "%Y%m%d"
end = time + dt.timedelta(days=7)
end = (
# typically the end timestamp is 7 days ahead...
time + dt.timedelta(days=7)
# unless this is dec26 or dec27, then end is dec31
if not time.strftime("%m-%d") in ("12-26", "12-27")
else pd.Timestamp(year=time.year, month=12, day=31)
)
return (
"https://oceandata.sci.gsfc.nasa.gov/ob/getfile/"
f"AQUA_MODIS.{time.strftime(fmt)}_{end.strftime(fmt)}.L3m.8D.{var}.4km.nc"
Expand Down
3 changes: 1 addition & 2 deletions feedstock/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# FIXME: pinned to latest commit on main because concurrency limiting is not released yet
git+https://github.com/pangeo-forge/pangeo-forge-recipes.git@f8dd0387b011eb16aefa178edd4c371ee414a445#egg=pangeo_forge_recipes
git+https://github.com/pangeo-forge/pangeo-forge-recipes.git@target-root-default#egg=pangeo_forge_recipes
3 changes: 3 additions & 0 deletions resources/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The text files stored as `./filenames/*.txt` are copied from the results of queries for the
desired variables, submitted to https://oceandata.sci.gsfc.nasa.gov/api/file_search/, with the
`Display results as text, one file name per line` option selected.
36,784 changes: 36,784 additions & 0 deletions resources/filenames/color.txt

Large diffs are not rendered by default.

54,264 changes: 54,264 additions & 0 deletions resources/filenames/iop.txt

Large diffs are not rendered by default.

3,870 changes: 3,870 additions & 0 deletions resources/filenames/sst.txt

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions tests/test_fnames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import sys
from pathlib import Path

import pytest

# 'feedstock' is not actually an installed package, so make it discoverable here
sys.path.append((Path(__file__).parent.parent / "feedstock").absolute().as_posix())
from recipe import dates, make_modis_url, variables # type: ignore


@pytest.fixture
def expected():
"""The expected fnames."""

# load all filenames text files
fnames = []
for p in Path("resources/filenames").iterdir():
with p.open() as f:
fnames += f.read().splitlines()

# filter filenames to only 4km data for the selected variables
expected = [f for f in fnames if "4km" in f and any([f".{v}" in f for v in variables])]
# we've found that the following date is missing from sst *only* (not other variables)
missing = "20220407"
# first of all, confirm that this is indeed the case
assert not any([(missing in f and "sst" in f) for f in expected]) # missing in sst
assert any([(missing in f and "chlor_a" in f) for f in expected]) # present in chlor_a
assert any([(missing in f and "bbp_443" in f) for f in expected]) # present in bbp_443
# now drop it from all variables, because we're not currently using it in the recipe
expected = [e for e in expected if "20220407" not in e]
expected.sort()
return expected


@pytest.fixture
def generated():
"""Generate fnames using our recipe logic.
Note that the `expected` list is *just* filenames (not full urls), so we parse accordingly.
"""
generated = [make_modis_url(d, var).split("getfile/")[-1] for d in dates for var in variables]
generated.sort()
return generated


@pytest.fixture
def diff(expected: list, generated: list) -> list[dict]:
"""Two-way diff of the fname lists."""
expected_but_not_generated = list(set(expected) - set(generated))
generated_but_not_expected = list(set(generated) - set(expected))

return [
{"exp": exp, "gen": gen}
for exp, gen
in zip(expected_but_not_generated, generated_but_not_expected)
]


def test_fnames(diff: list[dict]):
"""Check that there is no difference between expected and generated."""

# if there is a difference, print it for reference
for d in diff:
for k, v in d.items():
print(k, v)
print("-")
# but there shouldn't be one
assert not diff

0 comments on commit 52dd65f

Please sign in to comment.