diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml new file mode 100644 index 0000000..627fd03 --- /dev/null +++ b/.github/workflows/deploy.yaml @@ -0,0 +1,69 @@ +name: Deploy recipes + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + types: [opened, reopened, synchronize, labeled] + +jobs: + deploy-recipes: + name: deploy-recipes + runs-on: ubuntu-latest + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v3 + - name: "Authenticate to Google Cloud" + id: "auth" + uses: "google-github-actions/auth@v1" + with: + credentials_json: "${{ secrets.GCP_DATAFLOW_SERVICE_KEY }}" + - name: "Set prune config based on github event type" + run: > + if ${{ github.event_name == 'pull_request' }}; then + echo prune=true >> $GITHUB_ENV + else + echo prune=false >> $GITHUB_ENV + fi + - name: "Deploy recipes" + uses: "pangeo-forge/deploy-recipe-action@main" + with: + select_recipe_by_label: "true" + pangeo_forge_runner_config: > + { + "Bake": { + "prune": ${{ env.prune }}, + "bakery_class": "pangeo_forge_runner.bakery.dataflow.DataflowBakery" + }, + "DataflowBakery": { + "use_public_ips": false, + "service_account_email": "pangeo-forge-dataflow@pangeo-forge-4967.iam.gserviceaccount.com", + "project_id": "pangeo-forge-4967", + "temp_gcs_location": "gs://pangeo-forge-prod-dataflow/temp" + }, + "TargetStorage": { + "fsspec_class": "s3fs.S3FileSystem", + "fsspec_args": { + "key": "${{ secrets.PANGEO_FORGE_OSN_KEY }}", + "secret": "${{ secrets.PANGEO_FORGE_OSN_SECRET }}", + "client_kwargs": { + "endpoint_url": "https://ncsa.osn.xsede.org" + }, + "default_cache_type": "none", + "default_fill_cache": false, + "use_listings_cache": false + }, + "root_path": "Pangeo/pangeo-forge/aqua-modis-feedstock/{job_name}" + }, + "InputCacheStorage": { + "fsspec_class": "gcsfs.GCSFileSystem", + "root_path": "gs://pangeo-forge-prod-cache" + } + } + env: + GOOGLE_APPLICATION_CREDENTIALS: "${{ steps.auth.outputs.credentials_file_path }}" + EARTHDATA_USERNAME: cisaacstern + EARTHDATA_PASSWORD: "${{ secrets.EARTHDATA_PASSWORD }}" diff --git a/feedstock/recipe.py b/feedstock/recipe.py index 6f1d69f..6a8bda7 100644 --- a/feedstock/recipe.py +++ b/feedstock/recipe.py @@ -1,4 +1,5 @@ import datetime as dt +import itertools import os import aiohttp @@ -13,8 +14,21 @@ StoreToZarr, T, ) + +def make_dates(freq="8D"): + """Create the list of dates of available data.""" + yrs = { # start with a dict of dates as if every year was complete... + yr: pd.date_range(f"{yr}-01-01", f"{yr}-12-27", freq=freq) for yr in range(2002, 2024) + } + # ...but we need to make some edits due to missing data + yrs[2002] = yrs[2002][slice(*yrs[2002].slice_locs("2002-07-04", "2002-12-27"))] + yrs[2022] = yrs[2022].drop("2022-04-07") # missing for `sst`, but not `bbp_403` + `chlor_a` + yrs[2023] = yrs[2023][slice(*yrs[2023].slice_locs("2023-01-01", "2023-07-20"))] + # now flatten everything to a single list + return list(itertools.chain.from_iterable(yrs.values())) -dates = pd.date_range("2002-07-04", "2002-07-11", freq="8D") + +dates = make_dates() variables = ["CHL.chlor_a", "IOP.bbp_443", "SST.sst"] diff --git a/feedstock/requirements.txt b/feedstock/requirements.txt new file mode 100644 index 0000000..13a565e --- /dev/null +++ b/feedstock/requirements.txt @@ -0,0 +1,2 @@ +# FIXME: pinned to latest commit on main because concurrency limiting is not released yet +git+https://github.com/pangeo-forge/pangeo-forge-recipes.git@f8dd0387b011eb16aefa178edd4c371ee414a445#egg=pangeo_forge_recipes