diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index db254943..5433604b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,3 +21,17 @@ jobs: run: pip install -r requirements.txt - name: Validate collections run: python scripts/validate_collections.py + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: "pip" + - name: Install dependencies + run: pip install -r requirements.txt + - name: Run pre-commit hooks + run: pre-commit run --all-files diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc new file mode 100644 index 00000000..425004ad --- /dev/null +++ b/.markdownlint-cli2.jsonc @@ -0,0 +1,5 @@ +{ + "config": { + "MD013": false // disable line length checks + } +} \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..4363b456 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-yaml + - id: file-contents-sorter + files: ^requirements.in$ + - id: trailing-whitespace + exclude: ^ingestion-data/.*$ + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.10.0 + hooks: + - id: markdownlint-cli2 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.9.1 + hooks: + - id: black + - id: black-jupyter + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.292 + hooks: + - id: ruff diff --git a/README.md b/README.md index 14eba747..b36024f0 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # veda-data +[![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/nasa-impact/veda-data/ci.yaml?style=for-the-badge&label=CI)](https://github.com/NASA-IMPACT/veda-data/actions/workflows/ci.yaml) + This repository houses data used to define a VEDA dataset to load into the [VEDA catalog](https://nasa-impact.github.io/veda-docs/services/apis.html). Inclusion in the VEDA catalog is a prerequisite for displaying the dataset in the [VEDA Dashboard](https://www.earthdata.nasa.gov/dashboard/). The data provided here gets processed in the ingestion system [veda-data-airflow](https://github.com/NASA-IMPACT/veda-data-airflow), to which this repository is directly linked (as a Git submodule). - ## Dataset Submission Process The VEDA user docs explain the full [dataset submission process](https://nasa-impact.github.io/veda-docs/contributing/dataset-ingestion/). @@ -12,15 +13,14 @@ The VEDA user docs explain the full [dataset submission process](https://nasa-im Ultimately, submission to the VEDA catalog requires that you [open an issue with the "new dataset" template](https://github.com/NASA-IMPACT/veda-data/issues/new?assignees=&labels=dataset&projects=&template=new-dataset.yaml&title=New+Dataset%3A+%3Cdataset+title%3E). This template will require, at minimum: 1. a description of the dataset -2. the location of the data (in S3, CMR, etc.), and -3. a point of contact for the VEDA team to collaborate with. +2. the location of the data (in S3, CMR, etc.), and +3. a point of contact for the VEDA team to collaborate with. One or more notebooks showing how the data should be processed would be appreciated. - ## Ingestion Data Structure -When submitting STAC records to ingest, a pull request can be made with the data structured as described below. +When submitting STAC records to ingest, a pull request can be made with the data structured as described below. ### `collections/` @@ -92,13 +92,13 @@ Should follow the following format: "bucket": "", "filename_regex": "", "datetime_range": "", - + ## for cmr discovery "version": "", "temporal": ["", ""], "bounding_box": [""], "include": "", - + ### misc "cogify": "", "upload": "", @@ -123,13 +123,22 @@ python scripts/validate_collections.py ## Development -If you need to add new dependencies, first install the requirements: +We use [pre-commit](https://pre-commit.com/) hooks to keep our notebooks and Python scripts consistently formatted. +To contribute, first install the requirements, then install the **pre-commit** hooks: ```shell -pip install -r requirements.txt +pip install -r requirements.txt # recommend a virtual environment +pre-commit install +``` + +The hooks will run automatically on any changed files when you commit. +To run the hooks on the entire repository (which is what happens in CI): + +```shell +pre-commit run --all-files ``` -Add your dependency to `requirements.in` *without a version specifier* (unless you really need one). +If you need to add a Python dependency, add your dependency to `requirements.in`: Then run: ```shell diff --git a/requirements.in b/requirements.in index 71b6894b..0dd68fa0 100644 --- a/requirements.in +++ b/requirements.in @@ -1,2 +1,5 @@ +black[jupyter] pip-tools +pre-commit pystac[validation] +ruff diff --git a/requirements.txt b/requirements.txt index f8c8d163..e6a8b056 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,18 +4,74 @@ # # pip-compile # +appnope==0.1.3 + # via ipython +asttokens==2.4.0 + # via stack-data attrs==23.1.0 # via jsonschema +backcall==0.2.0 + # via ipython +black[jupyter]==23.9.1 + # via -r requirements.in build==1.0.3 # via pip-tools +cfgv==3.4.0 + # via pre-commit click==8.1.7 - # via pip-tools + # via + # black + # pip-tools +decorator==5.1.1 + # via ipython +distlib==0.3.7 + # via virtualenv +executing==2.0.0 + # via stack-data +filelock==3.12.4 + # via virtualenv +identify==2.5.30 + # via pre-commit +ipython==8.16.1 + # via black +jedi==0.19.1 + # via ipython jsonschema==4.17.3 # via pystac +matplotlib-inline==0.1.6 + # via ipython +mypy-extensions==1.0.0 + # via black +nodeenv==1.8.0 + # via pre-commit packaging==23.1 - # via build + # via + # black + # build +parso==0.8.3 + # via jedi +pathspec==0.11.2 + # via black +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==7.3.0 # via -r requirements.in +platformdirs==3.11.0 + # via + # black + # virtualenv +pre-commit==3.4.0 + # via -r requirements.in +prompt-toolkit==3.0.39 + # via ipython +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.16.1 + # via ipython pyproject-hooks==1.0.0 # via build pyrsistent==0.19.3 @@ -24,8 +80,26 @@ pystac[validation]==1.8.3 # via -r requirements.in python-dateutil==2.8.2 # via pystac +pyyaml==6.0.1 + # via pre-commit +ruff==0.0.292 + # via -r requirements.in six==1.16.0 - # via python-dateutil + # via + # asttokens + # python-dateutil +stack-data==0.6.3 + # via ipython +tokenize-rt==5.2.0 + # via black +traitlets==5.11.2 + # via + # ipython + # matplotlib-inline +virtualenv==20.24.5 + # via pre-commit +wcwidth==0.2.8 + # via prompt-toolkit wheel==0.41.2 # via pip-tools diff --git a/transformation-scripts/cmip6-kerchunk/generate-cmip6-kerchunk-historical.ipynb b/transformation-scripts/cmip6-kerchunk/generate-cmip6-kerchunk-historical.ipynb index 94a5763b..9efd475b 100644 --- a/transformation-scripts/cmip6-kerchunk/generate-cmip6-kerchunk-historical.ipynb +++ b/transformation-scripts/cmip6-kerchunk/generate-cmip6-kerchunk-historical.ipynb @@ -34,8 +34,8 @@ "from kerchunk.hdf import SingleHdf5ToZarr\n", "from typing import Dict\n", "\n", - "# Specify the CMIP model and variable to use. \n", - "# Here we are using near-surface air temperature from the GISS-E2-1-G GCM \n", + "# Specify the CMIP model and variable to use.\n", + "# Here we are using near-surface air temperature from the GISS-E2-1-G GCM\n", "model = \"GISS-E2-1-G\"\n", "variable = \"tas\"\n", "# If this code were re-used for a protected bucket, anon should be False.\n", @@ -126,18 +126,20 @@ "source": [ "so = dict(mode=\"rb\", anon=anon, default_fill_cache=False, default_cache_type=\"first\")\n", "\n", + "\n", "# Use Kerchunk's `SingleHdf5ToZarr` method to create a `Kerchunk` index from a NetCDF file.\n", "def generate_json_reference(u):\n", " with fs_read.open(u, **so) as infile:\n", - " fname = u.split(\"/\")[-1].strip(\".nc\") \n", + " fname = u.split(\"/\")[-1].strip(\".nc\")\n", " h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300)\n", " return fname, ujson.dumps(h5chunks.translate()).encode()\n", - " \n", + "\n", + "\n", "def write_json(fname, reference_json, temp_dir):\n", " outf = os.path.join(temp_dir, f\"{fname}.json\")\n", " with open(outf, \"wb\") as f:\n", " f.write(reference_json)\n", - " return outf " + " return outf" ] }, { @@ -297,7 +299,10 @@ "bag = db.from_sequence(all_files, partition_size=1)\n", "result = db.map(generate_json_reference, bag)\n", "all_references = result.compute()\n", - "output_files = [write_json(fname, reference_json, temp_dir) for fname, reference_json in all_references]" + "output_files = [\n", + " write_json(fname, reference_json, temp_dir)\n", + " for fname, reference_json in all_references\n", + "]" ] }, { @@ -331,11 +336,11 @@ "%%time\n", "mzz = MultiZarrToZarr(\n", " output_files,\n", - " remote_protocol='s3',\n", - " remote_options={'anon': anon},\n", - " concat_dims=['time'],\n", + " remote_protocol=\"s3\",\n", + " remote_options={\"anon\": anon},\n", + " concat_dims=[\"time\"],\n", " coo_map={\"time\": \"cf:time\"},\n", - " inline_threshold=0\n", + " inline_threshold=0,\n", ")\n", "multi_kerchunk = mzz.translate()" ] @@ -878,9 +883,13 @@ } ], "source": [ - "s3 = boto3.client('s3')\n", - "upload_bucket_name = 'veda-data-store-staging'\n", - "response = s3.upload_file(output_location, upload_bucket_name, f'cmip6-{model}-{variable}-kerchunk/{output_fname}')\n", + "s3 = boto3.client(\"s3\")\n", + "upload_bucket_name = \"veda-data-store-staging\"\n", + "response = s3.upload_file(\n", + " output_location,\n", + " upload_bucket_name,\n", + " f\"cmip6-{model}-{variable}-kerchunk/{output_fname}\",\n", + ")\n", "# None is good.\n", "print(f\"Response uploading {output_fname} to {upload_bucket_name} was {response}.\")" ] diff --git a/transformation-scripts/cmip6-kerchunk/publish-cmip6-kerchunk-stac.ipynb b/transformation-scripts/cmip6-kerchunk/publish-cmip6-kerchunk-stac.ipynb index 730d2b70..cfa68934 100644 --- a/transformation-scripts/cmip6-kerchunk/publish-cmip6-kerchunk-stac.ipynb +++ b/transformation-scripts/cmip6-kerchunk/publish-cmip6-kerchunk-stac.ipynb @@ -63,13 +63,9 @@ } ], "source": [ - "dataset_url = 's3://veda-data-store-staging/cmip6-GISS-E2-1-G-tas-kerchunk/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk.json'\n", + "dataset_url = \"s3://veda-data-store-staging/cmip6-GISS-E2-1-G-tas-kerchunk/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk.json\"\n", "\n", - "xr_open_args = {\n", - " \"engine\": \"zarr\",\n", - " \"decode_coords\": \"all\",\n", - " \"consolidated\": False\n", - "}\n", + "xr_open_args = {\"engine\": \"zarr\", \"decode_coords\": \"all\", \"consolidated\": False}\n", "\n", "fs = fsspec.filesystem(\n", " \"reference\",\n", @@ -102,18 +98,23 @@ }, "outputs": [], "source": [ - "spatial_extent_values = [ds.lon[0].values, ds.lat[0].values, ds.lon[-1].values, ds.lat[-1].values]\n", + "spatial_extent_values = [\n", + " ds.lon[0].values,\n", + " ds.lat[0].values,\n", + " ds.lon[-1].values,\n", + " ds.lat[-1].values,\n", + "]\n", "spatial_extent = list(map(int, spatial_extent_values))\n", - "_id = 'combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_TEST'\n", + "_id = \"combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_TEST\"\n", "zarr_asset = pystac.Asset(\n", - " title='zarr',\n", + " title=\"zarr\",\n", " href=dataset_url,\n", - " media_type='application/vnd+zarr',\n", - " roles=['data'],\n", + " media_type=\"application/vnd+zarr\",\n", + " roles=[\"data\"],\n", ")\n", "extent = pystac.Extent(\n", " spatial=pystac.SpatialExtent(bboxes=[spatial_extent]),\n", - " temporal=pystac.TemporalExtent([[None, None]])\n", + " temporal=pystac.TemporalExtent([[None, None]]),\n", ")" ] }, @@ -137,7 +138,11 @@ "providers = [\n", " pystac.Provider(\n", " name=\"VEDA\",\n", - " roles=[pystac.ProviderRole.PRODUCER, pystac.ProviderRole.PROCESSOR, pystac.ProviderRole.HOST],\n", + " roles=[\n", + " pystac.ProviderRole.PRODUCER,\n", + " pystac.ProviderRole.PROCESSOR,\n", + " pystac.ProviderRole.HOST,\n", + " ],\n", " url=\"https://www.earthdata.nasa.gov/dashboard/data-catalog\",\n", " )\n", "]" @@ -161,11 +166,11 @@ "collection = pystac.Collection(\n", " id=_id,\n", " extent=extent,\n", - " assets = {'zarr': zarr_asset},\n", - " description='for zarr testing',\n", + " assets={\"zarr\": zarr_asset},\n", + " description=\"for zarr testing\",\n", " providers=providers,\n", - " stac_extensions=['https://stac-extensions.github.io/datacube/v2.0.0/schema.json'],\n", - " license=\"CC0-1.0\"\n", + " stac_extensions=[\"https://stac-extensions.github.io/datacube/v2.0.0/schema.json\"],\n", + " license=\"CC0-1.0\",\n", ")" ] }, @@ -207,7 +212,7 @@ " y_dimension=\"lat\",\n", " # TODO: get this from attributes if possible\n", " reference_system=\"4326\",\n", - " validate=False\n", + " validate=False,\n", ")\n", "# It should validate, yay!\n", "collection.validate()" @@ -234,11 +239,11 @@ "source": [ "# The VEDA STAC ingestor requires a few more fields\n", "dataset = collection.to_dict()\n", - "dataset['data_type'] = 'zarr'\n", - "dataset['collection'] = _id\n", - "dataset['title'] = 'CMIP6 Daily GISS-E2-1-G TAS Kerchunk (DEMO)'\n", - "dataset['dashboard:is_periodic'] = True\n", - "dataset['dashboard:time_density'] = 'day'" + "dataset[\"data_type\"] = \"zarr\"\n", + "dataset[\"collection\"] = _id\n", + "dataset[\"title\"] = \"CMIP6 Daily GISS-E2-1-G TAS Kerchunk (DEMO)\"\n", + "dataset[\"dashboard:is_periodic\"] = True\n", + "dataset[\"dashboard:time_density\"] = \"day\"" ] }, { @@ -279,7 +284,9 @@ " \"content-type\": \"application/json\",\n", " \"accept\": \"application/json\",\n", "}\n", - "response = requests.post((STAC_INGESTOR_API + \"api/ingest/collections\"), json=dataset, headers=headers)\n", + "response = requests.post(\n", + " (STAC_INGESTOR_API + \"api/ingest/collections\"), json=dataset, headers=headers\n", + ")\n", "\n", "print(response.text)" ] diff --git a/transformation-scripts/fwi-stations/FWI_Transformation.ipynb b/transformation-scripts/fwi-stations/FWI_Transformation.ipynb index ed0a7c97..0ca3196e 100644 --- a/transformation-scripts/fwi-stations/FWI_Transformation.ipynb +++ b/transformation-scripts/fwi-stations/FWI_Transformation.ipynb @@ -59,9 +59,9 @@ "outputs": [], "source": [ "r = client.list_objects_v2(\n", - " Bucket = BUCKET,\n", - " Prefix = PREFIX,\n", - " )" + " Bucket=BUCKET,\n", + " Prefix=PREFIX,\n", + ")" ] }, { @@ -335,31 +335,41 @@ "\n", " id = file.split(\"/\")[-1].split(\"-\")[0]\n", "\n", - " try: \n", + " try:\n", " df = pd.read_csv(fwi_obj)\n", " df = df.reset_index()\n", - " df.rename(columns={\"index\": \"year\", \"YYYY\": \"month\",\"MM\":\"day\",\"DD\":\"hour\",\"HH\":\"minute\"}, inplace=True)\n", + " df.rename(\n", + " columns={\n", + " \"index\": \"year\",\n", + " \"YYYY\": \"month\",\n", + " \"MM\": \"day\",\n", + " \"DD\": \"hour\",\n", + " \"HH\": \"minute\",\n", + " },\n", + " inplace=True,\n", + " )\n", "\n", - " dt = pd.to_datetime(df[['year', 'month', 'day','hour','minute']])\n", - " df.insert(loc=0, column='t', value=dt)\n", + " dt = pd.to_datetime(df[[\"year\", \"month\", \"day\", \"hour\", \"minute\"]])\n", + " df.insert(loc=0, column=\"t\", value=dt)\n", "\n", - " filter = stations[stations[\"USAF\"]==id].iloc[0]\n", - " \n", - " df[\"lat\"]=filter[\"LAT\"]\n", - " df[\"lon\"]=filter[\"LON\"]\n", - " df[\"stn_id\"]=id\n", - " df[\"stn_name\"]=filter[\"STATION NAME\"]\n", - " \n", - " filename = file.split(\"/\")[-1].replace('.csv', '_transformed.csv')\n", + " filter = stations[stations[\"USAF\"] == id].iloc[0]\n", + "\n", + " df[\"lat\"] = filter[\"LAT\"]\n", + " df[\"lon\"] = filter[\"LON\"]\n", + " df[\"stn_id\"] = id\n", + " df[\"stn_name\"] = filter[\"STATION NAME\"]\n", + "\n", + " filename = file.split(\"/\")[-1].replace(\".csv\", \"_transformed.csv\")\n", " print(f\"Uploading to S3: {filename}\")\n", - " \n", + "\n", " csv_buffer = StringIO()\n", " df.to_csv(csv_buffer, index=False)\n", - " client.put_object(Body=csv_buffer.getvalue(), Bucket=BUCKET, Key=DST_PREFIX+filename)\n", - " \n", + " client.put_object(\n", + " Body=csv_buffer.getvalue(), Bucket=BUCKET, Key=DST_PREFIX + filename\n", + " )\n", + "\n", " except Exception as e:\n", - " print(f\"Exception: {file.split('/')[-1]} {e}\")\n", - " " + " print(f\"Exception: {file.split('/')[-1]} {e}\")" ] }, { diff --git a/transformation-scripts/landsat-lakes-discovery/README.md b/transformation-scripts/landsat-lakes-discovery/README.md index 4f91d0da..8a20071f 100644 --- a/transformation-scripts/landsat-lakes-discovery/README.md +++ b/transformation-scripts/landsat-lakes-discovery/README.md @@ -1,20 +1,21 @@ # Subset collections of Landsat 7-9 - ## Y tho? For some stories to be told in the VEDA or EO Dashboard, we want to have nice pre-filtered collections of Landsat scenes that cover a specific AOI, so users can quickly browse through a long time series of nice imagery and visually see the changes the story intends to highlight. We only need RGB and cross-mission physical consistency is not that important, so combined time series of Landsat-7 through 9 are fine. - ## How to select the right path/row - manual method To select the right scenes, we simply filter by two criteria + 1. Low cloud cover (> 5% or so, depending on the local conditions) 2. A subset of WRS2 path/row combinations that cover the AOI -1. Grab the Landsat WRS2 scene outlines from https://www.usgs.gov/media/files/landsat-wrs-2-scene-boundaries-kml-file +To process the data: + +1. Grab the Landsat WRS2 scene outlines from 2. Load it into QGIS 3. Zoom to your AOI / object of interest or get a geometry for it from somewhere 4. Find which features (path/row polygons) combined best cover the AOI @@ -22,4 +23,4 @@ To select the right scenes, we simply filter by two criteria 6. Export the Python code for that query 7. Run it and process the results in a notebook like the ones included here -This is not super elegant and could of course be automated, but it works fine for a small number of AOIs. \ No newline at end of file +This is not super elegant and could of course be automated, but it works fine for a small number of AOIs. diff --git a/transformation-scripts/landsat-lakes-discovery/aral-sea.ipynb b/transformation-scripts/landsat-lakes-discovery/aral-sea.ipynb index 0e81e0ad..df65234a 100644 --- a/transformation-scripts/landsat-lakes-discovery/aral-sea.ipynb +++ b/transformation-scripts/landsat-lakes-discovery/aral-sea.ipynb @@ -21,39 +21,40 @@ "from pystac_client import Client\n", "\n", "# Search against the Planetary Computer STAC API\n", - "catalog = Client.open(\n", - " \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n", - ")\n", + "catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", "\n", "# Define your area of interest\n", "aoi = {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [54.6355092639692, 42.68420851174815],\n", - " [63.69846542906245, 42.68420851174815],\n", - " [63.69846542906245, 47.318581705920565],\n", - " [54.6355092639692, 47.318581705920565],\n", - " [54.6355092639692, 42.68420851174815]\n", - " ]\n", - " ]\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [54.6355092639692, 42.68420851174815],\n", + " [63.69846542906245, 42.68420851174815],\n", + " [63.69846542906245, 47.318581705920565],\n", + " [54.6355092639692, 47.318581705920565],\n", + " [54.6355092639692, 42.68420851174815],\n", + " ]\n", + " ],\n", "}\n", "\n", "# Define your temporal range\n", "daterange = {\"interval\": [\"1982-08-22T00:00:00Z\", \"2023-06-21T23:59:59Z\"]}\n", "\n", "# Define your search with CQL2 syntax\n", - "search = catalog.search(filter_lang=\"cql2-json\", filter={\n", - " \"op\": \"and\",\n", - " \"args\": [\n", - " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", - " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", - " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"160\", \"161\"]]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"028\", \"029\"]]},\n", - " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]}\n", - " ]\n", - "})" + "search = catalog.search(\n", + " filter_lang=\"cql2-json\",\n", + " filter={\n", + " \"op\": \"and\",\n", + " \"args\": [\n", + " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", + " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", + " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"160\", \"161\"]]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"028\", \"029\"]]},\n", + " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]},\n", + " ],\n", + " },\n", + ")" ] }, { @@ -83,7 +84,10 @@ } ], "source": [ - "items = [item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\") for item in search.get_items()]\n", + "items = [\n", + " item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\")\n", + " for item in search.get_items()\n", + "]\n", "len(items)" ] }, @@ -136,7 +140,7 @@ " ymin = min(ymin, item.bbox[1])\n", " xmax = max(xmax, item.bbox[2])\n", " ymax = max(ymax, item.bbox[3])\n", - " \n", + "\n", " mindate = min(mindate, item.datetime)\n", " maxdate = max(maxdate, item.datetime)\n", "\n", @@ -146,7 +150,9 @@ "MIN_DATE = mindate\n", "MAX_DATE = maxdate\n", "\n", - "COLLECTION_INTERVAL = [d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)]\n", + "COLLECTION_INTERVAL = [\n", + " d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)\n", + "]\n", "print(COLLECTION_INTERVAL)" ] }, @@ -159,101 +165,78 @@ "source": [ "collection_id = f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\"\n", "collection = {\n", - " \"id\": f\"{collection_id}\",\n", - " \"type\": \"Collection\",\n", - " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", - " \"extent\": {\n", - " \"spatial\": {\n", - " \"bbox\": BBOX\n", - " },\n", - " \"temporal\": {\n", - " \"interval\": [\n", - " [\n", - " f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\",\n", - " f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"\n", - " ]\n", - " ]\n", - " }\n", - " },\n", - " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", - " \"summaries\": {\n", - " \"datetime\": [\n", - " ],\n", - " \"cog_default\": {\n", - " \"max\": None,\n", - " \"min\": None\n", - " }\n", - " },\n", - " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", - " \"item_assets\": {\n", - " \"red\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Red Band (B4)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B4\",\n", - " \"common_name\": \"red\",\n", - " \"center_wavelength\": 0.65\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\"\n", + " \"id\": f\"{collection_id}\",\n", + " \"type\": \"Collection\",\n", + " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", + " \"extent\": {\n", + " \"spatial\": {\"bbox\": BBOX},\n", + " \"temporal\": {\n", + " \"interval\": [\n", + " [f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\", f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"]\n", + " ]\n", + " },\n", " },\n", - " \"blue\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Blue Band (B2)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B2\",\n", - " \"common_name\": \"blue\",\n", - " \"center_wavelength\": 0.48\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\"\n", - " },\n", - " \"green\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Green Band (B3)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B3\",\n", - " \"common_name\": \"green\",\n", - " \"center_wavelength\": 0.56\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\"\n", - " },\n", - " \"thumbnail\": {\n", - " \"type\": \"image/jpeg\",\n", - " \"roles\": [\n", - " \"thumbnail\"\n", - " ],\n", - " \"title\": \"Thumbnail image\"\n", + " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", + " \"summaries\": {\"datetime\": [], \"cog_default\": {\"max\": None, \"min\": None}},\n", + " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", + " \"item_assets\": {\n", + " \"red\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Red Band (B4)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B4\",\n", + " \"common_name\": \"red\",\n", + " \"center_wavelength\": 0.65,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\",\n", + " },\n", + " \"blue\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Blue Band (B2)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B2\",\n", + " \"common_name\": \"blue\",\n", + " \"center_wavelength\": 0.48,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\",\n", + " },\n", + " \"green\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Green Band (B3)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B3\",\n", + " \"common_name\": \"green\",\n", + " \"center_wavelength\": 0.56,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\",\n", + " },\n", + " \"thumbnail\": {\n", + " \"type\": \"image/jpeg\",\n", + " \"roles\": [\"thumbnail\"],\n", + " \"title\": \"Thumbnail image\",\n", + " },\n", + " \"cog_default\": {\n", + " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", + " \"roles\": [\"data\", \"layer\"],\n", + " \"title\": \"Default COG Layer\",\n", + " \"description\": \"Cloud optimized default layer to display on map\",\n", + " },\n", " },\n", - " \"cog_default\": {\n", - " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", - " \"roles\": [\n", - " \"data\",\n", - " \"layer\"\n", - " ],\n", - " \"title\": \"Default COG Layer\",\n", - " \"description\": \"Cloud optimized default layer to display on map\"\n", - " }\n", - " },\n", - " \"stac_version\": \"1.0.0\",\n", - " \"dashboard:is_periodic\": \"false\",\n", - " \"dashboard:time_density\": \"day\"\n", + " \"stac_version\": \"1.0.0\",\n", + " \"dashboard:is_periodic\": \"false\",\n", + " \"dashboard:time_density\": \"day\",\n", "}" ] }, @@ -279,7 +262,7 @@ "\n", "with open(f\"{LAKE_NAME_SLUG}-scenes.json\", \"w\") as fobj:\n", " json.dump(items, fobj, indent=2)\n", - " \n", + "\n", "with open(f\"{LAKE_NAME_SLUG}-collection.json\", \"w\") as fobj:\n", " json.dump(collection, fobj, indent=2)" ] @@ -291,29 +274,30 @@ "metadata": {}, "outputs": [], "source": [ - "item_links = {\"links\": [\n", - " {\n", - " \"rel\": \"collection\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"parent\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"root\",\n", - " \"type\": \"application/json\",\n", - " \"href\": \"https://staging-stac.delta-backend.com/\"\n", - " },\n", - " {\n", - " \"rel\": \"self\",\n", - " \"type\": \"application/geo+json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\"\n", - " }\n", - " ]}\n", - "\n" + "item_links = {\n", + " \"links\": [\n", + " {\n", + " \"rel\": \"collection\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"parent\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"root\",\n", + " \"type\": \"application/json\",\n", + " \"href\": \"https://staging-stac.delta-backend.com/\",\n", + " },\n", + " {\n", + " \"rel\": \"self\",\n", + " \"type\": \"application/geo+json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\",\n", + " },\n", + " ]\n", + "}" ] }, { @@ -327,26 +311,30 @@ " expected_prefix = \"https://landsateuwest.blob.core.windows.net/landsat-c2/\"\n", "\n", " if not input_string.startswith(expected_prefix):\n", - " raise ValueError(f\"Input string does not match expected pattern - {input_string}.\")\n", + " raise ValueError(\n", + " f\"Input string does not match expected pattern - {input_string}.\"\n", + " )\n", "\n", " replacement_prefix = \"s3://usgs-landsat/collection02/\"\n", " replaced_string = input_string.replace(expected_prefix, replacement_prefix)\n", "\n", " return replaced_string\n", "\n", + "\n", "def remove_keys(dictionary, keys_to_keep):\n", " return {key: value for key, value in dictionary.items() if key in keys_to_keep}\n", "\n", - "bands = ['red', 'green', 'blue']\n", + "\n", + "bands = [\"red\", \"green\", \"blue\"]\n", "\n", "with open(f\"{LAKE_NAME_SLUG}-items.json\", \"w\") as file:\n", " for item in search.items_as_dicts():\n", " item.update(item_links)\n", - " item['collection'] = collection_id\n", - " item['assets'] = remove_keys(item['assets'], bands)\n", + " item[\"collection\"] = collection_id\n", + " item[\"assets\"] = remove_keys(item[\"assets\"], bands)\n", " for i in bands:\n", - " item['assets'][i]['href'] = replace_and_check(item['assets'][i]['href'])\n", - " file.write(json.dumps(item) + \"\\n\") " + " item[\"assets\"][i][\"href\"] = replace_and_check(item[\"assets\"][i][\"href\"])\n", + " file.write(json.dumps(item) + \"\\n\")" ] }, { diff --git a/transformation-scripts/landsat-lakes-discovery/lake-balaton.ipynb b/transformation-scripts/landsat-lakes-discovery/lake-balaton.ipynb index 300b88b3..a86da37a 100644 --- a/transformation-scripts/landsat-lakes-discovery/lake-balaton.ipynb +++ b/transformation-scripts/landsat-lakes-discovery/lake-balaton.ipynb @@ -21,39 +21,40 @@ "from pystac_client import Client\n", "\n", "# Search against the Planetary Computer STAC API\n", - "catalog = Client.open(\n", - " \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n", - ")\n", + "catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", "\n", "# Define your area of interest\n", "aoi = {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [16.64828162951298, 46.416154263750144],\n", - " [18.717592307140762, 46.416154263750144],\n", - " [18.717592307140762, 47.267800037379345],\n", - " [16.64828162951298, 47.267800037379345],\n", - " [16.64828162951298, 46.416154263750144]\n", - " ]\n", - " ]\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [16.64828162951298, 46.416154263750144],\n", + " [18.717592307140762, 46.416154263750144],\n", + " [18.717592307140762, 47.267800037379345],\n", + " [16.64828162951298, 47.267800037379345],\n", + " [16.64828162951298, 46.416154263750144],\n", + " ]\n", + " ],\n", "}\n", "\n", "# Define your temporal range\n", "daterange = {\"interval\": [\"1982-08-22T00:00:00Z\", \"2023-06-21T23:59:59Z\"]}\n", "\n", "# Define your search with CQL2 syntax\n", - "search = catalog.search(filter_lang=\"cql2-json\", filter={\n", - " \"op\": \"and\",\n", - " \"args\": [\n", - " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", - " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", - " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"189\"]]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"027\"]]},\n", - " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]}\n", - " ]\n", - "})" + "search = catalog.search(\n", + " filter_lang=\"cql2-json\",\n", + " filter={\n", + " \"op\": \"and\",\n", + " \"args\": [\n", + " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", + " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", + " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"189\"]]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"027\"]]},\n", + " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]},\n", + " ],\n", + " },\n", + ")" ] }, { @@ -93,7 +94,10 @@ } ], "source": [ - "items = [item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\") for item in search.get_items()]\n", + "items = [\n", + " item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\")\n", + " for item in search.get_items()\n", + "]\n", "len(items)" ] }, @@ -150,7 +154,7 @@ " ymin = min(ymin, item.bbox[1])\n", " xmax = max(xmax, item.bbox[2])\n", " ymax = max(ymax, item.bbox[3])\n", - " \n", + "\n", " mindate = min(mindate, item.datetime)\n", " maxdate = max(maxdate, item.datetime)\n", "\n", @@ -160,7 +164,9 @@ "MIN_DATE = mindate\n", "MAX_DATE = maxdate\n", "\n", - "COLLECTION_INTERVAL = [d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)]\n", + "COLLECTION_INTERVAL = [\n", + " d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)\n", + "]\n", "print(COLLECTION_INTERVAL)" ] }, @@ -181,17 +187,16 @@ "source": [ "# get s3 links for RGB assets\n", "import requests\n", + "\n", "root_url = \"https://landsatlook.usgs.gov/stac-server/search\"\n", "# find an item with matching id\n", "for i in items:\n", - " landsat_search = requests.post(root_url, json={\n", - " \"ids\": [i]\n", - " })\n", + " landsat_search = requests.post(root_url, json={\"ids\": [i]})\n", " if landsat_search.status_code == 200:\n", " print(landsat_search.json()[\"features\"][0][\"assets\"][\"red\"])\n", " break\n", " else:\n", - " print(landsat_search.status_code, landsat_search.text)\n" + " print(landsat_search.status_code, landsat_search.text)" ] }, { @@ -205,101 +210,78 @@ "source": [ "collection_id = f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\"\n", "collection = {\n", - " \"id\": f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\",\n", - " \"type\": \"Collection\",\n", - " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", - " \"extent\": {\n", - " \"spatial\": {\n", - " \"bbox\": BBOX\n", - " },\n", - " \"temporal\": {\n", - " \"interval\": [\n", - " [\n", - " f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\",\n", - " f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"\n", - " ]\n", - " ]\n", - " }\n", - " },\n", - " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", - " \"summaries\": {\n", - " \"datetime\": [\n", - " ],\n", - " \"cog_default\": {\n", - " \"max\": None,\n", - " \"min\": None\n", - " }\n", - " },\n", - " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", - " \"item_assets\": {\n", - " \"red\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Red Band (B4)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B4\",\n", - " \"common_name\": \"red\",\n", - " \"center_wavelength\": 0.65\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\"\n", - " },\n", - " \"blue\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Blue Band (B2)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B2\",\n", - " \"common_name\": \"blue\",\n", - " \"center_wavelength\": 0.48\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\"\n", + " \"id\": f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\",\n", + " \"type\": \"Collection\",\n", + " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", + " \"extent\": {\n", + " \"spatial\": {\"bbox\": BBOX},\n", + " \"temporal\": {\n", + " \"interval\": [\n", + " [f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\", f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"]\n", + " ]\n", + " },\n", " },\n", - " \"green\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Green Band (B3)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B3\",\n", - " \"common_name\": \"green\",\n", - " \"center_wavelength\": 0.56\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\"\n", - " },\n", - " \"thumbnail\": {\n", - " \"type\": \"image/jpeg\",\n", - " \"roles\": [\n", - " \"thumbnail\"\n", - " ],\n", - " \"title\": \"Thumbnail image\"\n", + " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", + " \"summaries\": {\"datetime\": [], \"cog_default\": {\"max\": None, \"min\": None}},\n", + " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", + " \"item_assets\": {\n", + " \"red\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Red Band (B4)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B4\",\n", + " \"common_name\": \"red\",\n", + " \"center_wavelength\": 0.65,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\",\n", + " },\n", + " \"blue\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Blue Band (B2)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B2\",\n", + " \"common_name\": \"blue\",\n", + " \"center_wavelength\": 0.48,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\",\n", + " },\n", + " \"green\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Green Band (B3)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B3\",\n", + " \"common_name\": \"green\",\n", + " \"center_wavelength\": 0.56,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\",\n", + " },\n", + " \"thumbnail\": {\n", + " \"type\": \"image/jpeg\",\n", + " \"roles\": [\"thumbnail\"],\n", + " \"title\": \"Thumbnail image\",\n", + " },\n", + " \"cog_default\": {\n", + " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", + " \"roles\": [\"data\", \"layer\"],\n", + " \"title\": \"Default COG Layer\",\n", + " \"description\": \"Cloud optimized default layer to display on map\",\n", + " },\n", " },\n", - " \"cog_default\": {\n", - " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", - " \"roles\": [\n", - " \"data\",\n", - " \"layer\"\n", - " ],\n", - " \"title\": \"Default COG Layer\",\n", - " \"description\": \"Cloud optimized default layer to display on map\"\n", - " }\n", - " },\n", - " \"stac_version\": \"1.0.0\",\n", - " \"dashboard:is_periodic\": \"false\",\n", - " \"dashboard:time_density\": \"day\"\n", + " \"stac_version\": \"1.0.0\",\n", + " \"dashboard:is_periodic\": \"false\",\n", + " \"dashboard:time_density\": \"day\",\n", "}" ] }, @@ -325,7 +307,7 @@ "\n", "with open(f\"{LAKE_NAME_SLUG}-scenes.json\", \"w\") as fobj:\n", " json.dump(items, fobj, indent=2)\n", - " \n", + "\n", "with open(f\"{LAKE_NAME_SLUG}-collection.json\", \"w\") as fobj:\n", " json.dump(collection, fobj, indent=2)" ] @@ -337,28 +319,30 @@ "metadata": {}, "outputs": [], "source": [ - "item_links = {\"links\": [\n", - " {\n", - " \"rel\": \"collection\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"parent\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"root\",\n", - " \"type\": \"application/json\",\n", - " \"href\": \"https://staging-stac.delta-backend.com/\"\n", - " },\n", - " {\n", - " \"rel\": \"self\",\n", - " \"type\": \"application/geo+json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\"\n", - " }\n", - " ]}\n" + "item_links = {\n", + " \"links\": [\n", + " {\n", + " \"rel\": \"collection\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"parent\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"root\",\n", + " \"type\": \"application/json\",\n", + " \"href\": \"https://staging-stac.delta-backend.com/\",\n", + " },\n", + " {\n", + " \"rel\": \"self\",\n", + " \"type\": \"application/geo+json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\",\n", + " },\n", + " ]\n", + "}" ] }, { @@ -372,26 +356,30 @@ " expected_prefix = \"https://landsateuwest.blob.core.windows.net/landsat-c2/\"\n", "\n", " if not input_string.startswith(expected_prefix):\n", - " raise ValueError(f\"Input string does not match expected pattern - {input_string}.\")\n", + " raise ValueError(\n", + " f\"Input string does not match expected pattern - {input_string}.\"\n", + " )\n", "\n", " replacement_prefix = \"s3://usgs-landsat/collection02/\"\n", " replaced_string = input_string.replace(expected_prefix, replacement_prefix)\n", "\n", " return replaced_string\n", "\n", + "\n", "def remove_keys(dictionary, keys_to_keep):\n", " return {key: value for key, value in dictionary.items() if key in keys_to_keep}\n", "\n", - "bands = ['red', 'green', 'blue']\n", + "\n", + "bands = [\"red\", \"green\", \"blue\"]\n", "\n", "with open(f\"{LAKE_NAME_SLUG}-items.json\", \"w\") as file:\n", " for item in search.items_as_dicts():\n", " item.update(item_links)\n", - " item['collection'] = collection_id\n", - " item['assets'] = remove_keys(item['assets'], bands)\n", + " item[\"collection\"] = collection_id\n", + " item[\"assets\"] = remove_keys(item[\"assets\"], bands)\n", " for i in bands:\n", - " item['assets'][i]['href'] = replace_and_check(item['assets'][i]['href'])\n", - " file.write(json.dumps(item) + \"\\n\") " + " item[\"assets\"][i][\"href\"] = replace_and_check(item[\"assets\"][i][\"href\"])\n", + " file.write(json.dumps(item) + \"\\n\")" ] }, { @@ -417,7 +405,7 @@ "# DB_USERNAME=username\n", "# DB_PASSWORD=password\n", "# DB_NAME (not used, just a flag to make sure env is loaded)\n", - "!echo $DB_NAME\n" + "!echo $DB_NAME" ] }, { @@ -427,7 +415,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pypgstac load collections {LAKE_NAME_SLUG}-collection.json --dsn postgresql://$DB_USERNAME:$DB_PASSWORD@$DB_HOSTNAME:5432/postgis --method upsert\n" + "!pypgstac load collections {LAKE_NAME_SLUG}-collection.json --dsn postgresql://$DB_USERNAME:$DB_PASSWORD@$DB_HOSTNAME:5432/postgis --method upsert" ] }, { diff --git a/transformation-scripts/landsat-lakes-discovery/lake-biwa.ipynb b/transformation-scripts/landsat-lakes-discovery/lake-biwa.ipynb index 73403e38..5fa41682 100644 --- a/transformation-scripts/landsat-lakes-discovery/lake-biwa.ipynb +++ b/transformation-scripts/landsat-lakes-discovery/lake-biwa.ipynb @@ -19,39 +19,40 @@ "from pystac_client import Client\n", "\n", "# Search against the Planetary Computer STAC API\n", - "catalog = Client.open(\n", - " \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n", - ")\n", + "catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", "\n", "# Define your area of interest\n", "aoi = {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [135.32243870119703, 34.87343277116993],\n", - " [136.90834461514822, 34.87343277116993],\n", - " [136.90834461514822, 35.652571266913526],\n", - " [135.32243870119703, 35.652571266913526],\n", - " [135.32243870119703, 34.87343277116993]\n", - " ]\n", - " ]\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [135.32243870119703, 34.87343277116993],\n", + " [136.90834461514822, 34.87343277116993],\n", + " [136.90834461514822, 35.652571266913526],\n", + " [135.32243870119703, 35.652571266913526],\n", + " [135.32243870119703, 34.87343277116993],\n", + " ]\n", + " ],\n", "}\n", "\n", "# Define your temporal range\n", "daterange = {\"interval\": [\"1982-08-22T00:00:00Z\", \"2023-06-21T23:59:59Z\"]}\n", "\n", "# Define your search with CQL2 syntax\n", - "search = catalog.search(filter_lang=\"cql2-json\", filter={\n", - " \"op\": \"and\",\n", - " \"args\": [\n", - " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", - " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", - " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"109\"]]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"036\"]]},\n", - " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]}\n", - " ]\n", - "})" + "search = catalog.search(\n", + " filter_lang=\"cql2-json\",\n", + " filter={\n", + " \"op\": \"and\",\n", + " \"args\": [\n", + " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", + " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", + " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"109\"]]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"036\"]]},\n", + " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]},\n", + " ],\n", + " },\n", + ")" ] }, { @@ -83,7 +84,10 @@ } ], "source": [ - "items = [item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\") for item in search.get_items()]\n", + "items = [\n", + " item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\")\n", + " for item in search.get_items()\n", + "]\n", "len(items)" ] }, @@ -138,7 +142,7 @@ " ymin = min(ymin, item.bbox[1])\n", " xmax = max(xmax, item.bbox[2])\n", " ymax = max(ymax, item.bbox[3])\n", - " \n", + "\n", " mindate = min(mindate, item.datetime)\n", " maxdate = max(maxdate, item.datetime)\n", "\n", @@ -148,7 +152,9 @@ "MIN_DATE = mindate\n", "MAX_DATE = maxdate\n", "\n", - "COLLECTION_INTERVAL = [d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)]\n", + "COLLECTION_INTERVAL = [\n", + " d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)\n", + "]\n", "print(COLLECTION_INTERVAL)" ] }, @@ -161,101 +167,78 @@ "source": [ "collection_id = f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\"\n", "collection = {\n", - " \"id\": f\"{collection_id}\",\n", - " \"type\": \"Collection\",\n", - " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", - " \"extent\": {\n", - " \"spatial\": {\n", - " \"bbox\": BBOX\n", - " },\n", - " \"temporal\": {\n", - " \"interval\": [\n", - " [\n", - " f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\",\n", - " f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"\n", - " ]\n", - " ]\n", - " }\n", - " },\n", - " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", - " \"summaries\": {\n", - " \"datetime\": [\n", - " ],\n", - " \"cog_default\": {\n", - " \"max\": None,\n", - " \"min\": None\n", - " }\n", - " },\n", - " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", - " \"item_assets\": {\n", - " \"red\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Red Band (B4)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B4\",\n", - " \"common_name\": \"red\",\n", - " \"center_wavelength\": 0.65\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\"\n", + " \"id\": f\"{collection_id}\",\n", + " \"type\": \"Collection\",\n", + " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", + " \"extent\": {\n", + " \"spatial\": {\"bbox\": BBOX},\n", + " \"temporal\": {\n", + " \"interval\": [\n", + " [f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\", f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"]\n", + " ]\n", + " },\n", " },\n", - " \"blue\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Blue Band (B2)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B2\",\n", - " \"common_name\": \"blue\",\n", - " \"center_wavelength\": 0.48\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\"\n", - " },\n", - " \"green\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Green Band (B3)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B3\",\n", - " \"common_name\": \"green\",\n", - " \"center_wavelength\": 0.56\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\"\n", - " },\n", - " \"thumbnail\": {\n", - " \"type\": \"image/jpeg\",\n", - " \"roles\": [\n", - " \"thumbnail\"\n", - " ],\n", - " \"title\": \"Thumbnail image\"\n", + " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", + " \"summaries\": {\"datetime\": [], \"cog_default\": {\"max\": None, \"min\": None}},\n", + " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", + " \"item_assets\": {\n", + " \"red\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Red Band (B4)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B4\",\n", + " \"common_name\": \"red\",\n", + " \"center_wavelength\": 0.65,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\",\n", + " },\n", + " \"blue\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Blue Band (B2)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B2\",\n", + " \"common_name\": \"blue\",\n", + " \"center_wavelength\": 0.48,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\",\n", + " },\n", + " \"green\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Green Band (B3)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B3\",\n", + " \"common_name\": \"green\",\n", + " \"center_wavelength\": 0.56,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\",\n", + " },\n", + " \"thumbnail\": {\n", + " \"type\": \"image/jpeg\",\n", + " \"roles\": [\"thumbnail\"],\n", + " \"title\": \"Thumbnail image\",\n", + " },\n", + " \"cog_default\": {\n", + " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", + " \"roles\": [\"data\", \"layer\"],\n", + " \"title\": \"Default COG Layer\",\n", + " \"description\": \"Cloud optimized default layer to display on map\",\n", + " },\n", " },\n", - " \"cog_default\": {\n", - " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", - " \"roles\": [\n", - " \"data\",\n", - " \"layer\"\n", - " ],\n", - " \"title\": \"Default COG Layer\",\n", - " \"description\": \"Cloud optimized default layer to display on map\"\n", - " }\n", - " },\n", - " \"stac_version\": \"1.0.0\",\n", - " \"dashboard:is_periodic\": \"false\",\n", - " \"dashboard:time_density\": \"day\"\n", + " \"stac_version\": \"1.0.0\",\n", + " \"dashboard:is_periodic\": \"false\",\n", + " \"dashboard:time_density\": \"day\",\n", "}" ] }, @@ -283,7 +266,7 @@ "\n", "with open(f\"{LAKE_NAME_SLUG}-scenes.json\", \"w\") as fobj:\n", " json.dump(items, fobj, indent=2)\n", - " \n", + "\n", "with open(f\"{LAKE_NAME_SLUG}-collection.json\", \"w\") as fobj:\n", " json.dump(collection, fobj, indent=2)" ] @@ -295,28 +278,30 @@ "metadata": {}, "outputs": [], "source": [ - "item_links = {\"links\": [\n", - " {\n", - " \"rel\": \"collection\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"parent\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"root\",\n", - " \"type\": \"application/json\",\n", - " \"href\": \"https://staging-stac.delta-backend.com/\"\n", - " },\n", - " {\n", - " \"rel\": \"self\",\n", - " \"type\": \"application/geo+json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\"\n", - " }\n", - " ]}\n" + "item_links = {\n", + " \"links\": [\n", + " {\n", + " \"rel\": \"collection\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"parent\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"root\",\n", + " \"type\": \"application/json\",\n", + " \"href\": \"https://staging-stac.delta-backend.com/\",\n", + " },\n", + " {\n", + " \"rel\": \"self\",\n", + " \"type\": \"application/geo+json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\",\n", + " },\n", + " ]\n", + "}" ] }, { @@ -330,26 +315,30 @@ " expected_prefix = \"https://landsateuwest.blob.core.windows.net/landsat-c2/\"\n", "\n", " if not input_string.startswith(expected_prefix):\n", - " raise ValueError(f\"Input string does not match expected pattern - {input_string}.\")\n", + " raise ValueError(\n", + " f\"Input string does not match expected pattern - {input_string}.\"\n", + " )\n", "\n", " replacement_prefix = \"s3://usgs-landsat/collection02/\"\n", " replaced_string = input_string.replace(expected_prefix, replacement_prefix)\n", "\n", " return replaced_string\n", "\n", + "\n", "def remove_keys(dictionary, keys_to_keep):\n", " return {key: value for key, value in dictionary.items() if key in keys_to_keep}\n", "\n", - "bands = ['red', 'green', 'blue']\n", + "\n", + "bands = [\"red\", \"green\", \"blue\"]\n", "\n", "with open(f\"{LAKE_NAME_SLUG}-items.json\", \"w\") as file:\n", " for item in search.items_as_dicts():\n", " item.update(item_links)\n", - " item['collection'] = collection_id\n", - " item['assets'] = remove_keys(item['assets'], bands)\n", + " item[\"collection\"] = collection_id\n", + " item[\"assets\"] = remove_keys(item[\"assets\"], bands)\n", " for i in bands:\n", - " item['assets'][i]['href'] = replace_and_check(item['assets'][i]['href'])\n", - " file.write(json.dumps(item) + \"\\n\") " + " item[\"assets\"][i][\"href\"] = replace_and_check(item[\"assets\"][i][\"href\"])\n", + " file.write(json.dumps(item) + \"\\n\")" ] }, { diff --git a/transformation-scripts/landsat-lakes-discovery/tonle-sap.ipynb b/transformation-scripts/landsat-lakes-discovery/tonle-sap.ipynb index 9320db27..6d132129 100644 --- a/transformation-scripts/landsat-lakes-discovery/tonle-sap.ipynb +++ b/transformation-scripts/landsat-lakes-discovery/tonle-sap.ipynb @@ -21,39 +21,40 @@ "from pystac_client import Client\n", "\n", "# Search against the Planetary Computer STAC API\n", - "catalog = Client.open(\n", - " \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n", - ")\n", + "catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", "\n", "# Define your area of interest\n", "aoi = {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [100.94980227825175, 11.538853844083988],\n", - " [105.89367732791578, 11.538853844083988],\n", - " [105.89367732791578, 14.437153553167747],\n", - " [100.94980227825175, 14.437153553167747],\n", - " [100.94980227825175, 11.538853844083988]\n", - " ]\n", - " ]\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [100.94980227825175, 11.538853844083988],\n", + " [105.89367732791578, 11.538853844083988],\n", + " [105.89367732791578, 14.437153553167747],\n", + " [100.94980227825175, 14.437153553167747],\n", + " [100.94980227825175, 11.538853844083988],\n", + " ]\n", + " ],\n", "}\n", "\n", "# Define your temporal range\n", "daterange = {\"interval\": [\"1982-08-22T00:00:00Z\", \"2023-06-21T23:59:59Z\"]}\n", "\n", "# Define your search with CQL2 syntax\n", - "search = catalog.search(filter_lang=\"cql2-json\", filter={\n", - " \"op\": \"and\",\n", - " \"args\": [\n", - " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", - " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", - " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"126\", \"127\"]]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"051\"]]},\n", - " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]}\n", - " ]\n", - "})" + "search = catalog.search(\n", + " filter_lang=\"cql2-json\",\n", + " filter={\n", + " \"op\": \"and\",\n", + " \"args\": [\n", + " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", + " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", + " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"126\", \"127\"]]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"051\"]]},\n", + " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 5]},\n", + " ],\n", + " },\n", + ")" ] }, { @@ -93,7 +94,10 @@ } ], "source": [ - "items = [item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\") for item in search.get_items()]\n", + "items = [\n", + " item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\")\n", + " for item in search.get_items()\n", + "]\n", "len(items)" ] }, @@ -148,7 +152,7 @@ " ymin = min(ymin, item.bbox[1])\n", " xmax = max(xmax, item.bbox[2])\n", " ymax = max(ymax, item.bbox[3])\n", - " \n", + "\n", " mindate = min(mindate, item.datetime)\n", " maxdate = max(maxdate, item.datetime)\n", "\n", @@ -158,7 +162,9 @@ "MIN_DATE = mindate\n", "MAX_DATE = maxdate\n", "\n", - "COLLECTION_INTERVAL = [d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)]\n", + "COLLECTION_INTERVAL = [\n", + " d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)\n", + "]\n", "print(COLLECTION_INTERVAL)" ] }, @@ -171,101 +177,78 @@ "source": [ "collection_id = f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\"\n", "collection = {\n", - " \"id\": f\"{collection_id}\",\n", - " \"type\": \"Collection\",\n", - " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", - " \"extent\": {\n", - " \"spatial\": {\n", - " \"bbox\": BBOX\n", - " },\n", - " \"temporal\": {\n", - " \"interval\": [\n", - " [\n", - " f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\",\n", - " f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"\n", - " ]\n", - " ]\n", - " }\n", - " },\n", - " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", - " \"summaries\": {\n", - " \"datetime\": [\n", - " ],\n", - " \"cog_default\": {\n", - " \"max\": None,\n", - " \"min\": None\n", - " }\n", - " },\n", - " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", - " \"item_assets\": {\n", - " \"red\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Red Band (B4)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B4\",\n", - " \"common_name\": \"red\",\n", - " \"center_wavelength\": 0.65\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\"\n", + " \"id\": f\"{collection_id}\",\n", + " \"type\": \"Collection\",\n", + " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", + " \"extent\": {\n", + " \"spatial\": {\"bbox\": BBOX},\n", + " \"temporal\": {\n", + " \"interval\": [\n", + " [f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\", f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"]\n", + " ]\n", + " },\n", " },\n", - " \"blue\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Blue Band (B2)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B2\",\n", - " \"common_name\": \"blue\",\n", - " \"center_wavelength\": 0.48\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\"\n", - " },\n", - " \"green\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Green Band (B3)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B3\",\n", - " \"common_name\": \"green\",\n", - " \"center_wavelength\": 0.56\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\"\n", - " },\n", - " \"thumbnail\": {\n", - " \"type\": \"image/jpeg\",\n", - " \"roles\": [\n", - " \"thumbnail\"\n", - " ],\n", - " \"title\": \"Thumbnail image\"\n", + " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", + " \"summaries\": {\"datetime\": [], \"cog_default\": {\"max\": None, \"min\": None}},\n", + " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", + " \"item_assets\": {\n", + " \"red\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Red Band (B4)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B4\",\n", + " \"common_name\": \"red\",\n", + " \"center_wavelength\": 0.65,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\",\n", + " },\n", + " \"blue\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Blue Band (B2)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B2\",\n", + " \"common_name\": \"blue\",\n", + " \"center_wavelength\": 0.48,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\",\n", + " },\n", + " \"green\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Green Band (B3)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B3\",\n", + " \"common_name\": \"green\",\n", + " \"center_wavelength\": 0.56,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\",\n", + " },\n", + " \"thumbnail\": {\n", + " \"type\": \"image/jpeg\",\n", + " \"roles\": [\"thumbnail\"],\n", + " \"title\": \"Thumbnail image\",\n", + " },\n", + " \"cog_default\": {\n", + " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", + " \"roles\": [\"data\", \"layer\"],\n", + " \"title\": \"Default COG Layer\",\n", + " \"description\": \"Cloud optimized default layer to display on map\",\n", + " },\n", " },\n", - " \"cog_default\": {\n", - " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", - " \"roles\": [\n", - " \"data\",\n", - " \"layer\"\n", - " ],\n", - " \"title\": \"Default COG Layer\",\n", - " \"description\": \"Cloud optimized default layer to display on map\"\n", - " }\n", - " },\n", - " \"stac_version\": \"1.0.0\",\n", - " \"dashboard:is_periodic\": \"false\",\n", - " \"dashboard:time_density\": \"day\"\n", + " \"stac_version\": \"1.0.0\",\n", + " \"dashboard:is_periodic\": \"false\",\n", + " \"dashboard:time_density\": \"day\",\n", "}" ] }, @@ -291,7 +274,7 @@ "\n", "with open(f\"{LAKE_NAME_SLUG}-scenes.json\", \"w\") as fobj:\n", " json.dump(items, fobj, indent=2)\n", - " \n", + "\n", "with open(f\"{LAKE_NAME_SLUG}-collection.json\", \"w\") as fobj:\n", " json.dump(collection, fobj, indent=2)" ] @@ -303,28 +286,30 @@ "metadata": {}, "outputs": [], "source": [ - "item_links = {\"links\": [\n", - " {\n", - " \"rel\": \"collection\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"parent\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"root\",\n", - " \"type\": \"application/json\",\n", - " \"href\": \"https://staging-stac.delta-backend.com/\"\n", - " },\n", - " {\n", - " \"rel\": \"self\",\n", - " \"type\": \"application/geo+json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\"\n", - " }\n", - " ]}\n" + "item_links = {\n", + " \"links\": [\n", + " {\n", + " \"rel\": \"collection\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"parent\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"root\",\n", + " \"type\": \"application/json\",\n", + " \"href\": \"https://staging-stac.delta-backend.com/\",\n", + " },\n", + " {\n", + " \"rel\": \"self\",\n", + " \"type\": \"application/geo+json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\",\n", + " },\n", + " ]\n", + "}" ] }, { @@ -334,31 +319,34 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "def replace_and_check(input_string):\n", " expected_prefix = \"https://landsateuwest.blob.core.windows.net/landsat-c2/\"\n", "\n", " if not input_string.startswith(expected_prefix):\n", - " raise ValueError(f\"Input string does not match expected pattern - {input_string}.\")\n", + " raise ValueError(\n", + " f\"Input string does not match expected pattern - {input_string}.\"\n", + " )\n", "\n", " replacement_prefix = \"s3://usgs-landsat/collection02/\"\n", " replaced_string = input_string.replace(expected_prefix, replacement_prefix)\n", "\n", " return replaced_string\n", "\n", + "\n", "def remove_keys(dictionary, keys_to_keep):\n", " return {key: value for key, value in dictionary.items() if key in keys_to_keep}\n", "\n", - "bands = ['red', 'green', 'blue']\n", + "\n", + "bands = [\"red\", \"green\", \"blue\"]\n", "\n", "with open(f\"{LAKE_NAME_SLUG}-items.json\", \"w\") as file:\n", " for item in search.items_as_dicts():\n", " item.update(item_links)\n", - " item['collection'] = collection_id\n", - " item['assets'] = remove_keys(item['assets'], bands)\n", + " item[\"collection\"] = collection_id\n", + " item[\"assets\"] = remove_keys(item[\"assets\"], bands)\n", " for i in bands:\n", - " item['assets'][i]['href'] = replace_and_check(item['assets'][i]['href'])\n", - " file.write(json.dumps(item) + \"\\n\") " + " item[\"assets\"][i][\"href\"] = replace_and_check(item[\"assets\"][i][\"href\"])\n", + " file.write(json.dumps(item) + \"\\n\")" ] }, { diff --git a/transformation-scripts/landsat-lakes-discovery/vanern.ipynb b/transformation-scripts/landsat-lakes-discovery/vanern.ipynb index 1cb4609a..a974a74b 100644 --- a/transformation-scripts/landsat-lakes-discovery/vanern.ipynb +++ b/transformation-scripts/landsat-lakes-discovery/vanern.ipynb @@ -19,39 +19,40 @@ "from pystac_client import Client\n", "\n", "# Search against the Planetary Computer STAC API\n", - "catalog = Client.open(\n", - " \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n", - ")\n", + "catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", "\n", "# Define your area of interest\n", "aoi = {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [11.72877026555824, 58.22234804213801],\n", - " [15.209467246270748, 58.22234804213801],\n", - " [15.209467246270748, 59.52443624797786],\n", - " [11.72877026555824, 59.52443624797786],\n", - " [11.72877026555824, 58.22234804213801]\n", - " ]\n", - " ]\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [11.72877026555824, 58.22234804213801],\n", + " [15.209467246270748, 58.22234804213801],\n", + " [15.209467246270748, 59.52443624797786],\n", + " [11.72877026555824, 59.52443624797786],\n", + " [11.72877026555824, 58.22234804213801],\n", + " ]\n", + " ],\n", "}\n", "\n", "# Define your temporal range\n", "daterange = {\"interval\": [\"1982-08-22T00:00:00Z\", \"2023-06-21T23:59:59Z\"]}\n", "\n", "# Define your search with CQL2 syntax\n", - "search = catalog.search(filter_lang=\"cql2-json\", filter={\n", - " \"op\": \"and\",\n", - " \"args\": [\n", - " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", - " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", - " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"195\"]]},\n", - " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"019\"]]},\n", - " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 10]}\n", - " ]\n", - "})" + "search = catalog.search(\n", + " filter_lang=\"cql2-json\",\n", + " filter={\n", + " \"op\": \"and\",\n", + " \"args\": [\n", + " {\"op\": \"s_intersects\", \"args\": [{\"property\": \"geometry\"}, aoi]},\n", + " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", + " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"landsat-c2-l2\"]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_path\"}, [\"195\"]]},\n", + " {\"op\": \"in\", \"args\": [{\"property\": \"landsat:wrs_row\"}, [\"019\"]]},\n", + " {\"op\": \"<=\", \"args\": [{\"property\": \"eo:cloud_cover\"}, 10]},\n", + " ],\n", + " },\n", + ")" ] }, { @@ -91,7 +92,10 @@ } ], "source": [ - "items = [item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\") for item in search.get_items()]\n", + "items = [\n", + " item.assets[\"red\"].href.split(\"/\")[-1].strip(\"_B4.TIF\").strip(\"_B3.TIF\")\n", + " for item in search.get_items()\n", + "]\n", "len(items)" ] }, @@ -146,7 +150,7 @@ " ymin = min(ymin, item.bbox[1])\n", " xmax = max(xmax, item.bbox[2])\n", " ymax = max(ymax, item.bbox[3])\n", - " \n", + "\n", " mindate = min(mindate, item.datetime)\n", " maxdate = max(maxdate, item.datetime)\n", "\n", @@ -156,7 +160,9 @@ "MIN_DATE = mindate\n", "MAX_DATE = maxdate\n", "\n", - "COLLECTION_INTERVAL = [d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)]\n", + "COLLECTION_INTERVAL = [\n", + " d.strftime(\"%Y-%m-%dT%H:%M:%S\") + \"Z\" for d in (mindate, maxdate)\n", + "]\n", "print(COLLECTION_INTERVAL)" ] }, @@ -169,101 +175,78 @@ "source": [ "collection_id = f\"landsat-c2l2-sr-lakes-{LAKE_NAME_SLUG}\"\n", "collection = {\n", - " \"id\": f\"{collection_id}\",\n", - " \"type\": \"Collection\",\n", - " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", - " \"extent\": {\n", - " \"spatial\": {\n", - " \"bbox\": BBOX\n", - " },\n", - " \"temporal\": {\n", - " \"interval\": [\n", - " [\n", - " f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\",\n", - " f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"\n", - " ]\n", - " ]\n", - " }\n", - " },\n", - " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", - " \"summaries\": {\n", - " \"datetime\": [\n", - " ],\n", - " \"cog_default\": {\n", - " \"max\": None,\n", - " \"min\": None\n", - " }\n", - " },\n", - " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", - " \"item_assets\": {\n", - " \"red\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Red Band (B4)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B4\",\n", - " \"common_name\": \"red\",\n", - " \"center_wavelength\": 0.65\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\"\n", + " \"id\": f\"{collection_id}\",\n", + " \"type\": \"Collection\",\n", + " \"title\": f\"Selected Landsat 7 through 9 Surface Reflectance Scenes for {LAKE_NAME}\",\n", + " \"extent\": {\n", + " \"spatial\": {\"bbox\": BBOX},\n", + " \"temporal\": {\n", + " \"interval\": [\n", + " [f\"{MIN_DATE:%Y-%m-%dT%H:%M:%S}Z\", f\"{MAX_DATE:%Y-%m-%dT%H:%M:%S}Z\"]\n", + " ]\n", + " },\n", " },\n", - " \"blue\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Blue Band (B2)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B2\",\n", - " \"common_name\": \"blue\",\n", - " \"center_wavelength\": 0.48\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\"\n", - " },\n", - " \"green\": {\n", - " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", - " \"roles\": [\n", - " \"data\"\n", - " ],\n", - " \"title\": \"Green Band (B3)\",\n", - " \"eo:bands\": [\n", - " {\n", - " \"gsd\": 30,\n", - " \"name\": \"B3\",\n", - " \"common_name\": \"green\",\n", - " \"center_wavelength\": 0.56\n", - " }\n", - " ],\n", - " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\"\n", - " },\n", - " \"thumbnail\": {\n", - " \"type\": \"image/jpeg\",\n", - " \"roles\": [\n", - " \"thumbnail\"\n", - " ],\n", - " \"title\": \"Thumbnail image\"\n", + " \"license\": \"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/Landsat_Data_Policy.pdf\",\n", + " \"summaries\": {\"datetime\": [], \"cog_default\": {\"max\": None, \"min\": None}},\n", + " \"description\": f\"Selected low-cloud-cover Landsat Collection-2 Level-2 Surface Reflectance scenes covering {LAKE_NAME}. Products from multiple missions and instruments are combined in this single collection for the purpose of visualizing RGB images. Note that the full item records are not interchangeable across instruments for purposes other than visualization and that the assets available are not the same for each instrument.\",\n", + " \"item_assets\": {\n", + " \"red\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Red Band (B4)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B4\",\n", + " \"common_name\": \"red\",\n", + " \"center_wavelength\": 0.65,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Red Band (B4) Surface Reflectance\",\n", + " },\n", + " \"blue\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Blue Band (B2)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B2\",\n", + " \"common_name\": \"blue\",\n", + " \"center_wavelength\": 0.48,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Blue Band (B2) Surface Reflectance\",\n", + " },\n", + " \"green\": {\n", + " \"type\": \"image/vnd.stac.geotiff; cloud-optimized=true\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Green Band (B3)\",\n", + " \"eo:bands\": [\n", + " {\n", + " \"gsd\": 30,\n", + " \"name\": \"B3\",\n", + " \"common_name\": \"green\",\n", + " \"center_wavelength\": 0.56,\n", + " }\n", + " ],\n", + " \"description\": \"Collection 2 Level-2 Green Band (B3) Surface Reflectance\",\n", + " },\n", + " \"thumbnail\": {\n", + " \"type\": \"image/jpeg\",\n", + " \"roles\": [\"thumbnail\"],\n", + " \"title\": \"Thumbnail image\",\n", + " },\n", + " \"cog_default\": {\n", + " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", + " \"roles\": [\"data\", \"layer\"],\n", + " \"title\": \"Default COG Layer\",\n", + " \"description\": \"Cloud optimized default layer to display on map\",\n", + " },\n", " },\n", - " \"cog_default\": {\n", - " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", - " \"roles\": [\n", - " \"data\",\n", - " \"layer\"\n", - " ],\n", - " \"title\": \"Default COG Layer\",\n", - " \"description\": \"Cloud optimized default layer to display on map\"\n", - " }\n", - " },\n", - " \"stac_version\": \"1.0.0\",\n", - " \"dashboard:is_periodic\": \"false\",\n", - " \"dashboard:time_density\": \"day\"\n", + " \"stac_version\": \"1.0.0\",\n", + " \"dashboard:is_periodic\": \"false\",\n", + " \"dashboard:time_density\": \"day\",\n", "}" ] }, @@ -291,7 +274,7 @@ "\n", "with open(f\"{LAKE_NAME_SLUG}-scenes.json\", \"w\") as fobj:\n", " json.dump(items, fobj, indent=2)\n", - " \n", + "\n", "with open(f\"{LAKE_NAME_SLUG}-collection.json\", \"w\") as fobj:\n", " json.dump(collection, fobj, indent=2)" ] @@ -303,28 +286,30 @@ "metadata": {}, "outputs": [], "source": [ - "item_links = {\"links\": [\n", - " {\n", - " \"rel\": \"collection\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"parent\",\n", - " \"type\": \"application/json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\"\n", - " },\n", - " {\n", - " \"rel\": \"root\",\n", - " \"type\": \"application/json\",\n", - " \"href\": \"https://staging-stac.delta-backend.com/\"\n", - " },\n", - " {\n", - " \"rel\": \"self\",\n", - " \"type\": \"application/geo+json\",\n", - " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\"\n", - " }\n", - " ]}\n" + "item_links = {\n", + " \"links\": [\n", + " {\n", + " \"rel\": \"collection\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"parent\",\n", + " \"type\": \"application/json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}\",\n", + " },\n", + " {\n", + " \"rel\": \"root\",\n", + " \"type\": \"application/json\",\n", + " \"href\": \"https://staging-stac.delta-backend.com/\",\n", + " },\n", + " {\n", + " \"rel\": \"self\",\n", + " \"type\": \"application/geo+json\",\n", + " \"href\": f\"https://staging-stac.delta-backend.com/collections/{collection_id}/items/LC08_L2SR_001113_20230125_20230208_02_T2_SR\",\n", + " },\n", + " ]\n", + "}" ] }, { @@ -338,26 +323,30 @@ " expected_prefix = \"https://landsateuwest.blob.core.windows.net/landsat-c2/\"\n", "\n", " if not input_string.startswith(expected_prefix):\n", - " raise ValueError(f\"Input string does not match expected pattern - {input_string}.\")\n", + " raise ValueError(\n", + " f\"Input string does not match expected pattern - {input_string}.\"\n", + " )\n", "\n", " replacement_prefix = \"s3://usgs-landsat/collection02/\"\n", " replaced_string = input_string.replace(expected_prefix, replacement_prefix)\n", "\n", " return replaced_string\n", "\n", + "\n", "def remove_keys(dictionary, keys_to_keep):\n", " return {key: value for key, value in dictionary.items() if key in keys_to_keep}\n", "\n", - "bands = ['red', 'green', 'blue']\n", + "\n", + "bands = [\"red\", \"green\", \"blue\"]\n", "\n", "with open(f\"{LAKE_NAME_SLUG}-items.json\", \"w\") as file:\n", " for item in search.items_as_dicts():\n", " item.update(item_links)\n", - " item['collection'] = collection_id\n", - " item['assets'] = remove_keys(item['assets'], bands)\n", + " item[\"collection\"] = collection_id\n", + " item[\"assets\"] = remove_keys(item[\"assets\"], bands)\n", " for i in bands:\n", - " item['assets'][i]['href'] = replace_and_check(item['assets'][i]['href'])\n", - " file.write(json.dumps(item) + \"\\n\") " + " item[\"assets\"][i][\"href\"] = replace_and_check(item[\"assets\"][i][\"href\"])\n", + " file.write(json.dumps(item) + \"\\n\")" ] }, { diff --git a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-organize-cogs.ipynb b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-organize-cogs.ipynb index 0d072474..63d77074 100644 --- a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-organize-cogs.ipynb +++ b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-organize-cogs.ipynb @@ -120,7 +120,21 @@ "outputs": [], "source": [ "EXPERIMENTS = [\"historical\", \"ssp245\"]\n", - "METRICS = [\"FFMC\", \"FWI_P25\", \"FWI_P50\", \"FWI_P75\", \"FWI_P95\", \"DMC\", \"DC\", \"ISI\", \"BUI\", \"FWI\", \"FWI_N15\", \"FWI_N30\", \"FWI_N45\"]" + "METRICS = [\n", + " \"FFMC\",\n", + " \"FWI_P25\",\n", + " \"FWI_P50\",\n", + " \"FWI_P75\",\n", + " \"FWI_P95\",\n", + " \"DMC\",\n", + " \"DC\",\n", + " \"ISI\",\n", + " \"BUI\",\n", + " \"FWI\",\n", + " \"FWI_N15\",\n", + " \"FWI_N30\",\n", + " \"FWI_N45\",\n", + "]" ] }, { @@ -130,9 +144,9 @@ "metadata": {}, "outputs": [], "source": [ - "# COG configuration, deflate compression is slightly smaller than the LZW default for the cog driver \n", + "# COG configuration, deflate compression is slightly smaller than the LZW default for the cog driver\n", "# and predictor=2 further reduces file size\n", - "# https://kokoalberti.com/articles/geotiff-compression-optimization-guide/ \n", + "# https://kokoalberti.com/articles/geotiff-compression-optimization-guide/\n", "# \"Predictors work especially well when there is some spatial correlation in the data, and pixels have values which are similar to their neighbours. As the name suggests, the floating point predictor can only be used on floating point data.\"\n", "\n", "driver = \"COG\"\n", @@ -3922,30 +3936,30 @@ "for experiment in EXPERIMENTS:\n", " prefix = f\"Sample/FWI/Yearly/MME/MME50_{experiment}_\"\n", " r = client.list_objects_v2(\n", - " Bucket = STAGING_BUCKET,\n", - " Prefix = prefix,\n", + " Bucket=STAGING_BUCKET,\n", + " Prefix=prefix,\n", " )\n", " if verbose:\n", " print(f\"\\n{r['KeyCount']} objects for {prefix=}\")\n", - " \n", - " # These objects are all of the 13 variable in single yearly netcdf \n", + "\n", + " # These objects are all of the 13 variable in single yearly netcdf\n", " objects = r[\"Contents\"]\n", " for obj in objects:\n", " src_key = obj[\"Key\"]\n", "\n", " if verbose:\n", " print(f\"{src_key=}\")\n", - " \n", + "\n", " # Get object and read open as xarray dataset\n", " with fs.open(f\"{STAGING_BUCKET}/{src_key}\") as fileobj:\n", " with xr.open_dataset(fileobj, engine=\"h5netcdf\") as ds:\n", - "\n", - " # Now loop through the individual variables and generate a COG for each of these \n", + " # Now loop through the individual variables and generate a COG for each of these\n", " for metric in METRICS:\n", - "\n", " # Generate output path\n", - " cog_key = cmip6_file_organization.generate_yearly_fwi_metrics_key(src_key, \"v0\", pub_type=\"cog\", metric=metric)\n", - " \n", + " cog_key = cmip6_file_organization.generate_yearly_fwi_metrics_key(\n", + " src_key, \"v0\", pub_type=\"cog\", metric=metric\n", + " )\n", + "\n", " if verbose or extra_dryrun:\n", " print(f\"{cog_key=}\")\n", " if extra_dryrun:\n", @@ -3955,26 +3969,34 @@ " da = ds[metric]\n", "\n", " # Realign the x dimension to -180 origin for dataset\n", - " da = da.assign_coords(lon=(((da.lon + 180) % 360) - 180)).sortby(\"lon\")\n", - " \n", - " # Reverse the DataArray's y dimension. It appears that the source NetCDF's y dimension is inverted. \n", + " da = da.assign_coords(lon=(((da.lon + 180) % 360) - 180)).sortby(\n", + " \"lon\"\n", + " )\n", + "\n", + " # Reverse the DataArray's y dimension. It appears that the source NetCDF's y dimension is inverted.\n", " da = da.reindex(lat=list(reversed(da.lat)))\n", "\n", " # Convert to COG via rioxarray\n", " da.rio.set_spatial_dims(\"lon\", \"lat\")\n", " da.rio.write_crs(\"epsg:4326\", inplace=True)\n", - " \n", + "\n", " # Here we need a memory file to write the output raster to\n", " with MemoryFile() as memfile:\n", - " da.rio.to_raster(memfile.name, driver=\"COG\", compress=compress, predictor=predictor)\n", + " da.rio.to_raster(\n", + " memfile.name,\n", + " driver=\"COG\",\n", + " compress=compress,\n", + " predictor=predictor,\n", + " )\n", "\n", " # Validate and upload COG\n", " if rio_cogeo.cogeo.cog_validate(memfile.name)[0]:\n", - "\n", " if dryrun:\n", - " print(f\"Generation of valid COG {cog_key=} successful. Skipping upload.\")\n", + " print(\n", + " f\"Generation of valid COG {cog_key=} successful. Skipping upload.\"\n", + " )\n", " continue\n", - " \n", + "\n", " if verbose:\n", " print(f\"START to upload {cog_key=} to {DST_BUCKET=}\")\n", "\n", @@ -3991,8 +4013,10 @@ " print(f\"WARNING unable to upload {cog_key=}\")\n", " else:\n", " invalid_cogs.append(cog_key)\n", - " \n", - "print(f\"\\nCOMPLETED with {len(error_responses)} errors and {len(invalid_cogs)} invalid cogs\")\n", + "\n", + "print(\n", + " f\"\\nCOMPLETED with {len(error_responses)} errors and {len(invalid_cogs)} invalid cogs\"\n", + ")\n", "print(error_responses)\n", "print(invalid_cogs)" ] diff --git a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-stac.ipynb b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-stac.ipynb index 022e5d74..5d79e774 100644 --- a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-stac.ipynb +++ b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-generate-stac.ipynb @@ -146,24 +146,30 @@ "outputs": [], "source": [ "METRICS = [\n", - " \"FFMC\", \"FWI_P25\", \"FWI_P50\", \"FWI_P75\", \"FWI_P95\", \"DMC\", \n", - " \"DC\", \"ISI\", \"BUI\", \"FWI\", \"FWI_N15\", \"FWI_N30\", \"FWI_N45\"\n", + " \"FFMC\",\n", + " \"FWI_P25\",\n", + " \"FWI_P50\",\n", + " \"FWI_P75\",\n", + " \"FWI_P95\",\n", + " \"DMC\",\n", + " \"DC\",\n", + " \"ISI\",\n", + " \"BUI\",\n", + " \"FWI\",\n", + " \"FWI_N15\",\n", + " \"FWI_N30\",\n", + " \"FWI_N45\",\n", "]\n", "\n", "asset_properties = {\n", - " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", - " \"roles\": [\n", - " \"data\",\n", - " \"layer\"\n", - " ]\n", - " }\n", + " \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n", + " \"roles\": [\"data\", \"layer\"],\n", + "}\n", "item_assets = {}\n", "for metric in METRICS:\n", " asset = asset_properties\n", " asset[\"title\"] = metric\n", - " item_assets[metric] = asset\n", - "\n", - " " + " item_assets[metric] = asset" ] }, { @@ -193,7 +199,7 @@ " \"dashboard:is_periodic\": DASHBOARD__IS_PERIODIC,\n", " \"dashboard:time_density\": DASHBOARD__TIME_DENSITY,\n", " \"item_assets\": item_assets,\n", - " \"sci:doi\": \"https://www.nccs.nasa.gov/services/data-collections/land-based-products/nex-gddp-cmip6\"\n", + " \"sci:doi\": \"https://www.nccs.nasa.gov/services/data-collections/land-based-products/nex-gddp-cmip6\",\n", " },\n", " license=LICENSE,\n", " extent=extent,\n", diff --git a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-organize-netcdf.ipynb b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-organize-netcdf.ipynb index c0e86c18..60232f65 100644 --- a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-organize-netcdf.ipynb +++ b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6-organize-netcdf.ipynb @@ -51,6 +51,7 @@ "outputs": [], "source": [ "import boto3\n", + "\n", "# # if running locally, uncomment to set user role for session\n", "# boto3.setup_default_session(profile_name=\"deltawest\")\n", "client = boto3.client(\"s3\")" @@ -256,40 +257,45 @@ "for experiment in EXPERIMENTS:\n", " prefix = f\"Sample/FWI/Yearly/MME/MME50_{experiment}_\"\n", " r = client.list_objects_v2(\n", - " Bucket = STAGING_BUCKET,\n", - " Prefix = prefix,\n", + " Bucket=STAGING_BUCKET,\n", + " Prefix=prefix,\n", " )\n", " if verbose or dryrun:\n", " print(f\"\\n{r['KeyCount']} objects for {prefix=} in {STAGING_BUCKET=}\")\n", " objects = r[\"Contents\"]\n", - " \n", + "\n", " for obj in objects:\n", - " \n", " src_key = obj[\"Key\"]\n", - " nc_key = cmip6_file_organization.generate_yearly_fwi_metrics_key(src_key, \"v0\", pub_type=\"netcdf\") \n", - " \n", - " # Skip existing \n", - " if (client.list_objects_v2(\n", - " Bucket = DST_BUCKET,\n", - " Prefix = nc_key,\n", - " ))[\"KeyCount\"] > 0:\n", + " nc_key = cmip6_file_organization.generate_yearly_fwi_metrics_key(\n", + " src_key, \"v0\", pub_type=\"netcdf\"\n", + " )\n", + "\n", + " # Skip existing\n", + " if (\n", + " client.list_objects_v2(\n", + " Bucket=DST_BUCKET,\n", + " Prefix=nc_key,\n", + " )\n", + " )[\"KeyCount\"] > 0:\n", " if verbose or dryrun:\n", " print(f\"CONTINUE {nc_key=} already uploaded to {DST_BUCKET}\")\n", " continue\n", - " \n", + "\n", " if dryrun:\n", " print(f\"WOULD HAVE uploaded {nc_key=} to {DST_BUCKET=}\")\n", " continue\n", - " \n", + "\n", " if verbose:\n", " print(f\"START to upload {nc_key=} to {DST_BUCKET=}\")\n", "\n", " # upload and confirm success\n", - " r = client.copy_object(Bucket=DST_BUCKET, CopySource=f\"{STAGING_BUCKET}/{src_key}\", Key=nc_key)\n", + " r = client.copy_object(\n", + " Bucket=DST_BUCKET, CopySource=f\"{STAGING_BUCKET}/{src_key}\", Key=nc_key\n", + " )\n", " if r[\"ResponseMetadata\"][\"HTTPStatusCode\"] != 200:\n", " print(f\"WARNING unable to upload {nc_key=}\")\n", " error_responses.append(r)\n", - " \n", + "\n", "print(f\"\\nCOMPLETED with {len(error_responses)} errors\")\n", "if error_responses:\n", " print(error_responses)" diff --git a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6_file_organization.py b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6_file_organization.py index b8bedfe3..38496c83 100644 --- a/transformation-scripts/nex-gddp-cmip6-fwi/cmip6_file_organization.py +++ b/transformation-scripts/nex-gddp-cmip6-fwi/cmip6_file_organization.py @@ -1,77 +1,99 @@ -from urllib.parse import urlparse from os.path import basename from typing import Optional - +from urllib.parse import urlparse def generate_yearly_fwi_metrics_key( - src_url: str, - dst_version: str, - ensemble: str = "mme", - pub_type: str = "netcdf", - metric: Optional[str] = None, - verbose: bool = False - ): + src_url: str, + dst_version: str, + ensemble: str = "mme", + pub_type: str = "netcdf", + metric: Optional[str] = None, + verbose: bool = False, +): """_summary_ - Generate the object key prefix to use when publishing a NetCDF from the staging bucket to the destination publicaiton bucket. + Generate the object key prefix to use when publishing a NetCDF from the + staging bucket to the destination publication bucket. + Args: - src_url (str): full s3 url of the file staged for publicaiton, i.e. s3://cmip6-staging/Sample/FWI/Yearly/MME/MME50_ssp245_fwi_metrics_yearly_2100.nc + src_url (str): full s3 url of the file staged for publication, i.e. s3://cmip6-staging/Sample/FWI/Yearly/MME/MME50_ssp245_fwi_metrics_yearly_2100.nc dst_version (str): data version to use for publication bucket key prefix """ # Only dealing with yearly data at this point cadence = "yearly" - + parsed = urlparse(src_url, allow_fragments=False) nc_base = basename(parsed.path) # Assume pattern ensemble-stat_experiment_fwi_metrics_yearly_yyyy # MME50_ssp245_fwi_metrics_yearly_2100 name_parts = nc_base.split("_") - # For now just test that this is probably the filename format we expect, this should be a regex match - assert(len(name_parts)) == 6 + # For now just test that this is probably the filename format we expect, + # this should be a regex match + assert (len(name_parts)) == 6 ensemble_stat = name_parts[0] experiment = name_parts[1] - year = name_parts[-1].replace(".nc","") - + year = name_parts[-1].replace(".nc", "") + if pub_type == "cog": # A metric must be supplied for COG outputs - assert(metric) + assert metric # Add additional information about the FWI metric to the data storage structure cog_base = nc_base.replace(".nc", f"_{metric}.tif") - key = (f"{dst_version}/{pub_type}/fwi/{ensemble}/{ensemble_stat}/{cadence}/{experiment}/{year}/{cog_base}").lower() + key = ( + f"{dst_version}/{pub_type}/fwi/{ensemble}/{ensemble_stat}/{cadence}/{experiment}/{year}/{cog_base}" + ).lower() else: - key = (f"{dst_version}/{pub_type}/fwi/{ensemble}/{ensemble_stat}/{cadence}/{experiment}/{year}/{nc_base}").lower() - + key = ( + f"{dst_version}/{pub_type}/fwi/{ensemble}/{ensemble_stat}/{cadence}/{experiment}/{year}/{nc_base}" + ).lower() + if verbose: print(key) return key if __name__ == "__main__": - + def _test_generate_yearly_fwi_metrics_key(): """Confirm keys are as expected""" - # check projected experiment + # check projected experiment src_url = "s3://cmip6-staging/Sample/FWI/Yearly/MME/MME50_ssp245_fwi_metrics_yearly_2100.nc" # netcdf - netcdf_key = generate_yearly_fwi_metrics_key(src_url, "v0",verbose=True) - assert netcdf_key == "v0/netcdf/fwi/mme/mme50/yearly/ssp245/2100/mme50_ssp245_fwi_metrics_yearly_2100.nc" + netcdf_key = generate_yearly_fwi_metrics_key(src_url, "v0", verbose=True) + assert ( + netcdf_key == "v0/netcdf/fwi/mme/mme50/yearly/ssp245/2100/" + "mme50_ssp245_fwi_metrics_yearly_2100.nc" + ) # cog - cog_key = generate_yearly_fwi_metrics_key(src_url, "v0", pub_type="cog", metric="ffmc", verbose=True) - assert cog_key == "v0/cog/fwi/mme/mme50/yearly/ssp245/2100/mme50_ssp245_fwi_metrics_yearly_2100_ffmc.tif" + cog_key = generate_yearly_fwi_metrics_key( + src_url, "v0", pub_type="cog", metric="ffmc", verbose=True + ) + assert ( + cog_key == "v0/cog/fwi/mme/mme50/yearly/ssp245/2100/" + "mme50_ssp245_fwi_metrics_yearly_2100_ffmc.tif" + ) # check historical experiment src_url = "s3://cmip6-staging/Sample/FWI/Yearly/MME/MME50_historical_fwi_metrics_yearly_1950.nc" # netcdf - netcdf_key = generate_yearly_fwi_metrics_key(src_url, "v0",verbose=True) - assert netcdf_key == "v0/netcdf/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950.nc" + netcdf_key = generate_yearly_fwi_metrics_key(src_url, "v0", verbose=True) + assert ( + netcdf_key == "v0/netcdf/fwi/mme/mme50/yearly/historical/1950/" + "mme50_historical_fwi_metrics_yearly_1950.nc" + ) # cog - cog_key = generate_yearly_fwi_metrics_key(src_url, "v0", pub_type="cog", metric="dc", verbose=True) - assert cog_key == "v0/cog/fwi/mme/mme50/yearly/historical/1950/mme50_historical_fwi_metrics_yearly_1950_dc.tif" - - _test_generate_yearly_fwi_metrics_key() \ No newline at end of file + cog_key = generate_yearly_fwi_metrics_key( + src_url, "v0", pub_type="cog", metric="dc", verbose=True + ) + assert ( + cog_key == "v0/cog/fwi/mme/mme50/yearly/historical/1950/" + "mme50_historical_fwi_metrics_yearly_1950_dc.tif" + ) + + _test_generate_yearly_fwi_metrics_key() diff --git a/transformation-scripts/soil_moisture_transformation_cog.ipynb b/transformation-scripts/soil_moisture_transformation_cog.ipynb index 82aac84b..1f15bf0a 100644 --- a/transformation-scripts/soil_moisture_transformation_cog.ipynb +++ b/transformation-scripts/soil_moisture_transformation_cog.ipynb @@ -41,12 +41,12 @@ "source": [ "session = boto3.Session()\n", "s3_client = session.client(\"s3\")\n", - "bucket_name = (\"veda-data-store-staging\") # bucket name to store the transformed COGs\n", - "FOLDER_NAME = \"fldas_anomalies_SoilMoi00_10cm_tavg_cog\" # Name of the folder to store the COGs\n", + "bucket_name = \"veda-data-store-staging\" # bucket name to store the transformed COGs\n", + "FOLDER_NAME = (\n", + " \"fldas_anomalies_SoilMoi00_10cm_tavg_cog\" # Name of the folder to store the COGs\n", + ")\n", "\n", - "files_processed = pd.DataFrame(\n", - " columns=[\"file_name\", \"COGs_created\"]\n", - ")" + "files_processed = pd.DataFrame(columns=[\"file_name\", \"COGs_created\"])" ] }, { @@ -76,7 +76,10 @@ "\n", " return keys\n", "\n", - "keys = get_all_s3_keys(\"gesdisc-cumulus-prod-protected\") # fetching all the keys from the prod bucket for netCDFs to be transformed" + "\n", + "keys = get_all_s3_keys(\n", + " \"gesdisc-cumulus-prod-protected\"\n", + ") # fetching all the keys from the prod bucket for netCDFs to be transformed" ] }, { @@ -88,8 +91,7 @@ }, "outputs": [], "source": [ - "\n", - "var = \"SoilMoi00_10cm_tavg\" # Variable name to be transformed" + "var = \"SoilMoi00_10cm_tavg\" # Variable name to be transformed" ] }, { @@ -646,7 +648,7 @@ " Bucket=bucket_name,\n", " Key=f\"{FOLDER_NAME}/{cog_filename}\",\n", " )\n", - "# A dataframe to keep track of files that are transfomed\n", + " # A dataframe to keep track of files that are transfomed\n", " files_processed = files_processed._append(\n", " {\"file_name\": name, \"COGs_created\": cog_filename},\n", " ignore_index=True,\n",