Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add reading grib files for stats #33

Open
wants to merge 68 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
3ec8155
added grib_parser
Jul 18, 2024
ae3a747
eccodes_definition in setup_env
Jul 19, 2024
56ab73e
applied changes for reading grib files
andleuth Jul 29, 2024
b3bc25e
Formatting
andleuth Jul 29, 2024
fd39159
update to master branch
andleuth Jul 30, 2024
3250783
dummy change
andleuth Jul 30, 2024
940f1f9
revert dummy change
andleuth Jul 30, 2024
fce5ac0
adapt get_ds()
andleuth Aug 5, 2024
b046756
unitest and change in grib_definition_path
andleuth Aug 7, 2024
dde8b24
old def_path for safety
andleuth Aug 7, 2024
ea0e431
Merge remote-tracking branch 'origin/main' into reading_grib_files
huppd Oct 1, 2024
6546223
improve formatting
huppd Oct 1, 2024
d3a942e
frist draft tests
huppd Oct 1, 2024
f62a22a
add unittest
huppd Oct 1, 2024
35e9821
rm deleted files
huppd Oct 1, 2024
c72eb96
update requirements
huppd Oct 1, 2024
5048f49
update requirements
huppd Oct 1, 2024
e2f426e
Merge branch 'reading_grib_files' of github.com:MeteoSwiss/probtest i…
huppd Oct 2, 2024
f9d04a9
use pinned version for ci and make python version consistent
huppd Oct 2, 2024
145dd49
pin env and set python version
huppd Oct 2, 2024
7dd8c18
add author
huppd Oct 2, 2024
e450ff7
fix gitignore
huppd Oct 2, 2024
cb5d76c
update gitignore
huppd Oct 2, 2024
13ce70c
small improvement
huppd Oct 3, 2024
7be1ac4
change to pytest
huppd Oct 3, 2024
99320d8
cleanup tmp path
huppd Oct 3, 2024
92677de
fix tmp
huppd Oct 3, 2024
658c249
cleanup gitignore
huppd Oct 3, 2024
32a180b
fix utest
huppd Oct 4, 2024
c16e221
fix env
huppd Oct 4, 2024
d2d84d3
improve github actions
huppd Oct 4, 2024
4f7e183
fix env
huppd Oct 4, 2024
74ad301
step by step refactor
huppd Oct 4, 2024
3996217
next step
huppd Oct 4, 2024
3b467e5
next step
huppd Oct 4, 2024
30bbcd3
next step
huppd Oct 4, 2024
bc0be67
last step
huppd Oct 4, 2024
b5c2866
last step
huppd Oct 4, 2024
7c59155
Merge branch 'reading_grib_files' of github.com:MeteoSwiss/probtest i…
huppd Oct 4, 2024
ad6e44e
cleanup gitignore
huppd Oct 4, 2024
8b074d7
minor change
huppd Oct 4, 2024
7e1427b
fix readme
huppd Oct 4, 2024
3f9bd0d
fix definitions version
huppd Oct 4, 2024
8f89c85
make icon consistent
huppd Oct 4, 2024
6039666
fix naming
huppd Oct 4, 2024
7bfcea5
fix naming
huppd Oct 4, 2024
903f8a4
fix eccodes version
huppd Oct 4, 2024
d9a1797
inlcude sample path
huppd Oct 4, 2024
a584fca
use eccodes=2.35.0
huppd Oct 4, 2024
59a0833
fix python version
huppd Oct 7, 2024
21c6d24
might not be needed
huppd Oct 7, 2024
c357b92
Merge remote-tracking branch 'origin/main' into reading_grib_files
huppd Oct 8, 2024
8000bdf
revert hotfix
huppd Oct 8, 2024
f2ab296
Merge remote-tracking branch 'origin/main' into reading_grib_files
huppd Oct 15, 2024
43a3985
update README.md
huppd Oct 16, 2024
2ed2845
refactor
huppd Oct 16, 2024
586d5a1
use closets possiblle eccodes to 2.25.0
huppd Oct 16, 2024
9b94a3a
refactor
huppd Oct 16, 2024
f51fcd4
not so nice workaround
huppd Oct 16, 2024
e50ec27
workaround
huppd Oct 17, 2024
427a954
update env
huppd Nov 7, 2024
68ee283
rename var
huppd Nov 7, 2024
0a2fc58
fix name
huppd Nov 7, 2024
282a9d3
rm mamba option
huppd Nov 8, 2024
937184e
fix name
huppd Nov 8, 2024
4321bbc
update requirements
huppd Nov 8, 2024
8ca9638
update eccodes, needs test fixing
huppd Nov 22, 2024
a438460
fix environment
huppd Nov 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements/dev-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,5 +255,6 @@ dependencies:
- zipp=3.11.0
- zlib=1.2.13
- zstd=1.5.2
- earthkit-data
- pip:
- flake8-pyproject==1.2.2
2 changes: 1 addition & 1 deletion requirements/dev-requirements.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ dependencies:
- sphinx
- sphinx-autobuild
- toml>=0.10.2
- pip
- earthkit-data
- pip:
- flake8-pyproject
1 change: 1 addition & 0 deletions requirements/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,4 @@ dependencies:
- xz=5.2.6
- zlib=1.2.13
- zstd=1.5.2
- earthkit-data
1 change: 1 addition & 0 deletions requirements/requirements.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ dependencies:
- scipy>=1.4.1
- xarray>=0.16.1
- pip
- earthkit-data
- pip:
- pytest-cov
13 changes: 13 additions & 0 deletions setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,16 @@ else
fi
fi
fi


# setting ECCODES_DEFINITION_PATH:
git clone --recursive https://github.com/COSMO-ORG/eccodes-cosmo-resources.git

${CONDA} activate ${DEV_ENV_NAME}
conda_loc=${CONDA_PREFIX}
base_dir=$(pwd)
def_path_default=${conda_loc}/share/eccodes/definitions
def_path_resources=${base_dir}/eccodes-cosmo-resources/definitions

conda env config vars set ECCODES_DEFINITION_PATH=${def_path_default}:${def_path_resources}
stelliom marked this conversation as resolved.
Show resolved Hide resolved
${CONDA} deactivate
1 change: 1 addition & 0 deletions templates/ICON.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"member_type": "{{member_type}}",
"factor": 5,
"file_specification": [{
"GRIB": {"format": "GRIB", "time_dim": "step", "horizontal_dims": ["values"], "var_excl": ["tlon", "tlat", "vlon", "vlat", "ulon", "ulat", "h", "slor", "anor", "isor", "sdor"]},
"latlon": { "format": "netcdf", "time_dim": "time", "horizontal_dims": ["lat:lon"] },
"meteogram": { "format": "netcdf", "time_dim": "time", "horizontal_dims": ["max_nlevs:nstations", "nstations"] },
"dace":{ "format": "netcdf", "time_dim": null, "horizontal_dims": ["d_body"]},
Expand Down
151 changes: 151 additions & 0 deletions util/model_output_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
from collections.abc import Iterable

import earthkit.data
import numpy as np
import pandas as pd
import xarray
Expand Down Expand Up @@ -61,6 +62,155 @@ def parse_netcdf(file_id, filename, specification):
return var_dfs


def parse_grib(file_id, filename, specification):
logger.debug("parse GRIB file {}".format(filename))
time_dim = specification["time_dim"]
horizontal_dims = specification["horizontal_dims"]
fill_value_key = specification.get("fill_value_key", None)
andleuth marked this conversation as resolved.
Show resolved Hide resolved

ds_grib = earthkit.data.from_source("file", filename)
short_name_excl = specification["var_excl"]

short_name = np.unique(ds_grib.metadata("shortName"))
short_name = short_name[
np.isin(short_name, short_name_excl, invert=True, assume_unique=True)
].tolist()

level_type = np.unique(ds_grib.metadata("typeOfLevel")).tolist()

var_dfs = []
for lev in level_type:
paramId = np.unique(
ds_grib.sel(typeOfLevel=lev, shortName=short_name).metadata("paramId")
).tolist()
for pid in paramId:
ds_temp_list = get_ds(ds_grib, pid, lev)
for ds_temp in ds_temp_list:
v = list(ds_temp.keys())[0]

dim_to_squeeze = [
dim
for dim, size in zip(ds_temp[v].dims, ds_temp[v].shape)
if size == 1 and dim != time_dim
]
ds = ds_temp.squeeze(dim=dim_to_squeeze)

sub_df = dataframe_from_ncfile(
file_id=file_id,
filename=filename,
varname=v,
time_dim=time_dim,
horizontal_dims=horizontal_dims,
xarray_ds=ds,
fill_value_key=fill_value_key,
)
var_dfs.append(sub_df)

return var_dfs


def get_ds(ds_grib, pid, lev):
ds_list = []
try:
huppd marked this conversation as resolved.
Show resolved Hide resolved
ds = ds_grib.sel(paramId=pid, typeOfLevel=lev).to_xarray()
ds_list.append(ds)
except KeyError:
stepType = np.unique(
ds_grib.sel(paramId=pid, typeOfLevel=lev).metadata("stepType")
).tolist()
for steps in stepType:
try:
ds = ds_grib.sel(
paramId=pid, typeOfLevel=lev, stepType=steps
).to_xarray()
ds_list.append(ds)
except KeyError:
num_points = np.unique(
ds_grib.sel(paramId=pid, typeOfLevel=lev, stepType=steps).metadata(
"numberOfPoints"
)
).tolist()
for points in num_points:
try:
ds = ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
).to_xarray()
ds_list.append(ds)
except KeyError:
units = np.unique(
ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
).metadata("stepUnits")
).tolist()
for unit in units:
try:
ds = ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
stepUnits=unit,
).to_xarray()
ds_list.append(ds)
except KeyError:
dataType = np.unique(
ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
stepUnits=unit,
).metadata("dataType")
).tolist()
for dtype in dataType:
try:
ds = ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
stepUnits=unit,
dataType=dtype,
).to_xarray()
ds_list.append(ds)
except KeyError:
gridType = np.unique(
ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
stepUnits=unit,
dataType=dtype,
).metadata("gridType")
).tolist()
for gtype in gridType:
try:
ds = ds_grib.sel(
paramId=pid,
typeOfLevel=lev,
stepType=steps,
numberOfPoints=points,
stepUnits=unit,
dataType=dtype,
gridType=gtype,
).to_xarray()
ds_list.append(ds)
except KeyError:
print(
f"GRIB file of level {lev} and"
"paramId {pid} cannot be read."
)

return ds_list


def __get_variables(data, time_dim, horizontal_dims):
# return a list of variable names from the dataset data that have a time dimension
# and horizontal dimension or in case there is no time dimension just the variables
Expand Down Expand Up @@ -228,4 +378,5 @@ def parse_csv(file_id, filename, specification):
model_output_parser = { # global lookup dict
"netcdf": parse_netcdf,
"csv": parse_csv,
"grib": parse_grib,
}
Loading