Skip to content

Commit

Permalink
Merge pull request #82 from MeteoSwiss/develop
Browse files Browse the repository at this point in the history
v0.5.0.dev0
  • Loading branch information
fpavogt authored Aug 2, 2022
2 parents 72ff42c + dc2fa40 commit 7267f9b
Show file tree
Hide file tree
Showing 13 changed files with 134 additions and 78 deletions.
11 changes: 9 additions & 2 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,22 @@ The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [v0.5.0.dev0]
### Added:
### Fixed:
- [fpavogt, 2022-07-08] Fix #78 and #79.
### Changed:
### Deprecated:
### Removed:
### Security:

## [v0.4.0.dev0]
### Added:
- [fpavogt, 2022-02-27] Add new speed-check Action.
- [fpavogt, 2022-02-25] Add the `prms` keyword to set parameters for each run call in a thread-safe manner.
### Fixed:
- [fpavogt, 2022-03-02] Fix #75.
- [fpavogt, 2022-02-25] Fix #71 and #25.
### Changed:
### Deprecated:
### Removed:
- [fpavogt, 2022-02-25] Remove yaconfigobject dependency in favor of ruamel.yaml.
### Security:
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ sh build_docs.sh
This will create the `.html` pages of the compiled documentation under `./build`. In particular,
this bash script will automatically update the help message from the high-level ampycloud entry
point ``ampycloud_speed_test``, create the demo figure for the main page, compile and ingest all the
docstrings, etc ... . See the ampycloud[release mechanisms](#release-mechansims) for more info about
docstrings, etc ... . See the ampycloud [release mechanisms](#release-mechansims) for more info about
the automated publication of the documentation upon new releases.
Expand Down
18 changes: 12 additions & 6 deletions src/ampycloud/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,18 +236,18 @@ def ceilos(self) -> list:
def max_hits_per_layer(self) -> int:
""" The maximum number of ceilometer hits possible for a given layer, given the chunk data.
Returns:
int: the max number of ceilometer hit for a layer. Divide by len(self.ceilos) to get
the **average** max number of hits per ceilometer per layer (remember: not all
ceilometers may have the same number of timestamps over the chunk time period !).
This is the total number of **unique** timesteps from all ceilometers considered.
Note:
This value assumes that a layer can contain only 1 hit per ceilometer per timestep,
i.e. 2 simultaneous hits from a given ceilometer can **never** belong to the same cloud
layer.
Returns:
int: the max number of ceilometer hit for a layer. Divide by len(self.ceilos) to get
the **average** max number of hits per ceilometer per layer (remember: not all
ceilometers may have the same number of timestamps over the chunk time period !).
"""

# For each ceilometer, count the number of individual time stamps ...
Expand Down Expand Up @@ -634,9 +634,15 @@ def find_layers(self) -> None:
logger.info('Group base alt: %.1f', self.groups.at[ind, 'alt_base'])
logger.info('min_sep value: %.1f', min_sep)

# Handle #78: if the data is comprised of only two distinct altitudes, only look for
# up to 2 Gaussian components. Else, up to 3.
ncomp_max = np.min([len(np.unique(gro_alts[~np.isnan(gro_alts)])), 3])
logger.debug('Setting ncomp_max to: %i', ncomp_max)

# And feed them to a Gaussian Mixture Model to figure out how many components it has ...
ncomp, sub_layers_id, _ = layer.ncomp_from_gmm(
gro_alts, min_sep=min_sep, **self.prms['LAYERING_PRMS']['gmm_kwargs'])
gro_alts, ncomp_max=ncomp_max, min_sep=min_sep,
**self.prms['LAYERING_PRMS']['gmm_kwargs'])

# Add this info to the log
logger.debug(' Cluster %s has %i components according to GMM.',
Expand Down
7 changes: 4 additions & 3 deletions src/ampycloud/icao.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ def significant_cloud(oktas: list) -> list:
* third layer must be BKN or more (i.e. 5 oktas or more)
* no more than 3 layers reported (since ampycloud does not deal with CB/TCU)
**Source**: Sec. 4.5.4.3 e) & footnote #14 in Table A3-1, Meteorological Service for
International Air Navigation, Annex 3 to the Convention on International Civil Aviation, ICAO,
20th edition, July 2018.
Reference:
Sec. 4.5.4.3 e) & footnote #14 in Table A3-1, Meteorological Service for
International Air Navigation, Annex 3 to the Convention on International Civil Aviation,
ICAO, 20th edition, July 2018.
"""

Expand Down
49 changes: 26 additions & 23 deletions src/ampycloud/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
def scores2nrl(abics: np.ndarray) -> np.ndarray:
""" Converts AIC or BIC scores into probabilities = normalized relative likelihood.
Args:
abics (ndarray): scores.
Returns:
ndarray: probabilities of the different models.
Specifically, this function computes:
.. math::
Expand All @@ -40,11 +46,6 @@ def scores2nrl(abics: np.ndarray) -> np.ndarray:
The smaller the BIC/AIC scores, the better, but the higher the probabilities = normalized
relative likelihood, the better !
Args:
abics (ndarray): scores.
Returns:
ndarray: probabilities of the different models.
"""

out = np.exp(-0.5*(abics-np.min(abics)))
Expand All @@ -58,6 +59,20 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',
min_prob: float = 1., delta_mul_gain: float = 1.) -> int:
""" Identify which Gaussian Mixture Model is most appropriate given AIC or BIC scores.
Args:
abics (ndarray): the AICs or BICs scores, ordered from simplest to most complex model.
mode (str, optional): one of ['delta', 'prob']. Defaults to 'delta'.
min_prob (float, optional): minimum model probability computed from the scores's relative
likelihood, below which the other models will be considered. Set it to 1 to select
the model with the lowest score, irrespective of its probability. Defaults to 1.
This has no effect unless mode='prob'.
delta_mul_gain (float, optional): a smaller score will only be considered "valid"
if it is smaller than delta_mul_gain*current_best_score. Defaults to 1.
This has no effect unless mode='delta'.
Returns:
int: index of the "most appropriate" model.
Model selection can be based on:
1. the normalized relative likelihood values (see `scores2nrl()`) of the AIC or/and BIC
Expand Down Expand Up @@ -87,20 +102,6 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',
The default arguments of this function lead to selecting the number of components with the
smallest score.
Args:
abics (ndarray): the AICs or BICs scores, ordered from simplest to most complex model.
mode (str, optional): one of ['delta', 'prob']. Defaults to 'delta'.
min_prob (float, optional): minimum model probability computed from the scores's relative
likelihood, below which the other models will be considered. Set it to 1 to select
the model with the lowest score, irrespective of its probability. Defaults to 1.
This has no effect unless mode='prob'.
delta_mul_gain (float, optional): a smaller score will only be considered "valid"
if it is smaller than delta_mul_gain*current_best_score. Defaults to 1.
This has no effect unless mode='delta'.
Returns:
int: index of the "most appropriate" model.
"""

# How many models do I need to compare ?
Expand Down Expand Up @@ -139,6 +140,7 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',

@log_func_call(logger)
def ncomp_from_gmm(vals: np.ndarray,
ncomp_max: int = 3,
min_sep: Union[int, float] = 0,
scores: str = 'BIC',
rescale_0_to_x: float = None,
Expand All @@ -147,11 +149,10 @@ def ncomp_from_gmm(vals: np.ndarray,
""" Runs a Gaussian Mixture Model on 1-D data, to determine if it contains 1, 2, or 3
components.
The default values lead to selecting the number of components with the smallest BIC values.
Args:
vals (ndarray): the data to process. If ndarray is 1-D, it will be reshaped to 2-D via
.reshape(-1, 1).
ncomp_max (int, optional): maximum number of Gaussian components to assess. Defaults to 3.
min_sep (int|float, optional): minimum separation, in data unit,
required between the mean location of two Gaussian components to consider them distinct.
Defaults to 0. This is used in complement to any parameters fed to best_gmm(), that will
Expand All @@ -171,6 +172,8 @@ def ncomp_from_gmm(vals: np.ndarray,
int, ndarray, ndarray: number of (likely) components, array of component ids to which
each hit most likely belongs, array of AIC/BIC scores.
The default values lead to selecting the number of components with the smallest BIC values.
Note:
This function was inspired from the "1-D Gaussian Mixture Model" example from astroML:
`<https://www.astroml.org/book_figures/chapter4/fig_GMM_1D.html>`_
Expand Down Expand Up @@ -204,8 +207,8 @@ def ncomp_from_gmm(vals: np.ndarray,
if rescale_0_to_x is not None:
vals = minmax_scale(vals) * rescale_0_to_x

# I will only look for at most 3 layers.
ncomp = np.array([1, 2, 3])
# List all the number of components I should try
ncomp = np.linspace(1, ncomp_max, ncomp_max, dtype=int)

# Prepare to store the different model fits
models = {}
Expand Down
6 changes: 3 additions & 3 deletions src/ampycloud/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
def log_func_call(logger: logging.Logger) -> Callable:
""" Intended as a decorator to log function calls.
The first part of the message containing the function name is at the 'INFO' level.
The second part of the message containing the argument values is at the 'DEBUG' level.
Args:
logger (logging.Logger): a logger to feed info to.
The first part of the message containing the function name is at the 'INFO' level.
The second part of the message containing the argument values is at the 'DEBUG' level.
Note:
Adapted from the similar dvas function, which itself was adapted from
`this post <https://stackoverflow.com/questions/218616>`__ on SO,
Expand Down
9 changes: 4 additions & 5 deletions src/ampycloud/plots/secondary.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,17 @@ def scaling_fcts(show: bool = True,
save_stem: str = None, save_fmts: Union[list, str] = None) -> None:
""" Plots the different scaling functions.
This is a small utility routine to rapidly see the different altitude scaling options used by
ampycloud.
For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS.
Args:
show (bool, optional): show the plot, or not. Defaults to True.
save_stem (str, optional): if set, will save the plot with this stem (which can include a
path as well). Defaults to None.
save_fmts (list|str, optional): a list of file formats to export the plot to. Defaults to
None = ['png'].
This is a small utility routine to rapidly see the different altitude scaling options used by
ampycloud.
For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS.
Example:
::
Expand Down
12 changes: 6 additions & 6 deletions src/ampycloud/plots/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def valid_styles() -> list:
def set_mplstyle(func: Callable) -> Callable:
""" Intended to be used as a decorator around plotting functions, to set the plotting style.
Returns:
Callable: the decorator.
By defaults, the ``base`` ampycloud style will be enabled. Motivated users can tweak it further
by setting the ``MPL_STYLE`` entry of :py:data:`ampycloud.dynamic.AMPYCLOUD_PRMS` to:
Expand All @@ -66,9 +69,6 @@ def set_mplstyle(func: Callable) -> Callable:
- ``metsymb`` (only if the ``MPL_STYLE`` entry of
:py:data:`ampycloud.dynamic.AMPYCLOUD_PRMS` was set to ``'metsymb'``)
Returns:
Callable: the decorator.
Todo:
See https://github.com/MeteoSwiss/ampycloud/issues/18
Expand Down Expand Up @@ -157,9 +157,6 @@ def get_scaling_kwargs(data: np.ndarray, mode: str, kwargs: dict) -> tuple:
""" Utility function to extract the **actual, deterministic** parameters required to scale the
data, given a set of user-defined parameters.
This is a utility function to aid in the drawing of secondary axis that require to derive the
"reverse scaling function".
Args:
data (pd.Series): the data that was originally scaled by the user.
mode (str): the name of the scaling used by the user. Must be any mode supported by
Expand All @@ -171,6 +168,9 @@ def get_scaling_kwargs(data: np.ndarray, mode: str, kwargs: dict) -> tuple:
tuple: (scale_kwargs, descale_kwargs), the two dict with parameters for the forward/backward
scaling.
This is a utility function to aid in the drawing of secondary axis that require to derive the
"reverse scaling function".
"""

# Let's create a storage dict, and fill it with what I got from the user.
Expand Down
14 changes: 7 additions & 7 deletions src/ampycloud/scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def minrange2minmax(vals: np.ndarray, min_range: Union[int, float] = 0) -> tuple
Returns:
tuple: the min and max values of the data range of at least min_range in size.
Essentially, if max(vals)-min(vals) >= min_range, this function returns
``[min(vals), max(vals)]``. Else, it returns ``[val_mid-min_range/2, val_mid+min_range/2]``,
with ```val_mid=(max(vals)+min(vals))/2``.
Expand All @@ -126,12 +125,6 @@ def step_scale(vals: np.ndarray,
steps: list, scales: list, mode: str = 'do') -> np.ndarray:
""" Scales values step-wise, with different constants bewteen specific steps.
Values are divided by scales[i] between steps[i-1:i].
Anything outside the range of steps is divided by scales[0] or scale[-1].
Note that this function ensures that each step is properly offseted to ensure that the
scaled data is continuous (no gaps and no overlapping steps) !
Args:
vals (ndarray): values to scale.
steps (list, optional): the step **edges**. E.g. [8000, 14000].
Expand All @@ -141,6 +134,13 @@ def step_scale(vals: np.ndarray,
Returns:
ndarray: (un-)step-scaled values
Values are divided by scales[i] between steps[i-1:i].
Anything outside the range of steps is divided by scales[0] or scale[-1].
Note that this function ensures that each step is properly offseted to ensure that the
scaled data is continuous (no gaps and no overlapping steps) !
"""

# Some sanity checks
Expand Down
2 changes: 1 addition & 1 deletion src/ampycloud/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
"""

#:str: the one-and-only place where the ampycloud version is set.
VERSION = '0.4.0.dev0'
VERSION = '0.5.0.dev0'
Loading

0 comments on commit 7267f9b

Please sign in to comment.