Merge pull request #80 from MeteoSwiss/develop_vof

Fix #78 and #79
MeteoSwiss · Aug 2, 2022 · dc2fa40 · dc2fa40
2 parents 91f95e1 + 69964ae
commit dc2fa40
Show file tree

Hide file tree

Showing 13 changed files with 134 additions and 78 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -4,15 +4,22 @@ The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [v0.5.0.dev0]
+### Added:
+### Fixed:
+ - [fpavogt, 2022-07-08] Fix #78 and #79.
+### Changed:
+### Deprecated:
+### Removed:
+### Security:
+
 ## [v0.4.0.dev0]
 ### Added:
  - [fpavogt, 2022-02-27] Add new speed-check Action.
  - [fpavogt, 2022-02-25] Add the `prms` keyword to set parameters for each run call in a thread-safe manner.
 ### Fixed:
  - [fpavogt, 2022-03-02] Fix #75.
  - [fpavogt, 2022-02-25] Fix #71 and #25.
-### Changed:
-### Deprecated:
 ### Removed:
  - [fpavogt, 2022-02-25] Remove yaconfigobject dependency in favor of ruamel.yaml.
 ### Security:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -233,7 +233,7 @@ sh build_docs.sh
 This will create the `.html` pages of the compiled documentation under `./build`. In particular,
 this bash script will automatically update the help message from the high-level ampycloud entry
 point ``ampycloud_speed_test``, create the demo figure for the main page, compile and ingest all the
-docstrings, etc ... . See the ampycloud[release mechanisms](#release-mechansims) for more info about
+docstrings, etc ... . See the ampycloud [release mechanisms](#release-mechansims) for more info about
 the automated publication of the documentation upon new releases.
 
 

diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py
@@ -236,18 +236,18 @@ def ceilos(self) -> list:
     def max_hits_per_layer(self) -> int:
         """ The maximum number of ceilometer hits possible for a given layer, given the chunk data.
 
+        Returns:
+            int: the max number of ceilometer hit for a layer. Divide by len(self.ceilos) to get
+            the **average** max number of hits per ceilometer per layer (remember: not all
+            ceilometers may have the same number of timestamps over the chunk time period !).
+
         This is the total number of **unique** timesteps from all ceilometers considered.
 
         Note:
             This value assumes that a layer can contain only 1 hit per ceilometer per timestep,
             i.e. 2 simultaneous hits from a given ceilometer can **never** belong to the same cloud
             layer.
 
-        Returns:
-            int: the max number of ceilometer hit for a layer. Divide by len(self.ceilos) to get
-            the **average** max number of hits per ceilometer per layer (remember: not all
-            ceilometers may have the same number of timestamps over the chunk time period !).
-
         """
 
         # For each ceilometer, count the number of individual time stamps ...
@@ -634,9 +634,15 @@ def find_layers(self) -> None:
             logger.info('Group base alt: %.1f', self.groups.at[ind, 'alt_base'])
             logger.info('min_sep value: %.1f', min_sep)
 
+            # Handle #78: if the data is comprised of only two distinct altitudes, only look for
+            # up to 2 Gaussian components. Else, up to 3.
+            ncomp_max = np.min([len(np.unique(gro_alts[~np.isnan(gro_alts)])), 3])
+            logger.debug('Setting ncomp_max to: %i', ncomp_max)
+
             # And feed them to a Gaussian Mixture Model to figure out how many components it has ...
             ncomp, sub_layers_id, _ = layer.ncomp_from_gmm(
-                gro_alts, min_sep=min_sep, **self.prms['LAYERING_PRMS']['gmm_kwargs'])
+                gro_alts, ncomp_max=ncomp_max, min_sep=min_sep,
+                **self.prms['LAYERING_PRMS']['gmm_kwargs'])
 
             # Add this info to the log
             logger.debug(' Cluster %s  has %i components according to GMM.',

diff --git a/src/ampycloud/icao.py b/src/ampycloud/icao.py
@@ -36,9 +36,10 @@ def significant_cloud(oktas: list) -> list:
         * third layer must be BKN or more (i.e. 5 oktas or more)
         * no more than 3 layers reported (since ampycloud does not deal with CB/TCU)
 
-    **Source**: Sec. 4.5.4.3 e) & footnote #14 in Table A3-1, Meteorological Service for
-    International Air Navigation, Annex 3 to the Convention on International Civil Aviation, ICAO,
-    20th edition, July 2018.
+    Reference:
+        Sec. 4.5.4.3 e) & footnote #14 in Table A3-1, Meteorological Service for
+        International Air Navigation, Annex 3 to the Convention on International Civil Aviation,
+        ICAO, 20th edition, July 2018.
 
     """
 

diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py
@@ -30,6 +30,12 @@
 def scores2nrl(abics: np.ndarray) -> np.ndarray:
     """ Converts AIC or BIC scores into probabilities = normalized relative likelihood.
 
+    Args:
+        abics (ndarray): scores.
+
+    Returns:
+        ndarray: probabilities of the different models.
+
     Specifically, this function computes:
 
     .. math::
@@ -40,11 +46,6 @@ def scores2nrl(abics: np.ndarray) -> np.ndarray:
         The smaller the BIC/AIC scores, the better, but the higher the probabilities = normalized
         relative likelihood, the better !
 
-    Args:
-        abics (ndarray): scores.
-
-    Returns:
-        ndarray: probabilities of the different models.
     """
 
     out = np.exp(-0.5*(abics-np.min(abics)))
@@ -58,6 +59,20 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',
              min_prob: float = 1., delta_mul_gain: float = 1.) -> int:
     """ Identify which Gaussian Mixture Model is most appropriate given AIC or BIC scores.
 
+    Args:
+        abics (ndarray): the AICs or BICs scores, ordered from simplest to most complex model.
+        mode (str, optional): one of ['delta', 'prob']. Defaults to 'delta'.
+        min_prob (float, optional): minimum model probability computed from the scores's relative
+            likelihood, below which the other models will be considered. Set it to 1 to select
+            the model with the lowest score, irrespective of its probability. Defaults to 1.
+            This has no effect unless mode='prob'.
+        delta_mul_gain (float, optional): a smaller score will only be considered "valid"
+            if it is smaller than delta_mul_gain*current_best_score. Defaults to 1.
+            This has no effect unless mode='delta'.
+
+    Returns:
+        int: index of the "most appropriate" model.
+
     Model selection can be based on:
 
         1. the normalized relative likelihood values (see `scores2nrl()`) of the AIC or/and BIC
@@ -87,20 +102,6 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',
         The default arguments of this function lead to selecting the number of components with the
         smallest score.
 
-    Args:
-        abics (ndarray): the AICs or BICs scores, ordered from simplest to most complex model.
-        mode (str, optional): one of ['delta', 'prob']. Defaults to 'delta'.
-        min_prob (float, optional): minimum model probability computed from the scores's relative
-            likelihood, below which the other models will be considered. Set it to 1 to select
-            the model with the lowest score, irrespective of its probability. Defaults to 1.
-            This has no effect unless mode='prob'.
-        delta_mul_gain (float, optional): a smaller score will only be considered "valid"
-            if it is smaller than delta_mul_gain*current_best_score. Defaults to 1.
-            This has no effect unless mode='delta'.
-
-    Returns:
-        int: index of the "most appropriate" model.
-
     """
 
     # How many models do I need to compare ?
@@ -139,6 +140,7 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta',
 
 @log_func_call(logger)
 def ncomp_from_gmm(vals: np.ndarray,
+                   ncomp_max: int = 3,
                    min_sep: Union[int, float] = 0,
                    scores: str = 'BIC',
                    rescale_0_to_x: float = None,
@@ -147,11 +149,10 @@ def ncomp_from_gmm(vals: np.ndarray,
     """ Runs a Gaussian Mixture Model on 1-D data, to determine if it contains 1, 2, or 3
     components.
 
-    The default values lead to selecting the number of components with the smallest BIC values.
-
     Args:
         vals (ndarray): the data to process. If ndarray is 1-D, it will be reshaped to 2-D via
             .reshape(-1, 1).
+        ncomp_max (int, optional): maximum number of Gaussian components to assess. Defaults to 3.
         min_sep (int|float, optional): minimum separation, in data unit,
             required between the mean location of two Gaussian components to consider them distinct.
             Defaults to 0. This is used in complement to any parameters fed to best_gmm(), that will
@@ -171,6 +172,8 @@ def ncomp_from_gmm(vals: np.ndarray,
         int, ndarray, ndarray: number of (likely) components, array of component ids to which
         each hit most likely belongs, array of AIC/BIC scores.
 
+    The default values lead to selecting the number of components with the smallest BIC values.
+
     Note:
         This function was inspired from the "1-D Gaussian Mixture Model" example from astroML:
         `<https://www.astroml.org/book_figures/chapter4/fig_GMM_1D.html>`_
@@ -204,8 +207,8 @@ def ncomp_from_gmm(vals: np.ndarray,
     if rescale_0_to_x is not None:
         vals = minmax_scale(vals) * rescale_0_to_x
 
-    # I will only look for at most 3 layers.
-    ncomp = np.array([1, 2, 3])
+    # List all the number of components I should try
+    ncomp = np.linspace(1, ncomp_max, ncomp_max, dtype=int)
 
     # Prepare to store the different model fits
     models = {}

diff --git a/src/ampycloud/logger.py b/src/ampycloud/logger.py
@@ -18,12 +18,12 @@
 def log_func_call(logger: logging.Logger) -> Callable:
     """ Intended as a decorator to log function calls.
 
-    The first part of the message containing the function name is at the 'INFO' level.
-    The second part of the message containing the argument values is at the 'DEBUG' level.
-
     Args:
         logger (logging.Logger): a logger to feed info to.
 
+    The first part of the message containing the function name is at the 'INFO' level.
+    The second part of the message containing the argument values is at the 'DEBUG' level.
+
     Note:
         Adapted from the similar dvas function, which itself was adapted from
         `this post <https://stackoverflow.com/questions/218616>`__ on SO,

diff --git a/src/ampycloud/plots/secondary.py b/src/ampycloud/plots/secondary.py
@@ -32,18 +32,17 @@ def scaling_fcts(show: bool = True,
                  save_stem: str = None, save_fmts: Union[list, str] = None) -> None:
     """ Plots the different scaling functions.
 
-    This is a small utility routine to rapidly see the different altitude scaling options used by
-    ampycloud.
-
-    For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS.
-
     Args:
        show (bool, optional): show the plot, or not. Defaults to True.
        save_stem (str, optional): if set, will save the plot with this stem (which can include a
            path as well). Defaults to None.
        save_fmts (list|str, optional): a list of file formats to export the plot to. Defaults to
            None = ['png'].
 
+    This is a small utility routine to rapidly see the different altitude scaling options used by
+    ampycloud.
+
+    For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS.
 
     Example:
     ::

diff --git a/src/ampycloud/plots/tools.py b/src/ampycloud/plots/tools.py
@@ -43,6 +43,9 @@ def valid_styles() -> list:
 def set_mplstyle(func: Callable) -> Callable:
     """ Intended to be used as a decorator around plotting functions, to set the plotting style.
 
+    Returns:
+        Callable: the decorator.
+
     By defaults, the ``base`` ampycloud style will be enabled. Motivated users can tweak it further
     by setting the ``MPL_STYLE`` entry of :py:data:`ampycloud.dynamic.AMPYCLOUD_PRMS` to:
 
@@ -66,9 +69,6 @@ def set_mplstyle(func: Callable) -> Callable:
            - ``metsymb`` (only if the ``MPL_STYLE`` entry of
              :py:data:`ampycloud.dynamic.AMPYCLOUD_PRMS` was set to ``'metsymb'``)
 
-    Returns:
-        Callable: the decorator.
-
     Todo:
         See https://github.com/MeteoSwiss/ampycloud/issues/18
 
@@ -157,9 +157,6 @@ def get_scaling_kwargs(data: np.ndarray, mode: str, kwargs: dict) -> tuple:
     """ Utility function to extract the **actual, deterministic** parameters required to scale the
     data, given a set of user-defined parameters.
 
-    This is a utility function to aid in the drawing of secondary axis that require to derive the
-    "reverse scaling function".
-
     Args:
         data (pd.Series): the data that was originally scaled by the user.
         mode (str): the name of the scaling used by the user. Must be any mode supported by
@@ -171,6 +168,9 @@ def get_scaling_kwargs(data: np.ndarray, mode: str, kwargs: dict) -> tuple:
         tuple: (scale_kwargs, descale_kwargs), the two dict with parameters for the forward/backward
         scaling.
 
+    This is a utility function to aid in the drawing of secondary axis that require to derive the
+    "reverse scaling function".
+
     """
 
     # Let's create a storage dict, and fill it with what I got from the user.

diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py
@@ -103,7 +103,6 @@ def minrange2minmax(vals: np.ndarray, min_range: Union[int, float] = 0) -> tuple
     Returns:
         tuple: the min and max values of the data range of at least min_range in size.
 
-
     Essentially, if max(vals)-min(vals) >= min_range, this function returns
     ``[min(vals), max(vals)]``. Else, it returns ``[val_mid-min_range/2, val_mid+min_range/2]``,
     with ```val_mid=(max(vals)+min(vals))/2``.
@@ -126,12 +125,6 @@ def step_scale(vals: np.ndarray,
                steps: list, scales: list, mode: str = 'do') -> np.ndarray:
     """ Scales values step-wise, with different constants bewteen specific steps.
 
-    Values are divided by scales[i] between steps[i-1:i].
-    Anything outside the range of steps is divided by scales[0] or scale[-1].
-
-    Note that this function ensures that each step is properly offseted to ensure that the
-    scaled data is continuous (no gaps and no overlapping steps) !
-
     Args:
         vals (ndarray): values to scale.
         steps (list, optional): the step **edges**. E.g. [8000, 14000].
@@ -141,6 +134,13 @@ def step_scale(vals: np.ndarray,
 
     Returns:
         ndarray: (un-)step-scaled values
+
+    Values are divided by scales[i] between steps[i-1:i].
+    Anything outside the range of steps is divided by scales[0] or scale[-1].
+
+    Note that this function ensures that each step is properly offseted to ensure that the
+    scaled data is continuous (no gaps and no overlapping steps) !
+
     """
 
     # Some sanity checks

diff --git a/src/ampycloud/version.py b/src/ampycloud/version.py
@@ -9,4 +9,4 @@
 """
 
 #:str: the one-and-only place where the ampycloud version is set.
-VERSION = '0.4.0.dev0'
+VERSION = '0.5.0.dev0'