diff --git a/CHANGELOG b/CHANGELOG index 1019282..206fbbe 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,15 @@ The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.5.0.dev0] +### Added: +### Fixed: + - [fpavogt, 2022-07-08] Fix #78 and #79. +### Changed: +### Deprecated: +### Removed: +### Security: + ## [v0.4.0.dev0] ### Added: - [fpavogt, 2022-02-27] Add new speed-check Action. @@ -11,8 +20,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Fixed: - [fpavogt, 2022-03-02] Fix #75. - [fpavogt, 2022-02-25] Fix #71 and #25. -### Changed: -### Deprecated: ### Removed: - [fpavogt, 2022-02-25] Remove yaconfigobject dependency in favor of ruamel.yaml. ### Security: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 787d8dd..68df694 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -233,7 +233,7 @@ sh build_docs.sh This will create the `.html` pages of the compiled documentation under `./build`. In particular, this bash script will automatically update the help message from the high-level ampycloud entry point ``ampycloud_speed_test``, create the demo figure for the main page, compile and ingest all the -docstrings, etc ... . See the ampycloud[release mechanisms](#release-mechansims) for more info about +docstrings, etc ... . See the ampycloud [release mechanisms](#release-mechansims) for more info about the automated publication of the documentation upon new releases. diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 50336f4..8adc307 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -236,6 +236,11 @@ def ceilos(self) -> list: def max_hits_per_layer(self) -> int: """ The maximum number of ceilometer hits possible for a given layer, given the chunk data. + Returns: + int: the max number of ceilometer hit for a layer. Divide by len(self.ceilos) to get + the **average** max number of hits per ceilometer per layer (remember: not all + ceilometers may have the same number of timestamps over the chunk time period !). + This is the total number of **unique** timesteps from all ceilometers considered. Note: @@ -243,11 +248,6 @@ def max_hits_per_layer(self) -> int: i.e. 2 simultaneous hits from a given ceilometer can **never** belong to the same cloud layer. - Returns: - int: the max number of ceilometer hit for a layer. Divide by len(self.ceilos) to get - the **average** max number of hits per ceilometer per layer (remember: not all - ceilometers may have the same number of timestamps over the chunk time period !). - """ # For each ceilometer, count the number of individual time stamps ... @@ -634,9 +634,15 @@ def find_layers(self) -> None: logger.info('Group base alt: %.1f', self.groups.at[ind, 'alt_base']) logger.info('min_sep value: %.1f', min_sep) + # Handle #78: if the data is comprised of only two distinct altitudes, only look for + # up to 2 Gaussian components. Else, up to 3. + ncomp_max = np.min([len(np.unique(gro_alts[~np.isnan(gro_alts)])), 3]) + logger.debug('Setting ncomp_max to: %i', ncomp_max) + # And feed them to a Gaussian Mixture Model to figure out how many components it has ... ncomp, sub_layers_id, _ = layer.ncomp_from_gmm( - gro_alts, min_sep=min_sep, **self.prms['LAYERING_PRMS']['gmm_kwargs']) + gro_alts, ncomp_max=ncomp_max, min_sep=min_sep, + **self.prms['LAYERING_PRMS']['gmm_kwargs']) # Add this info to the log logger.debug(' Cluster %s has %i components according to GMM.', diff --git a/src/ampycloud/icao.py b/src/ampycloud/icao.py index 9e1e10e..c13b9db 100644 --- a/src/ampycloud/icao.py +++ b/src/ampycloud/icao.py @@ -36,9 +36,10 @@ def significant_cloud(oktas: list) -> list: * third layer must be BKN or more (i.e. 5 oktas or more) * no more than 3 layers reported (since ampycloud does not deal with CB/TCU) - **Source**: Sec. 4.5.4.3 e) & footnote #14 in Table A3-1, Meteorological Service for - International Air Navigation, Annex 3 to the Convention on International Civil Aviation, ICAO, - 20th edition, July 2018. + Reference: + Sec. 4.5.4.3 e) & footnote #14 in Table A3-1, Meteorological Service for + International Air Navigation, Annex 3 to the Convention on International Civil Aviation, + ICAO, 20th edition, July 2018. """ diff --git a/src/ampycloud/layer.py b/src/ampycloud/layer.py index d16e682..719b5d3 100644 --- a/src/ampycloud/layer.py +++ b/src/ampycloud/layer.py @@ -30,6 +30,12 @@ def scores2nrl(abics: np.ndarray) -> np.ndarray: """ Converts AIC or BIC scores into probabilities = normalized relative likelihood. + Args: + abics (ndarray): scores. + + Returns: + ndarray: probabilities of the different models. + Specifically, this function computes: .. math:: @@ -40,11 +46,6 @@ def scores2nrl(abics: np.ndarray) -> np.ndarray: The smaller the BIC/AIC scores, the better, but the higher the probabilities = normalized relative likelihood, the better ! - Args: - abics (ndarray): scores. - - Returns: - ndarray: probabilities of the different models. """ out = np.exp(-0.5*(abics-np.min(abics))) @@ -58,6 +59,20 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta', min_prob: float = 1., delta_mul_gain: float = 1.) -> int: """ Identify which Gaussian Mixture Model is most appropriate given AIC or BIC scores. + Args: + abics (ndarray): the AICs or BICs scores, ordered from simplest to most complex model. + mode (str, optional): one of ['delta', 'prob']. Defaults to 'delta'. + min_prob (float, optional): minimum model probability computed from the scores's relative + likelihood, below which the other models will be considered. Set it to 1 to select + the model with the lowest score, irrespective of its probability. Defaults to 1. + This has no effect unless mode='prob'. + delta_mul_gain (float, optional): a smaller score will only be considered "valid" + if it is smaller than delta_mul_gain*current_best_score. Defaults to 1. + This has no effect unless mode='delta'. + + Returns: + int: index of the "most appropriate" model. + Model selection can be based on: 1. the normalized relative likelihood values (see `scores2nrl()`) of the AIC or/and BIC @@ -87,20 +102,6 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta', The default arguments of this function lead to selecting the number of components with the smallest score. - Args: - abics (ndarray): the AICs or BICs scores, ordered from simplest to most complex model. - mode (str, optional): one of ['delta', 'prob']. Defaults to 'delta'. - min_prob (float, optional): minimum model probability computed from the scores's relative - likelihood, below which the other models will be considered. Set it to 1 to select - the model with the lowest score, irrespective of its probability. Defaults to 1. - This has no effect unless mode='prob'. - delta_mul_gain (float, optional): a smaller score will only be considered "valid" - if it is smaller than delta_mul_gain*current_best_score. Defaults to 1. - This has no effect unless mode='delta'. - - Returns: - int: index of the "most appropriate" model. - """ # How many models do I need to compare ? @@ -139,6 +140,7 @@ def best_gmm(abics: np.ndarray, mode: str = 'delta', @log_func_call(logger) def ncomp_from_gmm(vals: np.ndarray, + ncomp_max: int = 3, min_sep: Union[int, float] = 0, scores: str = 'BIC', rescale_0_to_x: float = None, @@ -147,11 +149,10 @@ def ncomp_from_gmm(vals: np.ndarray, """ Runs a Gaussian Mixture Model on 1-D data, to determine if it contains 1, 2, or 3 components. - The default values lead to selecting the number of components with the smallest BIC values. - Args: vals (ndarray): the data to process. If ndarray is 1-D, it will be reshaped to 2-D via .reshape(-1, 1). + ncomp_max (int, optional): maximum number of Gaussian components to assess. Defaults to 3. min_sep (int|float, optional): minimum separation, in data unit, required between the mean location of two Gaussian components to consider them distinct. Defaults to 0. This is used in complement to any parameters fed to best_gmm(), that will @@ -171,6 +172,8 @@ def ncomp_from_gmm(vals: np.ndarray, int, ndarray, ndarray: number of (likely) components, array of component ids to which each hit most likely belongs, array of AIC/BIC scores. + The default values lead to selecting the number of components with the smallest BIC values. + Note: This function was inspired from the "1-D Gaussian Mixture Model" example from astroML: ``_ @@ -204,8 +207,8 @@ def ncomp_from_gmm(vals: np.ndarray, if rescale_0_to_x is not None: vals = minmax_scale(vals) * rescale_0_to_x - # I will only look for at most 3 layers. - ncomp = np.array([1, 2, 3]) + # List all the number of components I should try + ncomp = np.linspace(1, ncomp_max, ncomp_max, dtype=int) # Prepare to store the different model fits models = {} diff --git a/src/ampycloud/logger.py b/src/ampycloud/logger.py index bbf0589..fc93ec6 100644 --- a/src/ampycloud/logger.py +++ b/src/ampycloud/logger.py @@ -18,12 +18,12 @@ def log_func_call(logger: logging.Logger) -> Callable: """ Intended as a decorator to log function calls. - The first part of the message containing the function name is at the 'INFO' level. - The second part of the message containing the argument values is at the 'DEBUG' level. - Args: logger (logging.Logger): a logger to feed info to. + The first part of the message containing the function name is at the 'INFO' level. + The second part of the message containing the argument values is at the 'DEBUG' level. + Note: Adapted from the similar dvas function, which itself was adapted from `this post `__ on SO, diff --git a/src/ampycloud/plots/secondary.py b/src/ampycloud/plots/secondary.py index 7153397..53a839c 100644 --- a/src/ampycloud/plots/secondary.py +++ b/src/ampycloud/plots/secondary.py @@ -32,11 +32,6 @@ def scaling_fcts(show: bool = True, save_stem: str = None, save_fmts: Union[list, str] = None) -> None: """ Plots the different scaling functions. - This is a small utility routine to rapidly see the different altitude scaling options used by - ampycloud. - - For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS. - Args: show (bool, optional): show the plot, or not. Defaults to True. save_stem (str, optional): if set, will save the plot with this stem (which can include a @@ -44,6 +39,10 @@ def scaling_fcts(show: bool = True, save_fmts (list|str, optional): a list of file formats to export the plot to. Defaults to None = ['png']. + This is a small utility routine to rapidly see the different altitude scaling options used by + ampycloud. + + For the "step" scaling plot, the parameters are taken straight from dynamic.GROUPING_PRMS. Example: :: diff --git a/src/ampycloud/plots/tools.py b/src/ampycloud/plots/tools.py index d79e7f8..9497778 100644 --- a/src/ampycloud/plots/tools.py +++ b/src/ampycloud/plots/tools.py @@ -43,6 +43,9 @@ def valid_styles() -> list: def set_mplstyle(func: Callable) -> Callable: """ Intended to be used as a decorator around plotting functions, to set the plotting style. + Returns: + Callable: the decorator. + By defaults, the ``base`` ampycloud style will be enabled. Motivated users can tweak it further by setting the ``MPL_STYLE`` entry of :py:data:`ampycloud.dynamic.AMPYCLOUD_PRMS` to: @@ -66,9 +69,6 @@ def set_mplstyle(func: Callable) -> Callable: - ``metsymb`` (only if the ``MPL_STYLE`` entry of :py:data:`ampycloud.dynamic.AMPYCLOUD_PRMS` was set to ``'metsymb'``) - Returns: - Callable: the decorator. - Todo: See https://github.com/MeteoSwiss/ampycloud/issues/18 @@ -157,9 +157,6 @@ def get_scaling_kwargs(data: np.ndarray, mode: str, kwargs: dict) -> tuple: """ Utility function to extract the **actual, deterministic** parameters required to scale the data, given a set of user-defined parameters. - This is a utility function to aid in the drawing of secondary axis that require to derive the - "reverse scaling function". - Args: data (pd.Series): the data that was originally scaled by the user. mode (str): the name of the scaling used by the user. Must be any mode supported by @@ -171,6 +168,9 @@ def get_scaling_kwargs(data: np.ndarray, mode: str, kwargs: dict) -> tuple: tuple: (scale_kwargs, descale_kwargs), the two dict with parameters for the forward/backward scaling. + This is a utility function to aid in the drawing of secondary axis that require to derive the + "reverse scaling function". + """ # Let's create a storage dict, and fill it with what I got from the user. diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index 456afaf..acc8dbc 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -103,7 +103,6 @@ def minrange2minmax(vals: np.ndarray, min_range: Union[int, float] = 0) -> tuple Returns: tuple: the min and max values of the data range of at least min_range in size. - Essentially, if max(vals)-min(vals) >= min_range, this function returns ``[min(vals), max(vals)]``. Else, it returns ``[val_mid-min_range/2, val_mid+min_range/2]``, with ```val_mid=(max(vals)+min(vals))/2``. @@ -126,12 +125,6 @@ def step_scale(vals: np.ndarray, steps: list, scales: list, mode: str = 'do') -> np.ndarray: """ Scales values step-wise, with different constants bewteen specific steps. - Values are divided by scales[i] between steps[i-1:i]. - Anything outside the range of steps is divided by scales[0] or scale[-1]. - - Note that this function ensures that each step is properly offseted to ensure that the - scaled data is continuous (no gaps and no overlapping steps) ! - Args: vals (ndarray): values to scale. steps (list, optional): the step **edges**. E.g. [8000, 14000]. @@ -141,6 +134,13 @@ def step_scale(vals: np.ndarray, Returns: ndarray: (un-)step-scaled values + + Values are divided by scales[i] between steps[i-1:i]. + Anything outside the range of steps is divided by scales[0] or scale[-1]. + + Note that this function ensures that each step is properly offseted to ensure that the + scaled data is continuous (no gaps and no overlapping steps) ! + """ # Some sanity checks diff --git a/src/ampycloud/version.py b/src/ampycloud/version.py index d8dcf7b..4b4f137 100644 --- a/src/ampycloud/version.py +++ b/src/ampycloud/version.py @@ -9,4 +9,4 @@ """ #:str: the one-and-only place where the ampycloud version is set. -VERSION = '0.4.0.dev0' +VERSION = '0.5.0.dev0' diff --git a/src/ampycloud/wmo.py b/src/ampycloud/wmo.py index 300495d..c79d574 100644 --- a/src/ampycloud/wmo.py +++ b/src/ampycloud/wmo.py @@ -27,6 +27,16 @@ def perc2okta(val: Union[int, float, np.ndarray], lim8: Union[int, float] = 100) -> np.ndarray: """ Converts a sky coverage percentage into oktas. + Args: + val (int|float|ndarray): the sky coverage percentage to convert, in percent. + lim0 (int|float, optional): the upper limit for the 0 okta bin, in percent. + Defaults to 0. + lim8 (int|float, optional): the lower limit for the 8 oktas bin, in percent. + Defaults to 100. + + Returns: + ndarray of int: the okta value(s). + One okta corresponds to 1/8 of the sky covered by clouds. The cases of 0 and 8 oktas are special, in that these indicate that the sky is covered at *exactly* 0%, respectively 100%. This implies that the 1 okta and 7 okta bins are larger than others. @@ -41,15 +51,11 @@ def perc2okta(val: Union[int, float, np.ndarray], - 7 oktas == 6.5*100/8 < val < lim8 - 8 oktas == lim8 <= val <= 100 - Args: - val (int|float|ndarray): the sky coverage percentage to convert, in percent. - lim0 (int|float, optional): the upper limit for the 0 okta bin, in percent. - Defaults to 0. - lim8 (int|float, optional): the lower limit for the 8 oktas bin, in percent. - Defaults to 100. - - Returns: - ndarray of int: the okta value(s). + Reference: + Boers, R., de Haij, M. J., Wauben, W. M. F., Baltink, H. K., van Ulft, L. H., + Savenije, M., and Long, C. N. (2010), Optimized fractional cloudiness determination + from five ground-based remote sensing techniques, J. Geophys. Res., 115, D24116, + `doi:10.1029/2010JD014661 `_. """ # A basic sanity check @@ -83,6 +89,12 @@ def perc2okta(val: Union[int, float, np.ndarray], def okta2code(val: int) -> str: """ Convert an okta value to a METAR code. + Args: + int: okta value between 0 and 9 (included). + + Returns: + str: METAR code + Conversion is as follows: - 0 okta => NCD @@ -92,11 +104,6 @@ def okta2code(val: int) -> str: - 8 oktas => OVC - 9 oktas => None - Args: - int: okta value between 0 and 9 (included). - - Returns: - str: METAR code """ # Some sanity checks @@ -169,17 +176,25 @@ def alt2code(val: Union[int, float]) -> str: """ Function that converts a given altitude in hundreds of ft (3 digit number), e.g. 5000 ft -> 050, 500 ft -> 005. - Below 10'000 ft, the value is floored to the nearest 100 ft. Above 10'000 ft, the value is - floored to the nearest 1000 ft. - - Reference: *Aerodrome Reports and Forecasts, A Users' Handbook to the Codes*, - WMO-No.782, 2020 edition. https://library.wmo.int/?lvl=notice_display&id=716 - Args: val (int, float): the altitude to convert, in feet. Returns: str: the corresponding METAR code chunk + + Below 10'000 ft, the value is floored to the nearest 100 ft. Above 10'000 ft, the value is + floored to the nearest 1000 ft. + + Reference: + *Aerodrome Reports and Forecasts, A Users' Handbook to the Codes*, WMO-No.782, 2020 edition. + ``_ + + Warning: + Currently, this function does **not** allow to implement EASA's rule AMC1 MET.TR.205(e)(3) + (i.e. setting a resolution of 50 ft up to 300 ft for aerodromes with established + low-visibility approach and landing procedures). + ``_ + """ if np.isnan(val): diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index db86c2d..2bc411e 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -9,6 +9,7 @@ """ # Import from Python +import warnings import numpy as np import pandas as pd from pytest import raises, warns @@ -238,3 +239,27 @@ def test_layering_singleval(): # Check that the GMM was never executed assert np.all(chunk.groups.loc[:, 'ncomp'] == -1) + +def test_layering_dualeval(): + """ Test the layering step when there are two single altitude values. See #78 for details. """ + + data1 = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*120, np.ones(30)*1]) + data2 = np.array([np.ones(30), np.arange(0, 30, 1), np.ones(30)*150, np.ones(30)*2]) + + mock_data = pd.DataFrame(np.concatenate([data1, data2], axis=1).T, + columns=['ceilo', 'dt', 'alt', 'type']) + + # Set the proper column types + for (col, tpe) in hardcoded.REQ_DATA_COLS.items(): + mock_data.loc[:, col] = mock_data.loc[:, col].astype(tpe) + + # Instantiate a CeiloChunk entity ... + chunk = CeiloChunk(mock_data) + + # Do the dance ... + chunk.find_slices() + chunk.find_groups() + # The lyering should be solid enough to not complain if there are only two values in the data + with warnings.catch_warnings(): + warnings.simplefilter("error") + chunk.find_layers() diff --git a/test/ampycloud/test_layer.py b/test/ampycloud/test_layer.py index ef653b6..72afa3f 100644 --- a/test/ampycloud/test_layer.py +++ b/test/ampycloud/test_layer.py @@ -110,7 +110,7 @@ def test_unstable_layers(): best_ncomp, _, _ = ncomp_from_gmm(data['alt'].to_numpy(), scores='BIC', rescale_0_to_x=100, min_sep=0, - random_seed=45, + random_seed=39, delta_mul_gain=0.95, mode='delta') # With this specific seed, I should be finding 3 layers assert best_ncomp == 3