Merge pull request #672 from ICB-DCM/develop

Release 0.2.7
ICB-DCM · Aug 3, 2021 · e706798 · e706798
2 parents 72488fb + 41c4582
commit e706798
Show file tree

Hide file tree

Showing 45 changed files with 1,715 additions and 609 deletions.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8]
+        python-version: [3.9]
 
     steps:
     - name: Check out repository

diff --git a/.github/workflows/install_deps.sh b/.github/workflows/install_deps.sh
@@ -37,7 +37,7 @@ for par in "$@"; do
     pysb)
       # bionetgen
       wget -q -O bionetgen.tar \
-        https://github.com/RuleWorld/bionetgen/releases/download/BioNetGen-2.5.2/BioNetGen-2.5.2-linux.tgz
+        https://github.com/RuleWorld/bionetgen/releases/download/BioNetGen-2.6.0/BioNetGen-2.6.0-linux.tgz
       tar -xf bionetgen.tar
     ;;
 

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,35 @@ Release notes
 ..........
 
 
+0.2.7 (2021-07-30)
+------------------
+
+* Finite Differences:
+    * Adaptive finite differences (#671)
+    * Add helper function for checking gradients of objectives (#690)
+    * Small bug fixes (#711, #714)
+
+* Storage:
+    * Store representation of the objective (#669)
+    * Minor fixes in HDF5 history (#679)
+    * HDF5 reader for ensemble predictions (#681)
+    * Update storage demo jupyter notebook (#699)
+    * Option to trim trace to be monotonically decreasing (#705)
+
+* General:
+    * Improved tests and bug fixes of validation intervals (#676, #685)
+    * Add input file validation via PEtab linter for PEtab import (#678)
+    * Remove default values from docstring (#680)
+    * Minor fixes/improvements of ensembles (#687, #688)
+    * Fix sorting of optimization values including `NaN` values (#691)
+    * Specify axis limits for plotting (#693)
+    * Minor fixes in visualization (#696)
+    * Add installation option `all_optimizers` (#695)
+    * Improve installation documentation (#689)
+    * Update `pysb` and `BNG` version on GitHub Actions (#697)
+    * Bug fix in steady state guesses (#715)
+
+
 0.2.6 (2021-05-17)
 ------------------
 

diff --git a/doc/example/store.ipynb b/doc/example/store.ipynb
diff --git a/doc/install.rst b/doc/install.rst
@@ -61,17 +61,23 @@ If you want to upgrade from an existing previous version, replace
 ``install`` by ``ìnstall --upgrade`` in the above commands.
 
 
-Install optional packages
--------------------------
+Install optional packages and external dependencies
+---------------------------------------------------
 
-* This package includes multiple comfort methods simplyfing its use for
+* pyPESTO includes multiple convenience methods to simplify
   parameter estimation for models generated using the toolbox
-  `amici <https://www.github.com/icb-dcm/amici>`_.
+  `AMICI <https://github.com/AMICI-dev/AMICI>`_.
   To use AMICI, install it via pip::
 
     pip3 install amici
 
+  or, in case of problems, follow the full instructions from the
+  `AMICI documentation <https://amici.readthedocs.io/en/latest/python_installation.html>`_.
+
 * This package inherently supports optimization using the dlib toolbox.
   To use it, install dlib via::
 
    pip3 install dlib
+
+* All external dependencies can be installed through
+  `this shell script <https://github.com/ICB-DCM/pyPESTO/blob/main/.github/workflows/install_deps.sh>`_.
diff --git a/pypesto/__init__.py b/pypesto/__init__.py
@@ -22,6 +22,7 @@
     ObjectiveBase,
     OptimizerHistory,
     FD,
+    FDDelta,
 )
 from .problem import Problem
 from .result import (

diff --git a/pypesto/ensemble/__init__.py b/pypesto/ensemble/__init__.py
@@ -14,6 +14,7 @@
     read_from_df,
     read_from_csv,
     write_ensemble_prediction_to_h5,
+    read_ensemble_prediction_from_h5
 )
 from .dimension_reduction import (
     get_umap_representation_parameters,

diff --git a/pypesto/ensemble/ensemble.py b/pypesto/ensemble/ensemble.py
@@ -2,7 +2,7 @@
 from functools import partial
 import numpy as np
 import pandas as pd
-from typing import Sequence, Tuple, Callable, Dict, List
+from typing import Sequence, Tuple, Callable, Dict, List, Optional
 
 from .. import Result
 from ..engine import (
@@ -23,7 +23,7 @@
                         NVECTORS, VECTOR_TAGS, PREDICTIONS, MODE_FUN,
                         EnsembleType, ENSEMBLE_TYPE, MEAN, MEDIAN,
                         STANDARD_DEVIATION, SUMMARY, LOWER_BOUND,
-                        UPPER_BOUND, get_percentile_label)
+                        UPPER_BOUND, get_percentile_label, HISTORY)
 
 logger = logging.getLogger(__name__)
 
@@ -35,12 +35,13 @@ class EnsemblePrediction:
     It can be attached to a ensemble-type object
     """
 
-    def __init__(self,
-                 predictor: Callable[[Sequence], PredictionResult],
-                 prediction_id: str = None,
-                 prediction_results: Sequence[PredictionResult] = None,
-                 lower_bound: Sequence[np.ndarray] = None,
-                 upper_bound: Sequence[np.ndarray] = None):
+    def __init__(
+            self,
+            predictor: Optional[Callable[[Sequence], PredictionResult]] = None,
+            prediction_id: str = None,
+            prediction_results: Sequence[PredictionResult] = None,
+            lower_bound: Sequence[np.ndarray] = None,
+            upper_bound: Sequence[np.ndarray] = None):
         """
         Constructor.
 
@@ -62,6 +63,8 @@ def __init__(self,
             array of potential upper bounds for the parameters
         """
         self.predictor = predictor
+        if predictor is None:
+            logger.info("This `EnsemblePrediction` has no predictor.")
         self.prediction_id = prediction_id
         self.prediction_results = prediction_results
         if prediction_results is None:
@@ -395,6 +398,8 @@ def from_optimization_endpoints(
         """
         x_vectors = []
         vector_tags = []
+        x_names = [result.problem.x_names[i]
+                   for i in result.problem.x_free_indices]
 
         for start in result.optimize_result.list:
             # add the parameters from the next start as long as we
@@ -422,10 +427,10 @@ def from_optimization_endpoints(
 
         x_vectors = np.stack(x_vectors, axis=1)
         return Ensemble(x_vectors=x_vectors,
-                        x_names=result.problem.x_names,
+                        x_names=x_names,
                         vector_tags=vector_tags,
-                        lower_bound=result.problem.lb_full,
-                        upper_bound=result.problem.ub_full,
+                        lower_bound=result.problem.lb,
+                        upper_bound=result.problem.ub,
                         **kwargs)
 
     @staticmethod
@@ -471,22 +476,23 @@ def from_optimization_history(
                                                         **kwargs)
         x_vectors = []
         vector_tags = []
-        x_names = result.problem.x_names
-        lb = result.problem.lb_full
-        ub = result.problem.ub_full
+        x_names = [result.problem.x_names[i]
+                   for i in result.problem.x_free_indices]
+        lb = result.problem.lb
+        ub = result.problem.ub
 
         # calculate the number of starts whose final nllh is below cutoff
         n_starts = sum(start['fval'] <= cutoff
                        for start in result.optimize_result.list)
 
         fval_trace = [
             np.array(
-                result.optimize_result.list[i_ms]['history'].get_fval_trace()
+                result.optimize_result.list[i_ms][HISTORY].get_fval_trace()
             )
             for i_ms in range(n_starts)
         ]
         x_trace = [
-            result.optimize_result.list[i_ms]['history'].get_x_trace()
+            result.optimize_result.list[i_ms][HISTORY].get_x_trace()
             for i_ms in range(n_starts)
         ]
 
@@ -657,6 +663,8 @@ def predict(
             predictor=predictor,
             prediction_id=prediction_id,
             prediction_results=prediction_results,
+            lower_bound=self.lower_bound,
+            upper_bound=self.upper_bound
         )
 
     def compute_summary(self,
@@ -786,7 +794,7 @@ def entries_per_start(fval_traces: List['np.ndarray'],
 
     # if all possible indices can be included, return
     if (n_per_start < max_per_start).all() and sum(n_per_start) < max_size:
-        return ens_ind
+        return n_per_start
 
     # trimm down starts that exceed the limit:
     n_per_start = [min(n, max_per_start) for n in n_per_start]

diff --git a/pypesto/ensemble/utils.py b/pypesto/ensemble/utils.py
@@ -2,14 +2,15 @@
 import numpy as np
 import pandas as pd
 import os
-from typing import Callable, Union
-from pathlib import Path
+from typing import Callable, Union, Sequence
 
 from .constants import (EnsembleType, OUTPUT, UPPER_BOUND, LOWER_BOUND,
                         PREDICTION_RESULTS, PREDICTION_ID, SUMMARY,
-                        OPTIMIZE, SAMPLE)
+                        OPTIMIZE, SAMPLE, X_NAMES, TIMEPOINTS, OUTPUT_IDS)
+from ..predict import PredictionConditionResult, PredictionResult
+from pathlib import Path
 from .ensemble import (Ensemble, EnsemblePrediction)
-from ..store import read_result
+from ..store import read_result, get_or_create_group, write_array
 
 
 def read_from_csv(path: str,
@@ -155,48 +156,57 @@ def write_ensemble_prediction_to_h5(ensemble_prediction: EnsemblePrediction,
         base = Path(base_path)
 
     # open file
-    f = h5py.File(output_file, 'w')
-
-    # write prediction ID if available
-    if ensemble_prediction.prediction_id is not None:
-        f.create_dataset(os.path.join(base, PREDICTION_ID),
-                         data=ensemble_prediction.prediction_id)
-
-    # write the single prediction results
-    for i_result, result in enumerate(ensemble_prediction.prediction_results):
-        tmp_base_path = os.path.join(base, f'{PREDICTION_RESULTS}_{i_result}')
-        result.write_to_h5(output_file, base_path=tmp_base_path)
-
-    # write lower bounds per condition, if available
-    if ensemble_prediction.lower_bound is not None:
-        f.create_group(os.path.join(base, f'{LOWER_BOUND}s'))
-        for i_cond, lower_bounds in enumerate(ensemble_prediction.lower_bound):
-            condition_id = \
-                ensemble_prediction.prediction_results[0].condition_ids[i_cond]
-            f.create_group(os.path.join(base, condition_id))
-            f.create_dataset(os.path.join(base, condition_id, LOWER_BOUND),
-                             data=lower_bounds)
-
-    # write upper bounds per condition, if available
-    if ensemble_prediction.upper_bound is not None:
-        f.create_group(os.path.join(base, f'{UPPER_BOUND}s'))
-        for i_cond, upper_bounds in enumerate(ensemble_prediction.upper_bound):
-            condition_id = \
-                ensemble_prediction.prediction_results[0].condition_ids[i_cond]
-            f.create_group(os.path.join(base, condition_id))
-            f.create_dataset(os.path.join(base, condition_id, UPPER_BOUND),
-                             data=upper_bounds)
-
-    # write summary statistics to h5 file
-    for i_key in ensemble_prediction.prediction_summary.keys():
-        i_summary = ensemble_prediction.prediction_summary[i_key]
-        if i_summary is not None:
-            tmp_base_path = os.path.join(base, f'{SUMMARY}_{i_key}')
+    with h5py.File(output_file, 'a') as f:
+        # write prediction ID if available
+        if ensemble_prediction.prediction_id is not None:
+            f.create_dataset(os.path.join(base, PREDICTION_ID),
+                             data=ensemble_prediction.prediction_id)
+
+        # write lower bounds per condition, if available
+        if ensemble_prediction.lower_bound is not None:
+            if isinstance(ensemble_prediction.lower_bound[0], np.ndarray):
+                lb_grp = get_or_create_group(f, LOWER_BOUND)
+                for i_cond, lower_bounds in \
+                        enumerate(ensemble_prediction.lower_bound):
+                    condition_id = (
+                        ensemble_prediction
+                        .prediction_results[0]
+                        .condition_ids[i_cond]
+                    )
+                    write_array(lb_grp, condition_id, lower_bounds)
+            elif isinstance(ensemble_prediction.lower_bound[0], float):
+                f.create_dataset(LOWER_BOUND,
+                                 data=ensemble_prediction.lower_bound)
+
+        # write upper bounds per condition, if available
+        if ensemble_prediction.upper_bound is not None:
+            if isinstance(ensemble_prediction.upper_bound[0], np.ndarray):
+                ub_grp = get_or_create_group(f, UPPER_BOUND)
+                for i_cond, upper_bounds in \
+                        enumerate(ensemble_prediction.upper_bound):
+                    condition_id = \
+                        ensemble_prediction.prediction_results[
+                            0].condition_ids[i_cond]
+                    write_array(ub_grp, condition_id, upper_bounds)
+            elif isinstance(ensemble_prediction.upper_bound[0], float):
+                f.create_dataset(UPPER_BOUND,
+                                 data=ensemble_prediction.upper_bound)
+
+        # write summary statistics to h5 file
+        for summary_id, summary in \
+                ensemble_prediction.prediction_summary.items():
+            if summary is None:
+                continue
+            tmp_base_path = os.path.join(base, f'{SUMMARY}_{summary_id}')
             f.create_group(tmp_base_path)
-            i_summary.write_to_h5(output_file, base_path=tmp_base_path)
+            summary.write_to_h5(output_file, base_path=tmp_base_path)
 
-    # close file
-    f.close()
+        # write the single prediction results
+        for i_result, result in \
+                enumerate(ensemble_prediction.prediction_results):
+            tmp_base_path = os.path.join(base,
+                                         f'{PREDICTION_RESULTS}_{i_result}')
+            result.write_to_h5(output_file, base_path=tmp_base_path)
 
 
 def get_prediction_dataset(ens: Union[Ensemble, EnsemblePrediction],
@@ -230,3 +240,57 @@ def get_prediction_dataset(ens: Union[Ensemble, EnsemblePrediction],
                         'an EnsemblePrediction object as input. Stopping.')
 
     return dataset
+
+
+def read_ensemble_prediction_from_h5(
+        predictor: Union[Callable[[Sequence], PredictionResult], None],
+        input_file: str):
+
+    # open file
+    with h5py.File(input_file, 'r') as f:
+        pred_res_list = []
+        bounds = {}
+        for key in f.keys():
+            if key == PREDICTION_ID:
+                prediction_id = f[key][()].decode()
+                continue
+            if key in {LOWER_BOUND, UPPER_BOUND}:
+                if isinstance(f[key], h5py._hl.dataset.Dataset):
+                    bounds[key] = f[key][:]
+                    continue
+                bounds[key] = [f[f'{key}/{cond}'][()]
+                               for cond in f[key].keys()]
+                bounds[key] = np.array(bounds[key])
+                continue
+            x_names = decode_array(f[f'{key}/{X_NAMES}'][()])
+            condition_ids = np.array(decode_array(
+                f[f'{key}/condition_ids'][()]
+            ))
+            pred_cond_res_list = []
+            for id, _ in enumerate(condition_ids):
+                output = f[f'{key}/{id}/{OUTPUT}'][:]
+                output_ids = decode_array(f[f'{key}/{id}/{OUTPUT_IDS}'][:])
+                timepoints = f[f'{key}/{id}/{TIMEPOINTS}'][:]
+                pred_cond_res_list.append(PredictionConditionResult(
+                    timepoints=timepoints,
+                    output_ids=output_ids,
+                    output=output,
+                    x_names=x_names
+                ))
+            pred_res_list.append(PredictionResult(
+                conditions=pred_cond_res_list,
+                condition_ids=condition_ids
+            ))
+        return EnsemblePrediction(predictor=predictor,
+                                  prediction_id=prediction_id,
+                                  prediction_results=pred_res_list,
+                                  lower_bound=bounds[LOWER_BOUND],
+                                  upper_bound=bounds[UPPER_BOUND],
+                                  )
+
+
+def decode_array(array: np.ndarray) -> np.ndarray:
+    """Decodes array of bytes to string"""
+    for i in range(len(array)):
+        array[i] = array[i].decode()
+    return array
diff --git a/pypesto/objective/__init__.py b/pypesto/objective/__init__.py
@@ -6,7 +6,7 @@
 from .base import ObjectiveBase
 from .function import Objective
 from .aggregated import AggregatedObjective
-from .finite_difference import FD
+from .finite_difference import FD, FDDelta
 from .amici_calculator import AmiciCalculator
 from .amici import AmiciObjective, AmiciObjectBuilder
 from .priors import NegLogPriors, NegLogParameterPriors