diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 94a514f6b..7d10fd310 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,7 +1,7 @@
 # see https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
 
 # default owners = active maintainers
-* @Doresic @PaulJonasJost @m-philipps
+* @Doresic @PaulJonasJost @vwiela
 
 # Examples
 /doc/example/censored_data.ipynb @Doresic
@@ -28,29 +28,28 @@
 /pypesto/hierarchical/semiquantitative/ @Doresic
 /pypesto/history/ @PaulJonasJost
 /pypesto/objective/ @PaulJonasJost
+/pypesto/objective/julia @vwiela
 /pypesto/objective/amici/ @dweindl @FFroehlich
 /pypesto/objective/jax/ @FFroehlich
 /pypesto/objective/aesara/ @FFroehlich
 /pypesto/optimize/ @PaulJonasJost
 /pypesto/petab/ @dweindl @FFroehlich
 /pypesto/predict/ @dilpath
-/pypesto/problem/ @PaulJonasJost @m-philipps
-/pypesto/profile/ @PaulJonasJost
+/pypesto/problem/ @PaulJonasJost @vwiela
+/pypesto/profile/ @PaulJonasJost @Doresic
 /pypesto/result/ @PaulJonasJost
-/pypesto/sample/ @dilpath @vwiela
+/pypesto/sample/ @dilpath @arrjon
 /pypesto/select/ @dilpath
 /pypesto/startpoint/ @PaulJonasJost
 /pypesto/store/ @PaulJonasJost
-/pypesto/visualize/ @stephanmg @m-philipps
 
 # Tests
-/test/base/ @PaulJonasJost @m-philipps
+/test/base/ @PaulJonasJost @vwiela
 /test/doc/ @PaulJonasJost
-/test/hierarchical/ @dweindl @doresic
-/test/julia/ @PaulJonasJost
+/test/hierarchical/ @dweindl @Doresic
+/test/julia/ @PaulJonasJost @vwiela
 /test/optimize/ @PaulJonasJost
 /test/petab/ @dweindl @FFroehlich
-/test/profile/ @PaulJonasJost
-/test/sample/ @dilpath
+/test/profile/ @PaulJonasJost @Doresic
+/test/sample/ @dilpath @arrjon
 /test/select/ @dilpath
-/test/visualize/ @stephanmg @m-philipps
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e9847bc68..4db56ab55 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,6 +8,7 @@ on:
     - develop
   pull_request:
   workflow_dispatch:
+  merge_group:
   schedule:
     # run Monday and Thursday at 03:42 UTC
     - cron: '42 3 * * MON,THU'
@@ -28,15 +29,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -54,7 +55,7 @@ jobs:
         CXX: clang++
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -67,15 +68,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -90,7 +91,7 @@ jobs:
       run: ulimit -n 65536 65536 && tox -e base
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -103,15 +104,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~\AppData\Local\pip\Cache
@@ -136,15 +137,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -162,7 +163,7 @@ jobs:
         CXX: clang++
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -180,15 +181,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache tox and cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -196,19 +197,16 @@ jobs:
         key: "${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-ci-${{ github.job }}"
 
     - name: Install julia
-      uses: julia-actions/setup-julia@v1
+      uses: julia-actions/setup-julia@v2
       with:
-        version: 1.9
-
-    - name: Cache Julia
-      uses: julia-actions/cache@v1
+        version: 1.11
 
     - name: Install dependencies
       run: .github/workflows/install_deps.sh
 
     - name: Install PEtabJL dependencies
       run: >
-        julia -e 'using Pkg; Pkg.add(Pkg.PackageSpec(;name="PEtab", version="2.5.0"));
+        julia -e 'using Pkg; Pkg.add("PEtab");
         Pkg.add("OrdinaryDiffEq"); Pkg.add("Sundials")'
 
     - name: Run tests
@@ -216,7 +214,7 @@ jobs:
       run: tox -e julia
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -229,15 +227,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -252,7 +250,7 @@ jobs:
       run: tox -e optimize
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -265,15 +263,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -288,7 +286,7 @@ jobs:
       run: tox -e hierarchical
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -301,15 +299,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -324,7 +322,7 @@ jobs:
       run: tox -e select
 
     - name: Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
         file: ./coverage.xml
@@ -337,15 +335,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -369,15 +367,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -402,15 +400,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
@@ -432,15 +430,15 @@ jobs:
 
     steps:
     - name: Check out repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Prepare python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Cache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: |
           ~/.cache/pip
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index e96fb50cc..c56d4b29e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,35 @@ Release notes
 ..........
 
 
+0.5.4 (2024-10-19)
+-------------------
+
+- **Breaking Changes**
+    - Remove Aesara support (#1453, #1455)
+- General
+    - CI improvements (#1436, #1437, #1438, #1439, #1440, #1443, #1473, #1484, #1486, #1490, #1485)
+    - Update references/documentation (#1404, #1456, #1474, #1479, #1483, #1470, #1498)
+- Profile
+    - Improve Profiling Code (#1447)
+- Visualize
+    - allow log and/or linear scale for visualization (#1435)
+    - More informative error message for start indices. (#1472)
+- Optimization
+    - SacessOptimizer: Fix acceptance threshold for objective improvement (#1457)
+    - SacessOptimizer: expose more hyperparameters + minor fixes (#1459, #1476)
+    - SacessOptimizer, ESSOptimizer: Bound-normalize parameters for proximity check (#1462)
+    - ESSOptimizer: Fix bug in recombination and go-beyond (#1477, #1480)
+- Objective
+    - FD-objective correctly working with fixed parameters (#1446)
+    - Petab Importer reforge (#1442, #1502)
+    - Use cloudpickle for serializing NegLogParameterPriors (#1467)
+    - Update PEtab.jl integration to match version 3.X (#1489)
+- Sampling
+    - Bayes Factor Tutorial (#1444)
+- Ensemble
+    - Added HPD calculation to ensemble (#1431)
+
+
 0.5.3 (2024-08-01)
 -------------------
 
diff --git a/INSTALL.rst b/INSTALL.rst
index eb796aab4..e5b75dd29 100644
--- a/INSTALL.rst
+++ b/INSTALL.rst
@@ -5,7 +5,7 @@ Install and upgrade
 Requirements
 ------------
 
-This package requires Python 3.8 or later (see :ref:`Python support`).
+This package requires Python 3.10 or later (see :ref:`Python support`).
 It is continuously tested on Linux, and most parts should also work on other
 operating systems (MacOS, Windows).
 
diff --git a/README.md b/README.md
index c75c53c08..fc81f21a4 100644
--- a/README.md
+++ b/README.md
@@ -13,27 +13,43 @@ parameter estimation.
 
 ## Feature overview
 
+![](https://raw.githubusercontent.com/ICB-DCM/pyPESTO/main/doc/gfx/concept_pypesto.png)
+*Feature overview of pyPESTO. Figure taken from the [Bioinformatics publication](https://doi.org/10.1093/bioinformatics/btad711).*
+
 pyPESTO features include:
 
-* Multi-start local optimization
-* Profile computation
-* Result visualization
-* Interface to [AMICI](https://github.com/AMICI-dev/AMICI/) for efficient
-  simulation and sensitivity analysis of ordinary differential equation (ODE)
-  models
-  ([example](https://github.com/ICB-DCM/pyPESTO/blob/main/doc/example/amici.ipynb))
-* Parameter estimation pipeline for systems biology problems specified in
-  [SBML](http://sbml.org/) and [PEtab](https://github.com/PEtab-dev/PEtab)
-  ([example](https://github.com/ICB-DCM/pyPESTO/blob/master/doc/example/petab_import.ipynb))
-* Parameter estimation with relative (scaled and offset) data as described in
-  [Schmiester et al. (2020)](https://doi.org/10.1093/bioinformatics/btz581).
-  ([example](https://github.com/ICB-DCM/pyPESTO/blob/master/doc/example/relative_data.ipynb))
-* Parameter estimation with ordinal data as described in
-  [Schmiester et al. (2020)](https://doi.org/10.1007/s00285-020-01522-w) and
-  [Schmiester et al. (2021)](https://doi.org/10.1093/bioinformatics/btab512).
-  ([example](https://github.com/ICB-DCM/pyPESTO/blob/master/doc/example/ordinal_data.ipynb))
-* Parameter estimation with censored data. ([example](https://github.com/ICB-DCM/pyPESTO/blob/master/doc/example/censored_data.ipynb))
-* Parameter estimation with nonlinear-monotone data. ([example](https://github.com/ICB-DCM/pyPESTO/blob/master/doc/example/semiquantitative_data.ipynb))
+* Parameter estimation interfacing **multiple optimization algorithms** including
+  multi-start local and global optimization. ([example](https://pypesto.readthedocs.io/en/latest/example/getting_started.html),
+  [overview of optimizers](https://pypesto.readthedocs.io/en/latest/api/pypesto.optimize.html))
+* Interface to **multiple simulators** including
+  * [AMICI](https://github.com/AMICI-dev/AMICI/) for efficient simulation and
+    sensitivity analysis of ordinary differential equation (ODE) models. ([example](https://pypesto.readthedocs.io/en/latest/example/amici.html))
+  * [RoadRunner](https://libroadrunner.org/) for simulation of SBML models. ([example](https://pypesto.readthedocs.io/en/latest/example/roadrunner.html))
+  * [Jax](https://jax.readthedocs.io/en/latest/quickstart.html) and
+    [Julia](https://julialang.org) for automatic differentiation.
+* **Uncertainty quantification** using various methods:
+  * **Profile likelihoods**.
+  * **Sampling** using Markov chain Monte Carlo (MCMC), parallel tempering, and
+    interfacing other samplers including [emcee](https://emcee.readthedocs.io/en/stable/),
+    [pymc](https://www.pymc.io/welcome.html) and
+    [dynesty](https://dynesty.readthedocs.io/en/stable/).
+    ([example](https://pypesto.readthedocs.io/en/latest/example/sampler_study.html))
+  * **Variational inference**
+* **Complete** parameter estimation **pipeline** for systems biology problems specified in
+  [SBML](http://sbml.org/) and [PEtab](https://github.com/PEtab-dev/PEtab).
+  ([example](https://pypesto.readthedocs.io/en/latest/example/petab_import.html))
+* Parameter estimation pipelines for **different modes of data**:
+  * **Relative (scaled and offset) data** as described in
+    [Schmiester et al. (2020)](https://doi.org/10.1093/bioinformatics/btz581).
+    ([example](https://pypesto.readthedocs.io/en/latest/example/relative_data.html))
+  * **Ordinal data** as described in
+    [Schmiester et al. (2020)](https://doi.org/10.1007/s00285-020-01522-w) and
+    [Schmiester et al. (2021)](https://doi.org/10.1093/bioinformatics/btab512).
+    ([example](https://pypesto.readthedocs.io/en/latest/example/ordinal_data.html))
+  * **Censored data**. ([example](https://pypesto.readthedocs.io/en/latest/example/censored_data.html))
+  * **Semiquantitative data** as described in [Doresic et al. (2024)](https://doi.org/10.1093/bioinformatics/btae210). ([example](https://pypesto.readthedocs.io/en/latest/example/semiquantitative_data.html))
+* **Model selection**. ([example](https://pypesto.readthedocs.io/en/latest/example/model_selection.html))
+* Various **visualization methods** to analyze parameter estimation results.
 
 ## Quick install
 
@@ -75,10 +91,10 @@ When using pyPESTO in your project, please cite
   Bioinformatics, 2023, btad711, [doi:10.1093/bioinformatics/btad711](https://doi.org/10.1093/bioinformatics/btad711)
 
 When presenting work that employs pyPESTO, feel free to use one of the icons in
-[doc/logo/](https://github.com/ICB-DCM/pyPESTO/tree/main/doc/logo):
+[doc/logo/](doc/logo):
 
 <p align="center">
-  <img src="https://raw.githubusercontent.com/ICB-DCM/pyPESTO/master/doc/logo/logo.png" height="75" alt="AMICI Logo">
+  <img src="https://raw.githubusercontent.com/ICB-DCM/pyPESTO/main/doc/logo/logo.png" height="75" alt="pyPESTO Logo">
 </p>
 
 There is a list of [publications using pyPESTO](https://pypesto.readthedocs.io/en/latest/references.html).
diff --git a/doc/api.rst b/doc/api.rst
index f0ff709b6..d728e3bb5 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -13,7 +13,6 @@ API reference
    pypesto.history
    pypesto.logging
    pypesto.objective
-   pypesto.objective.aesara
    pypesto.objective.jax
    pypesto.objective.julia
    pypesto.objective.roadrunner
diff --git a/doc/authors.rst b/doc/authors.rst
index afd83dd8e..7025f54f3 100644
--- a/doc/authors.rst
+++ b/doc/authors.rst
@@ -9,7 +9,7 @@ This package was mainly developed by:
 - Paul Jonas Jost
 - Jakob Vanhoefer
 
-with major contributions by (status 2023):
+with major contributions by (status 2024):
 
 - Daniel Weindl
 - Dilan Pathirana
@@ -23,3 +23,5 @@ with major contributions by (status 2023):
 - Stephan Grein
 - Erika Dudkin
 - Domagoj Doresic
+- Jonas Arruda
+- Maren Philipps
diff --git a/doc/conf.py b/doc/conf.py
index 27cd4cb7c..6e9e09c5f 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -165,7 +165,7 @@
 
 # Add notebooks prolog to Google Colab and nbviewer
 nbsphinx_prolog = r"""
-{% set docname = 'github/icb-dcm/pypesto/blob/main/doc/' + env.doc2path(env.docname, base=None) %}
+{% set docname = 'github/icb-dcm/pypesto/blob/main/doc/' + env.doc2path(env.docname, base=None)|string %}
 .. raw:: html
 
     <div class="note">
diff --git a/doc/example.rst b/doc/example.rst
index bd65951ed..7cbbbdb38 100644
--- a/doc/example.rst
+++ b/doc/example.rst
@@ -50,6 +50,7 @@ Algorithms and features
    example/store.ipynb
    example/history_usage.ipynb
    example/model_selection.ipynb
+   example/model_evidence_and_bayes_factors.ipynb
    example/julia.ipynb
    example/relative_data.ipynb
    example/ordinal_data.ipynb
diff --git a/doc/example/amici.ipynb b/doc/example/amici.ipynb
index 3b0bcb96e..703fe4e95 100644
--- a/doc/example/amici.ipynb
+++ b/doc/example/amici.ipynb
@@ -4,6 +4,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -28,6 +31,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -52,6 +58,7 @@
     "import pypesto.store as store\n",
     "import pypesto.visualize as visualize\n",
     "import pypesto.visualize.model_fit as model_fit\n",
+    "import pypesto\n",
     "\n",
     "mpl.rcParams[\"figure.dpi\"] = 100\n",
     "mpl.rcParams[\"font.size\"] = 18\n",
@@ -71,6 +78,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -83,6 +93,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -98,6 +111,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -113,6 +129,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -125,6 +144,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -140,6 +162,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -153,6 +178,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -170,6 +198,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -190,6 +221,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -205,6 +239,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -221,6 +258,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -236,6 +276,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -258,6 +301,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -273,6 +319,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -286,6 +335,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -299,6 +351,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -316,6 +371,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -331,6 +389,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -429,6 +490,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -470,6 +534,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -499,6 +566,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -515,6 +585,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -532,6 +605,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -554,6 +630,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -567,6 +646,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -586,6 +668,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -598,6 +683,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -610,6 +698,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -617,13 +708,13 @@
    "source": [
     "#### Background on PEtab\n",
     "\n",
-    "<img src=\"https://github.com/PEtab-dev/PEtab/blob/main/doc/gfx/petab_files.png?raw=true\" width=\"80%\" alt=\"pyPESTO logo\"/>\n",
+    "<img src=\"https://github.com/PEtab-dev/PEtab/blob/main/doc/v1/gfx/petab_files.png?raw=true\" width=\"80%\" alt=\"PEtab files\"/>\n",
     "\n",
     "pyPESTO supports the [PEtab](https://github.com/PEtab-dev/PEtab) standard. PEtab is a data format for specifying parameter estimation problems in systems biology.\n",
     "\n",
     "A PEtab problem consist of an [SBML](https://sbml.org) file, defining the model topology and a set of `.tsv` files, defining experimental conditions, observables, measurements and parameters (and their optimization bounds, scale, priors...). All files that make up a PEtab problem can be structured in a `.yaml` file. The `pypesto.Objective` coming from a PEtab problem corresponds to the negative-log-likelihood/negative-log-posterior distribution of the parameters.\n",
     "\n",
-    "For more details on PEtab, the interested reader is referred to [PEtab's format definition](https://petab.readthedocs.io/en/latest/documentation_data_format.html), for examples the reader is referred to the [PEtab benchmark collection](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab). The Model from _[Böhm et al. JProteomRes 2014](https://pubs.acs.org/doi/abs/10.1021/pr5006923)_ is part of the benchmark collection and will be used as the running example throughout this notebook.\n"
+    "For more details on PEtab, the interested reader is referred to [PEtab's format definition](https://petab.readthedocs.io/en/latest/documentation_data_format.html), for examples the reader is referred to the [PEtab benchmark collection](https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab). For demonstration purposes, a simple model of conversion-reaction will be used as the running example throughout this notebook.\n"
    ]
   },
   {
@@ -631,6 +722,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -638,6 +732,7 @@
    "outputs": [],
    "source": [
     "%%capture\n",
+    "\n",
     "petab_yaml = f\"./{model_name}/{model_name}.yaml\"\n",
     "\n",
     "petab_problem = petab.Problem.from_yaml(petab_yaml)\n",
@@ -650,6 +745,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -665,6 +763,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -680,6 +781,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -695,6 +799,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -709,6 +816,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -722,6 +832,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -729,7 +842,7 @@
    "outputs": [],
    "source": [
     "# call the objective function\n",
-    "print(f\"Objective value: {problem.objective(benchmark_parameters)}\")\n",
+    "print(f\"Objective value: {problem.objective(petab_problem.x_free_indices)}\")\n",
     "# change things in the model\n",
     "problem.objective.amici_model.requireSensitivitiesForAllParameters()\n",
     "# change solver settings\n",
@@ -746,6 +859,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -758,6 +874,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -772,6 +891,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -789,6 +911,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -806,14 +931,17 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
    },
    "source": [
-    "### Startpoint method\n",
+    "### History options\n",
     "\n",
-    "The startpoint method describes how you want to choose your startpoints, in case you do a multistart optimization. The default here is `uniform` meaning that each startpoint is a uniform sample from the allowed parameter space. The other two notable options are either `latin_hypercube` or a self defined function."
+    "In some cases, it is good to trace what the optimizer did in each step, i.e., the history. There is a multitude of options on what to report here, but the most important one is `trace_record` which turns the history function on and off."
    ]
   },
   {
@@ -821,48 +949,44 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
    },
    "outputs": [],
    "source": [
-    "startpoint_method = pypesto.startpoint.uniform"
+    "# save optimizer trace\n",
+    "history_options = pypesto.HistoryOptions(trace_record=True)"
    ]
   },
   {
    "cell_type": "markdown",
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   },
+   "metadata": {},
    "source": [
-    "### History options\n",
+    "### Startpoint method\n",
     "\n",
-    "In some cases, it is good to trace what the optimizer did in each step, i.e., the history. There is a multitude of options on what to report here, but the most important one is `trace_record` which turns the history function on and off."
+    "The startpoint method describes how you want to choose your startpoints, in case you do a multistart optimization. The default here is `uniform` meaning that each startpoint is a uniform sample from the allowed parameter space. The other two notable options are either `latin_hypercube` or a self-defined function. The startpoint method is an inherent attribute of the problem and can be set there."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# save optimizer trace\n",
-    "history_options = pypesto.HistoryOptions(trace_record=True)"
+    "problem.startpoint_method = pypesto.startpoint.uniform"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -878,6 +1002,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -892,6 +1019,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -907,6 +1037,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -922,6 +1055,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -933,7 +1069,6 @@
     "    problem=problem,\n",
     "    optimizer=optimizer,\n",
     "    n_starts=n_starts,\n",
-    "    startpoint_method=startpoint_method,\n",
     "    engine=engine,\n",
     "    options=opt_options,\n",
     ")"
@@ -943,6 +1078,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -956,6 +1094,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -969,6 +1110,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -983,6 +1127,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -995,6 +1142,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1010,6 +1160,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1025,6 +1178,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1042,6 +1198,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1055,6 +1214,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1069,6 +1231,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1084,6 +1249,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1097,6 +1265,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1111,7 +1282,6 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1125,6 +1295,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1140,6 +1313,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1155,6 +1331,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1168,18 +1347,24 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
    },
    "source": [
-    "We definitely need to look further into it, and thus we turn to uncertainty quantification in the next section."
+    "However, these visualizations are only an indicator for possible uncertainties. In the next section we turn to proper uncertainty quantification."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1196,6 +1381,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1213,6 +1401,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1234,6 +1425,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1247,6 +1441,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1261,6 +1458,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1276,6 +1476,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1287,7 +1490,7 @@
     "result = sample.sample(\n",
     "    problem=problem,\n",
     "    sampler=sampler,\n",
-    "    n_samples=5000,\n",
+    "    n_samples=1000,\n",
     "    result=result,\n",
     ")"
    ]
@@ -1296,6 +1499,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1309,6 +1515,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1324,6 +1533,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1337,6 +1549,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1351,6 +1566,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1364,6 +1582,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1389,6 +1610,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1402,6 +1626,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1419,6 +1646,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1432,6 +1662,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1445,6 +1678,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1457,6 +1693,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -1470,6 +1709,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -1497,7 +1739,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/censored_data.ipynb b/doc/example/censored_data.ipynb
index cfe783848..c5ac649db 100644
--- a/doc/example/censored_data.ipynb
+++ b/doc/example/censored_data.ipynb
@@ -148,16 +148,6 @@
     "As there are no censored data specific inner options, we will pass none to the constructor."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = importer.create_model(verbose=False)\n",
-    "objective = importer.create_objective(model=model)"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -172,7 +162,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "problem = importer.create_problem(objective)\n",
+    "problem = importer.create_problem()\n",
     "\n",
     "engine = pypesto.engine.MultiProcessEngine(n_procs=3)\n",
     "\n",
@@ -247,7 +237,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "dev_venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -261,7 +251,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.12.3"
   },
   "vscode": {
    "interpreter": {
@@ -270,5 +260,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/doc/example/conversion_reaction.ipynb b/doc/example/conversion_reaction.ipynb
index e31217c77..c909388de 100644
--- a/doc/example/conversion_reaction.ipynb
+++ b/doc/example/conversion_reaction.ipynb
@@ -144,6 +144,7 @@
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "# create objective function from amici model\n",
     "# pesto.AmiciObjective is derived from pesto.Objective,\n",
     "# the general pesto objective function class\n",
@@ -157,7 +158,7 @@
     "\n",
     "# do the optimization\n",
     "result = optimize.minimize(\n",
-    "    problem=problem, optimizer=optimizer, n_starts=100, filename=None\n",
+    "    problem=problem, optimizer=optimizer, n_starts=10, filename=None\n",
     ")"
    ]
   },
@@ -221,7 +222,7 @@
     "    problem=problem,\n",
     "    result=result,\n",
     "    optimizer=optimizer,\n",
-    "    profile_index=np.array([1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0]),\n",
+    "    profile_index=np.array([0,1]),\n",
     "    result_index=0,\n",
     "    profile_options=profile_options,\n",
     "    filename=None,\n",
@@ -270,7 +271,7 @@
     ")\n",
     "\n",
     "result = sample.sample(\n",
-    "    problem, n_samples=10000, sampler=sampler, result=result, filename=None\n",
+    "    problem, n_samples=1000, sampler=sampler, result=result, filename=None\n",
     ")"
    ]
   },
@@ -425,7 +426,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/custom_objective_function.ipynb b/doc/example/custom_objective_function.ipynb
index 35c985884..f1276c5a4 100644
--- a/doc/example/custom_objective_function.ipynb
+++ b/doc/example/custom_objective_function.ipynb
@@ -4,6 +4,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -16,6 +19,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -30,6 +36,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -48,6 +57,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -63,6 +75,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -89,6 +104,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -101,6 +119,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -113,6 +134,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -127,6 +151,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -145,6 +172,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -159,6 +189,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -181,6 +214,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -194,6 +230,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -216,6 +255,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -229,6 +271,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -260,6 +305,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -281,6 +329,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -301,6 +352,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -320,6 +374,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -333,6 +390,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -349,6 +409,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -362,6 +425,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -383,6 +449,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -396,12 +465,16 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "# run optimization of problem 1\n",
     "result1 = optimize.minimize(\n",
     "    problem=problem1, optimizer=optimizer, n_starts=n_starts, engine=engine\n",
@@ -424,6 +497,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -437,6 +513,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -461,6 +540,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -477,6 +559,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -490,6 +575,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -507,6 +595,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -520,6 +611,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -537,6 +631,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -550,6 +647,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -572,6 +672,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -584,6 +687,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -599,6 +705,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -636,12 +745,16 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "# compute profiles\n",
     "profile_options = profile.ProfileOptions(whole_path=True)\n",
     "\n",
@@ -649,7 +762,7 @@
     "    problem=problem1,\n",
     "    result=result1,\n",
     "    optimizer=optimizer,\n",
-    "    profile_index=np.array([0, 1, 3, 5]),\n",
+    "    profile_index=np.array([0, 3]),\n",
     "    result_index=0,\n",
     "    profile_options=profile_options,\n",
     "    filename=None,\n",
@@ -660,7 +773,7 @@
     "    problem=problem1,\n",
     "    result=result1,\n",
     "    optimizer=optimizer,\n",
-    "    profile_index=np.array([0, 1, 3, 5]),\n",
+    "    profile_index=np.array([0, 3]),\n",
     "    result_index=-1,\n",
     "    profile_options=profile_options,\n",
     "    filename=None,\n",
@@ -669,7 +782,7 @@
     "    problem=problem4,\n",
     "    result=result4,\n",
     "    optimizer=optimizer,\n",
-    "    profile_index=np.array([0, 1, 3, 5]),\n",
+    "    profile_index=np.array([0, 3]),\n",
     "    result_index=0,\n",
     "    profile_options=profile_options,\n",
     "    filename=None,\n",
@@ -681,6 +794,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -690,7 +806,7 @@
     "# specify the parameters, for which profiles should be computed\n",
     "visualize.profiles(\n",
     "    result1,\n",
-    "    profile_indices=[0, 1, 3, 5],\n",
+    "    profile_indices=[0, 3],\n",
     "    reference=[ref, ref2],\n",
     "    profile_list_ids=[0, 1],\n",
     ");"
@@ -701,6 +817,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -709,7 +828,7 @@
    "source": [
     "visualize.profiles(\n",
     "    result4,\n",
-    "    profile_indices=[0, 1, 3, 5],\n",
+    "    profile_indices=[0, 3],\n",
     "    reference=[ref4],\n",
     ");"
    ]
@@ -718,6 +837,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -730,6 +852,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -742,6 +867,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -755,6 +883,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -774,6 +905,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -787,6 +921,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -795,7 +932,7 @@
    "source": [
     "axes = visualize.profiles(\n",
     "    result1,\n",
-    "    profile_indices=[0, 1, 3, 5],\n",
+    "    profile_indices=[0, 3],\n",
     "    profile_list_ids=[0, 2],\n",
     "    ratio_min=0.01,\n",
     "    colors=[(1, 0, 0, 1), (0, 0, 1, 1)],\n",
@@ -811,6 +948,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -825,23 +965,23 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 4
 }
diff --git a/doc/example/fixed_parameters.ipynb b/doc/example/fixed_parameters.ipynb
index 6f8838d9a..6074a50b7 100644
--- a/doc/example/fixed_parameters.ipynb
+++ b/doc/example/fixed_parameters.ipynb
@@ -146,7 +146,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.2 64-bit",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -160,7 +160,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.12.3"
   },
   "vscode": {
    "interpreter": {
diff --git a/doc/example/getting_started.ipynb b/doc/example/getting_started.ipynb
index 2f98e6d69..bf6a80e32 100644
--- a/doc/example/getting_started.ipynb
+++ b/doc/example/getting_started.ipynb
@@ -33,6 +33,7 @@
     "import amici\n",
     "import matplotlib as mpl\n",
     "import numpy as np\n",
+    "import scipy as sp\n",
     "\n",
     "import pypesto.optimize as optimize\n",
     "import pypesto.petab\n",
@@ -54,11 +55,11 @@
    "source": [
     "## 1. Objective Definition\n",
     "\n",
-    "PyPESTO allows the definition of a custom objectives, as well as offers support for objectives defined in the [PEtab](https://github.com/PEtab-dev/PEtab) format.\n",
+    "pyPESTO allows the definition of custom objectives and offers support for objectives defined in the [PEtab](https://github.com/PEtab-dev/PEtab) format.\n",
     "\n",
     "### Custom Objective Definition\n",
     "\n",
-    "You can define an objective via a python function. Also providing an analytical gradient (and potentially also a Hessian) improves the performance of Gradient/Hessian-based optimizers. When accessing parameter uncertainties via profile-likelihoods/sampling, pyPESTO interprets the objective function as the negative-log-likelihood/negative-log-posterior."
+    "You can define an objective via a python function. Also providing an analytical gradient (and potentially also a Hessian) improves the performance of Gradient/Hessian-based optimizers. When accessing parameter uncertainties via profile-likelihoods/sampling, pyPESTO interprets the objective function as the negative-log-likelihood/negative-log-posterior. A more in-depth construction of a custom objective function can be found in [a designated example notebook.](./custom_objective_function.ipynb)"
    ]
   },
   {
@@ -195,7 +196,7 @@
     "\n",
     "#### Background on PEtab\n",
     "\n",
-    "<img src=\"https://github.com/PEtab-dev/PEtab/blob/main/doc/gfx/petab_files.png?raw=true\" width=\"80%\" alt=\"pyPESTO logo\"/>\n",
+    "<img src=\"https://github.com/PEtab-dev/PEtab/blob/main/doc/v1/gfx/petab_files.png?raw=true\" width=\"80%\" alt=\"PEtab files\"/>\n",
     "\n",
     "PyPESTO supports the [PEtab](https://github.com/PEtab-dev/PEtab) standard. PEtab is a data format for specifying parameter estimation problems in systems biology. \n",
     "\n",
@@ -231,7 +232,7 @@
    "source": [
     "%%capture\n",
     "# directory of the PEtab problem\n",
-    "petab_yaml = \"./boehm_JProteomeRes2014/boehm_JProteomeRes2014.yaml\"\n",
+    "petab_yaml = \"./conversion_reaction/conversion_reaction.yaml\"\n",
     "\n",
     "importer = pypesto.petab.PetabImporter.from_yaml(petab_yaml)\n",
     "problem = importer.create_problem(verbose=False)"
@@ -329,7 +330,7 @@
     "    * Gradient-free\n",
     "* [FIDES](https://github.com/fides-dev/fides/) (`optimize.FidesOptimizer()`)\n",
     "    * Interior Trust Region optimizer \n",
-    "* [Particle Swarm](https://github.com/tisimst/pyswarm) (`optimize.PyswarmOptimizer()`)\n",
+    "* [Particle Swarm](https://github.com/ljvmiranda921/pyswarms) (`optimize.PyswarmsOptimizer()`)\n",
     "    * Particle swarm algorithm\n",
     "    * Gradient-free\n",
     "* [CMA-ES](https://pypi.org/project/cma-es/) (`optimize.CmaOptimizer()`)\n",
@@ -351,7 +352,7 @@
     "optimizer_scipy_powell = optimize.ScipyOptimizer(method=\"Powell\")\n",
     "\n",
     "optimizer_fides = optimize.FidesOptimizer(verbose=logging.ERROR)\n",
-    "optimizer_pyswarm = optimize.PyswarmOptimizer()"
+    "optimizer_pyswarm = optimize.PyswarmsOptimizer(par_popsize=10)"
    ]
   },
   {
@@ -384,14 +385,12 @@
     "# Due to run time we already use parallelization.\n",
     "# This will be introduced in more detail later.\n",
     "engine = pypesto.engine.MultiProcessEngine()\n",
-    "history_options = pypesto.HistoryOptions(trace_record=True)\n",
     "\n",
     "# Scipy: L-BFGS-B\n",
     "result_lbfgsb = optimize.minimize(\n",
     "    problem=problem,\n",
     "    optimizer=optimizer_scipy_lbfgsb,\n",
     "    engine=engine,\n",
-    "    history_options=history_options,\n",
     "    n_starts=n_starts,\n",
     ")\n",
     "\n",
@@ -400,7 +399,6 @@
     "    problem=problem,\n",
     "    optimizer=optimizer_scipy_powell,\n",
     "    engine=engine,\n",
-    "    history_options=history_options,\n",
     "    n_starts=n_starts,\n",
     ")\n",
     "\n",
@@ -409,7 +407,6 @@
     "    problem=problem,\n",
     "    optimizer=optimizer_fides,\n",
     "    engine=engine,\n",
-    "    history_options=history_options,\n",
     "    n_starts=n_starts,\n",
     ")\n",
     "\n",
@@ -419,8 +416,7 @@
     "    problem=problem,\n",
     "    optimizer=optimizer_pyswarm,\n",
     "    engine=engine,\n",
-    "    history_options=history_options,\n",
-    "    n_starts=n_starts,\n",
+    "    n_starts=1,  # Global optimizers are usually run once. The number of particles (par_popsize) is usually the parameter that is adapted.\n",
     ")"
    ]
   },
@@ -435,9 +431,7 @@
     "### Optimizer Convergence\n",
     "\n",
     "\n",
-    "A common visualization of optimizer convergence are waterfall plots. Waterfall plots show the (ordered) results of the individual optimization runs. As we see below, Dlib and pyswarm, which are not gradient-based, are not able to find the global optimum. \n",
-    "\n",
-    "Furthermore, we hope to obtain clearly visible plateaus, as they indicate optimizer convergence to local minima. "
+    "A common visualization of optimizer convergence are waterfall plots. Waterfall plots show the (ordered) results of the individual optimization runs. In general, we hope to obtain clearly visible plateaus, as they indicate optimizer convergence to local minima. "
    ]
   },
   {
@@ -453,10 +447,9 @@
     "optimizer_results = [\n",
     "    result_lbfgsb,\n",
     "    result_powell,\n",
-    "    result_fides,\n",
-    "    result_pyswarm,\n",
+    "    result_fides\n",
     "]\n",
-    "optimizer_names = [\"Scipy: L-BFGS-B\", \"Scipy: Powell\", \"Fides\", \"pyswarm\"]\n",
+    "optimizer_names = [\"Scipy: L-BFGS-B\", \"Scipy: Powell\", \"Fides\"]\n",
     "\n",
     "pypesto.visualize.waterfall(optimizer_results, legends=optimizer_names);"
    ]
@@ -612,7 +605,7 @@
     "    problem=problem,\n",
     "    result=result,\n",
     "    optimizer=optimizer_scipy_lbfgsb,\n",
-    "    profile_index=[0, 1, 2],\n",
+    "    profile_index=[0, 1],\n",
     ")"
    ]
   },
@@ -910,6 +903,9 @@
    "cell_type": "markdown",
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%% md\n"
     }
@@ -941,7 +937,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/history_usage.ipynb b/doc/example/history_usage.ipynb
index ae882fd77..21d661af3 100644
--- a/doc/example/history_usage.ipynb
+++ b/doc/example/history_usage.ipynb
@@ -376,12 +376,15 @@
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "source": [
     "We compare the function value trace of the loaded results with the original results to ensure consistency."
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
@@ -471,7 +474,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/model_evidence_and_bayes_factors.ipynb b/doc/example/model_evidence_and_bayes_factors.ipynb
new file mode 100644
index 000000000..0cc7d188a
--- /dev/null
+++ b/doc/example/model_evidence_and_bayes_factors.ipynb
@@ -0,0 +1,979 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2db98323fb940c7f",
+   "metadata": {},
+   "source": [
+    "# Bayes Factor Tutorial\n",
+    "\n",
+    "Bayes factors are a key concept in Bayesian model comparison, allowing us to compare the relative likelihood of different models given the data. They are computed using the marginal likelihoods (or evidence) of the models. This tutorial will cover various methods for computing marginal likelihoods.\n",
+    "\n",
+    "You find an introduction and extensive review here: [Llorente et al. (2023)](https://doi.org/10.1137/20M1310849)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c56f766bcf7ab48",
+   "metadata": {},
+   "source": [
+    "\n",
+    "## Marginal Likelihood\n",
+    "\n",
+    "The marginal likelihood (or evidence) of a model $\\mathcal{M}$ given data $\\mathcal{D}$ is defined as:\n",
+    "\n",
+    "$$\n",
+    "P(\\mathcal{D} \\mid \\mathcal{M}) = \\int P(\\mathcal{D} \\mid \\theta, \\mathcal{M}) P(\\theta \\mid \\mathcal{M}) \\, d\\theta\n",
+    "$$\n",
+    "\n",
+    "where $\\theta$ are the parameters of the model. This integral averages the likelihood over the prior distribution of the parameters, providing a measure of how well the model explains the data, considering all possible parameter values."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6337b6a3",
+   "metadata": {},
+   "source": [
+    "## Bayes Factor\n",
+    "\n",
+    "The Bayes factor comparing two models $\\mathcal{M}_1$ and $\\mathcal{M}_2$ given data $\\mathcal{D}$ is the ratio of their marginal likelihoods:\n",
+    "\n",
+    "$$\n",
+    "\\operatorname{BF}_{12} = \\frac{P(\\mathcal{D} \\mid \\mathcal{M}_1)}{P(\\mathcal{D} \\mid \\mathcal{M}_2)}\n",
+    "$$\n",
+    "\n",
+    "A $\\operatorname{BF}_{12} > 1$ indicates that the data favors model $\\mathcal{M}_1$ over model $\\mathcal{M}_2$, while $\\operatorname{BF}_{12} < 1$ indicates the opposite.\n",
+    "\n",
+    "Jeffreys (1961) suggested interpreting Bayes factors in half-units on the log10 scale (this was further simplified in Kass and Raftery (1995)):\n",
+    "\n",
+    "- Not worth more than a bare mention: $0 < \\log_{10} \\operatorname{BF}_{12} \\leq 0.5$\n",
+    "- Substantial: $0.5 < \\log_{10}\\operatorname{BF}_{12} \\leq 1$\n",
+    "- Strong: $1 < \\log_{10}\\operatorname{BF}_{12} \\leq 2$\n",
+    "- Decisive: $2 < \\log_{10}\\operatorname{BF}_{12}$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6b7640cff0280de",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "To illustrate different methods to compute marginal likelihoods, we introduce two toy models, for which we can compute the marginal likelihoods analytically:\n",
+    "\n",
+    "1. **Mixture of Two Gaussians (True Data Generator)**: Composed of two Gaussian distributions, $\\mathcal{N}(\\mu_1, \\sigma_1^2)$ and $\\mathcal{N}(\\mu_2, \\sigma_2^2)$, with mixing coefficient $\\pi=0.7$.\n",
+    "\n",
+    "2. **Single Gaussian (Alternative Model)**: A single Gaussian distribution, $\\mathcal{N}(\\mu, \\sigma^2)$.\n",
+    "\n",
+    "We sample synthetic data from the first model and create pypesto problems for both models with the same data. \n",
+    "The free parameters are the means of both models. \n",
+    "For this example, we assume that the standard deviation is known and fixed to the true value.\n",
+    "As priors, we assume normal distributions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6eb930b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from functools import partial\n",
+    "from typing import Union\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from scipy import stats\n",
+    "from scipy.special import logsumexp\n",
+    "\n",
+    "from pypesto import sample, optimize, visualize, variational\n",
+    "from pypesto.objective import (\n",
+    "    AggregatedObjective,\n",
+    "    NegLogParameterPriors,\n",
+    "    Objective,\n",
+    ")\n",
+    "from pypesto.problem import Problem\n",
+    "\n",
+    "# For testing purposes. Remove if not running the exact example.\n",
+    "np.random.seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ad47e3f67a7896d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model hyperparameters\n",
+    "N = 10\n",
+    "N2_1 = 3\n",
+    "N2_2 = N - N2_1\n",
+    "sigma2 = 2.\n",
+    "true_params = np.array([-2., 2.])\n",
+    "\n",
+    "rng = np.random.default_rng(seed=0)\n",
+    "# Alternative Model\n",
+    "Y1 = rng.normal(loc=0., scale=1., size=N)\n",
+    "\n",
+    "# True Model\n",
+    "Y2_1 = rng.normal(loc=true_params[0], scale=sigma2, size=N2_1)\n",
+    "Y2_2 = rng.normal(loc=true_params[1], scale=sigma2, size=N2_2)\n",
+    "Y2 = np.concatenate([Y2_1, Y2_2])\n",
+    "mixture_data, sigma = Y2, sigma2\n",
+    "n_obs = len(mixture_data)\n",
+    "\n",
+    "# plot the alternative model distribution as a normal distribution\n",
+    "plt.figure()\n",
+    "x = np.linspace(-10, 10, 100)\n",
+    "plt.plot(x, stats.norm.pdf(x, loc=0., scale=1.), label='Alternative Model', color='red')\n",
+    "plt.plot(x, stats.norm.pdf(x, loc=true_params[0], scale=sigma2), label='True Model Y2_1', color='blue')\n",
+    "plt.plot(x, stats.norm.pdf(x, loc=true_params[1], scale=sigma2), label='True Model Y2_2', color='green')\n",
+    "\n",
+    "\n",
+    "# Plot the data of the alternative and true model as dots on the x-axis for each model\n",
+    "plt.scatter(Y1, np.zeros_like(Y1), label='Y1 samples', color='red')\n",
+    "plt.scatter(Y2_1, np.full(len(Y2_1), 0.05), label='Y2_1 samples', color='blue')\n",
+    "plt.scatter(Y2_2, np.full(len(Y2_2), 0.1), label='Y2_2 samples', color='green')\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2143410833d86594",
+   "metadata": {
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# evidence\n",
+    "def log_evidence_alt(data: np.ndarray, std: float):\n",
+    "    n = int(data.size)\n",
+    "    y_sum = np.sum(data)\n",
+    "    y_sq_sum = np.sum(data**2)\n",
+    "\n",
+    "    term1 = 1 / (np.sqrt(2 * np.pi) * std)\n",
+    "    log_term2 = -0.5 * np.log(n + 1)\n",
+    "    inside_exp = -0.5 / (std**2) * (y_sq_sum - (y_sum**2) / (n + 1))\n",
+    "    return n * np.log(term1) + log_term2 + inside_exp\n",
+    "\n",
+    "\n",
+    "def log_evidence_true(data: np.ndarray, std: float):\n",
+    "    y1 = data[:N2_1]\n",
+    "    y2 = data[N2_1:]\n",
+    "    n = N2_1 + N2_2\n",
+    "\n",
+    "    y_mean_1 = np.mean(y1)\n",
+    "    y_mean_2 = np.mean(y2)\n",
+    "    y_sq_sum = np.sum(y1**2) + np.sum(y2**2)\n",
+    "\n",
+    "    term1 = (1 / (np.sqrt(2 * np.pi) * std)) ** n\n",
+    "    term2 = 1 / (np.sqrt(N2_1 + 1) * np.sqrt(N2_2 + 1))\n",
+    "\n",
+    "    inside_exp = (\n",
+    "        -1\n",
+    "        / (2 * std**2)\n",
+    "        * (\n",
+    "            y_sq_sum\n",
+    "            + 8\n",
+    "            - (N2_1 * y_mean_1 - 2) ** 2 / (N2_1 + 1)\n",
+    "            - (N2_2 * y_mean_2 + 2) ** 2 / (N2_2 + 1)\n",
+    "        )\n",
+    "    )\n",
+    "    return np.log(term1) + np.log(term2) + inside_exp\n",
+    "\n",
+    "\n",
+    "true_log_evidence_alt = log_evidence_alt(mixture_data, sigma)\n",
+    "true_log_evidence_true = log_evidence_true(mixture_data, sigma)\n",
+    "\n",
+    "print(\"True log evidence, true model:\", true_log_evidence_true)\n",
+    "print(\"True log evidence, alternative model:\", true_log_evidence_alt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33659ff5eba21345",
+   "metadata": {
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# define likelihood for each model, and build the objective functions for the pyPESTO problem\n",
+    "def neg_log_likelihood(params: Union[np.ndarray, list], data: np.ndarray):\n",
+    "    # normal distribution\n",
+    "    mu, std = params\n",
+    "    n = int(data.size)\n",
+    "    return (\n",
+    "        0.5 * n * np.log(2 * np.pi)\n",
+    "        + n * np.log(std)\n",
+    "        + np.sum((data - mu) ** 2) / (2 * std**2)\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "def neg_log_likelihood_grad(params: Union[np.ndarray, list], data: np.ndarray):\n",
+    "    mu, std = params\n",
+    "    n = int(data.size)\n",
+    "    grad_mu = -np.sum(data - mu) / (std**2)\n",
+    "    grad_std = n / std - np.sum((data - mu) ** 2) / (std**3)\n",
+    "    return np.array([grad_mu, grad_std])\n",
+    "\n",
+    "\n",
+    "def neg_log_likelihood_hess(params: Union[np.ndarray, list], data: np.ndarray):\n",
+    "    mu, std = params\n",
+    "    n = int(data.size)\n",
+    "    hess_mu_mu = n / (std**2)\n",
+    "    hess_mu_std = 2 * np.sum(data - mu) / (std**3)\n",
+    "    hess_std_std = -n / (std**2) + 3 * np.sum((data - mu) ** 2) / (std**4)\n",
+    "    return np.array([[hess_mu_mu, hess_mu_std], [hess_mu_std, hess_std_std]])\n",
+    "\n",
+    "\n",
+    "def neg_log_likelihood_m2(\n",
+    "    params: Union[np.ndarray, list], data: np.ndarray, n_mix: int\n",
+    "):\n",
+    "    # normal distribution\n",
+    "    y1 = data[:n_mix]\n",
+    "    y2 = data[n_mix:]\n",
+    "    m1, m2, std = params\n",
+    "\n",
+    "    neg_log_likelihood([m1, std], y1)\n",
+    "    term1 = neg_log_likelihood([m1, std], y1)\n",
+    "    term2 = neg_log_likelihood([m2, std], y2)\n",
+    "    return term1 + term2\n",
+    "\n",
+    "\n",
+    "def neg_log_likelihood_m2_grad(params: np.ndarray, data: np.ndarray, n_mix: int):\n",
+    "    m1, m2, std = params\n",
+    "    y1 = data[:n_mix]\n",
+    "    y2 = data[n_mix:]\n",
+    "\n",
+    "    grad_m1, grad_std1 = neg_log_likelihood_grad([m1, std], y1)\n",
+    "    grad_m2, grad_std2 = neg_log_likelihood_grad([m2, std], y2)\n",
+    "    return np.array([grad_m1, grad_m2, grad_std1 + grad_std2])\n",
+    "\n",
+    "\n",
+    "def neg_log_likelihood_m2_hess(params: np.ndarray, data: np.ndarray, n_mix: int):\n",
+    "    m1, m2, std = params\n",
+    "    y1 = data[:n_mix]\n",
+    "    y2 = data[n_mix:]\n",
+    "\n",
+    "    [[hess_m1_m1, hess_m1_std], [_, hess_std_std1]] = neg_log_likelihood_hess(\n",
+    "        [m1, std], y1\n",
+    "    )\n",
+    "    [[hess_m2_m2, hess_m2_std], [_, hess_std_std2]] = neg_log_likelihood_hess(\n",
+    "        [m2, std], y2\n",
+    "    )\n",
+    "    hess_m1_m2 = 0\n",
+    "\n",
+    "    return np.array(\n",
+    "        [\n",
+    "            [hess_m1_m1, hess_m1_m2, hess_m1_std],\n",
+    "            [hess_m1_m2, hess_m2_m2, hess_m2_std],\n",
+    "            [hess_m1_std, hess_m2_std, hess_std_std1 + hess_std_std2],\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "nllh_true = Objective(\n",
+    "    fun=partial(neg_log_likelihood_m2, data=mixture_data, n_mix=N2_1),\n",
+    "    grad=partial(neg_log_likelihood_m2_grad, data=mixture_data, n_mix=N2_1),\n",
+    "    hess=partial(neg_log_likelihood_m2_hess, data=mixture_data, n_mix=N2_1),\n",
+    ")\n",
+    "nllh_alt = Objective(\n",
+    "    fun=partial(neg_log_likelihood, data=mixture_data),\n",
+    "    grad=partial(neg_log_likelihood_grad, data=mixture_data),\n",
+    "    hess=partial(neg_log_likelihood_hess, data=mixture_data),\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def log_normal_density(x: float, mu: float, std: float):\n",
+    "    return (\n",
+    "        -1 / 2 * np.log(2 * np.pi)\n",
+    "        - 1 / 2 * np.log(std**2)\n",
+    "        - (x - mu) ** 2 / (2 * std**2)\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "def log_normal_density_grad(x: float, mu: float, std: float):\n",
+    "    return -(x - mu) / (std**2)\n",
+    "\n",
+    "\n",
+    "def log_normal_density_hess(x: float, mu: float, std: float):\n",
+    "    return -1 / (std**2)\n",
+    "\n",
+    "\n",
+    "prior_true = NegLogParameterPriors(\n",
+    "    [\n",
+    "        {\n",
+    "            \"index\": 0,\n",
+    "            \"density_fun\": partial(log_normal_density, mu=true_params[0], std=sigma2),\n",
+    "            \"density_dx\": partial(\n",
+    "                log_normal_density_grad, mu=true_params[0], std=sigma2\n",
+    "            ),\n",
+    "            \"density_ddx\": partial(\n",
+    "                log_normal_density_hess, mu=true_params[0], std=sigma2\n",
+    "            ),\n",
+    "        },\n",
+    "        {\n",
+    "            \"index\": 1,\n",
+    "            \"density_fun\": partial(log_normal_density, mu=true_params[1], std=sigma2),\n",
+    "            \"density_dx\": partial(\n",
+    "                log_normal_density_grad, mu=true_params[1], std=sigma2\n",
+    "            ),\n",
+    "            \"density_ddx\": partial(\n",
+    "                log_normal_density_hess, mu=true_params[1], std=sigma2\n",
+    "            ),\n",
+    "        },\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "prior_alt = NegLogParameterPriors(\n",
+    "    [\n",
+    "        {\n",
+    "            \"index\": 0,\n",
+    "            \"density_fun\": partial(log_normal_density, mu=0., std=1.),\n",
+    "            \"density_dx\": partial(log_normal_density_grad, mu=0., std=1.),\n",
+    "            \"density_ddx\": partial(\n",
+    "                log_normal_density_hess, mu=0., std=1.\n",
+    "            ),\n",
+    "        }\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "\n",
+    "mixture_problem_true = Problem(\n",
+    "    objective=AggregatedObjective(objectives=[nllh_true, prior_true]),\n",
+    "    lb=[-10, -10, 0],\n",
+    "    ub=[10, 10, 10],\n",
+    "    x_names=[\"mu1\", \"mu2\", \"sigma\"],\n",
+    "    x_scales=[\"lin\", \"lin\", \"lin\"],\n",
+    "    x_fixed_indices=[2],\n",
+    "    x_fixed_vals=[sigma],\n",
+    "    x_priors_defs=prior_true,\n",
+    ")\n",
+    "\n",
+    "mixture_problem_alt = Problem(\n",
+    "    objective=AggregatedObjective(objectives=[nllh_alt, prior_alt]),\n",
+    "    lb=[-10, 0],\n",
+    "    ub=[10, 10],\n",
+    "    x_names=[\"mu\", \"sigma\"],\n",
+    "    x_scales=[\"lin\", \"lin\"],\n",
+    "    x_fixed_indices=[1],\n",
+    "    x_fixed_vals=[sigma],\n",
+    "    x_priors_defs=prior_alt,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf9af2fa37f3a0cf",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# to make the code more readable, we define a dictionary with all models\n",
+    "# from here on, we use the pyPESTO problem objects, so the code can be reused for any other problem\n",
+    "models = {\n",
+    "    'mixture_model1': {\n",
+    "        'name': 'True-Model',\n",
+    "        'true_log_evidence': true_log_evidence_true,\n",
+    "        'prior_mean': np.array([-2, 2]),\n",
+    "        'prior_std': np.array([2, 2]),\n",
+    "        'prior_cov': np.diag([4, 4]),\n",
+    "        'true_params': true_params,\n",
+    "        'problem': mixture_problem_true,\n",
+    "    },\n",
+    "    'mixture_model2': {\n",
+    "        'name': 'Alternative-Model',\n",
+    "        'true_log_evidence': true_log_evidence_alt,\n",
+    "        'prior_mean': np.array([0]),\n",
+    "        'prior_std': np.array([1]),\n",
+    "        'prior_cov': np.diag([1]),\n",
+    "        'problem': mixture_problem_alt,\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "for m in models.values():         \n",
+    "    # neg_log_likelihood is called with full vector, parameters might be still in log space\n",
+    "    m['neg_log_likelihood'] = lambda x: m['problem'].objective._objectives[0](\n",
+    "        m['problem'].get_full_vector(x=x, x_fixed_vals=m['problem'].x_fixed_vals)\n",
+    "    )    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e273503367e8bf4d",
+   "metadata": {},
+   "source": [
+    "## Methods for Computing Marginal Likelihoods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95ec6b53c9133332",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# run optimization for each model\n",
+    "for m in models.values(): \n",
+    "    m['results'] = optimize.minimize(problem=m['problem'],  n_starts=100)\n",
+    "    \n",
+    "    if 'true_params' in m.keys():\n",
+    "        visualize.parameters(\n",
+    "            results=m['results'], reference={'x': m[\"true_params\"], 'fval': m['problem'].objective(m[\"true_params\"])})\n",
+    "    else:\n",
+    "        visualize.parameters(m['results'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffd895262133fe00",
+   "metadata": {},
+   "source": [
+    "### 1. Bayesian Information Criterion (BIC)\n",
+    "\n",
+    "The BIC is a simple and widely-used approximation to the marginal likelihood. It is computed as:\n",
+    "\n",
+    "$$\n",
+    "\\operatorname{BIC} = k \\ln(n) - 2 \\ln(\\hat{L})\n",
+    "$$\n",
+    "\n",
+    "where $k$ is the number of parameters, $n$ is the number of data points, and $\\hat{L}$ is the maximum likelihood estimate. $-\\frac12 \\operatorname{BIC}$ approximates the marginal likelihood under the assumption that the prior is non-informative and the sample size is large.\n",
+    "\n",
+    "\n",
+    "BIC is easy to compute and converges to the marginal likelihood, but it may not capture the full complexity of model selection, especially for complex models or significant prior information as the prior is completely ignored."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b40d72091d00e9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for m in models.values(): \n",
+    "    m['BIC'] = len(m['problem'].x_free_indices) * np.log(n_obs) + 2 * m['neg_log_likelihood'](m['results'].optimize_result.x[0])\n",
+    "    print(m['name'], 'BIC marginal likelihood approximation:', -1/2*m['BIC'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67cb4a7bb781d42",
+   "metadata": {},
+   "source": [
+    "### 2. Laplace Approximation\n",
+    "\n",
+    "The Laplace approximation estimates the marginal likelihood by approximating the posterior distribution as a Gaussian centered at the maximum a posteriori (MAP) estimate $\\hat{\\theta}$ using the Hessian of the posterior distribution. The marginal likelihood is then approximated as:\n",
+    "\n",
+    "$$\n",
+    "P(\\mathcal{D} \\mid \\mathcal{M}) \\approx (2\\pi)^{k/2} \\left| \\Sigma \\right|^{1/2} P(\\mathcal{D} \\mid \\hat{\\theta}, \\mathcal{M}) P(\\hat{\\theta} \\mid \\mathcal{M})\n",
+    "$$\n",
+    "\n",
+    "where $\\Sigma$ is the covariance matrix of the posterior distribution (unnormalized, so likelihood $\\times$ prior).\n",
+    "\n",
+    "\n",
+    "The Laplace approximation is accurate if the posterior is unimodal and roughly Gaussian."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "548513d76b8887dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values(): \n",
+    "    laplace_evidences = []\n",
+    "    for x in m['results'].optimize_result.x:\n",
+    "        log_evidence = sample.evidence.laplace_approximation_log_evidence(m['problem'], x)\n",
+    "        laplace_evidences.append(log_evidence)\n",
+    "                \n",
+    "        m['laplace_evidences'] = np.array(laplace_evidences)\n",
+    "    print(m['name'], f'laplace approximation: {m[\"laplace_evidences\"][0]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5ac29500e0e678b",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "### 3. Sampling-Based Methods\n",
+    "\n",
+    "Sampling-based methods, such as Markov Chain Monte Carlo (MCMC) or nested sampling, do not make assumptions about the shape of the posterior and can provide more accurate estimates of the marginal likelihood. However, they can be computationally very intensive."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "212297d07ef90600",
+   "metadata": {},
+   "source": [
+    "\n",
+    "#### Arithmetic Mean Estimator\n",
+    "\n",
+    "The arithmetic mean estimator also uses samples from the prior evaluated at the likelihood function to approximate the marginal likelihood:\n",
+    "\n",
+    "$$\n",
+    "P(\\mathcal{D} \\mid \\mathcal{M}) \\approx \\frac{1}{N} \\sum_{i=1}^N P(\\mathcal{D} \\mid \\theta_i, \\mathcal{M})\n",
+    "$$\n",
+    "\n",
+    "The arithmetic mean estimator requires a large number of samples and is very inefficient. It approximates the marginal likelihood from below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec2f000c836abad6",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values():    \n",
+    "    prior_sample = np.random.multivariate_normal(mean=m['prior_mean'], \n",
+    "                                                 cov=m['prior_cov'],\n",
+    "                                                 size=1000)\n",
+    "    log_likelihoods = np.array([-m['neg_log_likelihood'](x) for x in prior_sample])\n",
+    "    m['arithmetic_log_evidence'] = logsumexp(log_likelihoods) - np.log(log_likelihoods.size)\n",
+    "    \n",
+    "    print(m['name'], f'arithmetic mean: {m[\"arithmetic_log_evidence\"]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77ec3e1ec016d0d1",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "#### Harmonic Mean\n",
+    "\n",
+    "The harmonic mean estimator uses posterior samples to estimate the marginal likelihood:\n",
+    "\n",
+    "$$\n",
+    "P(\\mathcal{D} \\mid \\mathcal{M}) \\approx \\left( \\frac{1}{N} \\sum_{i=1}^N \\frac{1}{P(\\mathcal{D} \\mid \\theta_i, \\mathcal{M})} \\right)^{-1}\n",
+    "$$\n",
+    "\n",
+    "where $\\theta_i$ are samples from the posterior distribution.\n",
+    "\n",
+    "The harmonic mean estimator approximates the evidence from above since it tends to ignore low likelihood regions, such as those comprising the prior, leading to overestimates of the marginal likelihoods, even when asymptotically unbiased.\n",
+    "Moreover, the estimator can have a high variance due to evaluating the likelihood at low probability regions and inverting it.\n",
+    "Hence, it can be very unstable and even fail catastrophically. A more stable version, the stabilized harmonic mean, also uses samples from the prior (see [Newton and Raftery (1994)](https://doi.org/10.1111/j.2517-6161.1994.tb01956.x)). However, there are more efficient methods available.\n",
+    "\n",
+    "A reliable sampling method is bridge sampling (see [\"A Tutorial on Bridge Sampling\" by Gronau et al. (2017)](https://api.semanticscholar.org/CorpusID:5447695) for a nice introduction). It uses samples from a proposal and the posterior to estimate the marginal likelihood. The proposal distribution should be chosen to have a high overlap with the posterior (we construct it from half of the posterior samples by fitting a Gaussian distribution with the same mean and covariance). This method is more stable than the harmonic mean estimator. However, its accuracy may depend on the choice of the proposal distribution.\n",
+    "\n",
+    "A different approach, the learnt harmonic mean estimator, was proposed by [McEwen et al. (2021)](https://api.semanticscholar.org/CorpusID:244709474). The estimator solves the large variance problem by interpreting the harmonic mean estimator as importance sampling and introducing a new target distribution, which is learned from the posterior samples. The method can be applied just using samples from the posterior and is implemented in the software package accompanying the paper.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba4cc742f71fad4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values(): \n",
+    "    results = sample.sample(\n",
+    "        problem=m['problem'], \n",
+    "        n_samples=1000, \n",
+    "        result=m['results'],\n",
+    "    )\n",
+    "    # compute harmonic mean\n",
+    "    m['harmonic_log_evidence'] = sample.evidence.harmonic_mean_log_evidence(results)\n",
+    "    print(m['name'], f'harmonic mean: {m[\"harmonic_log_evidence\"]}')    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7272997b60de2e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values(): \n",
+    "    results = sample.sample(\n",
+    "        problem=m['problem'], \n",
+    "        n_samples=800, \n",
+    "        result=m['results'],\n",
+    "    )\n",
+    "    # compute stabilized harmonic mean\n",
+    "    prior_samples = np.random.multivariate_normal(mean=m['prior_mean'], \n",
+    "                                                 cov=m['prior_cov'],\n",
+    "                                                 size=200)\n",
+    "    m['harmonic_stabilized_log_evidence'] = sample.evidence.harmonic_mean_log_evidence(\n",
+    "        result=results,\n",
+    "        prior_samples=prior_samples,\n",
+    "        neg_log_likelihood_fun=m['neg_log_likelihood']\n",
+    "    )\n",
+    "    print(m['name'], f'stabilized harmonic mean: {m[\"harmonic_stabilized_log_evidence\"]}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce38f1a4975cd72a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values(): \n",
+    "    results = sample.sample(\n",
+    "        problem=m['problem'], \n",
+    "        n_samples=1000, \n",
+    "        result=m['results'],\n",
+    "    )\n",
+    "    m['bridge_log_evidence'] = sample.evidence.bridge_sampling_log_evidence(results)\n",
+    "    print(m['name'], f'bridge sampling: {m[\"bridge_log_evidence\"]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "443bf17c8ae27a15",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "#### Nested Sampling\n",
+    "\n",
+    "Nested sampling is specifically designed for estimating marginal likelihoods. The static nested sampler is optimized for evidence computation and provides accurate estimates but may give less accurate posterior samples unless dynamic nested sampling is used. \n",
+    "\n",
+    "Dynamic nested sampling can improve the accuracy of posterior samples. The package [dynesty](https://dynesty.readthedocs.io/en/stable/) offers a lot of hyperparameters to tune accuracy and efficiency of computing samples from the posterior vs. estimating the marginal likelihood."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0236f455dfc64d5",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values():\n",
+    "    # define prior transformation needed for nested sampling\n",
+    "    def prior_transform(u):\n",
+    "        \"\"\"Transform prior sample from unit cube to normal prior.\"\"\"\n",
+    "        t = stats.norm.ppf(u)  # convert to standard normal\n",
+    "        c_sqrt = np.linalg.cholesky(m['prior_cov'])  # Cholesky decomposition\n",
+    "        u_new = np.dot(c_sqrt, t)  # correlate with appropriate covariance\n",
+    "        u_new += m['prior_mean']  # add mean\n",
+    "        return u_new\n",
+    "\n",
+    "    # initialize nested sampler\n",
+    "    nested_sampler = sample.DynestySampler(\n",
+    "        #sampler_args={'nlive': 250},\n",
+    "        run_args={'maxcall': 1000},\n",
+    "        dynamic=False,  # static nested sampler is optimized for evidence computation\n",
+    "        prior_transform=prior_transform\n",
+    "    )\n",
+    "    \n",
+    "    # run nested sampling\n",
+    "    result_dynesty_sample = sample.sample(\n",
+    "        problem=m['problem'],\n",
+    "        n_samples=None,\n",
+    "        sampler=nested_sampler\n",
+    "    )\n",
+    "\n",
+    "    # extract log evidence\n",
+    "    m[f'nested_log_evidence'] = nested_sampler.sampler.results.logz[-1]\n",
+    "    print(m['name'], f'nested sampling: {m[\"nested_log_evidence\"]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dcb16e2efcf4bf0d",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "#### Thermodynamic Integration and Steppingstone Sampling\n",
+    "\n",
+    "These methods are based on the power posterior, where the posterior is raised to a power $t$ and integrated over $t$:\n",
+    "\n",
+    "$$\n",
+    "P(\\mathcal{D} \\mid \\mathcal{M}) = \\int_0^1 \\frac1{Z_t} P(\\mathcal{D} \\mid \\theta, \\mathcal{M})^t P(\\theta \\mid \\mathcal{M}) \\, dt\n",
+    "$$\n",
+    "\n",
+    "Parallel tempering is a sampling algorithm that improves accuracy for multimodal posteriors by sampling from different temperatures simultaneously and exchanging samples between parallel chains. It can be used to sample from all power posteriors simultaneously allowing for thermodynamic integration and steppingstone sampling [(Annis et al., 2019)](https://doi.org/10.1016/j.jmp.2019.01.005). These methods can be seen as path sampling methods, hence related to bridge sampling.\n",
+    "\n",
+    "These methods can be more accurate for complex posteriors but are computationally intensive. Thermodynamic integration (TI) relies on integrating the integral over the temperature $t$, while steppingstone sampling approximates the integral with a sum over a finite number of temperatures using an importance sampling estimator. Accuracy can be improved by using more temperatures. \n",
+    "Errors in the estimator might come from the MCMC sampler in both cases and from numerical integration when applying TI. Steppingstone sampling can be a biased estimator for a small number of temperatures [(Annis et al., 2019)](https://doi.org/10.1016/j.jmp.2019.01.005).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13059e00c982d98d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values():\n",
+    "    # initialize parallel tempering sampler\n",
+    "    ti_sampler = sample.ParallelTemperingSampler(  # not adaptive, since we want fixed temperatures\n",
+    "        internal_sampler=sample.AdaptiveMetropolisSampler(),\n",
+    "        n_chains=10\n",
+    "    )\n",
+    "\n",
+    "    # run mcmc with parallel tempering\n",
+    "    result_ti = sample.sample(\n",
+    "                            problem=m['problem'], \n",
+    "                            n_samples=1000, \n",
+    "                            sampler=ti_sampler,\n",
+    "                            result=m['results']\n",
+    "                        )    \n",
+    "    # compute log evidence via thermodynamic integration\n",
+    "    m['thermodynamic_log_evidence'] = sample.evidence.parallel_tempering_log_evidence(result_ti, use_all_chains=False)\n",
+    "    print(m['name'], f'thermodynamic integration: {m[\"thermodynamic_log_evidence\"]}')\n",
+    "    \n",
+    "    # compute log evidence via steppingstone sampling\n",
+    "    m['steppingstone_log_evidence'] = sample.evidence.parallel_tempering_log_evidence(result_ti, method='steppingstone', use_all_chains=False)\n",
+    "    print(m['name'], f'steppingstone sampling: {m[\"steppingstone_log_evidence\"]}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90fd0f80a9d94b7d",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "#### Variational Inference\n",
+    "\n",
+    "Variational inference approximates the posterior with a simpler distribution and can be faster than sampling methods for large problems. The marginal likelihood can be estimated using similar approaches as before, but the accuracy is limited by the choice of the variational family.\n",
+    "\n",
+    "Variational inference optimization is based on the Evidence Lower Bound (ELBO), providing an additional check for the estimator."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c616b8a566478d0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "for m in models.values():\n",
+    "    \n",
+    "    # one could define callbacks to check convergence during optimization\n",
+    "    # import pymc as pm\n",
+    "    # cb = [\n",
+    "    #     pm.callbacks.CheckParametersConvergence(\n",
+    "    #         tolerance=1e-3, diff='absolute'),\n",
+    "    #     pm.callbacks.CheckParametersConvergence(\n",
+    "    #         tolerance=1e-3, diff='relative'),\n",
+    "    # ]\n",
+    "\n",
+    "    pypesto_variational_result = variational.variational_fit(\n",
+    "        problem=m['problem'],\n",
+    "        method='advi',\n",
+    "        n_iterations=10000,\n",
+    "        n_samples=None,\n",
+    "        result=m['results'],\n",
+    "        #callbacks=cb,\n",
+    "    )\n",
+    "    \n",
+    "    # negative elbo, this is bound to the evidence (optimization criterion)\n",
+    "    vi_lower_bound = np.max(-pypesto_variational_result.variational_result.data.hist)\n",
+    "            \n",
+    "    # compute harmonic mean from posterior samples\n",
+    "    approx_sample = pypesto_variational_result.variational_result.sample(1000)['trace_x'][0]\n",
+    "    neg_log_likelihoods = np.array([m['neg_log_likelihood'](ps) for ps in approx_sample])\n",
+    "    m['vi_harmonic_log_evidences'] = -logsumexp(neg_log_likelihoods) + np.log(\n",
+    "            neg_log_likelihoods.size\n",
+    "        )\n",
+    "    print(m['name'], f'harmonic mean with variational inference: {m[\"vi_harmonic_log_evidences\"]}')\n",
+    "    print(\"Evidence lower bound:\", vi_lower_bound)\n",
+    "    \n",
+    "    # evidence cannot be smaller than the lower bound\n",
+    "    m[\"vi_harmonic_log_evidences\"] = max(m[\"vi_harmonic_log_evidences\"], vi_lower_bound)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e6c53b1a6414210",
+   "metadata": {},
+   "source": [
+    "## Comparison"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fbb5a071645523d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = [\n",
+    "        '-1/2 BIC',\n",
+    "        'Arithmetic Mean',\n",
+    "        'Laplace', \n",
+    "        'Harmonic Mean', \n",
+    "        'Stabilized\\nHarmonic Mean',\n",
+    "        'Bridge Sampling',\n",
+    "        'Nested Sampling',\n",
+    "        'Thermodynamic\\nIntegration',\n",
+    "        'Steppingstone\\nSampling',\n",
+    "        'Variational Inference\\nHarmonic Mean'\n",
+    "]\n",
+    "\n",
+    "bayes_factors = [\n",
+    "    -1/2*models['mixture_model1']['BIC']+1/2*models['mixture_model2']['BIC'],\n",
+    "    models['mixture_model1']['arithmetic_log_evidence']-models['mixture_model2']['arithmetic_log_evidence'],\n",
+    "    models['mixture_model1']['laplace_evidences'][0]-models['mixture_model2']['laplace_evidences'][0],\n",
+    "    models['mixture_model1']['harmonic_log_evidence']-models['mixture_model2']['harmonic_log_evidence'],\n",
+    "    models['mixture_model1']['harmonic_stabilized_log_evidence']-models['mixture_model2']['harmonic_stabilized_log_evidence'],\n",
+    "    models['mixture_model1']['bridge_log_evidence']-models['mixture_model2']['bridge_log_evidence'],\n",
+    "    models['mixture_model1']['nested_log_evidence']-models['mixture_model2']['nested_log_evidence'],\n",
+    "    models['mixture_model1']['thermodynamic_log_evidence']-models['mixture_model2']['thermodynamic_log_evidence'],\n",
+    "    models['mixture_model1']['steppingstone_log_evidence']-models['mixture_model2']['steppingstone_log_evidence'],\n",
+    "    models['mixture_model1']['vi_harmonic_log_evidences']-models['mixture_model2']['vi_harmonic_log_evidences']\n",
+    "]\n",
+    "\n",
+    "true_bf = models['mixture_model1']['true_log_evidence'] - models['mixture_model2']['true_log_evidence']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30fea0ed78548d6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(2, 1, tight_layout=True, sharex=True, figsize=(6, 6))\n",
+    "colors = ['blue', 'orange']\n",
+    "\n",
+    "for i, m in enumerate(models.values()):\n",
+    "    m['log_evidences'] = np.array([\n",
+    "        -1/2*m['BIC'],\n",
+    "        m['arithmetic_log_evidence'],\n",
+    "        m['laplace_evidences'][0],\n",
+    "        m['harmonic_log_evidence'],\n",
+    "        m['harmonic_stabilized_log_evidence'],\n",
+    "        m['bridge_log_evidence'],\n",
+    "        m['nested_log_evidence'],\n",
+    "        m['thermodynamic_log_evidence'],\n",
+    "        m['steppingstone_log_evidence'],\n",
+    "        m['vi_harmonic_log_evidences']\n",
+    "    ])\n",
+    "    ax[0].scatter(x=np.arange(m['log_evidences'].size), y=m['log_evidences'], color=colors[i], label=m['name'])    \n",
+    "    ax[0].axhline(m['true_log_evidence'], color=colors[i], alpha=0.75, label=f'True evidence of {m[\"name\"]}')\n",
+    "    \n",
+    "    m['error'] = (np.exp(m['log_evidences']) - np.exp(m['true_log_evidence']))**2\n",
+    "mean_error = np.sum(np.array([m['error'] for m in models.values()]), axis=0)\n",
+    "ax[1].scatter(x=np.arange(len(labels)), y=mean_error)  \n",
+    "\n",
+    "ax[1].set_xlabel('Estimator')\n",
+    "ax[0].set_title(f'Comparison of different evidence estimators')\n",
+    "ax[0].set_ylabel('Ln Evidence')    \n",
+    "ax[1].set_ylabel('Squared Error of Evidence\\nsum of both models') \n",
+    "ax[1].set_yscale('log')\n",
+    "ax[1].set_xticks(ticks=np.arange(len(labels)), labels=labels, rotation=60)\n",
+    "fig.legend(ncols=1, loc='center right', bbox_to_anchor=(1.5, 0.7))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d6590690b5c7a30",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(1, 1, tight_layout=True, figsize=(6, 5))\n",
+    "ax.axhline(true_bf, linestyle='-', color='r', label='True Bayes Factor')\n",
+    "plt.scatter(x=np.arange(len(bayes_factors)), y=bayes_factors, label='Estimates')\n",
+    "\n",
+    "# add decision thresholds\n",
+    "c = lambda x: np.log(np.power(10, x))  # usually defined in log10, convert to ln\n",
+    "ax.axhline(c(0), color='red', linestyle='--', label='\"Not worth more than a bare mention\"')\n",
+    "ax.axhline(c(0.5), color='orange', linestyle='--', label='\"Substantial\"')\n",
+    "ax.axhline(c(1), color='yellow', linestyle='--', label='\"Strong\"')\n",
+    "ax.axhline(c(2),  color='green', linestyle='--', label='\"Decisive\"')\n",
+    "\n",
+    "ax.set_ylabel('ln Bayes Factor')\n",
+    "ax.set_xlabel('Estimator')\n",
+    "ax.set_title(f'Bayes Factor of {models[\"mixture_model1\"][\"name\"]} vs. {models[\"mixture_model2\"][\"name\"]}')\n",
+    "plt.xticks(ticks=np.arange(len(bayes_factors)), labels=labels, rotation=60)\n",
+    "fig.legend(ncols=1, loc='center right', bbox_to_anchor=(1.5, 0.7))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6cbfd915823d6989",
+   "metadata": {},
+   "source": [
+    "We recommend using either bridge sampling, nested sampling or one of the methods using power posteriors depending on the computational resources available. \n",
+    "\n",
+    "Bayes factors and marginal likelihoods are powerful tools for Bayesian model comparison. While there are various methods to compute marginal likelihoods, each has its strengths and weaknesses. Choosing the appropriate method depends on the specific context, the complexity of the models, and the computational resources available."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/doc/example/model_selection.ipynb b/doc/example/model_selection.ipynb
index 72ae22a8d..f31338dc7 100644
--- a/doc/example/model_selection.ipynb
+++ b/doc/example/model_selection.ipynb
@@ -158,9 +158,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import logging\n",
@@ -188,9 +186,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Reduce notebook runtime\n",
@@ -263,9 +259,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "pvs.plot_calibrated_models_digraph(\n",
@@ -485,9 +479,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "petab_select_problem.model_space.reset_exclusions()\n",
@@ -555,7 +547,7 @@
  "metadata": {
   "celltoolbar": "Raw Cell Format",
   "kernelspec": {
-   "display_name": "Python 3.10.2 64-bit",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -569,7 +561,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.12.3"
   },
   "vscode": {
    "interpreter": {
@@ -578,5 +570,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/doc/example/ordinal_data.ipynb b/doc/example/ordinal_data.ipynb
index 2488cadf5..17dbbaa2a 100644
--- a/doc/example/ordinal_data.ipynb
+++ b/doc/example/ordinal_data.ipynb
@@ -542,7 +542,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "objective = importer.create_objective(verbose=False)"
+    "factory = importer.create_objective_creator()\n",
+    "objective = factory.create_objective(verbose=False)"
    ]
   },
   {
@@ -559,7 +560,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "objective = importer.create_objective(\n",
+    "objective = factory.create_objective(\n",
     "    inner_options={\n",
     "        \"method\": \"reduced\",\n",
     "        \"reparameterized\": True,\n",
@@ -916,7 +917,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "dev_venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -930,7 +931,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.12.3"
   },
   "vscode": {
    "interpreter": {
@@ -939,5 +940,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/doc/example/petab_import.ipynb b/doc/example/petab_import.ipynb
index 4b3b5bbdc..e516fa008 100644
--- a/doc/example/petab_import.ipynb
+++ b/doc/example/petab_import.ipynb
@@ -105,7 +105,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The model must be imported to pyPESTO and AMICI. Therefore, we create a `pypesto.PetabImporter` from the problem, and create an AMICI model."
+    "In order to import the model into pyPESTO, we additionally need a simulator. We can specify the simulator through the `simulator_type` argument. Supported simulators are e.g.`amici` and `roadrunner`. We will use AMICI as our example simulator. Therefore, we create a `pypesto.PetabImporter` from the problem. The importer itself creates a `pypesto.petab.Factory`, which is used to create the AMICI objective and model."
    ]
   },
   {
@@ -116,9 +116,10 @@
    },
    "outputs": [],
    "source": [
-    "importer = pypesto.petab.PetabImporter(petab_problem)\n",
+    "importer = pypesto.petab.PetabImporter(petab_problem, simulator_type=\"amici\")\n",
+    "factory = importer.create_objective_creator()\n",
     "\n",
-    "model = importer.create_model(verbose=False)\n",
+    "model = factory.create_model(verbose=False)\n",
     "\n",
     "# some model properties\n",
     "print(\"Model parameters:\", list(model.getParameterIds()), \"\\n\")\n",
@@ -154,7 +155,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "importer = pypesto.petab.PetabImporter.from_yaml(yaml_config)\n",
+    "importer = pypesto.petab.PetabImporter.from_yaml(yaml_config, simulator_type=\"amici\")\n",
     "problem = importer.create_problem()  # creating the problem from the importer. The objective can be found at problem.objective"
    ]
   },
@@ -178,7 +179,8 @@
     "converter_config = libsbml.SBMLLocalParameterConverter().getDefaultProperties()\n",
     "petab_problem.sbml_document.convert(converter_config)\n",
     "\n",
-    "obj = importer.create_objective()\n",
+    "factory = importer.create_objective_creator()\n",
+    "obj = factory.create_objective()\n",
     "\n",
     "# for some models, hyperparameters need to be adjusted\n",
     "# obj.amici_solver.setMaxSteps(10000)\n",
@@ -325,7 +327,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "source": [
     "### Dealing with function evaluations at the initial point\n",
@@ -337,7 +342,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [],
    "source": [
@@ -436,7 +444,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.12.3"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/doc/example/prior_definition.ipynb b/doc/example/prior_definition.ipynb
index 9ab018a14..60edde7f2 100644
--- a/doc/example/prior_definition.ipynb
+++ b/doc/example/prior_definition.ipynb
@@ -206,7 +206,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.2 64-bit",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -220,7 +220,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.12.3"
   },
   "vscode": {
    "interpreter": {
diff --git a/doc/example/relative_data.ipynb b/doc/example/relative_data.ipynb
index 62ba09c05..677b7ff75 100644
--- a/doc/example/relative_data.ipynb
+++ b/doc/example/relative_data.ipynb
@@ -467,7 +467,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.12.3"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/doc/example/roadrunner.ipynb b/doc/example/roadrunner.ipynb
index f666efb6e..a553feceb 100644
--- a/doc/example/roadrunner.ipynb
+++ b/doc/example/roadrunner.ipynb
@@ -49,10 +49,9 @@
     "import petab\n",
     "import pypesto.objective\n",
     "import pypesto.optimize as optimize\n",
-    "import pypesto.objective.roadrunner as pypesto_rr\n",
-    "import pypesto.sample as sample\n",
+    "import pypesto.petab\n",
     "import pypesto.visualize as visualize\n",
-    "import pypesto.profile as profile\n",
+    "import pypesto.objective.roadrunner as pypesto_rr\n",
     "from IPython.display import Markdown, display\n",
     "from pprint import pprint\n",
     "\n",
@@ -63,10 +62,7 @@
     "\n",
     "\n",
     "# name of the model that will also be the name of the python module\n",
-    "model_name = \"boehm_JProteomeRes2014\"\n",
-    "\n",
-    "# output directory\n",
-    "model_output_dir = \"tmp/\" + model_name"
+    "model_name = \"conversion_reaction\""
    ]
   },
   {
@@ -80,7 +76,7 @@
    "source": [
     "## Creating pyPESTO problem from PEtab\n",
     "\n",
-    "The [PEtab file format](https://petab.readthedocs.io/en/latest/documentation_data_format.html) stores all the necessary information to define a parameter estimation problem. This includes the model, the experimental data, the parameters to estimate, and the experimental conditions. Using the `pypesto_rr.PetabImporterRR` class, we can create a pyPESTO problem directly from a PEtab problem."
+    "The [PEtab file format](https://petab.readthedocs.io/en/latest/documentation_data_format.html) stores all the necessary information to define a parameter estimation problem. This includes the model, the experimental data, the parameters to estimate, and the experimental conditions. Using the `pypesto.petab.PetabImporter` class, we can create a pyPESTO problem directly from a PEtab problem."
    ]
   },
   {
@@ -97,7 +93,7 @@
     "petab_yaml = f\"./{model_name}/{model_name}.yaml\"\n",
     "\n",
     "petab_problem = petab.Problem.from_yaml(petab_yaml)\n",
-    "importer = pypesto_rr.PetabImporterRR(petab_problem)\n",
+    "importer = pypesto.petab.PetabImporter(petab_problem, simulator_type=\"roadrunner\")\n",
     "problem = importer.create_problem()"
    ]
   },
@@ -224,6 +220,7 @@
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "result = optimize.minimize(\n",
     "    problem=problem,\n",
     "    optimizer=optimizer,\n",
@@ -329,7 +326,7 @@
     "# no support for sensitivities\n",
     "try:\n",
     "    ret = problem.objective(\n",
-    "        petab_problem.get_x_nominal(fixed=False,scaled=True),\n",
+    "        petab_problem.x_nominal_free_scaled,\n",
     "        mode=\"mode_fun\",\n",
     "        return_dict=True,\n",
     "        sensi_orders=(1,),\n",
@@ -354,7 +351,7 @@
     "# support through finite differences\n",
     "try:\n",
     "    ret = objective_fd(\n",
-    "        petab_problem.get_x_nominal(fixed=False,scaled=True),\n",
+    "        petab_problem.x_nominal_scaled,\n",
     "        mode=\"mode_fun\",\n",
     "        return_dict=True,\n",
     "        sensi_orders=(1,),\n",
@@ -381,7 +378,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/sampler_study.ipynb b/doc/example/sampler_study.ipynb
index b466b028d..f5e0ccf95 100644
--- a/doc/example/sampler_study.ipynb
+++ b/doc/example/sampler_study.ipynb
@@ -301,7 +301,7 @@
    "source": [
     "sampler = sample.MetropolisSampler({\"std\": 0.5})\n",
     "result = sample.sample(\n",
-    "    problem, 1e4, sampler, x0=np.array([0.5]), filename=None\n",
+    "    problem, 1e3, sampler, x0=np.array([0.5]), filename=None\n",
     ")"
    ]
   },
@@ -331,7 +331,7 @@
    "source": [
     "sampler = sample.MetropolisSampler({\"std\": 1})\n",
     "result = sample.sample(\n",
-    "    problem, 1e4, sampler, x0=np.array([0.5]), filename=None\n",
+    "    problem, 1e3, sampler, x0=np.array([0.5]), filename=None\n",
     ")"
    ]
   },
@@ -620,8 +620,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The internal `dynesty` sampler can be saved and restored, for post-sampling analysis. For example, pyPESTO stores resampled MCMC-like samples from the `dynesty` sampler by default. The following code shows how to save and load the internal dynesty sampler, to facilitate post-sampling analysis of both the resampled and original chains. N.B.: when working across different computers, you might prefer to work with the raw sample results via `pypesto.sample.dynesty.save_raw_results` and `load_raw_results`.",
-    "\n",
+    "The internal `dynesty` sampler can be saved and restored, for post-sampling analysis. For example, pyPESTO stores resampled MCMC-like samples from the `dynesty` sampler by default. The following code shows how to save and load the internal dynesty sampler, to facilitate post-sampling analysis of both the resampled and original chains. N.B.: when working across different computers, you might prefer to work with the raw sample results via `pypesto.sample.dynesty.save_raw_results` and `load_raw_results`.\n",
     "First, we save the internal sampler."
    ]
   },
@@ -772,7 +771,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/sampling_diagnostics.ipynb b/doc/example/sampling_diagnostics.ipynb
index 21827fa74..70db98959 100644
--- a/doc/example/sampling_diagnostics.ipynb
+++ b/doc/example/sampling_diagnostics.ipynb
@@ -680,7 +680,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/semiquantitative_data.ipynb b/doc/example/semiquantitative_data.ipynb
index ee5717e54..bd277ca3f 100644
--- a/doc/example/semiquantitative_data.ipynb
+++ b/doc/example/semiquantitative_data.ipynb
@@ -193,7 +193,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "objective = importer.create_objective(verbose=False)"
+    "factory = importer.create_objective_creator()\n",
+    "objective = factory.create_objective(verbose=False)"
    ]
   },
   {
@@ -210,7 +211,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "objective = importer.create_objective(\n",
+    "objective = factory.create_objective(\n",
     "    inner_options={\n",
     "        \"spline_ratio\": 1 / 2,\n",
     "        \"min_diff_factor\": 1 / 2,\n",
@@ -456,7 +457,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "dev_venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -470,7 +471,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.12.3"
   },
   "vscode": {
    "interpreter": {
@@ -479,5 +480,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/doc/example/store.ipynb b/doc/example/store.ipynb
index 58f9902af..18df12962 100644
--- a/doc/example/store.ipynb
+++ b/doc/example/store.ipynb
@@ -25,6 +25,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -51,6 +54,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -99,6 +105,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -108,7 +117,7 @@
    "source": [
     "%%capture\n",
     "# directory of the PEtab problem\n",
-    "petab_yaml = \"./boehm_JProteomeRes2014/boehm_JProteomeRes2014.yaml\"\n",
+    "petab_yaml = \"./conversion_reaction/conversion_reaction.yaml\"\n",
     "\n",
     "importer = pypesto.petab.PetabImporter.from_yaml(petab_yaml)\n",
     "problem = importer.create_problem(verbose=False)"
@@ -144,6 +153,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -159,7 +171,7 @@
     ")\n",
     "\n",
     "# set number of starts\n",
-    "n_starts = 15  # usually a larger number >=100 is used\n",
+    "n_starts = 10  # usually a larger number >=100 is used\n",
     "\n",
     "# Optimization\n",
     "result = pypesto.optimize.minimize(\n",
@@ -172,6 +184,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -198,6 +213,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -212,7 +230,7 @@
     "    problem=problem,\n",
     "    result=result,\n",
     "    optimizer=optimizer,\n",
-    "    profile_index=np.array([1, 1, 1, 0, 0, 0, 0, 0, 1]),\n",
+    "    profile_index=np.array([0, 1]),\n",
     ")"
    ]
   },
@@ -232,6 +250,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -246,7 +267,7 @@
     "result = sample.sample(\n",
     "    problem=problem,\n",
     "    sampler=sampler,\n",
-    "    n_samples=5000,  # rather low\n",
+    "    n_samples=1000,  # rather low\n",
     "    result=result,\n",
     "    filename=None,\n",
     ")"
@@ -270,6 +291,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -308,6 +332,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -337,6 +364,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -354,6 +384,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -406,6 +439,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -423,6 +459,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -451,6 +490,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -467,6 +509,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -494,6 +539,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -510,6 +558,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -581,6 +632,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -588,6 +642,7 @@
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "# record the history\n",
     "history_options = pypesto.HistoryOptions(trace_record=True)\n",
     "\n",
@@ -617,6 +672,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -660,12 +718,16 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "# create temporary file\n",
     "with tempfile.NamedTemporaryFile(suffix=\"_{id}.csv\") as fn_csv:\n",
     "    # record the history and store to CSV\n",
@@ -699,6 +761,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -747,12 +812,16 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
    },
    "outputs": [],
    "source": [
+    "%%time\n",
     "# create temporary file\n",
     "f_hdf5 = tempfile.NamedTemporaryFile(suffix=\".hdf5\", delete=False)\n",
     "fn_hdf5 = f_hdf5.name\n",
@@ -777,6 +846,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     }
@@ -808,6 +880,9 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
     "pycharm": {
      "name": "#%%\n"
     },
@@ -827,14 +902,17 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
    "outputs": [],
    "source": [
     "# close the temporary file\n",
     "f_hdf5.close()"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   }
  ],
  "metadata": {
@@ -853,7 +931,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/example/synthetic_data.ipynb b/doc/example/synthetic_data.ipynb
index 75041530c..e5a37cf4c 100644
--- a/doc/example/synthetic_data.ipynb
+++ b/doc/example/synthetic_data.ipynb
@@ -150,7 +150,7 @@
    "outputs": [],
    "source": [
     "pypesto_result_original = pypesto.optimize.minimize(\n",
-    "    pypesto_problem_original, n_starts=20\n",
+    "    pypesto_problem_original, n_starts=10  # For demonstration purposes only n=10 starts are used. Usually n >= 100\n",
     ")"
    ]
   },
@@ -270,7 +270,7 @@
     ")\n",
     "pypesto_problem_synthetic = pypesto_importer_synthetic.create_problem()\n",
     "pypesto_result_synthetic = pypesto.optimize.minimize(\n",
-    "    pypesto_problem_synthetic, n_starts=20\n",
+    "    pypesto_problem_synthetic, n_starts=10\n",
     ")"
    ]
   },
@@ -324,7 +324,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/doc/gfx/concept_pypesto.png b/doc/gfx/concept_pypesto.png
new file mode 100644
index 000000000..c202a5b05
Binary files /dev/null and b/doc/gfx/concept_pypesto.png differ
diff --git a/doc/using_pypesto.bib b/doc/using_pypesto.bib
index ebaa06055..c4c85924d 100644
--- a/doc/using_pypesto.bib
+++ b/doc/using_pypesto.bib
@@ -11,7 +11,6 @@ @Article{FalcoCoh2023
   timestamp    = {2023-07-20},
   doi          = {10.1098/rsif.2023.0184},
   publisher    = {The Royal Society},
-  url          = {https://doi.org/10.1098/rsif.2023.0184},
 }
 
 @Article{LakrisenkoSta2023,
@@ -27,7 +26,6 @@ @Article{LakrisenkoSta2023
   creationdate = {2023-01-26T11:19:52},
   doi          = {10.1371/journal.pcbi.1010783},
   publisher    = {Public Library of Science},
-  url          = {https://doi.org/10.1371/journal.pcbi.1010783},
 }
 
 @Article{SchmiesterSch2021,
@@ -44,7 +42,6 @@ @Article{SchmiesterSch2021
   doi          = {10.1371/journal.pcbi.1008646},
   publisher    = {Public Library of Science},
   timestamp    = {2021-01-30},
-  url          = {https://doi.org/10.1371/journal.pcbi.1008646},
 }
 
 @Article{MishraWan2023,
@@ -59,7 +56,6 @@ @Article{MishraWan2023
   creationdate = {2023-01-26T11:31:17},
   doi          = {https://doi.org/10.1016/j.ymben.2022.11.003},
   keywords     = {Lipid metabolism, Kinetic model, Free fatty acid, Fatty alcohol},
-  url          = {https://www.sciencedirect.com/science/article/pii/S1096717622001380},
 }
 
 @Article{FroehlichSor2022,
@@ -75,7 +71,6 @@ @Article{FroehlichSor2022
   creationdate = {2023-01-26T11:31:44},
   doi          = {10.1371/journal.pcbi.1010322},
   publisher    = {Public Library of Science},
-  url          = {https://doi.org/10.1371/journal.pcbi.1010322},
 }
 
 @Article{FroehlichGer2022,
@@ -91,7 +86,6 @@ @Article{FroehlichGer2022
   modificationdate = {2024-05-13T09:29:21},
   publisher        = {Cold Spring Harbor Laboratory},
   ranking          = {rank1},
-  url              = {https://www.biorxiv.org/content/early/2022/02/18/2022.02.17.480899},
 }
 
 @Article{GerosaChi2020,
@@ -109,7 +103,6 @@ @Article{GerosaChi2020
   creationdate = {2023-01-26T11:32:57},
   doi          = {10.1016/j.cels.2020.10.002},
   publisher    = {Elsevier},
-  url          = {https://doi.org/10.1016/j.cels.2020.10.002},
 }
 
 @Article{SchmiesterWei2021,
@@ -126,7 +119,6 @@ @Article{SchmiesterWei2021
   creationdate = {2023-01-26T11:33:16},
   doi          = {10.1093/bioinformatics/btab512},
   eprint       = {https://academic.oup.com/bioinformatics/article-pdf/37/23/4493/41641709/btab512.pdf},
-  url          = {https://doi.org/10.1093/bioinformatics/btab512},
 }
 
 @Article{SchmiesterWei2020,
@@ -143,7 +135,6 @@ @Article{SchmiesterWei2020
   doi          = {10.1007/s00285-020-01522-w},
   refid        = {Schmiester2020},
   timestamp    = {2021-01-30},
-  url          = {https://doi.org/10.1007/s00285-020-01522-w},
 }
 
 @InProceedings{DuttaShi2021,
@@ -162,7 +153,6 @@ @InProceedings{DuttaShi2021
   keywords     = {Machine Learning, Flaky tests, Extreme Value Theory},
   location     = {Athens, Greece},
   numpages     = {12},
-  url          = {https://doi.org/10.1145/3468264.3468615},
 }
 
 @Article{ContentoCas2021,
@@ -176,7 +166,6 @@ @Article{ContentoCas2021
   elocation-id = {2021.10.01.21263052},
   eprint       = {https://www.medrxiv.org/content/early/2021/10/01/2021.10.01.21263052.full.pdf},
   publisher    = {Cold Spring Harbor Laboratory Press},
-  url          = {https://www.medrxiv.org/content/early/2021/10/01/2021.10.01.21263052},
 }
 
 @Article{AlbadryHoe2022,
@@ -192,7 +181,6 @@ @Article{AlbadryHoe2022
   creationdate = {2023-01-26T11:34:50},
   doi          = {10.1038/s41598-022-26483-6},
   refid        = {Albadry2022},
-  url          = {https://doi.org/10.1038/s41598-022-26483-6},
 }
 
 @Article{FischerHolzhausenRoe2023,
@@ -206,7 +194,6 @@ @Article{FischerHolzhausenRoe2023
   elocation-id = {2023.01.17.523407},
   eprint       = {https://www.biorxiv.org/content/early/2023/01/19/2023.01.17.523407.full.pdf},
   publisher    = {Cold Spring Harbor Laboratory},
-  url          = {https://www.biorxiv.org/content/early/2023/01/19/2023.01.17.523407},
 }
 
 @Article{KissVen2024,
@@ -222,7 +209,6 @@ @Article{KissVen2024
   doi              = {10.1093/nar/gkae123},
   eprint           = {https://academic.oup.com/nar/advance-article-pdf/doi/10.1093/nar/gkae123/56756494/gkae123.pdf},
   modificationdate = {2024-02-28T18:27:01},
-  url              = {https://doi.org/10.1093/nar/gkae123},
 }
 
 @Article{DoresicGre2024,
@@ -237,7 +223,6 @@ @Article{DoresicGre2024
   eprint           = {https://www.biorxiv.org/content/early/2024/01/30/2024.01.26.577371.full.pdf},
   modificationdate = {2024-04-20T13:06:42},
   publisher        = {Cold Spring Harbor Laboratory},
-  url              = {https://www.biorxiv.org/content/early/2024/01/30/2024.01.26.577371},
 }
 
 @Article{ArrudaSch2023,
@@ -252,7 +237,6 @@ @Article{ArrudaSch2023
   eprint           = {https://www.biorxiv.org/content/early/2023/08/23/2023.08.22.554273.full.pdf},
   modificationdate = {2024-04-22T12:56:00},
   publisher        = {Cold Spring Harbor Laboratory},
-  url              = {https://www.biorxiv.org/content/early/2023/08/23/2023.08.22.554273},
 }
 
 @Article{MerktAli2024,
@@ -283,4 +267,76 @@ @Article{FalcoCoh2024a
   publisher        = {Elsevier BV},
 }
 
+@Article{HoepflAlb2024,
+  author           = {Höpfl, Sebastian and Albadry, Mohamed and Dahmen, Uta and Herrmann, Karl-Heinz and Kindler, Eva Marie and König, Matthias and Reichenbach, Jürgen Rainer and Tautenhahn, Hans-Michael and Wei, Weiwei and Zhao, Wan-Ting and Radde, Nicole Erika},
+  journal          = {Bioinformatics},
+  title            = {{Bayesian modelling of time series data (BayModTS) - a FAIR workflow to process sparse and highly variable data}},
+  year             = {2024},
+  issn             = {1367-4811},
+  month            = {05},
+  pages            = {btae312},
+  abstract         = {{Systems biology aims to better understand living systems through mathematical modelling of experimental and clinical data. A pervasive challenge in quantitative dynamical modelling is the integration of time series measurements, which often have high variability and low sampling resolution. Approaches are required to utilise such information while consistently handling uncertainties.We present BayModTS (Bayesian Modelling of Time Series data), a new FAIR (Findable, Accessible, Interoperable and Reusable) workflow for processing and analysing sparse and highly variable time series data. BayModTS consistently transfers uncertainties from data to model predictions, including process knowledge via parameterised models. Further, credible differences in the dynamics of different conditions can be identified by filtering noise. To demonstrate the power and versatility of BayModTS, we applied it to three hepatic datasets gathered from three different species and with different measurement techniques: (i) blood perfusion measurements by magnetic resonance imaging in rat livers after portal vein ligation, (ii) pharmacokinetic time series of different drugs in normal and steatotic mice, and (iii) CT-based volumetric assessment of human liver remnants after clinical liver resection.The BayModTS codebase is available on GitHub at https://github.com/Systems-Theory-in-Systems-Biology/BayModTS. The repository contains a Python script for the executable BayModTS workflow and a widely applicable SBML (Systems Biology Markup Language) model for retarded transient functions. In addition, all examples from the paper are included in the repository. Data and code of the application examples are stored on DaRUS https://doi.org/10.18419/darus-3876. The raw MRI ROI voxel data were uploaded to DaRUS https://doi.org/10.18419/darus-3878. The steatosis metabolite data are published on FairdomHub 10.15490/fairdomhub.1.study.1070.1.}},
+  creationdate     = {2024-05-16T07:58:55},
+  doi              = {10.1093/bioinformatics/btae312},
+  eprint           = {https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btae312/57572667/btae312.pdf},
+  modificationdate = {2024-05-16T07:58:55},
+}
+
+@Misc{LakrisenkoPat2024,
+  author           = {Polina Lakrisenko and Dilan Pathirana and Daniel Weindl and Jan Hasenauer},
+  title            = {Exploration of methods for computing sensitivities in ODE models at dynamic and steady states},
+  year             = {2024},
+  archiveprefix    = {arXiv},
+  creationdate     = {2024-05-30T09:47:51},
+  eprint           = {2405.16524},
+  modificationdate = {2024-05-30T09:47:51},
+  primaryclass     = {q-bio.QM},
+}
+
+@Article{PhilippsKoe2024,
+  author           = {Maren Philipps and Antonia Körner and Jakob Vanhoefer and Dilan Pathirana and Jan Hasenauer},
+  title            = {Non-Negative Universal Differential Equations With Applications in Systems Biology},
+  year             = {2024},
+  journal          = {IFAC-PapersOnLine},
+  volume           = {58},
+  number           = {23},
+  pages            = {25-30},
+  issn             = {2405-8963},
+  doi              = {https://doi.org/10.1016/j.ifacol.2024.10.005},
+  url              = {https://www.sciencedirect.com/science/article/pii/S2405896324017518},
+  abstract         = {Universal differential equations (UDEs) leverage the respective advantages of mechanistic models and artificial neural networks and combine them into one dynamic model. However, these hybrid models can suffer from unrealistic solutions, such as negative values for biochemical quantities. We present non-negative UDE (nUDEs), a constrained UDE variant that guarantees non-negative values. Furthermore, we explore regularisation techniques to improve generalisation and interpretability of UDEs.}
+}
+
+@Article{SchmiesterBra2024,
+  author           = {Schmiester, Leonard and Brasó-Maristany, Fara and González-Farré, Blanca and Pascual, Tomás and Gavilá, Joaquín and Tekpli, Xavier and Geisler, Jürgen and Kristensen, Vessela N. and Frigessi, Arnoldo and Prat, Aleix and Köhn-Luque, Alvaro},
+  journal          = {Clinical Cancer Research},
+  title            = {{Computational Model Predicts Patient Outcomes in Luminal B Breast Cancer Treated with Endocrine Therapy and CDK4/6 Inhibition}},
+  year             = {2024},
+  issn             = {1078-0432},
+  month            = {07},
+  pages            = {OF1-OF9},
+  abstract         = {{Development of a computational biomarker to predict, prior to treatment, the response to CDK4/6 inhibition (CDK4/6i) in combination with endocrine therapy in patients with breast cancer.A mechanistic mathematical model that accounts for protein signaling and drug mechanisms of action was developed and trained on extensive, publicly available data from breast cancer cell lines. The model was built to provide a patient-specific response score based on the expression of six genes (CCND1, CCNE1, ESR1, RB1, MYC, and CDKN1A). The model was validated in five independent cohorts of 148 patients in total with early-stage or advanced breast cancer treated with endocrine therapy and CDK4/6i. Response was measured either by evaluating Ki67 levels and PAM50 risk of relapse (ROR) after neoadjuvant treatment or by evaluating progression-free survival (PFS).The model showed significant association with patient’s outcomes in all five cohorts. The model predicted high Ki67 [area under the curve; AUC (95\\% confidence interval, CI) of 0.80 (0.64–0.92), 0.81 (0.60–1.00) and 0.80 (0.65–0.93)] and high PAM50 ROR [AUC of 0.78 (0.64–0.89)]. This observation was not obtained in patients treated with chemotherapy. In the other cohorts, patient stratification based on the model prediction was significantly associated with PFS [hazard ratio (HR) = 2.92 (95\\% CI, 1.08–7.86), P = 0.034 and HR = 2.16 (1.02 4.55), P = 0.043].A mathematical modeling approach accurately predicts patient outcome following CDK4/6i plus endocrine therapy that marks a step toward more personalized treatments in patients with Luminal B breast cancer.}},
+  creationdate     = {2024-08-01T09:44:04},
+  doi              = {10.1158/1078-0432.CCR-24-0244},
+  eprint           = {https://aacrjournals.org/clincancerres/article-pdf/doi/10.1158/1078-0432.CCR-24-0244/3478451/ccr-24-0244.pdf},
+  modificationdate = {2024-08-01T09:44:04},
+  url              = {https://doi.org/10.1158/1078-0432.CCR-24-0244},
+}
+
+@InProceedings{JacksonCha2023,
+  author           = {Jackson, Clayton and Chardon, Matthieu and Wang, Y. Curtis and Rudi, Johann and Tresch, Matthew and Heckman, Charles J. and Quinn, Roger D.},
+  booktitle        = {Biomimetic and Biohybrid Systems},
+  title            = {Multimodal Parameter Inference for a Canonical Motor Microcircuit Controlling Rat Hindlimb Motion},
+  year             = {2023},
+  address          = {Cham},
+  editor           = {Meder, Fabian and Hunt, Alexander and Margheri, Laura and Mura, Anna and Mazzolai, Barbara},
+  pages            = {38--51},
+  publisher        = {Springer Nature Switzerland},
+  abstract         = {This work explored synaptic strengths in a computational neuroscience model of a controller for the hip joint of a rat which consists of Ia interneurons, Renshaw cells, and the associated motor neurons. This circuit has been referred to as the Canonical Motor Microcircuit (CMM). It is thought that the CMM acts to modulate motor neuron activity at the output stage. We first created a biomechanical model of a rat hindlimb consisting of a pelvis, femur, shin, foot, and flexor-extensor muscle pairs modeled with a Hill muscle model. We then modeled the CMM using non-spiking leaky-integrator neural models connected with conductance-based synapses. To tune the parameters in the network, we implemented an automated approach for parameter search using the Markov chain Monte Carlo (MCMC) method to solve a parameter estimation problem in a Bayesian inference framework. As opposed to traditional optimization techniques, the MCMC method identifies probability densities over the multidimensional space of parameters. This allows us to see a range of likely parameters that produce model outcomes consistent with animal data, determine if the distribution of likely parameters is uni- or multi-modal, as well as evaluate the significance and sensitivity of each parameter. This approach will allow for further analysis of the circuit, specifically, the function and significance of Ia feedback and Renshaw cells.},
+  creationdate     = {2024-09-06T15:49:21},
+  doi              = {10.1007/978-3-031-39504-8_3},
+  isbn             = {978-3-031-39504-8},
+  modificationdate = {2024-09-06T15:49:47},
+}
+
 @Comment{jabref-meta: databaseType:bibtex;}
diff --git a/pypesto/C.py b/pypesto/C.py
index a3c36af31..5a0e7438a 100644
--- a/pypesto/C.py
+++ b/pypesto/C.py
@@ -84,6 +84,10 @@ class EnsembleType(Enum):
 X0 = "x0"
 ID = "id"
 
+AMICI = "amici"
+ROADRUNNER = "roadrunner"
+PETAB = "petab"
+
 
 ###############################################################################
 # HIERARCHICAL SCALING + OFFSET
@@ -265,6 +269,9 @@ class InnerParameterType(str, Enum):
     "exponential_decay"  # temperature schedule for parallel tempering
 )
 BETA_DECAY = "beta_decay"  # temperature schedule for parallel tempering
+TRAPEZOID = "trapezoid"  # method to compute log evidence
+SIMPSON = "simpson"  # method to compute log evidence
+STEPPINGSTONE = "steppingstone"  # method to compute log evidence
 
 ###############################################################################
 # PREDICT
diff --git a/pypesto/ensemble/ensemble.py b/pypesto/ensemble/ensemble.py
index ecd3d7b85..5446e2103 100644
--- a/pypesto/ensemble/ensemble.py
+++ b/pypesto/ensemble/ensemble.py
@@ -555,6 +555,7 @@ def __init__(
     def from_sample(
         result: Result,
         remove_burn_in: bool = True,
+        ci_level: float = None,
         chain_slice: slice = None,
         x_names: Sequence[str] = None,
         lower_bound: np.ndarray = None,
@@ -571,6 +572,10 @@ def from_sample(
         remove_burn_in:
             Exclude parameter vectors from the ensemble if they are in the
             "burn-in".
+        ci_level:
+            A form of relative cutoff. Exclude parameter vectors, for which the
+            (non-normalized) posterior value is not within the `ci_level` best
+            values.
         chain_slice:
             Subset the chain with a slice. Any "burn-in" removal occurs first.
         x_names:
@@ -594,14 +599,23 @@ def from_sample(
             lower_bound = result.problem.lb
         if upper_bound is None:
             upper_bound = result.problem.ub
+        burn_in = 0
         if remove_burn_in:
             if result.sample_result.burn_in is None:
                 geweke_test(result)
             burn_in = result.sample_result.burn_in
             x_vectors = x_vectors[burn_in:]
+
+        # added cutoff
+        if ci_level is not None:
+            x_vectors = calculate_hpd(
+                result=result, burn_in=burn_in, ci_level=ci_level
+            )
+
         if chain_slice is not None:
             x_vectors = x_vectors[chain_slice]
         x_vectors = x_vectors.T
+
         return Ensemble(
             x_vectors=x_vectors,
             x_names=x_names,
@@ -1253,3 +1267,77 @@ def calculate_cutoff(
 
     range = chi2.ppf(q=percentile / 100, df=df)
     return fval_opt + range
+
+
+def calculate_hpd(
+    result: Result,
+    burn_in: int = 0,
+    ci_level: float = 0.95,
+):
+    """
+    Calculate Highest Posterior Density (HPD) samples.
+
+    The HPD is calculated for a user-defined credibility level (`ci_level`). The
+    HPD includes all parameter vectors with a (non-normalized) posterior
+    probability that is higher than the lowest `1-ci_level` %
+    posterior probability values.
+
+    Parameters
+    ----------
+    result:
+        The sampling result from which to create the ensemble.
+    burn_in:
+        Burn in index that is cut off before HPD is calculated.
+    ci_level:
+        Credibility level of the resulting HPD. 0.95 corresponds to the 95% CI.
+        Only values between 0 and 1 are allowed.
+
+    Returns
+    -------
+    The HPD parameter vectors.
+    """
+    if not 0 <= ci_level <= 1:
+        raise ValueError(
+            f"ci_level={ci_level} is not valid. Choose 0<=ci_level<=1."
+        )
+    # get names of chain parameters
+    param_names = result.problem.get_reduced_vector(result.problem.x_names)
+
+    # Get converged parameter samples as numpy arrays
+    chain = np.asarray(result.sample_result.trace_x[0, burn_in:, :])
+    neglogpost = result.sample_result.trace_neglogpost[0, burn_in:]
+    indices = np.arange(
+        burn_in, len(result.sample_result.trace_neglogpost[0, :])
+    )
+
+    # create df first, as we need to match neglogpost to the according parameter values
+    pd_params = pd.DataFrame(chain, columns=param_names)
+    pd_fval = pd.DataFrame(neglogpost, columns=["neglogPosterior"])
+    pd_iter = pd.DataFrame(indices, columns=["iteration"])
+
+    params_df = pd.concat(
+        [pd_params, pd_fval, pd_iter], axis=1, ignore_index=False
+    )
+
+    # get lower neglogpost bound for HPD
+    # sort neglogpost values of MCMC chain without burn in
+    neglogpost_sort = np.sort(neglogpost)
+
+    # Get converged chain length
+    chain_length = len(neglogpost)
+
+    # most negative ci percentage samples of the posterior are kept to get the according HPD
+    neglogpost_lower_bound = neglogpost_sort[int(chain_length * (ci_level))]
+
+    # cut posterior to hpd
+    hpd_params_df = params_df[
+        params_df["neglogPosterior"] <= neglogpost_lower_bound
+    ]
+
+    # convert df to ensemble vector
+    hpd_params_df_vals_only = hpd_params_df.drop(
+        columns=["iteration", "neglogPosterior"]
+    )
+    hpd_ensemble_vector = hpd_params_df_vals_only.to_numpy()
+
+    return hpd_ensemble_vector
diff --git a/pypesto/hierarchical/base_parameter.py b/pypesto/hierarchical/base_parameter.py
index e58cc7d8c..6692f8728 100644
--- a/pypesto/hierarchical/base_parameter.py
+++ b/pypesto/hierarchical/base_parameter.py
@@ -68,6 +68,23 @@ def __init__(
 
         if scale not in {LIN, LOG, LOG10}:
             raise ValueError(f"Scale not recognized: {scale}.")
+
+        if (
+            scale in [LOG, LOG10]
+            and inner_parameter_type == InnerParameterType.SIGMA
+        ):
+            raise ValueError(
+                f"Inner parameter type `{inner_parameter_type}` "
+                f"cannot be log-scaled."
+            )
+
+        if scale in [LOG, LOG10] and lb <= 0:
+            raise ValueError(
+                f"Lower bound of inner parameter `{inner_parameter_id}` "
+                f"cannot be non-positive for log-scaled parameters. "
+                f"Provide a positive lower bound."
+            )
+
         self.scale = scale
 
         if inner_parameter_type not in (
diff --git a/pypesto/hierarchical/base_problem.py b/pypesto/hierarchical/base_problem.py
index 8adb5c721..7482e95c9 100644
--- a/pypesto/hierarchical/base_problem.py
+++ b/pypesto/hierarchical/base_problem.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 
+from ..C import LIN, LOG, LOG10
 from .base_parameter import InnerParameter
 
 try:
@@ -82,6 +83,10 @@ def get_interpretable_x_ids(self) -> list[str]:
         """
         return list(self.xs.keys())
 
+    def get_interpretable_x_scales(self) -> list[str]:
+        """Get scales of interpretable inner parameters."""
+        return [x.scale for x in self.xs.values()]
+
     def get_xs_for_type(
         self, inner_parameter_type: str
     ) -> list[InnerParameter]:
@@ -119,7 +124,9 @@ def get_for_id(self, inner_parameter_id: str) -> InnerParameter:
         try:
             return self.xs[inner_parameter_id]
         except KeyError:
-            raise KeyError(f"Cannot find parameter with id {id}.") from None
+            raise KeyError(
+                f"Cannot find parameter with id {inner_parameter_id}."
+            ) from None
 
     def is_empty(self) -> bool:
         """Check for emptiness.
@@ -222,15 +229,37 @@ def scale_value_dict(
 
 def scale_value(val: float | np.array, scale: str) -> float | np.array:
     """Scale a single value."""
-    if scale == "lin":
+    if scale == LIN:
         return val
-    if scale == "log":
+    if scale == LOG:
         return np.log(val)
-    if scale == "log10":
+    if scale == LOG10:
         return np.log10(val)
     raise ValueError(f"Scale {scale} not recognized.")
 
 
+def scale_back_value_dict(
+    dct: dict[str, float], problem: InnerProblem
+) -> dict[str, float]:
+    """Scale back a value dictionary."""
+    scaled_dct = {}
+    for key, val in dct.items():
+        x = problem.get_for_id(key)
+        scaled_dct[key] = scale_back_value(val, x.scale)
+    return scaled_dct
+
+
+def scale_back_value(val: float | np.array, scale: str) -> float | np.array:
+    """Scale back a single value."""
+    if scale == LIN:
+        return val
+    if scale == LOG:
+        return np.exp(val)
+    if scale == LOG10:
+        return 10**val
+    raise ValueError(f"Scale {scale} not recognized.")
+
+
 def ix_matrices_from_arrays(
     ixs: dict[str, list[tuple[int, int, int]]], edatas: list[np.array]
 ) -> dict[str, list[np.array]]:
diff --git a/pypesto/hierarchical/inner_calculator_collector.py b/pypesto/hierarchical/inner_calculator_collector.py
index 8713ca33c..e89f4ecd1 100644
--- a/pypesto/hierarchical/inner_calculator_collector.py
+++ b/pypesto/hierarchical/inner_calculator_collector.py
@@ -286,6 +286,14 @@ def get_interpretable_inner_par_bounds(
             ub.extend(ub_i)
         return np.asarray(lb), np.asarray(ub)
 
+    def get_interpretable_inner_par_scales(self) -> list[str]:
+        """Return the scales of interpretable inner parameters of all inner problems."""
+        return [
+            scale
+            for inner_calculator in self.inner_calculators
+            for scale in inner_calculator.inner_problem.get_interpretable_x_scales()
+        ]
+
     def __call__(
         self,
         x_dct: dict,
diff --git a/pypesto/hierarchical/ordinal/problem.py b/pypesto/hierarchical/ordinal/problem.py
index e7967d3bf..85d46312a 100644
--- a/pypesto/hierarchical/ordinal/problem.py
+++ b/pypesto/hierarchical/ordinal/problem.py
@@ -193,6 +193,13 @@ def get_interpretable_x_ids(self) -> list[str]:
         """
         return []
 
+    def get_interpretable_x_scales(self) -> list[str]:
+        """Get scales of interpretable inner parameters.
+
+        There are no interpretable inner parameters for the ordinal problem.
+        """
+        return []
+
     def get_groups_for_xs(self, inner_parameter_type: str) -> list[int]:
         """Get unique list of ``OptimalScalingParameter.group`` values."""
         groups = [x.group for x in self.get_xs_for_type(inner_parameter_type)]
diff --git a/pypesto/hierarchical/petab.py b/pypesto/hierarchical/petab.py
index 278f01116..c1b85b3be 100644
--- a/pypesto/hierarchical/petab.py
+++ b/pypesto/hierarchical/petab.py
@@ -1,5 +1,6 @@
 """Helper methods for hierarchical optimization with PEtab."""
 
+import warnings
 from typing import Literal
 
 import pandas as pd
@@ -94,14 +95,35 @@ def validate_hierarchical_petab_problem(petab_problem: petab.Problem) -> None:
         and not (
             inner_parameter_table[petab.PARAMETER_SCALE].isna()
             | (inner_parameter_table[petab.PARAMETER_SCALE] == petab.LIN)
+            | (
+                inner_parameter_table[PARAMETER_TYPE]
+                != InnerParameterType.SIGMA
+            )
         ).all()
     ):
         sub_df = inner_parameter_table.loc[
             :, [PARAMETER_TYPE, petab.PARAMETER_SCALE]
         ]
         raise NotImplementedError(
-            "Only parameterScale=lin supported for parameters of the inner "
-            f"subproblem.\n{sub_df}"
+            "LOG and LOG10 parameter scale of inner parameters is not supported "
+            "for sigma parameters. Inner parameter table:\n"
+            f"{sub_df}"
+        )
+    elif (
+        petab.PARAMETER_SCALE in inner_parameter_table
+        and not (
+            inner_parameter_table[petab.PARAMETER_SCALE].isna()
+            | (inner_parameter_table[petab.PARAMETER_SCALE] == petab.LIN)
+        ).all()
+    ):
+        sub_df = inner_parameter_table.loc[
+            :, [PARAMETER_TYPE, petab.PARAMETER_SCALE]
+        ]
+        warnings.warn(
+            f"LOG and LOG10 parameter scale of inner parameters is used only "
+            f"for their visualization, and does not affect their optimization. "
+            f"Inner parameter table:\n{sub_df}",
+            stacklevel=1,
         )
 
     inner_parameter_df = validate_measurement_formulae(
diff --git a/pypesto/hierarchical/relative/solver.py b/pypesto/hierarchical/relative/solver.py
index 00b7c93ed..930fdeba2 100644
--- a/pypesto/hierarchical/relative/solver.py
+++ b/pypesto/hierarchical/relative/solver.py
@@ -11,7 +11,11 @@
 from ...optimize import minimize
 from ...problem import Problem
 from ..base_parameter import InnerParameter
-from ..base_problem import InnerProblem, scale_value_dict
+from ..base_problem import (
+    InnerProblem,
+    scale_back_value_dict,
+    scale_value_dict,
+)
 from ..base_solver import InnerSolver
 from .util import (
     apply_offset,
@@ -62,6 +66,8 @@ def calculate_obj_function(
         relevant_data = copy.deepcopy(problem.data)
         sim = copy.deepcopy(sim)
         sigma = copy.deepcopy(sigma)
+        inner_parameters = copy.deepcopy(inner_parameters)
+        inner_parameters = scale_back_value_dict(inner_parameters, problem)
 
         for x in problem.get_xs_for_type(InnerParameterType.OFFSET):
             apply_offset(
@@ -140,6 +146,8 @@ def calculate_gradients(
         relevant_data = copy.deepcopy(problem.data)
         sim = copy.deepcopy(sim)
         sigma = copy.deepcopy(sigma)
+        inner_parameters = copy.deepcopy(inner_parameters)
+        inner_parameters = scale_back_value_dict(inner_parameters, problem)
 
         # restructure sensitivities to have parameter index as second index
         ssim = [
diff --git a/pypesto/hierarchical/semiquantitative/problem.py b/pypesto/hierarchical/semiquantitative/problem.py
index 3b4705278..27a54ffba 100644
--- a/pypesto/hierarchical/semiquantitative/problem.py
+++ b/pypesto/hierarchical/semiquantitative/problem.py
@@ -39,6 +39,7 @@
         NOISE_PARAMETERS,
         OBSERVABLE_ID,
         PARAMETER_ID,
+        PARAMETER_SCALE,
         UPPER_BOUND,
     )
 except ImportError:
@@ -149,6 +150,18 @@ def get_interpretable_x_ids(self) -> list[str]:
             if x.inner_parameter_type == InnerParameterType.SIGMA
         ]
 
+    def get_interpretable_x_scales(self) -> list[str]:
+        """Get scales of interpretable inner parameters.
+
+        The interpretable inner parameters of the semiquantitative
+        problem are the noise parameters.
+        """
+        return [
+            x.scale
+            for x in self.xs.values()
+            if x.inner_parameter_type == InnerParameterType.SIGMA
+        ]
+
     def get_semiquant_observable_ids(self) -> list[str]:
         """Get the IDs of semiquantitative observables."""
         return list(
@@ -420,7 +433,7 @@ def noise_inner_parameters_from_parameter_df(
             SplineInnerParameter(
                 inner_parameter_id=row[PARAMETER_ID],
                 inner_parameter_type=InnerParameterType.SIGMA,
-                scale=LIN,
+                scale=row[PARAMETER_SCALE],
                 lb=row[LOWER_BOUND],
                 ub=row[UPPER_BOUND],
                 observable_id=observable_id,
diff --git a/pypesto/objective/__init__.py b/pypesto/objective/__init__.py
index ad4b3ba30..48da2a97d 100644
--- a/pypesto/objective/__init__.py
+++ b/pypesto/objective/__init__.py
@@ -8,8 +8,10 @@
 from .base import ObjectiveBase
 from .finite_difference import FD, FDDelta
 from .function import Objective
+from .petab import PetabSimulatorObjective
 from .priors import (
     NegLogParameterPriors,
     NegLogPriors,
     get_parameter_prior_dict,
 )
+from .roadrunner import RoadRunnerObjective
diff --git a/pypesto/objective/aesara/__init__.py b/pypesto/objective/aesara/__init__.py
deleted file mode 100644
index 6f2fd79d1..000000000
--- a/pypesto/objective/aesara/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""
-Aesara objective
-================
-"""
-
-from .base import AesaraObjective
diff --git a/pypesto/objective/aesara/base.py b/pypesto/objective/aesara/base.py
deleted file mode 100644
index ce7ad8c22..000000000
--- a/pypesto/objective/aesara/base.py
+++ /dev/null
@@ -1,282 +0,0 @@
-"""
-Aesara models interface.
-
-Adds an interface for the construction of loss functions
-incorporating aesara models. This permits computation of derivatives using a
-combination of objective based methods and aesara based backpropagation.
-"""
-
-import copy
-from collections.abc import Sequence
-from typing import Optional
-
-import numpy as np
-
-from ...C import FVAL, GRAD, HESS, MODE_FUN, RDATAS, ModeType
-from ..base import ObjectiveBase, ResultDict
-
-try:
-    import aesara
-    import aesara.tensor as aet
-    from aesara.tensor import Op
-    from aesara.tensor.var import TensorVariable
-except ImportError:
-    raise ImportError(
-        "Using an aeasara objective requires an installation of "
-        "the python package aesara. Please install aesara via "
-        "`pip install aesara`."
-    ) from None
-
-
-class AesaraObjective(ObjectiveBase):
-    """
-    Wrapper around an ObjectiveBase.
-
-    Computes the gradient at each evaluation, caching it for later calls.
-    Caching is only enabled after the first time the gradient is asked for
-    and disabled whenever the cached gradient is not used, in order not to
-    increase computation time for derivative-free samplers.
-
-    Parameters
-    ----------
-    objective:
-        The `pypesto.ObjectiveBase` to wrap.
-    aet_x:
-        Tensor variables that define the variables of `aet_fun`
-    aet_fun:
-        Aesara function that maps `aet_x` to the variables of `objective`
-    coeff:
-        Multiplicative coefficient for objective
-    """
-
-    def __init__(
-        self,
-        objective: ObjectiveBase,
-        aet_x: TensorVariable,
-        aet_fun: TensorVariable,
-        coeff: Optional[float] = 1.0,
-        x_names: Sequence[str] = None,
-    ):
-        if not isinstance(objective, ObjectiveBase):
-            raise TypeError("objective must be an ObjectiveBase instance")
-        if not objective.check_mode(MODE_FUN):
-            raise NotImplementedError(
-                f"objective must support mode={MODE_FUN}"
-            )
-        super().__init__(x_names)
-        self.base_objective = objective
-
-        self.aet_x = aet_x
-        self.aet_fun = aet_fun
-        self._coeff = coeff
-
-        self.obj_op = AesaraObjectiveOp(self, self._coeff)
-
-        # compiled function
-        if objective.has_fun:
-            self.afun = aesara.function([aet_x], self.obj_op(aet_fun))
-
-        # compiled gradient
-        if objective.has_grad:
-            self.agrad = aesara.function(
-                [aet_x], aesara.grad(self.obj_op(aet_fun), [aet_x])
-            )
-
-        # compiled hessian
-        if objective.has_hess:
-            self.ahess = aesara.function(
-                [aet_x], aesara.gradient.hessian(self.obj_op(aet_fun), [aet_x])
-            )
-
-        # compiled input mapping
-        self.infun = aesara.function([aet_x], aet_fun)
-
-        # temporary storage for evaluation results of objective
-        self.cached_base_ret: ResultDict = {}
-
-    def check_mode(self, mode: ModeType) -> bool:
-        """See `ObjectiveBase` documentation."""
-        return mode == MODE_FUN
-
-    def check_sensi_orders(self, sensi_orders, mode: ModeType) -> bool:
-        """See `ObjectiveBase` documentation."""
-        if not self.check_mode(mode):
-            return False
-        else:
-            return self.base_objective.check_sensi_orders(sensi_orders, mode)
-
-    def call_unprocessed(
-        self,
-        x: np.ndarray,
-        sensi_orders: tuple[int, ...],
-        mode: ModeType,
-        return_dict: bool,
-        **kwargs,
-    ) -> ResultDict:
-        """
-        See `ObjectiveBase` for more documentation.
-
-        Main method to overwrite from the base class. It handles and
-        delegates the actual objective evaluation.
-        """
-        # hess computation in aesara requires grad
-        if 2 in sensi_orders and 1 not in sensi_orders:
-            sensi_orders = (1, *sensi_orders)
-
-        # this computes all the results from the inner objective, rendering
-        # them accessible to aesara compiled functions
-
-        set_return_dict, return_dict = (
-            "return_dict" in kwargs,
-            kwargs.pop("return_dict", False),
-        )
-        self.cached_base_ret = self.base_objective(
-            self.infun(x), sensi_orders, mode, return_dict=True, **kwargs
-        )
-        if set_return_dict:
-            kwargs["return_dict"] = return_dict
-        ret = {}
-        if RDATAS in self.cached_base_ret:
-            ret[RDATAS] = self.cached_base_ret[RDATAS]
-        if 0 in sensi_orders:
-            ret[FVAL] = float(self.afun(x))
-        if 1 in sensi_orders:
-            ret[GRAD] = self.agrad(x)[0]
-        if 2 in sensi_orders:
-            ret[HESS] = self.ahess(x)[0]
-
-        return ret
-
-    def __deepcopy__(self, memodict=None):
-        other = AesaraObjective(
-            copy.deepcopy(self.base_objective),
-            self.aet_x,
-            self.aet_fun,
-            self._coeff,
-        )
-
-        return other
-
-
-class AesaraObjectiveOp(Op):
-    """
-    Aesara wrapper around a (non-normalized) log-probability function.
-
-    Parameters
-    ----------
-    obj:
-        Base aesara objective
-    coeff:
-        Multiplicative coefficient for the objective function value
-    """
-
-    itypes = [aet.dvector]  # expects a vector of parameter values when called
-    otypes = [aet.dscalar]  # outputs a single scalar value (the log prob)
-
-    def __init__(self, obj: AesaraObjective, coeff: Optional[float] = 1.0):
-        self._objective: AesaraObjective = obj
-        self._coeff: float = coeff
-
-        # initialize the sensitivity Op
-        if obj.has_grad:
-            self._log_prob_grad = AesaraObjectiveGradOp(obj, coeff)
-        else:
-            self._log_prob_grad = None
-
-    def perform(self, node, inputs, outputs, params=None):  # noqa
-        # note that we use precomputed values from the outer
-        # AesaraObjective.call_unprocessed here, which means we can
-        # ignore inputs here
-        log_prob = self._coeff * self._objective.cached_base_ret[FVAL]
-        outputs[0][0] = np.array(log_prob)
-
-    def grad(self, inputs, g):
-        """
-        Calculate the hessian.
-
-        Actually returns the vector-hessian product - g[0] is a vector of
-        parameter values.
-        """
-        if self._log_prob_grad is None:
-            return super().grad(inputs, g)
-        (theta,) = inputs
-        log_prob_grad = self._log_prob_grad(theta)
-        return [g[0] * log_prob_grad]
-
-
-class AesaraObjectiveGradOp(Op):
-    """
-    Aesara wrapper around a (non-normalized) log-probability gradient function.
-
-    This Op will be called with a vector of values and also return a vector of
-    values - the gradients in each dimension.
-
-    Parameters
-    ----------
-    obj:
-        Base aesara objective
-    coeff:
-        Multiplicative coefficient for the objective function value
-    """
-
-    itypes = [aet.dvector]  # expects a vector of parameter values when called
-    otypes = [aet.dvector]  # outputs a vector (the log prob grad)
-
-    def __init__(self, obj: AesaraObjective, coeff: Optional[float] = 1.0):
-        self._objective: AesaraObjective = obj
-        self._coeff: float = coeff
-
-        if obj.has_hess:
-            self._log_prob_hess = AesaraObjectiveHessOp(obj, coeff)
-        else:
-            self._log_prob_hess = None
-
-    def perform(self, node, inputs, outputs, params=None):  # noqa
-        # note that we use precomputed values from the outer
-        # AesaraObjective.call_unprocessed here, which means we can
-        # ignore inputs here
-        log_prob_grad = self._coeff * self._objective.cached_base_ret[GRAD]
-        outputs[0][0] = log_prob_grad
-
-    def grad(self, inputs, g):
-        """
-        Calculate the hessian.
-
-        Actually returns the vector-hessian product - g[0] is a vector of
-        parameter values.
-        """
-        if self._log_prob_hess is None:
-            return super().grad(inputs, g)
-        (theta,) = inputs
-        log_prob_hess = self._log_prob_hess(theta)
-        return [g[0].dot(log_prob_hess)]
-
-
-class AesaraObjectiveHessOp(Op):
-    """
-    Aesara wrapper around a (non-normalized) log-probability Hessian function.
-
-    This Op will be called with a vector of values and also return a matrix of
-    values - the Hessian in each dimension.
-
-    Parameters
-    ----------
-    obj:
-        Base aesara objective
-    coeff:
-        Multiplicative coefficient for the objective function value
-    """
-
-    itypes = [aet.dvector]
-    otypes = [aet.dmatrix]
-
-    def __init__(self, obj: AesaraObjective, coeff: Optional[float] = 1.0):
-        self._objective: AesaraObjective = obj
-        self._coeff: float = coeff
-
-    def perform(self, node, inputs, outputs, params=None):  # noqa
-        # note that we use precomputed values from the outer
-        # AesaraObjective.call_unprocessed here, which means we can
-        # ignore inputs here
-        log_prob_hess = self._coeff * self._objective.cached_base_ret[HESS]
-        outputs[0][0] = log_prob_hess
diff --git a/pypesto/objective/finite_difference.py b/pypesto/objective/finite_difference.py
index 4bf7a505f..fe1a1fdac 100644
--- a/pypesto/objective/finite_difference.py
+++ b/pypesto/objective/finite_difference.py
@@ -327,6 +327,7 @@ def __init__(
         self.delta_grad: FDDelta = to_delta(delta_grad)
         self.delta_res: FDDelta = to_delta(delta_res)
         self.method: str = method
+        self.pre_post_processor = obj.pre_post_processor
 
         if method not in FD.METHODS:
             raise ValueError(
diff --git a/pypesto/objective/julia/petabJl.py b/pypesto/objective/julia/petabJl.py
index 84de76999..f9ddefb22 100644
--- a/pypesto/objective/julia/petabJl.py
+++ b/pypesto/objective/julia/petabJl.py
@@ -62,10 +62,10 @@ def __init__(
         self.petab_jl_problem = petab_jl_problem
 
         # get functions
-        fun = self.petab_jl_problem.compute_cost
-        grad = self.petab_jl_problem.compute_gradient
-        hess = self.petab_jl_problem.compute_hessian
-        x_names = np.asarray(self.petab_jl_problem.θ_names)
+        fun = self.petab_jl_problem.nllh
+        grad = self.petab_jl_problem.grad
+        hess = self.petab_jl_problem.hess
+        x_names = np.asarray(self.petab_jl_problem.xnames)
 
         # call the super super super constructor
         super(JuliaObjective, self).__init__(
@@ -105,10 +105,10 @@ def __setstate__(self, state):
         self.petab_jl_problem = petab_jl_problem
 
         # get functions
-        fun = self.petab_jl_problem.compute_cost
-        grad = self.petab_jl_problem.compute_gradient
-        hess = self.petab_jl_problem.compute_hessian
-        x_names = np.asarray(self.petab_jl_problem.θ_names)
+        fun = self.petab_jl_problem.nllh
+        grad = self.petab_jl_problem.grad
+        hess = self.petab_jl_problem.hess
+        x_names = np.asarray(self.petab_jl_problem.xnames)
 
         # call the super super constructor
         super(JuliaObjective, self).__init__(fun, grad, hess, x_names)
diff --git a/pypesto/objective/julia/petab_jl_importer.py b/pypesto/objective/julia/petab_jl_importer.py
index 3d76900b5..b5b0b444e 100644
--- a/pypesto/objective/julia/petab_jl_importer.py
+++ b/pypesto/objective/julia/petab_jl_importer.py
@@ -309,7 +309,7 @@ def _write_julia_file(
         f"found at {link_to_options}\n"
         f"petabProblem = PEtabODEProblem(\n\t"
         f"petabModel,\n\t"
-        f"ode_solver=ODESolver({odeSolvOpt_str}),\n\t"
+        f"odesolver=ODESolver({odeSolvOpt_str}),\n\t"
         f"gradient_method=:{options['gradient_method']},\n\t"
         f"hessian_method=:{options['hessian_method']},\n\t"
         f"sparse_jacobian={options['sparse_jacobian']},\n\t"
diff --git a/pypesto/objective/petab.py b/pypesto/objective/petab.py
new file mode 100644
index 000000000..e40513f59
--- /dev/null
+++ b/pypesto/objective/petab.py
@@ -0,0 +1,102 @@
+"""Objective function for PEtab models using the PEtab simulator."""
+from __future__ import annotations
+
+try:
+    import petab.v1 as petab
+    from petab.v1.simulate import Simulator as PetabSimulator
+except ImportError:
+    petab = None
+from collections import OrderedDict
+from collections.abc import Sequence
+
+import numpy as np
+
+from ..C import FVAL, MODE_FUN, MODE_RES, RES, ModeType
+from .base import ObjectiveBase, ResultDict
+
+
+class PetabSimulatorObjective(ObjectiveBase):
+    """Objective function for PEtab models using the PEtab simulator."""
+
+    def __init__(
+        self,
+        simulator: PetabSimulator,
+        x_names: Sequence[str] | None = None,
+    ):
+        """Initialize the PEtab simulator objective function.
+
+        Parameters
+        ----------
+        petab_problem:
+            The PEtab problem.
+        simulator:
+            The PEtab simulator.
+        x_names:
+            Names of optimization parameters.
+        """
+        if petab is None:
+            raise ImportError(
+                "The `petab` package is required for this objective function."
+            )
+        self.simulator = simulator
+        self.petab_problem = self.simulator.petab_problem
+        if x_names is None:
+            x_names = list(self.petab_problem.get_x_ids())
+        super().__init__(x_names=x_names)
+
+    def replace_parameters(self, x: np.ndarray):
+        """Replace the parameters in the PEtab problem with the given values.
+
+        Parameters
+        ----------
+        x:
+            Parameter vector for optimization.
+        """
+        x_dict = OrderedDict(zip(self._x_names, x))
+        x_unscaled = self.petab_problem.unscale_parameters(x_dict)
+        par_df = self.petab_problem.parameter_df
+        par_df["nominalValue"] = par_df.index.map(x_unscaled)
+        self.simulator.set_parameters(x_unscaled)
+
+    def call_unprocessed(
+        self,
+        x: np.ndarray,
+        sensi_orders: tuple[int, ...],
+        mode: ModeType,
+        return_dict: bool,
+        **kwargs,
+    ) -> ResultDict:
+        """See :meth:`ObjectiveBase.call_unprocessed`."""
+
+        self.replace_parameters(x)
+        sim_df = self.simulator.simulate(noise=False, as_measurement=False)
+        result = {}
+        result["simulations"] = sim_df
+        if mode == MODE_FUN:
+            result[FVAL] = -petab.calculate_llh(
+                measurement_dfs=self.petab_problem.measurement_df,
+                simulation_dfs=sim_df,
+                observable_dfs=self.petab_problem.observable_df,
+                parameter_dfs=self.petab_problem.parameter_df,
+            )
+        elif mode == MODE_RES:
+            result[RES] = petab.calculate_residuals(
+                measurement_dfs=self.petab_problem.measurement_df,
+                simulation_dfs=sim_df,
+                observable_dfs=self.petab_problem.observable_df,
+                parameter_dfs=self.petab_problem.parameter_df,
+            )
+        return result
+
+    def check_sensi_orders(
+        self,
+        sensi_orders: tuple[int, ...],
+        mode: ModeType,
+    ) -> bool:
+        """See :class:`ObjectiveBase` documentation."""
+        if not sensi_orders:
+            return True
+        sensi_order = max(sensi_orders)
+        max_sensi_order = 0
+
+        return sensi_order <= max_sensi_order
diff --git a/pypesto/objective/priors.py b/pypesto/objective/priors.py
index e460c1572..4ffcdaf6a 100644
--- a/pypesto/objective/priors.py
+++ b/pypesto/objective/priors.py
@@ -3,6 +3,7 @@
 from collections.abc import Sequence
 from typing import Callable, Union
 
+import cloudpickle
 import numpy as np
 
 from .. import C
@@ -67,6 +68,14 @@ def __init__(
         self.prior_list = prior_list
         super().__init__(x_names)
 
+    def __getstate__(self):
+        """Get state using cloudpickle."""
+        return cloudpickle.dumps(self.__dict__)
+
+    def __setstate__(self, state):
+        """Set state using cloudpickle."""
+        self.__dict__.update(cloudpickle.loads(state))
+
     def call_unprocessed(
         self,
         x: np.ndarray,
diff --git a/pypesto/objective/roadrunner/__init__.py b/pypesto/objective/roadrunner/__init__.py
index 3b6e800e2..b87ab9dc0 100644
--- a/pypesto/objective/roadrunner/__init__.py
+++ b/pypesto/objective/roadrunner/__init__.py
@@ -2,6 +2,12 @@
 RoadRunner objective
 ====================
 """
+__all__ = [
+    "PetabImporterRR",
+    "RoadRunnerCalculator",
+    "ExpData",
+    "SolverOptions",
+]
 
 from .petab_importer_roadrunner import PetabImporterRR
 from .road_runner import RoadRunnerObjective
diff --git a/pypesto/objective/roadrunner/petab_importer_roadrunner.py b/pypesto/objective/roadrunner/petab_importer_roadrunner.py
index 0b674b755..d41c31250 100644
--- a/pypesto/objective/roadrunner/petab_importer_roadrunner.py
+++ b/pypesto/objective/roadrunner/petab_importer_roadrunner.py
@@ -9,24 +9,25 @@
 import logging
 import numbers
 import re
+import warnings
 from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
 
-import libsbml
-import petab.v1 as petab
-import roadrunner
-from petab.v1.C import (
-    OBSERVABLE_FORMULA,
-    PREEQUILIBRATION_CONDITION_ID,
-    SIMULATION_CONDITION_ID,
-)
-from petab.v1.models.sbml_model import SbmlModel
-from petab.v1.parameter_mapping import ParMappingDictQuadruple
+try:
+    import petab.v1 as petab
+    from petab.v1.C import (
+        OBSERVABLE_FORMULA,
+        PREEQUILIBRATION_CONDITION_ID,
+        SIMULATION_CONDITION_ID,
+    )
+    from petab.v1.models.sbml_model import SbmlModel
+    from petab.v1.parameter_mapping import ParMappingDictQuadruple
+except ImportError:
+    petab = None
 
 import pypesto.C
 
-from ...petab.importer import PetabStartpoints
 from ...problem import Problem
 from ...startpoint import StartpointMethod
 from ..aggregated import AggregatedObjective
@@ -35,6 +36,13 @@
 from .roadrunner_calculator import RoadRunnerCalculator
 from .utils import ExpData
 
+try:
+    import libsbml
+    import roadrunner
+except ImportError:
+    roadrunner = None
+    libsbml = None
+
 logger = logging.getLogger(__name__)
 
 
@@ -62,6 +70,14 @@ def __init__(
         validate_petab:
             Flag indicating if the PEtab problem shall be validated.
         """
+        warnings.warn(
+            "The RoadRunner importer is deprecated and will be removed in "
+            "future versions. Please use the generic PetabImporter instead "
+            "with `simulator_type='roadrunner'`. Everything else will stay "
+            "same.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self.petab_problem = petab_problem
         if validate_petab:
             if petab.lint_problem(petab_problem):
@@ -288,6 +304,7 @@ def create_objective(
             petab_problem=self.petab_problem,
             calculator=calculator,
             x_names=x_names,
+            x_ids=x_names,
         )
 
     def create_prior(self) -> NegLogParameterPriors | None:
@@ -344,6 +361,8 @@ def create_startpoint_method(self, **kwargs) -> StartpointMethod:
             Additional keyword arguments passed on to
             :meth:`pypesto.startpoint.FunctionStartpoints.__init__`.
         """
+        from ...petab.util import PetabStartpoints
+
         return PetabStartpoints(petab_problem=self.petab_problem, **kwargs)
 
     def create_problem(
diff --git a/pypesto/objective/roadrunner/road_runner.py b/pypesto/objective/roadrunner/road_runner.py
index c2f88de80..ce2ce1820 100644
--- a/pypesto/objective/roadrunner/road_runner.py
+++ b/pypesto/objective/roadrunner/road_runner.py
@@ -2,20 +2,28 @@
 
 Currently does not support sensitivities.
 """
+from __future__ import annotations
+
 from collections import OrderedDict
 from collections.abc import Sequence
-from typing import Optional, Union
 
 import numpy as np
-import roadrunner
-from petab.v1 import Problem as PetabProblem
-from petab.v1.parameter_mapping import ParMappingDictQuadruple
 
 from ...C import MODE_FUN, MODE_RES, ROADRUNNER_INSTANCE, X_NAMES, ModeType
 from ..base import ObjectiveBase
 from .roadrunner_calculator import RoadRunnerCalculator
 from .utils import ExpData, SolverOptions
 
+try:
+    from petab.v1 import Problem as PetabProblem
+    from petab.v1.parameter_mapping import ParMappingDictQuadruple
+except ImportError:
+    petab = None
+try:
+    import roadrunner
+except ImportError:
+    roadrunner = None
+
 
 class RoadRunnerObjective(ObjectiveBase):
     """Objective function for RoadRunner models.
@@ -26,12 +34,13 @@ class RoadRunnerObjective(ObjectiveBase):
     def __init__(
         self,
         rr: roadrunner.RoadRunner,
-        edatas: Union[Sequence[ExpData], ExpData],
+        edatas: Sequence[ExpData] | ExpData,
         parameter_mapping: list[ParMappingDictQuadruple],
         petab_problem: PetabProblem,
-        calculator: Optional[RoadRunnerCalculator] = None,
-        x_names: Optional[Sequence[str]] = None,
-        solver_options: Optional[SolverOptions] = None,
+        calculator: RoadRunnerCalculator | None = None,
+        x_ids: Sequence[str] | None = None,
+        x_names: Sequence[str] | None = None,
+        solver_options: SolverOptions | None = None,
     ):
         """Initialize the RoadRunner objective function.
 
@@ -52,6 +61,8 @@ def __init__(
             Might be removed later.
         calculator:
             The calculator to use. If None, a new instance is created.
+        x_ids:
+            IDs of Roadrunner parameters. Includes fixed parameters as well.
         x_names:
             Names of optimization parameters.
         """
@@ -68,6 +79,11 @@ def __init__(
         if solver_options is None:
             solver_options = SolverOptions()
         self.solver_options = solver_options
+        if x_ids is None:
+            x_ids = list(rr.model.getGlobalParameterIds())
+        self.x_ids = x_ids
+        if x_names is None:
+            x_names = x_ids
         super().__init__(x_names=x_names)
 
     def get_config(self) -> dict:
@@ -87,7 +103,7 @@ def __call__(
         mode: ModeType = MODE_FUN,
         return_dict: bool = False,
         **kwargs,
-    ) -> Union[float, np.ndarray, dict]:
+    ) -> float | np.ndarray | dict:
         """See :class:`ObjectiveBase` documentation."""
         return super().__call__(x, sensi_orders, mode, return_dict, **kwargs)
 
@@ -97,8 +113,8 @@ def call_unprocessed(
         sensi_orders: tuple[int, ...],
         mode: ModeType,
         return_dict: bool,
-        edatas: Optional[Sequence[ExpData]] = None,
-        parameter_mapping: Optional[list[ParMappingDictQuadruple]] = None,
+        edatas: Sequence[ExpData] | None = None,
+        parameter_mapping: list[ParMappingDictQuadruple] | None = None,
     ) -> dict:
         """
         Call objective function without pre- or post-processing and formatting.
@@ -114,7 +130,7 @@ def call_unprocessed(
         if parameter_mapping is None:
             parameter_mapping = self.parameter_mapping
         # convert x to dictionary
-        x = OrderedDict(zip(self.x_names, x))
+        x = OrderedDict(zip(self.x_ids, x))
         ret = self.calculator(
             x_dct=x,
             mode=mode,
diff --git a/pypesto/objective/roadrunner/roadrunner_calculator.py b/pypesto/objective/roadrunner/roadrunner_calculator.py
index f6c1cf652..3df7b10ef 100644
--- a/pypesto/objective/roadrunner/roadrunner_calculator.py
+++ b/pypesto/objective/roadrunner/roadrunner_calculator.py
@@ -2,14 +2,12 @@
 
 Handles all RoadRunner.simulate calls, calculates likelihoods and residuals.
 """
+from __future__ import annotations
+
 import numbers
 from collections.abc import Sequence
-from typing import Optional
 
 import numpy as np
-import petab.v1 as petab
-import roadrunner
-from petab.v1.parameter_mapping import ParMappingDictQuadruple
 
 from ...C import (
     FVAL,
@@ -28,6 +26,16 @@
     unscale_parameters,
 )
 
+try:
+    import petab.v1 as petab
+    from petab.v1.parameter_mapping import ParMappingDictQuadruple
+except ImportError:
+    petab = None
+try:
+    import roadrunner
+except ImportError:
+    roadrunner = None
+
 LLH_TYPES = {
     "lin_normal": lambda measurement, simulation, sigma: -0.5
     * (
@@ -69,7 +77,7 @@ def __call__(
         x_ids: Sequence[str],
         parameter_mapping: list[ParMappingDictQuadruple],
         petab_problem: petab.Problem,
-        solver_options: Optional[SolverOptions],
+        solver_options: SolverOptions | None = None,
     ):
         """Perform the RoadRunner call and obtain objective function values.
 
@@ -240,10 +248,10 @@ def simulate_per_condition(
     def fill_in_parameters(
         self,
         problem_parameters: dict,
-        roadrunner_instance: Optional[roadrunner.RoadRunner] = None,
-        parameter_mapping: Optional[ParMappingDictQuadruple] = None,
+        roadrunner_instance: roadrunner.RoadRunner | None = None,
+        parameter_mapping: ParMappingDictQuadruple | None = None,
         preeq: bool = False,
-        filling_mode: Optional[str] = None,
+        filling_mode: str | None = None,
     ) -> dict:
         """Fill in parameters into the roadrunner instance.
 
diff --git a/pypesto/objective/roadrunner/utils.py b/pypesto/objective/roadrunner/utils.py
index e29291ef3..8ae7e3527 100644
--- a/pypesto/objective/roadrunner/utils.py
+++ b/pypesto/objective/roadrunner/utils.py
@@ -11,20 +11,28 @@
 
 import numpy as np
 import pandas as pd
-import petab.v1 as petab
-import roadrunner
-from petab.v1.C import (
-    LIN,
-    MEASUREMENT,
-    NOISE_DISTRIBUTION,
-    NOISE_FORMULA,
-    NORMAL,
-    OBSERVABLE_ID,
-    OBSERVABLE_TRANSFORMATION,
-    SIMULATION,
-    SIMULATION_CONDITION_ID,
-    TIME,
-)
+
+try:
+    import petab.v1 as petab
+    from petab.v1.C import (
+        LIN,
+        MEASUREMENT,
+        NOISE_DISTRIBUTION,
+        NOISE_FORMULA,
+        NORMAL,
+        OBSERVABLE_ID,
+        OBSERVABLE_TRANSFORMATION,
+        SIMULATION,
+        SIMULATION_CONDITION_ID,
+        TIME,
+    )
+except ImportError:
+    petab = None
+
+try:
+    import roadrunner
+except ImportError:
+    roadrunner = None
 
 
 class ExpData:
diff --git a/pypesto/optimize/__init__.py b/pypesto/optimize/__init__.py
index 968fe9e55..9c2679562 100644
--- a/pypesto/optimize/__init__.py
+++ b/pypesto/optimize/__init__.py
@@ -10,6 +10,7 @@
     ESSOptimizer,
     SacessFidesFactory,
     SacessOptimizer,
+    SacessOptions,
     get_default_ess_options,
 )
 from .load import (
diff --git a/pypesto/optimize/ess/__init__.py b/pypesto/optimize/ess/__init__.py
index fef613895..c5f2d0df4 100644
--- a/pypesto/optimize/ess/__init__.py
+++ b/pypesto/optimize/ess/__init__.py
@@ -10,5 +10,6 @@
 from .sacess import (
     SacessFidesFactory,
     SacessOptimizer,
+    SacessOptions,
     get_default_ess_options,
 )
diff --git a/pypesto/optimize/ess/ess.py b/pypesto/optimize/ess/ess.py
index 479c67c61..ca8ffa2f6 100644
--- a/pypesto/optimize/ess/ess.py
+++ b/pypesto/optimize/ess/ess.py
@@ -443,11 +443,15 @@ def _combine(self, i, j) -> np.array:
             raise ValueError("i == j")
         x = self.refset.x
 
-        d = x[j] - x[i]
-        alpha = np.sign(j - i)
+        d = (x[j] - x[i]) / 2
+        # i < j implies f(x_i) < f(x_j)
+        alpha = 1 if i < j else -1
+        # beta is a relative rank-based distance between the two parents
+        #  0 <= beta <= 1
         beta = (np.abs(j - i) - 1) / (self.refset.dim - 2)
+        # new hyper-rectangle, biased towards the better parent
         c1 = x[i] - d * (1 + alpha * beta)
-        c2 = x[i] - d * (1 - alpha * beta)
+        c2 = x[i] + d * (1 - alpha * beta)
 
         # this will not always yield admissible points -> clip to bounds
         ub, lb = self.evaluator.problem.ub, self.evaluator.problem.lb
@@ -556,11 +560,11 @@ def _do_local_search(
     def _maybe_update_global_best(self, x, fx):
         """Update the global best value if the provided value is better."""
         if fx < self.fx_best:
-            self.x_best = x[:]
+            self.x_best[:] = x
             self.fx_best = fx
             self.x_best_has_changed = True
             self.history.update(
-                self.x_best,
+                self.x_best.copy(),
                 (0,),
                 pypesto.C.MODE_FUN,
                 {pypesto.C.FVAL: self.fx_best},
@@ -579,9 +583,9 @@ def _go_beyond(self, x_best_children, fx_best_children):
                 continue
 
             # offspring is better than parent
-            x_parent = self.refset.x[i]
+            x_parent = self.refset.x[i].copy()
             fx_parent = self.refset.fx[i]
-            x_child = x_best_children[i]
+            x_child = x_best_children[i].copy()
             fx_child = fx_best_children[i]
             improvement = 1
             # Multiplier used in determining the hyper-rectangle from which to
diff --git a/pypesto/optimize/ess/refset.py b/pypesto/optimize/ess/refset.py
index 5c75d54a2..0e3cff403 100644
--- a/pypesto/optimize/ess/refset.py
+++ b/pypesto/optimize/ess/refset.py
@@ -67,6 +67,14 @@ def __init__(
         self.n_stuck = np.zeros(shape=[dim])
         self.attributes: dict[Any, np.array] = {}
 
+    def __repr__(self):
+        fx = (
+            f", fx=[{np.min(self.fx)} ... {np.max(self.fx)}]"
+            if self.fx is not None and len(self.fx) >= 2
+            else ""
+        )
+        return f"RefSet(dim={self.dim}{fx})"
+
     def sort(self):
         """Sort RefSet by quality."""
         order = np.argsort(self.fx)
@@ -80,7 +88,7 @@ def initialize_random(
         self,
         n_diverse: int,
     ):
-        """Create initial reference set from random parameters.
+        """Create an initial reference set from random parameters.
 
         Sample ``n_diverse`` random points, populate half of the RefSet using
         the best solutions and fill the rest with random points.
@@ -90,7 +98,7 @@ def initialize_random(
         self.initialize_from_array(x_diverse=x_diverse, fx_diverse=fx_diverse)
 
     def initialize_from_array(self, x_diverse: np.array, fx_diverse: np.array):
-        """Create initial reference set using the provided points.
+        """Create an initial reference set using the provided points.
 
         Populate half of the RefSet using the best given solutions and fill the
         rest with a random selection from the remaining points.
@@ -131,14 +139,29 @@ def prune_too_close(self):
 
         Assumes RefSet is sorted.
         """
+        # Compare [PenasGon2007]
+        #  Note that the main text states that distance between the two points
+        #  is normalized to the bounds of the search space. However,
+        #  Algorithm 1, line 9 normalizes to x_j instead. The accompanying
+        #  code does normalize to max(abs(x_i), abs(x_j)).
+        # Normalizing to the bounds of the search space seems more reasonable.
+        #  Otherwise, for a parameter with bounds [lb, ub],
+        #  where (ub-lb)/ub < proximity_threshold, we would never find an
+        #  admissible point.
         x = self.x
+        ub, lb = self.evaluator.problem.ub, self.evaluator.problem.lb
+
+        def normalize(x):
+            """Normalize parameter vector to the bounds of the search space."""
+            return (x - lb) / (ub - lb)
+
         for i in range(self.dim):
             for j in range(i + 1, self.dim):
                 # check proximity
                 # zero-division may occur here
                 with np.errstate(divide="ignore", invalid="ignore"):
                     while (
-                        np.max(np.abs((x[i] - x[j]) / x[j]))
+                        np.max(np.abs(normalize(x[i]) - normalize(x[j])))
                         <= self.proximity_threshold
                     ):
                         # too close. replace x_j.
@@ -174,7 +197,8 @@ def resize(self, new_dim: int):
         If the dimension does not change, do nothing.
         If size is decreased, drop entries from the end (i.e., the worst
         values, assuming it is sorted). If size is increased, the new
-        entries are filled with randomly and the refset is sorted.
+        entries are filled with randomly sampled parameters and the refset is
+        sorted.
 
         NOTE: Any attributes are just truncated or filled with zeros.
         """
diff --git a/pypesto/optimize/ess/sacess.py b/pypesto/optimize/ess/sacess.py
index 0dee7482a..c54ef0ad6 100644
--- a/pypesto/optimize/ess/sacess.py
+++ b/pypesto/optimize/ess/sacess.py
@@ -1,4 +1,5 @@
 """Self-adaptive cooperative enhanced scatter search (SACESS)."""
+from __future__ import annotations
 
 import itertools
 import logging
@@ -11,7 +12,7 @@
 from multiprocessing import get_context
 from multiprocessing.managers import SyncManager
 from pathlib import Path
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable
 from uuid import uuid1
 from warnings import warn
 
@@ -31,6 +32,7 @@
     "SacessOptimizer",
     "get_default_ess_options",
     "SacessFidesFactory",
+    "SacessOptions",
 ]
 
 logger = logging.getLogger(__name__)
@@ -62,13 +64,14 @@ class SacessOptimizer:
 
     def __init__(
         self,
-        num_workers: Optional[int] = None,
-        ess_init_args: Optional[list[dict[str, Any]]] = None,
+        num_workers: int | None = None,
+        ess_init_args: list[dict[str, Any]] | None = None,
         max_walltime_s: float = np.inf,
         sacess_loglevel: int = logging.INFO,
         ess_loglevel: int = logging.WARNING,
-        tmpdir: Union[Path, str] = None,
+        tmpdir: Path | str = None,
         mp_start_method: str = "spawn",
+        options: SacessOptions = None,
     ):
         """Construct.
 
@@ -110,6 +113,8 @@ def __init__(
         mp_start_method:
             The start method for the multiprocessing context.
             See :mod:`multiprocessing` for details.
+        options:
+            Further optimizer hyperparameters.
         """
         if (num_workers is None and ess_init_args is None) or (
             num_workers is not None and ess_init_args is not None
@@ -138,10 +143,11 @@ def __init__(
                 self._tmpdir = Path(f"SacessOptimizerTemp-{str(uuid1())[:8]}")
         self._tmpdir = Path(self._tmpdir).absolute()
         self._tmpdir.mkdir(parents=True, exist_ok=True)
-        self.histories: Optional[
-            list["pypesto.history.memory.MemoryHistory"]
-        ] = None
+        self.histories: list[
+            pypesto.history.memory.MemoryHistory
+        ] | None = None
         self.mp_ctx = get_context(mp_start_method)
+        self.options = options or SacessOptions()
 
     def minimize(
         self,
@@ -189,7 +195,7 @@ def minimize(
         start_time = time.time()
         logger.debug(
             f"Running {self.__class__.__name__} with {self.num_workers} "
-            f"workers: {self.ess_init_args}"
+            f"workers: {self.ess_init_args} and {self.options}."
         )
         ess_init_args = self.ess_init_args or get_default_ess_options(
             num_workers=self.num_workers, dim=problem.dim
@@ -212,6 +218,7 @@ def minimize(
                 shmem_manager=shmem_manager,
                 ess_options=ess_init_args,
                 dim=problem.dim,
+                options=self.options,
             )
             # create workers
             workers = [
@@ -225,6 +232,7 @@ def minimize(
                     tmp_result_file=SacessWorker.get_temp_result_filename(
                         worker_idx, self._tmpdir
                     ),
+                    options=self.options,
                 )
                 for worker_idx, ess_kwargs in enumerate(ess_init_args)
             ]
@@ -344,12 +352,13 @@ class SacessManager:
         more promising the respective worker is considered)
     _worker_comms: Number of communications received from the individual
         workers
-    _rejections: Number of rejected solutions received from workers since last
-        adaptation of ``_rejection_threshold``.
+    _rejections: Number of rejected solutions received from workers since the
+        last adaptation of ``_rejection_threshold``.
     _rejection_threshold: Threshold for relative objective improvements that
         incoming solutions have to pass to be accepted
     _lock: Lock for accessing shared state.
     _logger: A logger instance
+    _options: Further optimizer hyperparameters.
     """
 
     def __init__(
@@ -357,14 +366,22 @@ def __init__(
         shmem_manager: SyncManager,
         ess_options: list[dict[str, Any]],
         dim: int,
+        options: SacessOptions = None,
     ):
+        self._options = options or SacessOptions()
         self._num_workers = len(ess_options)
         self._ess_options = [shmem_manager.dict(o) for o in ess_options]
         self._best_known_fx = shmem_manager.Value("d", np.inf)
         self._best_known_x = shmem_manager.Array("d", [np.nan] * dim)
         self._rejections = shmem_manager.Value("i", 0)
-        # initial value from [PenasGon2017]_ p.9
-        self._rejection_threshold = shmem_manager.Value("d", 0.1)
+        # The initial value for the acceptance/rejection threshold in
+        # [PenasGon2017]_ p.9 is 0.1.
+        # However, their implementation uses 0.1 *percent*. I assume this is a
+        # mistake in the paper.
+        self._rejection_threshold = shmem_manager.Value(
+            "d", self._options.manager_initial_rejection_threshold
+        )
+
         # scores of the workers, ordered by worker-index
         # initial score is the worker index
         self._worker_scores = shmem_manager.Array(
@@ -425,7 +442,7 @@ def submit_solution(
                     np.isfinite(fx)
                     and not np.isfinite(self._best_known_fx.value)
                 )
-                # avoid division by 0. just accept any improvement if best
+                # avoid division by 0. just accept any improvement if the best
                 # known value is 0.
                 or (self._best_known_fx.value == 0 and fx < 0)
                 or (
@@ -467,10 +484,13 @@ def submit_solution(
                     f"(threshold: {self._rejection_threshold.value}) "
                     f"(total rejections: {self._rejections.value})."
                 )
-                # adapt acceptance threshold if too many solutions have been
-                #  rejected
-                if self._rejections.value > self._num_workers:
-                    self._rejection_threshold.value /= 2
+                # adapt the acceptance threshold if too many solutions have
+                #  been rejected
+                if self._rejections.value >= self._num_workers:
+                    self._rejection_threshold.value = min(
+                        self._rejection_threshold.value / 2,
+                        self._options.manager_minimum_rejection_threshold,
+                    )
                     self._logger.debug(
                         "Lowered acceptance threshold to "
                         f"{self._rejection_threshold.value}."
@@ -496,9 +516,6 @@ class SacessWorker:
         to the manager.
     _ess_kwargs: ESSOptimizer options for this worker (may get updated during
         the self-adaptive step).
-    _acceptance_threshold: Minimum relative improvement of the objective
-        compared to the best known value to be eligible for submission to the
-        Manager.
     _n_sent_solutions: Number of solutions sent to the Manager.
     _max_walltime_s: Walltime limit.
     _logger: A Logger instance.
@@ -516,6 +533,7 @@ def __init__(
         loglevel: int = logging.INFO,
         ess_loglevel: int = logging.WARNING,
         tmp_result_file: str = None,
+        options: SacessOptions = None,
     ):
         self._manager = manager
         self._worker_idx = worker_idx
@@ -523,7 +541,6 @@ def __init__(
         self._n_received_solutions = 0
         self._neval = 0
         self._ess_kwargs = ess_kwargs
-        self._acceptance_threshold = 0.005
         self._n_sent_solutions = 0
         self._max_walltime_s = max_walltime_s
         self._start_time = None
@@ -532,6 +549,7 @@ def __init__(
         self._logger = None
         self._tmp_result_file = tmp_result_file
         self._refset = None
+        self._options = options or SacessOptions()
 
     def run(
         self,
@@ -545,7 +563,8 @@ def run(
         self._manager._logger = self._logger
 
         self._logger.debug(
-            f"#{self._worker_idx} starting " f"({self._ess_kwargs})."
+            f"#{self._worker_idx} starting "
+            f"({self._ess_kwargs}, {self._options})."
         )
 
         evaluator = create_function_evaluator(
@@ -606,6 +625,7 @@ def run(
             exit_flag=ess.exit_flag,
         )
         self._manager._result_queue.put(worker_result)
+        self._logger.debug(f"Final configuration: {self._ess_kwargs}")
         ess._report_final()
 
     def _setup_ess(self, startpoint_method: StartpointMethod) -> ESSOptimizer:
@@ -653,24 +673,35 @@ def _cooperate(self):
             self.replace_solution(self._refset, x=recv_x, fx=recv_fx)
 
     def _maybe_adapt(self, problem: Problem):
-        """Perform adaptation step.
+        """Perform the adaptation step if needed.
 
         Update ESS settings if conditions are met.
         """
         # Update ESS settings if we received way more solutions than we sent
-        # Magic numbers from [PenasGon2017]_ algorithm 5
+        #  Note: [PenasGon2017]_ Algorithm 5 uses AND in the following
+        #  condition, but the accompanying implementation uses OR.
         if (
-            self._n_received_solutions > 10 * self._n_sent_solutions + 20
-            and self._neval > problem.dim * 5000
+            self._n_received_solutions
+            > self._options.adaptation_sent_coeff * self._n_sent_solutions
+            + self._options.adaptation_sent_offset
+            or self._neval > problem.dim * self._options.adaptation_min_evals
         ):
             self._ess_kwargs = self._manager.reconfigure_worker(
                 self._worker_idx
             )
+            self._refset.sort()
             self._refset.resize(self._ess_kwargs["dim_refset"])
             self._logger.debug(
                 f"Updated settings on worker {self._worker_idx} to "
                 f"{self._ess_kwargs}"
             )
+        else:
+            self._logger.debug(
+                f"Worker {self._worker_idx} not adapting. "
+                f"Received: {self._n_received_solutions} <= {self._options.adaptation_sent_coeff * self._n_sent_solutions + self._options.adaptation_sent_offset}, "
+                f"Sent: {self._n_sent_solutions}, "
+                f"neval: {self._neval} <= {problem.dim * self._options.adaptation_min_evals}."
+            )
 
     def maybe_update_best(self, x: np.array, fx: float):
         """Maybe update the best known solution and send it to the manager."""
@@ -681,17 +712,17 @@ def maybe_update_best(self, x: np.array, fx: float):
             f"Worker {self._worker_idx} maybe sending solution {fx}. "
             f"best known: {self._best_known_fx}, "
             f"rel change: {rel_change:.4g}, "
-            f"threshold: {self._acceptance_threshold}"
+            f"threshold: {self._options.worker_acceptance_threshold}"
         )
 
-        # solution improves best value by at least a factor of ...
+        # solution improves the best value by at least a factor of ...
         if (
             (np.isfinite(fx) and not np.isfinite(self._best_known_fx))
             or (self._best_known_fx == 0 and fx < 0)
             or (
                 fx < self._best_known_fx
                 and abs((self._best_known_fx - fx) / fx)
-                > self._acceptance_threshold
+                > self._options.worker_acceptance_threshold
             )
         ):
             self._logger.debug(
@@ -726,7 +757,7 @@ def replace_solution(refset: RefSet, x: np.array, fx: float):
                 refset.attributes["cooperative_solution"]
             )
         ).size == 0:
-            # the attribute exists, but no member is marked as cooperative
+            # the attribute exists, but no member is marked as the cooperative
             # solution. this may happen if we shrink the refset.
             cooperative_solution_idx = np.argmax(refset.fx)
 
@@ -755,9 +786,7 @@ def _keep_going(self):
         return True
 
     @staticmethod
-    def get_temp_result_filename(
-        worker_idx: int, tmpdir: Union[str, Path]
-    ) -> str:
+    def get_temp_result_filename(worker_idx: int, tmpdir: str | Path) -> str:
         return str(Path(tmpdir, f"sacess-{worker_idx:02d}_tmp.h5").absolute())
 
 
@@ -774,7 +803,7 @@ def _run_worker(
     # different random seeds per process
     np.random.seed((os.getpid() * int(time.time() * 1000)) % 2**32)
 
-    # Forward log messages to logging process
+    # Forward log messages to the logging process
     h = logging.handlers.QueueHandler(log_process_queue)
     worker._logger = logging.getLogger(multiprocessing.current_process().name)
     worker._logger.addHandler(h)
@@ -785,11 +814,9 @@ def _run_worker(
 def get_default_ess_options(
     num_workers: int,
     dim: int,
-    local_optimizer: Union[
-        bool,
-        "pypesto.optimize.Optimizer",
-        Callable[..., "pypesto.optimize.Optimizer"],
-    ] = True,
+    local_optimizer: bool
+    | pypesto.optimize.Optimizer
+    | Callable[..., pypesto.optimize.Optimizer] = True,
 ) -> list[dict]:
     """Get default ESS settings for (SA)CESS.
 
@@ -821,13 +848,6 @@ def dim_refset(x):
         return max(min_dimrefset, ceil((1 + sqrt(4 * dim * x)) / 2))
 
     settings = [
-        # settings for first worker
-        {
-            "dim_refset": dim_refset(10),
-            "balance": 0.5,
-            "local_n2": 10,
-        },
-        # for the remaining workers, cycle through these settings
         # 1
         {
             "dim_refset": dim_refset(1),
@@ -979,10 +999,7 @@ def dim_refset(x):
         elif local_optimizer is not False:
             cur_settings["local_optimizer"] = local_optimizer
 
-    return [
-        settings[0],
-        *(itertools.islice(itertools.cycle(settings[1:]), num_workers - 1)),
-    ]
+    return list(itertools.islice(itertools.cycle(settings), num_workers))
 
 
 class SacessFidesFactory:
@@ -1005,8 +1022,8 @@ class SacessFidesFactory:
 
     def __init__(
         self,
-        fides_options: Optional[dict[str, Any]] = None,
-        fides_kwargs: Optional[dict[str, Any]] = None,
+        fides_options: dict[str, Any] | None = None,
+        fides_kwargs: dict[str, Any] | None = None,
     ):
         if fides_options is None:
             fides_options = {}
@@ -1026,7 +1043,7 @@ def __init__(
 
     def __call__(
         self, max_walltime_s: int, max_eval: int
-    ) -> "pypesto.optimize.FidesOptimizer":
+    ) -> pypesto.optimize.FidesOptimizer:
         """Create a :class:`FidesOptimizer` instance."""
 
         from fides.constants import Options as FidesOptions
@@ -1073,5 +1090,72 @@ class SacessWorkerResult:
     fx: float
     n_eval: int
     n_iter: int
-    history: "pypesto.history.memory.MemoryHistory"
+    history: pypesto.history.memory.MemoryHistory
     exit_flag: ESSExitFlag
+
+
+@dataclass
+class SacessOptions:
+    """Container for :class:`SacessOptimizer` hyperparameters.
+
+    Parameters
+    ----------
+    manager_initial_rejection_threshold, manager_minimum_rejection_threshold:
+        Initial and minimum threshold for relative objective improvements that
+        incoming solutions have to pass to be accepted. If the number of
+        rejected solutions exceeds the number of workers, the threshold is
+        halved until it reaches ``manager_minimum_rejection_threshold``.
+
+    worker_acceptance_threshold:
+        Minimum relative improvement of the objective compared to the best
+        known value to be eligible for submission to the Manager.
+
+    adaptation_min_evals, adaptation_sent_offset, adaptation_sent_coeff:
+        Hyperparameters that control when the workers will adapt their settings
+        based on the performance of the other workers.
+
+        The adaptation step is performed if all the following conditions are
+        met:
+
+        * The number of function evaluations since the last solution was sent
+          to the manager times the number of optimization parameters is greater
+          than ``adaptation_min_evals``.
+
+        * The number of solutions received by the worker since the last
+          solution it sent to the manager is greater than
+          ``adaptation_sent_coeff * n_sent_solutions + adaptation_sent_offset``,
+          where ``n_sent_solutions`` is the number of solutions sent to the
+          manager by the given worker.
+
+    """
+
+    manager_initial_rejection_threshold: float = 0.001
+    manager_minimum_rejection_threshold: float = 0.001
+
+    # Default value from original SaCeSS implementation
+    worker_acceptance_threshold: float = 0.0001
+
+    # Magic numbers for adaptation, taken from [PenasGon2017]_ algorithm 5
+    adaptation_min_evals: int = 5000
+    adaptation_sent_offset: int = 20
+    adaptation_sent_coeff: int = 10
+
+    def __post_init__(self):
+        if self.adaptation_min_evals < 0:
+            raise ValueError("adaptation_min_evals must be non-negative.")
+        if self.adaptation_sent_offset < 0:
+            raise ValueError("adaptation_sent_offset must be non-negative.")
+        if self.adaptation_sent_coeff < 0:
+            raise ValueError("adaptation_sent_coeff must be non-negative.")
+        if self.manager_initial_rejection_threshold < 0:
+            raise ValueError(
+                "manager_initial_rejection_threshold must be non-negative."
+            )
+        if self.manager_minimum_rejection_threshold < 0:
+            raise ValueError(
+                "manager_minimum_rejection_threshold must be non-negative."
+            )
+        if self.worker_acceptance_threshold < 0:
+            raise ValueError(
+                "worker_acceptance_threshold must be non-negative."
+            )
diff --git a/pypesto/petab/importer.py b/pypesto/petab/importer.py
index 6160247dd..5a44b9698 100644
--- a/pypesto/petab/importer.py
+++ b/pypesto/petab/importer.py
@@ -4,69 +4,57 @@
 
 import logging
 import os
-import shutil
-import sys
 import tempfile
 import warnings
-from collections.abc import Iterable, Sequence
-from dataclasses import dataclass
-from functools import partial
+from collections.abc import Callable, Iterable, Sequence
 from importlib.metadata import version
 from typing import (
     Any,
-    Callable,
 )
 
-import numpy as np
 import pandas as pd
 import petab.v1 as petab
-from petab.v1.C import (
-    ESTIMATE,
-    NOISE_PARAMETERS,
-    OBSERVABLE_ID,
-    PREEQUILIBRATION_CONDITION_ID,
-    SIMULATION_CONDITION_ID,
-)
-from petab.v1.models import MODEL_TYPE_SBML
+
+try:
+    import roadrunner
+except ImportError:
+    roadrunner = None
 
 from ..C import (
+    AMICI,
     CENSORED,
-    CENSORING_TYPES,
-    CONDITION_SEP,
-    MEASUREMENT_TYPE,
-    MODE_FUN,
-    MODE_RES,
     ORDINAL,
     ORDINAL_OPTIONS,
-    PARAMETER_TYPE,
-    RELATIVE,
+    PETAB,
+    ROADRUNNER,
     SEMIQUANTITATIVE,
     SPLINE_APPROXIMATION_OPTIONS,
-    InnerParameterType,
 )
 from ..hierarchical.inner_calculator_collector import InnerCalculatorCollector
-from ..objective import AggregatedObjective, AmiciObjective
-from ..objective.amici import AmiciObjectBuilder
+from ..objective import AggregatedObjective, AmiciObjective, ObjectiveBase
 from ..objective.priors import NegLogParameterPriors, get_parameter_prior_dict
 from ..predict import AmiciPredictor
 from ..problem import HierarchicalProblem, Problem
 from ..result import PredictionResult
-from ..startpoint import CheckedStartpoints, StartpointMethod
+from ..startpoint import StartpointMethod
+from .objective_creator import (
+    AmiciObjectiveCreator,
+    ObjectiveCreator,
+    PetabSimulatorObjectiveCreator,
+    RoadRunnerObjectiveCreator,
+)
+from .util import PetabStartpoints, get_petab_non_quantitative_data_types
 
 try:
     import amici
-    import amici.petab
-    import amici.petab.conditions
-    import amici.petab.parameter_mapping
     import amici.petab.simulations
-    from amici.petab.import_helpers import check_model
 except ImportError:
     amici = None
 
 logger = logging.getLogger(__name__)
 
 
-class PetabImporter(AmiciObjectBuilder):
+class PetabImporter:
     """
     Importer for PEtab files.
 
@@ -83,12 +71,15 @@ class PetabImporter(AmiciObjectBuilder):
     def __init__(
         self,
         petab_problem: petab.Problem,
-        output_folder: str = None,
-        model_name: str = None,
+        output_folder: str | None = None,
+        model_name: str | None = None,
         validate_petab: bool = True,
         validate_petab_hierarchical: bool = True,
         hierarchical: bool = False,
-        inner_options: dict = None,
+        inner_options: dict | None = None,
+        simulator_type: str = AMICI,
+        simulator: petab.Simulator | None = None,
+        rr: roadrunner.RoadRunner | None = None,
     ):
         """Initialize importer.
 
@@ -115,6 +106,13 @@ def __init__(
         inner_options:
             Options for the inner problems and solvers.
             If not provided, default options will be used.
+        simulator_type:
+            The type of simulator to use. Depending on this different kinds
+            of objectives will be created. Allowed types are 'amici', 'petab',
+            and 'roadrunner'.
+        simulator:
+            In case of a ``simulator_type == 'petab'``, the simulator object
+            has to be provided. Otherwise, the argument is not used.
         """
         self.petab_problem = petab_problem
         self._hierarchical = hierarchical
@@ -171,11 +169,21 @@ def __init__(
             model_name = _find_model_name(self.output_folder)
         self.model_name = model_name
 
+        self.simulator_type = simulator_type
+        self.simulator = simulator
+        if simulator_type == PETAB and simulator is None:
+            raise ValueError(
+                "A petab simulator object must be provided if the simulator "
+                "type is 'petab'."
+            )
+        self.roadrunner_instance = rr
+
     @staticmethod
     def from_yaml(
         yaml_config: dict | str,
         output_folder: str = None,
         model_name: str = None,
+        simulator_type: str = AMICI,
     ) -> PetabImporter:
         """Simplified constructor using a petab yaml file."""
         petab_problem = petab.Problem.from_yaml(yaml_config)
@@ -184,6 +192,7 @@ def from_yaml(
             petab_problem=petab_problem,
             output_folder=output_folder,
             model_name=model_name,
+            simulator_type=simulator_type,
         )
 
     def validate_inner_options(self):
@@ -217,438 +226,11 @@ def check_gradients(
         -------
         match: Whether gradients match FDs (True) or not (False)
         """
-        par = np.asarray(self.petab_problem.x_nominal_scaled)
-        problem = self.create_problem()
-        objective = problem.objective
-        free_indices = par[problem.x_free_indices]
-        dfs = []
-        modes = []
-
-        if mode is None:
-            modes = [MODE_FUN, MODE_RES]
-        else:
-            modes = [mode]
-
-        if multi_eps is None:
-            multi_eps = np.array([10 ** (-i) for i in range(3, 9)])
-
-        for mode in modes:
-            try:
-                dfs.append(
-                    objective.check_grad_multi_eps(
-                        free_indices,
-                        *args,
-                        **kwargs,
-                        mode=mode,
-                        multi_eps=multi_eps,
-                    )
-                )
-            except (RuntimeError, ValueError):
-                # Might happen in case PEtab problem not well defined or
-                # fails for specified tolerances in forward sensitivities
-                return False
-
-        return all(
-            any(
-                [
-                    np.all(
-                        (mode_df.rel_err.values < rtol)
-                        | (mode_df.abs_err.values < atol)
-                    ),
-                ]
-            )
-            for mode_df in dfs
-        )
-
-    def create_model(
-        self,
-        force_compile: bool = False,
-        verbose: bool = True,
-        **kwargs,
-    ) -> amici.Model:
-        """
-        Import amici model.
-
-        Parameters
-        ----------
-        force_compile:
-            If False, the model is compiled only if the output folder does not
-            exist yet. If True, the output folder is deleted and the model
-            (re-)compiled in either case.
-
-            .. warning::
-                If `force_compile`, then an existing folder of that name will
-                be deleted.
-        verbose:
-            Passed to AMICI's model compilation. If True, the compilation
-            progress is printed.
-        kwargs:
-            Extra arguments passed to amici.SbmlImporter.sbml2amici
-        """
-        # courtesy check whether target is folder
-        if os.path.exists(self.output_folder) and not os.path.isdir(
-            self.output_folder
-        ):
-            raise AssertionError(
-                f"Refusing to remove {self.output_folder} for model "
-                f"compilation: Not a folder."
-            )
-
-        # add module to path
-        if self.output_folder not in sys.path:
-            sys.path.insert(0, self.output_folder)
-
-        # compile
-        if self._must_compile(force_compile):
-            logger.info(
-                f"Compiling amici model to folder " f"{self.output_folder}."
-            )
-            if self.petab_problem.model.type_id == MODEL_TYPE_SBML:
-                self.compile_model(
-                    validate=self.validate_petab,
-                    verbose=verbose,
-                    **kwargs,
-                )
-            else:
-                self.compile_model(verbose=verbose, **kwargs)
-        else:
-            logger.debug(
-                f"Using existing amici model in folder "
-                f"{self.output_folder}."
-            )
-
-        return self._create_model()
-
-    def _create_model(self) -> amici.Model:
-        """Load model module and return the model, no checks/compilation."""
-        # load moduĺe
-        module = amici.import_model_module(
-            module_name=self.model_name, module_path=self.output_folder
-        )
-        model = module.getModel()
-        check_model(
-            amici_model=model,
-            petab_problem=self.petab_problem,
-        )
-
-        return model
-
-    def _must_compile(self, force_compile: bool):
-        """Check whether the model needs to be compiled first."""
-        # asked by user
-        if force_compile:
-            return True
-
-        # folder does not exist
-        if not os.path.exists(self.output_folder) or not os.listdir(
-            self.output_folder
-        ):
-            return True
-
-        # try to import (in particular checks version)
-        try:
-            # importing will already raise an exception if version wrong
-            amici.import_model_module(self.model_name, self.output_folder)
-        except ModuleNotFoundError:
-            return True
-        except amici.AmiciVersionError as e:
-            logger.info(
-                "amici model will be re-imported due to version "
-                f"mismatch: {e}"
-            )
-            return True
-
-        # no need to (re-)compile
-        return False
-
-    def compile_model(self, **kwargs):
-        """
-        Compile the model.
-
-        If the output folder exists already, it is first deleted.
-
-        Parameters
-        ----------
-        kwargs:
-            Extra arguments passed to :meth:`amici.sbml_import.SbmlImporter.sbml2amici`
-            or :func:`amici.pysb_import.pysb2amici`.
-        """
-        # delete output directory
-        if os.path.exists(self.output_folder):
-            shutil.rmtree(self.output_folder)
-
-        amici.petab.import_petab_problem(
-            petab_problem=self.petab_problem,
-            model_name=self.model_name,
-            model_output_dir=self.output_folder,
-            **kwargs,
-        )
-
-    def create_solver(
-        self,
-        model: amici.Model = None,
-        verbose: bool = True,
-    ) -> amici.Solver:
-        """Return model solver."""
-        # create model
-        if model is None:
-            model = self.create_model(verbose=verbose)
-
-        solver = model.getSolver()
-        return solver
-
-    def create_edatas(
-        self,
-        model: amici.Model = None,
-        simulation_conditions=None,
-        verbose: bool = True,
-    ) -> list[amici.ExpData]:
-        """Create list of :class:`amici.amici.ExpData` objects."""
-        # create model
-        if model is None:
-            model = self.create_model(verbose=verbose)
-
-        return amici.petab.conditions.create_edatas(
-            amici_model=model,
-            petab_problem=self.petab_problem,
-            simulation_conditions=simulation_conditions,
-        )
-
-    def create_objective(
-        self,
-        model: amici.Model = None,
-        solver: amici.Solver = None,
-        edatas: Sequence[amici.ExpData] = None,
-        force_compile: bool = False,
-        verbose: bool = True,
-        **kwargs,
-    ) -> AmiciObjective:
-        """Create a :class:`pypesto.objective.AmiciObjective`.
-
-        Parameters
-        ----------
-        model:
-            The AMICI model.
-        solver:
-            The AMICI solver.
-        edatas:
-            The experimental data in AMICI format.
-        force_compile:
-            Whether to force-compile the model if not passed.
-        verbose:
-            Passed to AMICI's model compilation. If True, the compilation
-            progress is printed.
-        **kwargs:
-            Additional arguments passed on to the objective. In case of ordinal
-            or semiquantitative measurements, ``inner_options`` can optionally
-            be passed here. If none are given, ``inner_options`` given to the
-            importer constructor (or inner defaults) will be chosen.
-
-        Returns
-        -------
-        A :class:`pypesto.objective.AmiciObjective` for the model and the data.
-        """
-        # get simulation conditions
-        simulation_conditions = petab.get_simulation_conditions(
-            self.petab_problem.measurement_df
-        )
-
-        # create model
-        if model is None:
-            model = self.create_model(
-                force_compile=force_compile, verbose=verbose
-            )
-        # create solver
-        if solver is None:
-            solver = self.create_solver(model)
-        # create conditions and edatas from measurement data
-        if edatas is None:
-            edatas = self.create_edatas(
-                model=model, simulation_conditions=simulation_conditions
-            )
-
-        parameter_mapping = (
-            amici.petab.parameter_mapping.create_parameter_mapping(
-                petab_problem=self.petab_problem,
-                simulation_conditions=simulation_conditions,
-                scaled_parameters=True,
-                amici_model=model,
-                fill_fixed_parameters=False,
-            )
-        )
-
-        par_ids = self.petab_problem.x_ids
-
-        # fill in dummy parameters (this is needed since some objective
-        #  initialization e.g. checks for preeq parameters)
-        problem_parameters = dict(
-            zip(self.petab_problem.x_ids, self.petab_problem.x_nominal_scaled)
-        )
-        amici.petab.conditions.fill_in_parameters(
-            edatas=edatas,
-            problem_parameters=problem_parameters,
-            scaled_parameters=True,
-            parameter_mapping=parameter_mapping,
-            amici_model=model,
-        )
-
-        calculator = None
-        amici_reporting = None
-
-        if (
-            self._non_quantitative_data_types is not None
-            and self._hierarchical
-        ):
-            inner_options = kwargs.pop("inner_options", None)
-            inner_options = (
-                inner_options
-                if inner_options is not None
-                else self.inner_options
-            )
-            calculator = InnerCalculatorCollector(
-                self._non_quantitative_data_types,
-                self.petab_problem,
-                model,
-                edatas,
-                inner_options,
-            )
-            amici_reporting = amici.RDataReporting.full
-
-            # FIXME: currently not supported with hierarchical
-            if "guess_steadystate" in kwargs and kwargs["guess_steadystate"]:
-                warnings.warn(
-                    "`guess_steadystate` not supported with hierarchical "
-                    "optimization. Disabling `guess_steadystate`.",
-                    stacklevel=1,
-                )
-            kwargs["guess_steadystate"] = False
-            inner_parameter_ids = calculator.get_inner_par_ids()
-            par_ids = [x for x in par_ids if x not in inner_parameter_ids]
-
-        max_sensi_order = kwargs.get("max_sensi_order", None)
-
-        if (
-            self._non_quantitative_data_types is not None
-            and any(
-                data_type in self._non_quantitative_data_types
-                for data_type in [ORDINAL, CENSORED, SEMIQUANTITATIVE]
-            )
-            and max_sensi_order is not None
-            and max_sensi_order > 1
-        ):
-            raise ValueError(
-                "Ordinal, censored and semiquantitative data cannot be "
-                "used with second order sensitivities. Use a up to first order "
-                "method or disable ordinal, censored and semiquantitative "
-            )
-
-        # create objective
-        obj = AmiciObjective(
-            amici_model=model,
-            amici_solver=solver,
-            edatas=edatas,
-            x_ids=par_ids,
-            x_names=par_ids,
-            parameter_mapping=parameter_mapping,
-            amici_object_builder=self,
-            calculator=calculator,
-            amici_reporting=amici_reporting,
-            **kwargs,
-        )
-
-        return obj
-
-    def create_predictor(
-        self,
-        objective: AmiciObjective = None,
-        amici_output_fields: Sequence[str] = None,
-        post_processor: Callable | None = None,
-        post_processor_sensi: Callable | None = None,
-        post_processor_time: Callable | None = None,
-        max_chunk_size: int | None = None,
-        output_ids: Sequence[str] = None,
-        condition_ids: Sequence[str] = None,
-    ) -> AmiciPredictor:
-        """Create a :class:`pypesto.predict.AmiciPredictor`.
-
-        The `AmiciPredictor` facilitates generation of predictions from
-        parameter vectors.
-
-        Parameters
-        ----------
-        objective:
-            An objective object, which will be used to get model simulations
-        amici_output_fields:
-            keys that exist in the return data object from AMICI, which should
-            be available for the post-processors
-        post_processor:
-            A callable function which applies postprocessing to the simulation
-            results. Default are the observables of the AMICI model.
-            This method takes a list of ndarrays (as returned in the field
-            ['y'] of amici ReturnData objects) as input.
-        post_processor_sensi:
-            A callable function which applies postprocessing to the
-            sensitivities of the simulation results. Default are the
-            observable sensitivities of the AMICI model.
-            This method takes two lists of ndarrays (as returned in the
-            fields ['y'] and ['sy'] of amici ReturnData objects) as input.
-        post_processor_time:
-            A callable function which applies postprocessing to the timepoints
-            of the simulations. Default are the timepoints of the amici model.
-            This method takes a list of ndarrays (as returned in the field
-            ['t'] of amici ReturnData objects) as input.
-        max_chunk_size:
-            In some cases, we don't want to compute all predictions at once
-            when calling the prediction function, as this might not fit into
-            the memory for large datasets and models.
-            Here, the user can specify a maximum number of conditions, which
-            should be simulated at a time.
-            Default is 0 meaning that all conditions will be simulated.
-            Other values are only applicable, if an output file is specified.
-        output_ids:
-            IDs of outputs, if post-processing is used
-        condition_ids:
-            IDs of conditions, if post-processing is used
-
-        Returns
-        -------
-        A :class:`pypesto.predict.AmiciPredictor` for the model, using
-        the outputs of the AMICI model and the timepoints from the PEtab data.
-        """
-        # if the user didn't pass an objective function, we create it first
-        if objective is None:
-            objective = self.create_objective()
-
-        # create a identifiers of preequilibration and simulation condition ids
-        # which can then be stored in the prediction result
-        edata_conditions = objective.amici_object_builder.petab_problem.get_simulation_conditions_from_measurement_df()
-        if PREEQUILIBRATION_CONDITION_ID not in list(edata_conditions.columns):
-            preeq_dummy = [""] * edata_conditions.shape[0]
-            edata_conditions[PREEQUILIBRATION_CONDITION_ID] = preeq_dummy
-        edata_conditions.drop_duplicates(inplace=True)
-
-        if condition_ids is None:
-            condition_ids = [
-                edata_conditions.loc[id, PREEQUILIBRATION_CONDITION_ID]
-                + CONDITION_SEP
-                + edata_conditions.loc[id, SIMULATION_CONDITION_ID]
-                for id in edata_conditions.index
-            ]
-
-        # wrap around AmiciPredictor
-        predictor = AmiciPredictor(
-            amici_objective=objective,
-            amici_output_fields=amici_output_fields,
-            post_processor=post_processor,
-            post_processor_sensi=post_processor_sensi,
-            post_processor_time=post_processor_time,
-            max_chunk_size=max_chunk_size,
-            output_ids=output_ids,
-            condition_ids=condition_ids,
+        raise NotImplementedError(
+            "This function has been removed. "
+            "Please use `objective.check_gradients_match_finite_differences`."
         )
 
-        return predictor
-
     def create_prior(self) -> NegLogParameterPriors | None:
         """
         Create a prior from the parameter table.
@@ -707,9 +289,46 @@ def create_startpoint_method(self, **kwargs) -> StartpointMethod:
         """
         return PetabStartpoints(petab_problem=self.petab_problem, **kwargs)
 
+    def create_objective_creator(
+        self,
+        simulator_type: str = AMICI,
+        simulator: petab.Simulator | None = None,
+    ) -> ObjectiveCreator:
+        """Choose :class:`ObjectiveCreator` depending on the simulator type.
+
+        Parameters
+        ----------
+        simulator_type:
+            The type of simulator to use. Depending on this different kinds
+            of objectives will be created. Allowed types are 'amici', 'petab',
+            and 'roadrunner'.
+        simulator:
+            In case of a ``simulator_type == 'petab'``, the simulator object
+            has to be provided. Otherwise the argument is not used.
+
+        """
+        if simulator_type == AMICI:
+            return AmiciObjectiveCreator(
+                petab_problem=self.petab_problem,
+                output_folder=self.output_folder,
+                model_name=self.model_name,
+                hierarchical=self._hierarchical,
+                inner_options=self.inner_options,
+                non_quantitative_data_types=self._non_quantitative_data_types,
+                validate_petab=self.validate_petab,
+            )
+        elif simulator_type == PETAB:
+            return PetabSimulatorObjectiveCreator(
+                petab_problem=self.petab_problem, simulator=simulator
+            )
+        elif simulator_type == ROADRUNNER:
+            return RoadRunnerObjectiveCreator(
+                petab_problem=self.petab_problem, rr=self.roadrunner_instance
+            )
+
     def create_problem(
         self,
-        objective: AmiciObjective = None,
+        objective: ObjectiveBase = None,
         x_guesses: Iterable[float] | None = None,
         problem_kwargs: dict[str, Any] = None,
         startpoint_kwargs: dict[str, Any] = None,
@@ -739,7 +358,11 @@ def create_problem(
         A :class:`pypesto.problem.Problem` for the objective.
         """
         if objective is None:
-            objective = self.create_objective(**kwargs)
+            self.objective_constructor = self.create_objective_creator(
+                kwargs.pop("simulator_type", self.simulator_type),
+                kwargs.pop("simulator", self.simulator),
+            )
+            objective = self.objective_constructor.create_objective(**kwargs)
 
         x_fixed_indices = self.petab_problem.x_fixed_indices
         x_fixed_vals = self.petab_problem.x_nominal_fixed_scaled
@@ -809,39 +432,99 @@ def create_problem(
 
         return problem
 
+    def create_model(
+        self,
+        force_compile: bool = False,
+        verbose: bool = True,
+        **kwargs,
+    ) -> amici.Model:
+        """See :meth:`AmiciObjectiveCreator.create_model`."""
+        warnings.warn(
+            "This function has been moved to `AmiciObjectiveCreator`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        objective_constructor = self.create_objective_creator(
+            kwargs.pop("simulator_type", self.simulator_type),
+            kwargs.pop("simulator", self.simulator),
+        )
+        return objective_constructor.create_model(
+            force_compile=force_compile,
+            verbose=verbose,
+            **kwargs,
+        )
+
+    def create_objective(
+        self,
+        model: amici.Model = None,
+        solver: amici.Solver = None,
+        edatas: Sequence[amici.ExpData] = None,
+        force_compile: bool = False,
+        verbose: bool = True,
+        **kwargs,
+    ) -> ObjectiveBase:
+        """See :meth:`AmiciObjectiveCreator.create_objective`."""
+        warnings.warn(
+            "This function has been moved to `AmiciObjectiveCreator`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        objective_constructor = self.create_objective_creator(
+            kwargs.pop("simulator_type", self.simulator_type),
+            kwargs.pop("simulator", self.simulator),
+        )
+        return objective_constructor.create_objective(
+            model=model,
+            solver=solver,
+            edatas=edatas,
+            force_compile=force_compile,
+            verbose=verbose,
+            **kwargs,
+        )
+
+    def create_predictor(
+        self,
+        objective: AmiciObjective = None,
+        amici_output_fields: Sequence[str] = None,
+        post_processor: Callable | None = None,
+        post_processor_sensi: Callable | None = None,
+        post_processor_time: Callable | None = None,
+        max_chunk_size: int | None = None,
+        output_ids: Sequence[str] = None,
+        condition_ids: Sequence[str] = None,
+    ) -> AmiciPredictor:
+        """See :meth:`AmiciObjectiveCreator.create_predictor`."""
+        if self.simulator_type != AMICI:
+            raise ValueError(
+                "Predictor can only be created for amici models and is "
+                "supposed to be created from the AmiciObjectiveCreator."
+            )
+        warnings.warn(
+            "This function has been moved to `AmiciObjectiveCreator`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        objective_constructor = self.create_objective_creator()
+        return objective_constructor.create_predictor(
+            objective=objective,
+            amici_output_fields=amici_output_fields,
+            post_processor=post_processor,
+            post_processor_sensi=post_processor_sensi,
+            post_processor_time=post_processor_time,
+            max_chunk_size=max_chunk_size,
+            output_ids=output_ids,
+            condition_ids=condition_ids,
+        )
+
     def rdatas_to_measurement_df(
         self,
         rdatas: Sequence[amici.ReturnData],
         model: amici.Model = None,
         verbose: bool = True,
     ) -> pd.DataFrame:
-        """
-        Create a measurement dataframe in the petab format.
-
-        Parameters
-        ----------
-        rdatas:
-            A list of rdatas as produced by
-            ``pypesto.AmiciObjective.__call__(x, return_dict=True)['rdatas']``.
-        model:
-            The amici model.
-        verbose:
-            Passed to AMICI's model compilation. If True, the compilation
-            progress is printed.
-
-        Returns
-        -------
-        A dataframe built from the rdatas in the format as in
-        ``self.petab_problem.measurement_df``.
-        """
-        # create model
-        if model is None:
-            model = self.create_model(verbose=verbose)
-
-        measurement_df = self.petab_problem.measurement_df
-
-        return amici.petab.simulations.rdatas_to_measurement_df(
-            rdatas, model, measurement_df
+        """See :meth:`AmiciObjectiveCreator.rdatas_to_measurement_df`."""
+        raise NotImplementedError(
+            "This function has been moved to `AmiciObjectiveCreator`."
         )
 
     def rdatas_to_simulation_df(
@@ -855,8 +538,8 @@ def rdatas_to_simulation_df(
         Except a petab simulation dataframe is created, i.e. the measurement
         column label is adjusted.
         """
-        return self.rdatas_to_measurement_df(rdatas, model).rename(
-            columns={petab.MEASUREMENT: petab.SIMULATION}
+        raise NotImplementedError(
+            "This function has been moved to `AmiciObjectiveCreator`."
         )
 
     def prediction_to_petab_measurement_df(
@@ -882,24 +565,9 @@ def prediction_to_petab_measurement_df(
         A dataframe built from the rdatas in the format as in
         ``self.petab_problem.measurement_df``.
         """
-
-        # create rdata-like dicts from the prediction result
-        @dataclass
-        class FakeRData:
-            ts: np.ndarray
-            y: np.ndarray
-
-        rdatas = [
-            FakeRData(ts=condition.timepoints, y=condition.output)
-            for condition in prediction.conditions
-        ]
-
-        # add an AMICI model, if possible
-        model = None
-        if predictor is not None:
-            model = predictor.amici_objective.amici_model
-
-        return self.rdatas_to_measurement_df(rdatas, model)
+        raise NotImplementedError(
+            "This function has been moved to `AmiciObjectiveCreator`."
+        )
 
     def prediction_to_petab_simulation_df(
         self,
@@ -912,9 +580,9 @@ def prediction_to_petab_simulation_df(
         Except a PEtab simulation dataframe is created, i.e. the measurement
         column label is adjusted.
         """
-        return self.prediction_to_petab_measurement_df(
-            prediction, predictor
-        ).rename(columns={petab.MEASUREMENT: petab.SIMULATION})
+        raise NotImplementedError(
+            "This function has been moved to `AmiciObjectiveCreator`."
+        )
 
 
 def _find_output_folder_name(
@@ -962,148 +630,3 @@ def _find_output_folder_name(
 def _find_model_name(output_folder: str) -> str:
     """Just re-use the last part of the output folder."""
     return os.path.split(os.path.normpath(output_folder))[-1]
-
-
-def get_petab_non_quantitative_data_types(
-    petab_problem: petab.Problem,
-) -> set[str]:
-    """
-    Get the data types from the PEtab problem.
-
-    Parameters
-    ----------
-    petab_problem:
-        The PEtab problem.
-
-    Returns
-    -------
-    data_types:
-        A list of the data types.
-    """
-    non_quantitative_data_types = set()
-    caught_observables = set()
-    # For ordinal, censored and semiquantitative data, search
-    # for the corresponding data types in the measurement table
-    meas_df = petab_problem.measurement_df
-    if MEASUREMENT_TYPE in meas_df.columns:
-        petab_data_types = meas_df[MEASUREMENT_TYPE].unique()
-        for data_type in [ORDINAL, SEMIQUANTITATIVE] + CENSORING_TYPES:
-            if data_type in petab_data_types:
-                non_quantitative_data_types.add(
-                    CENSORED if data_type in CENSORING_TYPES else data_type
-                )
-                caught_observables.update(
-                    set(
-                        meas_df[meas_df[MEASUREMENT_TYPE] == data_type][
-                            OBSERVABLE_ID
-                        ]
-                    )
-                )
-
-    # For relative data, search for parameters to estimate with
-    # a scaling/offset/sigma parameter type
-    if PARAMETER_TYPE in petab_problem.parameter_df.columns:
-        # get the df with non-nan parameter types
-        par_df = petab_problem.parameter_df[
-            petab_problem.parameter_df[PARAMETER_TYPE].notna()
-        ]
-        for par_id, row in par_df.iterrows():
-            if not row[ESTIMATE]:
-                continue
-            if row[PARAMETER_TYPE] in [
-                InnerParameterType.SCALING,
-                InnerParameterType.OFFSET,
-            ]:
-                non_quantitative_data_types.add(RELATIVE)
-
-            # For sigma parameters, we need to check if they belong
-            # to an observable with a non-quantitative data type
-            elif row[PARAMETER_TYPE] == InnerParameterType.SIGMA:
-                corresponding_observables = set(
-                    meas_df[meas_df[NOISE_PARAMETERS] == par_id][OBSERVABLE_ID]
-                )
-                if not (corresponding_observables & caught_observables):
-                    non_quantitative_data_types.add(RELATIVE)
-
-    # TODO this can be made much shorter if the relative measurements
-    # are also specified in the measurement table, but that would require
-    # changing the PEtab format of a lot of benchmark models.
-
-    if len(non_quantitative_data_types) == 0:
-        return None
-    return non_quantitative_data_types
-
-
-class PetabStartpoints(CheckedStartpoints):
-    """Startpoint method for PEtab problems.
-
-    Samples optimization startpoints from the distributions defined in the
-    provided PEtab problem. The PEtab-problem is copied.
-    """
-
-    def __init__(self, petab_problem: petab.Problem, **kwargs):
-        super().__init__(**kwargs)
-        self._parameter_df = petab_problem.parameter_df.copy()
-        self._priors: list[tuple] | None = None
-        self._free_ids: list[str] | None = None
-
-    def _setup(
-        self,
-        pypesto_problem: Problem,
-    ):
-        """Update priors if necessary.
-
-        Check if ``problem.x_free_indices`` changed since last call, and if so,
-        get the corresponding priors from PEtab.
-        """
-        current_free_ids = np.asarray(pypesto_problem.x_names)[
-            pypesto_problem.x_free_indices
-        ]
-
-        if (
-            self._priors is not None
-            and len(current_free_ids) == len(self._free_ids)
-            and np.all(current_free_ids == self._free_ids)
-        ):
-            # no need to update
-            return
-
-        # update priors
-        self._free_ids = current_free_ids
-        id_to_prior = dict(
-            zip(
-                self._parameter_df.index[self._parameter_df[ESTIMATE] == 1],
-                petab.parameters.get_priors_from_df(
-                    self._parameter_df, mode=petab.INITIALIZATION
-                ),
-            )
-        )
-
-        self._priors = list(map(id_to_prior.__getitem__, current_free_ids))
-
-    def __call__(
-        self,
-        n_starts: int,
-        problem: Problem,
-    ) -> np.ndarray:
-        """Call the startpoint method."""
-        # Update the list of priors if needed
-        self._setup(pypesto_problem=problem)
-
-        return super().__call__(n_starts, problem)
-
-    def sample(
-        self,
-        n_starts: int,
-        lb: np.ndarray,
-        ub: np.ndarray,
-    ) -> np.ndarray:
-        """Actual startpoint sampling.
-
-        Must only be called through `self.__call__` to ensure that the list of priors
-        matches the currently free parameters in the :class:`pypesto.Problem`.
-        """
-        sampler = partial(petab.sample_from_prior, n_starts=n_starts)
-        startpoints = list(map(sampler, self._priors))
-
-        return np.array(startpoints).T
diff --git a/pypesto/petab/objective_creator.py b/pypesto/petab/objective_creator.py
new file mode 100644
index 000000000..72f98cf03
--- /dev/null
+++ b/pypesto/petab/objective_creator.py
@@ -0,0 +1,853 @@
+"""Contains the ObjectiveCreator class."""
+
+from __future__ import annotations
+
+import logging
+import numbers
+import os
+import re
+import shutil
+import sys
+import warnings
+from abc import ABC, abstractmethod
+from collections.abc import Iterable, Sequence
+from dataclasses import dataclass
+from typing import (
+    Any,
+    Callable,
+)
+
+import numpy as np
+import pandas as pd
+import petab.v1 as petab
+from petab.v1.C import (
+    OBSERVABLE_FORMULA,
+    PREEQUILIBRATION_CONDITION_ID,
+    SIMULATION_CONDITION_ID,
+)
+from petab.v1.models import MODEL_TYPE_SBML
+from petab.v1.models.sbml_model import SbmlModel
+from petab.v1.parameter_mapping import ParMappingDictQuadruple
+from petab.v1.simulate import Simulator
+
+from ..C import CENSORED, CONDITION_SEP, LIN, ORDINAL, SEMIQUANTITATIVE
+from ..hierarchical.inner_calculator_collector import InnerCalculatorCollector
+from ..objective import AmiciObjective, ObjectiveBase, PetabSimulatorObjective
+from ..objective.amici import AmiciObjectBuilder
+from ..objective.roadrunner import (
+    ExpData,
+    RoadRunnerCalculator,
+    RoadRunnerObjective,
+)
+from ..predict import AmiciPredictor
+from ..result import PredictionResult
+
+try:
+    import amici
+    import amici.petab
+    import amici.petab.conditions
+    import amici.petab.parameter_mapping
+    import amici.petab.simulations
+    from amici.petab.import_helpers import check_model
+except ImportError:
+    amici = None
+try:
+    import libsbml
+    import roadrunner
+except ImportError:
+    roadrunner = None
+    libsbml = None
+
+logger = logging.getLogger(__name__)
+
+
+class ObjectiveCreator(ABC):
+    """Abstract Creator for creating an objective function."""
+
+    @abstractmethod
+    def create_objective(self, **kwargs) -> ObjectiveBase:
+        """Create an objective function."""
+        pass
+
+
+class AmiciObjectiveCreator(ObjectiveCreator, AmiciObjectBuilder):
+    """ObjectiveCreator for creating an amici objective function."""
+
+    def __init__(
+        self,
+        petab_problem: petab.Problem,
+        hierarchical: bool = False,
+        non_quantitative_data_types: Iterable[str] | None = None,
+        inner_options: dict[str, Any] | None = None,
+        output_folder: str | None = None,
+        model_name: str | None = None,
+        validate_petab: bool = True,
+    ):
+        """
+        Initialize the creator.
+
+        Parameters
+        ----------
+        petab_problem:
+            The PEtab problem.
+        hierarchical:
+            Whether to use hierarchical optimization.
+        non_quantitative_data_types:
+            The non-quantitative data types to consider.
+        inner_options:
+            Options for the inner optimization.
+        output_folder:
+            The output folder for the compiled model.
+        model_name:
+            The name of the model.
+        validate_petab:
+            Whether to check the PEtab problem for errors.
+        """
+        self.petab_problem = petab_problem
+        self._hierarchical = hierarchical
+        self._non_quantitative_data_types = non_quantitative_data_types
+        self.inner_options = inner_options
+        self.output_folder = output_folder
+        self.model_name = model_name
+        self.validate_petab = validate_petab
+
+    def create_model(
+        self,
+        force_compile: bool = False,
+        verbose: bool = True,
+        **kwargs,
+    ) -> amici.Model:
+        """
+        Import amici model.
+
+        Parameters
+        ----------
+        force_compile:
+            If False, the model is compiled only if the output folder does not
+            exist yet. If True, the output folder is deleted and the model
+            (re-)compiled in either case.
+
+            .. warning::
+                If `force_compile`, then an existing folder of that name will
+                be deleted.
+        verbose:
+            Passed to AMICI's model compilation. If True, the compilation
+            progress is printed.
+        kwargs:
+            Extra arguments passed to amici.SbmlImporter.sbml2amici
+        """
+        # courtesy check whether target is folder
+        if os.path.exists(self.output_folder) and not os.path.isdir(
+            self.output_folder
+        ):
+            raise AssertionError(
+                f"Refusing to remove {self.output_folder} for model "
+                f"compilation: Not a folder."
+            )
+
+        # add module to path
+        if self.output_folder not in sys.path:
+            sys.path.insert(0, self.output_folder)
+
+        # compile
+        if self._must_compile(force_compile):
+            logger.info(
+                f"Compiling amici model to folder " f"{self.output_folder}."
+            )
+            if self.petab_problem.model.type_id == MODEL_TYPE_SBML:
+                self.compile_model(
+                    validate=self.validate_petab,
+                    verbose=verbose,
+                    **kwargs,
+                )
+            else:
+                self.compile_model(verbose=verbose, **kwargs)
+        else:
+            logger.debug(
+                f"Using existing amici model in folder "
+                f"{self.output_folder}."
+            )
+
+        return self._create_model()
+
+    def _create_model(self) -> amici.Model:
+        """Load model module and return the model, no checks/compilation."""
+        # load moduĺe
+        module = amici.import_model_module(
+            module_name=self.model_name, module_path=self.output_folder
+        )
+        model = module.getModel()
+        check_model(
+            amici_model=model,
+            petab_problem=self.petab_problem,
+        )
+
+        return model
+
+    def _must_compile(self, force_compile: bool):
+        """Check whether the model needs to be compiled first."""
+        # asked by user
+        if force_compile:
+            return True
+
+        # folder does not exist
+        if not os.path.exists(self.output_folder) or not os.listdir(
+            self.output_folder
+        ):
+            return True
+
+        # try to import (in particular checks version)
+        try:
+            # importing will already raise an exception if version wrong
+            amici.import_model_module(self.model_name, self.output_folder)
+        except ModuleNotFoundError:
+            return True
+        except amici.AmiciVersionError as e:
+            logger.info(
+                "amici model will be re-imported due to version "
+                f"mismatch: {e}"
+            )
+            return True
+
+        # no need to (re-)compile
+        return False
+
+    def compile_model(self, **kwargs):
+        """
+        Compile the model.
+
+        If the output folder exists already, it is first deleted.
+
+        Parameters
+        ----------
+        kwargs:
+            Extra arguments passed to :meth:`amici.sbml_import.SbmlImporter.sbml2amici`
+            or :func:`amici.pysb_import.pysb2amici`.
+        """
+        # delete output directory
+        if os.path.exists(self.output_folder):
+            shutil.rmtree(self.output_folder)
+
+        amici.petab.import_petab_problem(
+            petab_problem=self.petab_problem,
+            model_name=self.model_name,
+            model_output_dir=self.output_folder,
+            **kwargs,
+        )
+
+    def create_solver(
+        self,
+        model: amici.Model = None,
+        verbose: bool = True,
+    ) -> amici.Solver:
+        """Return model solver."""
+        # create model
+        if model is None:
+            model = self.create_model(verbose=verbose)
+
+        solver = model.getSolver()
+        return solver
+
+    def create_edatas(
+        self,
+        model: amici.Model = None,
+        simulation_conditions=None,
+        verbose: bool = True,
+    ) -> list[amici.ExpData]:
+        """Create list of :class:`amici.amici.ExpData` objects."""
+        # create model
+        if model is None:
+            model = self.create_model(verbose=verbose)
+
+        return amici.petab.conditions.create_edatas(
+            amici_model=model,
+            petab_problem=self.petab_problem,
+            simulation_conditions=simulation_conditions,
+        )
+
+    def create_objective(
+        self,
+        model: amici.Model = None,
+        solver: amici.Solver = None,
+        edatas: Sequence[amici.ExpData] = None,
+        force_compile: bool = False,
+        verbose: bool = True,
+        **kwargs,
+    ) -> AmiciObjective:
+        """Create a :class:`pypesto.objective.AmiciObjective`.
+
+        Parameters
+        ----------
+        model:
+            The AMICI model.
+        solver:
+            The AMICI solver.
+        edatas:
+            The experimental data in AMICI format.
+        force_compile:
+            Whether to force-compile the model if not passed.
+        verbose:
+            Passed to AMICI's model compilation. If True, the compilation
+            progress is printed.
+        **kwargs:
+            Additional arguments passed on to the objective. In case of ordinal
+            or semiquantitative measurements, ``inner_options`` can optionally
+            be passed here. If none are given, ``inner_options`` given to the
+            importer constructor (or inner defaults) will be chosen.
+
+        Returns
+        -------
+        A :class:`pypesto.objective.AmiciObjective` for the model and the data.
+        """
+        simulation_conditions = petab.get_simulation_conditions(
+            self.petab_problem.measurement_df
+        )
+        if model is None:
+            model = self.create_model(
+                force_compile=force_compile, verbose=verbose
+            )
+        if solver is None:
+            solver = self.create_solver(model)
+        # create conditions and edatas from measurement data
+        if edatas is None:
+            edatas = self.create_edatas(
+                model=model, simulation_conditions=simulation_conditions
+            )
+        parameter_mapping = (
+            amici.petab.parameter_mapping.create_parameter_mapping(
+                petab_problem=self.petab_problem,
+                simulation_conditions=simulation_conditions,
+                scaled_parameters=True,
+                amici_model=model,
+                fill_fixed_parameters=False,
+            )
+        )
+        par_ids = self.petab_problem.x_ids
+
+        # fill in dummy parameters (this is needed since some objective
+        #  initialization e.g. checks for preeq parameters)
+        problem_parameters = dict(
+            zip(self.petab_problem.x_ids, self.petab_problem.x_nominal_scaled)
+        )
+        amici.petab.conditions.fill_in_parameters(
+            edatas=edatas,
+            problem_parameters=problem_parameters,
+            scaled_parameters=True,
+            parameter_mapping=parameter_mapping,
+            amici_model=model,
+        )
+
+        calculator = None
+        amici_reporting = None
+
+        if (
+            self._non_quantitative_data_types is not None
+            and self._hierarchical
+        ):
+            inner_options = kwargs.pop("inner_options", None)
+            inner_options = (
+                inner_options
+                if inner_options is not None
+                else self.inner_options
+            )
+            calculator = InnerCalculatorCollector(
+                self._non_quantitative_data_types,
+                self.petab_problem,
+                model,
+                edatas,
+                inner_options,
+            )
+            amici_reporting = amici.RDataReporting.full
+
+            # FIXME: currently not supported with hierarchical
+            if "guess_steadystate" in kwargs and kwargs["guess_steadystate"]:
+                warnings.warn(
+                    "`guess_steadystate` not supported with hierarchical "
+                    "optimization. Disabling `guess_steadystate`.",
+                    stacklevel=1,
+                )
+            kwargs["guess_steadystate"] = False
+            inner_parameter_ids = calculator.get_inner_par_ids()
+            par_ids = [x for x in par_ids if x not in inner_parameter_ids]
+
+        max_sensi_order = kwargs.get("max_sensi_order", None)
+
+        if (
+            self._non_quantitative_data_types is not None
+            and any(
+                data_type in self._non_quantitative_data_types
+                for data_type in [ORDINAL, CENSORED, SEMIQUANTITATIVE]
+            )
+            and max_sensi_order is not None
+            and max_sensi_order > 1
+        ):
+            raise ValueError(
+                "Ordinal, censored and semiquantitative data cannot be "
+                "used with second order sensitivities. Use a up to first order "
+                "method or disable ordinal, censored and semiquantitative "
+            )
+
+        # create objective
+        obj = AmiciObjective(
+            amici_model=model,
+            amici_solver=solver,
+            edatas=edatas,
+            x_ids=par_ids,
+            x_names=par_ids,
+            parameter_mapping=parameter_mapping,
+            amici_object_builder=self,
+            calculator=calculator,
+            amici_reporting=amici_reporting,
+            **kwargs,
+        )
+
+        return obj
+
+    def create_predictor(
+        self,
+        objective: AmiciObjective = None,
+        amici_output_fields: Sequence[str] = None,
+        post_processor: Callable | None = None,
+        post_processor_sensi: Callable | None = None,
+        post_processor_time: Callable | None = None,
+        max_chunk_size: int | None = None,
+        output_ids: Sequence[str] = None,
+        condition_ids: Sequence[str] = None,
+    ) -> AmiciPredictor:
+        """Create a :class:`pypesto.predict.AmiciPredictor`.
+
+        The `AmiciPredictor` facilitates generation of predictions from
+        parameter vectors.
+
+        Parameters
+        ----------
+        objective:
+            An objective object, which will be used to get model simulations
+        amici_output_fields:
+            keys that exist in the return data object from AMICI, which should
+            be available for the post-processors
+        post_processor:
+            A callable function which applies postprocessing to the simulation
+            results. Default are the observables of the AMICI model.
+            This method takes a list of ndarrays (as returned in the field
+            ['y'] of amici ReturnData objects) as input.
+        post_processor_sensi:
+            A callable function which applies postprocessing to the
+            sensitivities of the simulation results. Default are the
+            observable sensitivities of the AMICI model.
+            This method takes two lists of ndarrays (as returned in the
+            fields ['y'] and ['sy'] of amici ReturnData objects) as input.
+        post_processor_time:
+            A callable function which applies postprocessing to the timepoints
+            of the simulations. Default are the timepoints of the amici model.
+            This method takes a list of ndarrays (as returned in the field
+            ['t'] of amici ReturnData objects) as input.
+        max_chunk_size:
+            In some cases, we don't want to compute all predictions at once
+            when calling the prediction function, as this might not fit into
+            the memory for large datasets and models.
+            Here, the user can specify a maximum number of conditions, which
+            should be simulated at a time.
+            Default is 0 meaning that all conditions will be simulated.
+            Other values are only applicable, if an output file is specified.
+        output_ids:
+            IDs of outputs, if post-processing is used
+        condition_ids:
+            IDs of conditions, if post-processing is used
+
+        Returns
+        -------
+        A :class:`pypesto.predict.AmiciPredictor` for the model, using
+        the outputs of the AMICI model and the timepoints from the PEtab data.
+        """
+        # if the user didn't pass an objective function, we create it first
+        if objective is None:
+            objective = self.create_objective()
+
+        # create a identifiers of preequilibration and simulation condition ids
+        # which can then be stored in the prediction result
+        edata_conditions = objective.amici_object_builder.petab_problem.get_simulation_conditions_from_measurement_df()
+        if PREEQUILIBRATION_CONDITION_ID not in list(edata_conditions.columns):
+            preeq_dummy = [""] * edata_conditions.shape[0]
+            edata_conditions[PREEQUILIBRATION_CONDITION_ID] = preeq_dummy
+        edata_conditions.drop_duplicates(inplace=True)
+
+        if condition_ids is None:
+            condition_ids = [
+                edata_conditions.loc[id, PREEQUILIBRATION_CONDITION_ID]
+                + CONDITION_SEP
+                + edata_conditions.loc[id, SIMULATION_CONDITION_ID]
+                for id in edata_conditions.index
+            ]
+
+        # wrap around AmiciPredictor
+        predictor = AmiciPredictor(
+            amici_objective=objective,
+            amici_output_fields=amici_output_fields,
+            post_processor=post_processor,
+            post_processor_sensi=post_processor_sensi,
+            post_processor_time=post_processor_time,
+            max_chunk_size=max_chunk_size,
+            output_ids=output_ids,
+            condition_ids=condition_ids,
+        )
+
+        return predictor
+
+    def rdatas_to_measurement_df(
+        self,
+        rdatas: Sequence[amici.ReturnData],
+        model: amici.Model = None,
+        verbose: bool = True,
+    ) -> pd.DataFrame:
+        """
+        Create a measurement dataframe in the petab format.
+
+        Parameters
+        ----------
+        rdatas:
+            A list of rdatas as produced by
+            ``pypesto.AmiciObjective.__call__(x, return_dict=True)['rdatas']``.
+        model:
+            The amici model.
+        verbose:
+            Passed to AMICI's model compilation. If True, the compilation
+            progress is printed.
+
+        Returns
+        -------
+        A dataframe built from the rdatas in the format as in
+        ``self.petab_problem.measurement_df``.
+        """
+        # create model
+        if model is None:
+            model = self.create_model(verbose=verbose)
+
+        measurement_df = self.petab_problem.measurement_df
+
+        return amici.petab.simulations.rdatas_to_measurement_df(
+            rdatas, model, measurement_df
+        )
+
+    def rdatas_to_simulation_df(
+        self,
+        rdatas: Sequence[amici.ReturnData],
+        model: amici.Model = None,
+    ) -> pd.DataFrame:
+        """
+        See :meth:`rdatas_to_measurement_df`.
+
+        Except a petab simulation dataframe is created, i.e. the measurement
+        column label is adjusted.
+        """
+        return self.rdatas_to_measurement_df(rdatas, model).rename(
+            columns={petab.MEASUREMENT: petab.SIMULATION}
+        )
+
+    def prediction_to_petab_measurement_df(
+        self,
+        prediction: PredictionResult,
+        predictor: AmiciPredictor = None,
+    ) -> pd.DataFrame:
+        """
+        Cast prediction into a dataframe.
+
+        If a PEtab problem is simulated without post-processing, then the
+        result can be cast into a PEtab measurement or simulation dataframe
+
+        Parameters
+        ----------
+        prediction:
+            A prediction result as produced by an :class:`pypesto.predict.AmiciPredictor`.
+        predictor:
+            The :class:`pypesto.predict.AmiciPredictor` instance.
+
+        Returns
+        -------
+        A dataframe built from the rdatas in the format as in
+        ``self.petab_problem.measurement_df``.
+        """
+
+        # create rdata-like dicts from the prediction result
+        @dataclass
+        class FakeRData:
+            ts: np.ndarray
+            y: np.ndarray
+
+        rdatas = [
+            FakeRData(ts=condition.timepoints, y=condition.output)
+            for condition in prediction.conditions
+        ]
+
+        # add an AMICI model, if possible
+        model = None
+        if predictor is not None:
+            model = predictor.amici_objective.amici_model
+
+        return self.rdatas_to_measurement_df(rdatas, model)
+
+    def prediction_to_petab_simulation_df(
+        self,
+        prediction: PredictionResult,
+        predictor: AmiciPredictor = None,
+    ) -> pd.DataFrame:
+        """
+        See :meth:`prediction_to_petab_measurement_df`.
+
+        Except a PEtab simulation dataframe is created, i.e. the measurement
+        column label is adjusted.
+        """
+        return self.prediction_to_petab_measurement_df(
+            prediction, predictor
+        ).rename(columns={petab.MEASUREMENT: petab.SIMULATION})
+
+
+class PetabSimulatorObjectiveCreator(ObjectiveCreator):
+    """ObjectiveCreator for creating an objective based on a PEtabSimulator."""
+
+    def __init__(
+        self,
+        petab_problem: petab.Problem,
+        simulator: Simulator,
+    ):
+        self.petab_problem = petab_problem
+        self.simulator = simulator
+
+    def create_objective(self, **kwargs):
+        """Create a PEtabSimulatorObjective."""
+        return PetabSimulatorObjective(self.simulator)
+
+
+class RoadRunnerObjectiveCreator(ObjectiveCreator):
+    """ObjectiveCreator for creating an objective for a RoadRunner model."""
+
+    def __init__(
+        self,
+        petab_problem: petab.Problem,
+        rr: roadrunner.RoadRunner | None = None,
+    ):
+        self.petab_problem = petab_problem
+        if rr is None:
+            if roadrunner is None:
+                raise ImportError(
+                    "The `roadrunner` package is required for this objective "
+                    "function."
+                )
+            rr = roadrunner.RoadRunner()
+        self.rr = rr
+
+    def _check_noise_formulae(
+        self,
+        edatas: list[ExpData] | None = None,
+        parameter_mapping: list[ParMappingDictQuadruple] | None = None,
+    ):
+        """Check if the noise formulae are valid.
+
+        Currently, only static values or singular parameters are supported.
+        Complex formulae are not supported.
+        """
+        # check that parameter mapping is available
+        if parameter_mapping is None:
+            parameter_mapping = self.create_parameter_mapping()
+        # check that edatas are available
+        if edatas is None:
+            edatas = self.create_edatas()
+        # save formulae that need to be changed
+        to_change = []
+        # check that noise formulae are valid
+        for i_edata, (edata, par_map) in enumerate(
+            zip(edatas, parameter_mapping)
+        ):
+            for j_formula, noise_formula in enumerate(edata.noise_formulae):
+                # constant values are allowed
+                if isinstance(noise_formula, numbers.Number):
+                    continue
+                # single parameters are allowed
+                if noise_formula in par_map[1].keys():
+                    continue
+                # extract the observable name via regex pattern
+                pattern = r"noiseParameter1_(.*?)($|\s)"
+                observable_name = re.search(pattern, noise_formula).group(1)
+                to_change.append((i_edata, j_formula, observable_name))
+        # change formulae
+        formulae_changed = []
+        for i_edata, j_formula, obs_name in to_change:
+            # assign new parameter, formula in RR and parameter into mapping
+            original_formula = edatas[i_edata].noise_formulae[j_formula]
+            edatas[i_edata].noise_formulae[
+                j_formula
+            ] = f"noiseFormula_{obs_name}"
+            # different conditions will have the same noise formula
+            if (obs_name, original_formula) not in formulae_changed:
+                self.rr.addParameter(f"noiseFormula_{obs_name}", 0.0, False)
+                self.rr.addAssignmentRule(
+                    f"noiseFormula_{obs_name}",
+                    original_formula,
+                    forceRegenerate=False,
+                )
+                self.rr.regenerateModel()
+                formulae_changed.append((obs_name, original_formula))
+
+    def _write_observables_to_model(self):
+        """Write observables of petab problem to the model."""
+        # add all observables as species
+        for obs_id in self.petab_problem.observable_df.index:
+            self.rr.addParameter(obs_id, 0.0, False)
+        # extract all parameters from observable formulas
+        parameters = petab.get_output_parameters(
+            self.petab_problem.observable_df,
+            self.petab_problem.model,
+            noise=True,
+            observables=True,
+        )
+        # add all parameters to the model
+        for param_id in parameters:
+            self.rr.addParameter(param_id, 0.0, False)
+        formulae = self.petab_problem.observable_df[
+            OBSERVABLE_FORMULA
+        ].to_dict()
+
+        # add all observable formulas as assignment rules
+        for obs_id, formula in formulae.items():
+            self.rr.addAssignmentRule(obs_id, formula, forceRegenerate=False)
+
+        # regenerate model to apply changes
+        self.rr.regenerateModel()
+
+    def create_edatas(self) -> list[ExpData]:
+        """Create a List of :class:`ExpData` objects from the PEtab problem."""
+        # Create Dataframes per condition
+        return ExpData.from_petab_problem(self.petab_problem)
+
+    def fill_model(self):
+        """Fill the RoadRunner model inplace from the PEtab problem.
+
+        Parameters
+        ----------
+        return_model:
+            Flag indicating if the model should be returned.
+        """
+        if not isinstance(self.petab_problem.model, SbmlModel):
+            raise ValueError(
+                "The model is not an SBML model. Using "
+                "RoadRunner as simulator requires an SBML model."
+            )  # TODO: add Pysb support
+        if self.petab_problem.model.sbml_document:
+            sbml_document = self.petab_problem.model.sbml_document
+        elif self.petab_problem.model.sbml_model:
+            sbml_document = (
+                self.petab_problem.model.sbml_model.getSBMLDocument()
+            )
+        else:
+            raise ValueError("No SBML model found.")
+        sbml_writer = libsbml.SBMLWriter()
+        sbml_string = sbml_writer.writeSBMLToString(sbml_document)
+        self.rr.load(sbml_string)
+        self._write_observables_to_model()
+
+    def create_parameter_mapping(self):
+        """Create a parameter mapping from the PEtab problem."""
+        simulation_conditions = (
+            self.petab_problem.get_simulation_conditions_from_measurement_df()
+        )
+        mapping = petab.get_optimization_to_simulation_parameter_mapping(
+            condition_df=self.petab_problem.condition_df,
+            measurement_df=self.petab_problem.measurement_df,
+            parameter_df=self.petab_problem.parameter_df,
+            observable_df=self.petab_problem.observable_df,
+            model=self.petab_problem.model,
+        )
+        # check whether any species in the condition table are assigned
+        species = self.rr.model.getFloatingSpeciesIds()
+        # overrides in parameter table are handled already
+        overrides = [
+            specie
+            for specie in species
+            if specie in self.petab_problem.condition_df.columns
+        ]
+        if not overrides:
+            return mapping
+        for (_, condition), mapping_per_condition in zip(
+            simulation_conditions.iterrows(), mapping
+        ):
+            for override in overrides:
+                preeq_id = condition.get(PREEQUILIBRATION_CONDITION_ID)
+                sim_id = condition.get(SIMULATION_CONDITION_ID)
+                if preeq_id:
+                    parameter_id_or_value = (
+                        self.petab_problem.condition_df.loc[preeq_id, override]
+                    )
+                    mapping_per_condition[0][override] = parameter_id_or_value
+                    if isinstance(parameter_id_or_value, str):
+                        mapping_per_condition[2][
+                            override
+                        ] = self.petab_problem.parameter_df.loc[
+                            parameter_id_or_value, petab.PARAMETER_SCALE
+                        ]
+                    elif isinstance(parameter_id_or_value, numbers.Number):
+                        mapping_per_condition[2][override] = LIN
+                    else:
+                        raise ValueError(
+                            "The parameter value in the condition table "
+                            "is not a number or a parameter ID."
+                        )
+                if sim_id:
+                    parameter_id_or_value = (
+                        self.petab_problem.condition_df.loc[sim_id, override]
+                    )
+                    mapping_per_condition[1][override] = parameter_id_or_value
+                    if isinstance(parameter_id_or_value, str):
+                        mapping_per_condition[3][
+                            override
+                        ] = self.petab_problem.parameter_df.loc[
+                            parameter_id_or_value, petab.PARAMETER_SCALE
+                        ]
+                    elif isinstance(parameter_id_or_value, numbers.Number):
+                        mapping_per_condition[3][override] = LIN
+                    else:
+                        raise ValueError(
+                            "The parameter value in the condition table "
+                            "is not a number or a parameter ID."
+                        )
+        return mapping
+
+    def create_objective(
+        self,
+        rr: roadrunner.RoadRunner | None = None,
+        edatas: ExpData | None = None,
+    ) -> RoadRunnerObjective:
+        """Create a :class:`pypesto.objective.RoadRunnerObjective`.
+
+        Parameters
+        ----------
+        rr:
+            RoadRunner instance.
+        edatas:
+            ExpData object.
+        """
+        roadrunner_instance = rr
+        if roadrunner_instance is None:
+            roadrunner_instance = self.rr
+            self.fill_model()
+        if edatas is None:
+            edatas = self.create_edatas()
+
+        parameter_mapping = self.create_parameter_mapping()
+
+        # get x_names
+        x_names = self.petab_problem.get_x_ids()
+
+        calculator = RoadRunnerCalculator()
+
+        # run the check for noise formulae
+        self._check_noise_formulae(edatas, parameter_mapping)
+
+        return RoadRunnerObjective(
+            rr=roadrunner_instance,
+            edatas=edatas,
+            parameter_mapping=parameter_mapping,
+            petab_problem=self.petab_problem,
+            calculator=calculator,
+            x_names=x_names,
+            x_ids=x_names,
+        )
diff --git a/pypesto/petab/util.py b/pypesto/petab/util.py
new file mode 100644
index 000000000..81b3590c5
--- /dev/null
+++ b/pypesto/petab/util.py
@@ -0,0 +1,171 @@
+from functools import partial
+
+import numpy as np
+
+try:
+    import petab.v1 as petab
+    from petab.v1.C import (
+        ESTIMATE,
+        NOISE_PARAMETERS,
+        OBSERVABLE_ID,
+    )
+except ImportError:
+    petab = None
+
+from ..C import (
+    CENSORED,
+    CENSORING_TYPES,
+    MEASUREMENT_TYPE,
+    ORDINAL,
+    PARAMETER_TYPE,
+    RELATIVE,
+    SEMIQUANTITATIVE,
+    InnerParameterType,
+)
+from ..problem import Problem
+from ..startpoint import CheckedStartpoints
+
+
+def get_petab_non_quantitative_data_types(
+    petab_problem: petab.Problem,
+) -> set[str]:
+    """
+    Get the data types from the PEtab problem.
+
+    Parameters
+    ----------
+    petab_problem:
+        The PEtab problem.
+
+    Returns
+    -------
+    data_types:
+        A list of the data types.
+    """
+    non_quantitative_data_types = set()
+    caught_observables = set()
+    # For ordinal, censored and semiquantitative data, search
+    # for the corresponding data types in the measurement table
+    meas_df = petab_problem.measurement_df
+    if MEASUREMENT_TYPE in meas_df.columns:
+        petab_data_types = meas_df[MEASUREMENT_TYPE].unique()
+        for data_type in [ORDINAL, SEMIQUANTITATIVE] + CENSORING_TYPES:
+            if data_type in petab_data_types:
+                non_quantitative_data_types.add(
+                    CENSORED if data_type in CENSORING_TYPES else data_type
+                )
+                caught_observables.update(
+                    set(
+                        meas_df[meas_df[MEASUREMENT_TYPE] == data_type][
+                            OBSERVABLE_ID
+                        ]
+                    )
+                )
+
+    # For relative data, search for parameters to estimate with
+    # a scaling/offset/sigma parameter type
+    if PARAMETER_TYPE in petab_problem.parameter_df.columns:
+        # get the df with non-nan parameter types
+        par_df = petab_problem.parameter_df[
+            petab_problem.parameter_df[PARAMETER_TYPE].notna()
+        ]
+        for par_id, row in par_df.iterrows():
+            if not row[ESTIMATE]:
+                continue
+            if row[PARAMETER_TYPE] in [
+                InnerParameterType.SCALING,
+                InnerParameterType.OFFSET,
+            ]:
+                non_quantitative_data_types.add(RELATIVE)
+
+            # For sigma parameters, we need to check if they belong
+            # to an observable with a non-quantitative data type
+            elif row[PARAMETER_TYPE] == InnerParameterType.SIGMA:
+                corresponding_observables = set(
+                    meas_df[meas_df[NOISE_PARAMETERS] == par_id][OBSERVABLE_ID]
+                )
+                if not (corresponding_observables & caught_observables):
+                    non_quantitative_data_types.add(RELATIVE)
+
+    # TODO this can be made much shorter if the relative measurements
+    # are also specified in the measurement table, but that would require
+    # changing the PEtab format of a lot of benchmark models.
+
+    if len(non_quantitative_data_types) == 0:
+        return None
+    return non_quantitative_data_types
+
+
+class PetabStartpoints(CheckedStartpoints):
+    """Startpoint method for PEtab problems.
+
+    Samples optimization startpoints from the distributions defined in the
+    provided PEtab problem. The PEtab-problem is copied.
+    """
+
+    def __init__(self, petab_problem: petab.Problem, **kwargs):
+        super().__init__(**kwargs)
+        self._parameter_df = petab_problem.parameter_df.copy()
+        self._priors: list[tuple] | None = None
+        self._free_ids: list[str] | None = None
+
+    def _setup(
+        self,
+        pypesto_problem: Problem,
+    ):
+        """Update priors if necessary.
+
+        Check if ``problem.x_free_indices`` changed since last call, and if so,
+        get the corresponding priors from PEtab.
+        """
+        current_free_ids = np.asarray(pypesto_problem.x_names)[
+            pypesto_problem.x_free_indices
+        ]
+
+        if (
+            self._priors is not None
+            and len(current_free_ids) == len(self._free_ids)
+            and np.all(current_free_ids == self._free_ids)
+        ):
+            # no need to update
+            return
+
+        # update priors
+        self._free_ids = current_free_ids
+        id_to_prior = dict(
+            zip(
+                self._parameter_df.index[self._parameter_df[ESTIMATE] == 1],
+                petab.parameters.get_priors_from_df(
+                    self._parameter_df, mode=petab.INITIALIZATION
+                ),
+            )
+        )
+
+        self._priors = list(map(id_to_prior.__getitem__, current_free_ids))
+
+    def __call__(
+        self,
+        n_starts: int,
+        problem: Problem,
+    ) -> np.ndarray:
+        """Call the startpoint method."""
+        # Update the list of priors if needed
+        self._setup(pypesto_problem=problem)
+
+        return super().__call__(n_starts, problem)
+
+    def sample(
+        self,
+        n_starts: int,
+        lb: np.ndarray,
+        ub: np.ndarray,
+    ) -> np.ndarray:
+        """Actual startpoint sampling.
+
+        Must only be called through `self.__call__` to ensure that the list of priors
+        matches the currently free parameters in the :class:`pypesto.Problem`.
+        """
+        sampler = partial(petab.sample_from_prior, n_starts=n_starts)
+        startpoints = list(map(sampler, self._priors))
+
+        return np.array(startpoints).T
diff --git a/pypesto/problem/hierarchical.py b/pypesto/problem/hierarchical.py
index 07442d015..b6ed8f4f4 100644
--- a/pypesto/problem/hierarchical.py
+++ b/pypesto/problem/hierarchical.py
@@ -35,6 +35,11 @@ class HierarchicalProblem(Problem):
         Only relevant if hierarchical is True. Contains the bounds of easily
         interpretable inner parameters only, e.g. noise parameters, scaling
         factors, offsets.
+    inner_scales:
+        The scales for the inner optimization parameters. Only relevant if
+        hierarchical is True. Contains the scales of easily interpretable inner
+        parameters only, e.g. noise parameters, scaling factors, offsets. Can
+        be pypesto.C.{LIN,LOG,LOG10}. Used only for visualization purposes.
     semiquant_observable_ids:
         The ids of semiquantitative observables. Only relevant if hierarchical
         is True. If not None, the optimization result's `spline_knots` will be
@@ -77,6 +82,10 @@ def __init__(
         self.inner_lb = np.array(inner_lb)
         self.inner_ub = np.array(inner_ub)
 
+        self.inner_scales = (
+            self.objective.calculator.get_interpretable_inner_par_scales()
+        )
+
         self.semiquant_observable_ids = (
             self.objective.calculator.semiquant_observable_ids
         )
diff --git a/pypesto/profile/options.py b/pypesto/profile/options.py
index f2c9dc42a..bcc3b805e 100644
--- a/pypesto/profile/options.py
+++ b/pypesto/profile/options.py
@@ -32,9 +32,9 @@ class ProfileOptions(dict):
     reg_order:
         Maximum degree of regression polynomial used in regression based
         adaptive profile points proposal.
-    magic_factor_obj_value:
-        There is this magic factor in the old profiling code which slows down
-        profiling at small ratios (must be >= 0 and < 1).
+    adaptive_target_scaling_factor:
+        The scaling factor of the next_obj_target in next guess generation.
+        Larger values result in larger next_guess step size (must be > 1).
     whole_path:
         Whether to profile the whole bounds or only till we get below the
         ratio.
@@ -44,13 +44,13 @@ def __init__(
         self,
         default_step_size: float = 0.01,
         min_step_size: float = 0.001,
-        max_step_size: float = 1.0,
+        max_step_size: float = 0.1,
         step_size_factor: float = 1.25,
         delta_ratio_max: float = 0.1,
         ratio_min: float = 0.145,
         reg_points: int = 10,
         reg_order: int = 4,
-        magic_factor_obj_value: float = 0.5,
+        adaptive_target_scaling_factor: float = 1.5,
         whole_path: bool = False,
     ):
         super().__init__()
@@ -63,7 +63,7 @@ def __init__(
         self.delta_ratio_max = delta_ratio_max
         self.reg_points = reg_points
         self.reg_order = reg_order
-        self.magic_factor_obj_value = magic_factor_obj_value
+        self.adaptive_target_scaling_factor = adaptive_target_scaling_factor
         self.whole_path = whole_path
 
         self.validate()
@@ -112,5 +112,5 @@ def validate(self):
         if self.default_step_size < self.min_step_size:
             raise ValueError("default_step_size must be >= min_step_size.")
 
-        if self.magic_factor_obj_value < 0 or self.magic_factor_obj_value >= 1:
-            raise ValueError("magic_factor_obj_value must be >= 0 and < 1.")
+        if self.adaptive_target_scaling_factor < 1:
+            raise ValueError("adaptive_target_scaling_factor must be > 1.")
diff --git a/pypesto/profile/profile.py b/pypesto/profile/profile.py
index e4e124964..2df0b4f99 100644
--- a/pypesto/profile/profile.py
+++ b/pypesto/profile/profile.py
@@ -24,7 +24,7 @@ def parameter_profile(
     profile_index: Iterable[int] = None,
     profile_list: int = None,
     result_index: int = 0,
-    next_guess_method: Union[Callable, str] = "adaptive_step_regression",
+    next_guess_method: Union[Callable, str] = "adaptive_step_order_1",
     profile_options: ProfileOptions = None,
     progress_bar: bool = None,
     filename: Union[str, Callable, None] = None,
@@ -93,7 +93,9 @@ def parameter_profile(
     profile_options = ProfileOptions.create_instance(profile_options)
     profile_options.validate()
 
-    # create a function handle that will be called later to get the next point
+    # Create a function handle that will be called later to get the next point.
+    # This function will be used to generate the initial points of optimization
+    # steps in profiling in `walk_along_profile.py`
     if isinstance(next_guess_method, str):
 
         def create_next_guess(
@@ -104,6 +106,8 @@ def create_next_guess(
             current_profile_,
             problem_,
             global_opt_,
+            min_step_increase_factor_,
+            max_step_reduce_factor_,
         ):
             return next_guess(
                 x,
@@ -114,6 +118,8 @@ def create_next_guess(
                 current_profile_,
                 problem_,
                 global_opt_,
+                min_step_increase_factor_,
+                max_step_reduce_factor_,
             )
 
     elif callable(next_guess_method):
diff --git a/pypesto/profile/profile_next_guess.py b/pypesto/profile/profile_next_guess.py
index fd523e062..dd05b6b8b 100644
--- a/pypesto/profile/profile_next_guess.py
+++ b/pypesto/profile/profile_next_guess.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Callable, Literal
 
 import numpy as np
@@ -6,6 +7,8 @@
 from ..result import ProfilerResult
 from .options import ProfileOptions
 
+logger = logging.getLogger(__name__)
+
 __all__ = ["next_guess", "fixed_step", "adaptive_step"]
 
 
@@ -23,6 +26,8 @@ def next_guess(
     current_profile: ProfilerResult,
     problem: Problem,
     global_opt: float,
+    min_step_increase_factor: float = 1.0,
+    max_step_reduce_factor: float = 1.0,
 ) -> np.ndarray:
     """
     Create the next initial guess for the optimizer.
@@ -53,17 +58,22 @@ def next_guess(
         The problem to be solved.
     global_opt:
         Log-posterior value of the global optimum.
+    min_step_increase_factor:
+        Factor to increase the minimal step size bound. Used only in
+        :func:`adaptive_step`.
+    max_step_reduce_factor:
+        Factor to reduce the maximal step size bound. Used only in
+        :func:`adaptive_step`.
 
     Returns
     -------
     The next initial guess as base for the next profile point.
     """
     if update_type == "fixed_step":
-        return fixed_step(
+        next_initial_guess = fixed_step(
             x, par_index, par_direction, profile_options, problem
         )
-
-    if update_type == "adaptive_step_order_0":
+    elif update_type == "adaptive_step_order_0":
         order = 0
     elif update_type == "adaptive_step_order_1":
         order = 1
@@ -73,18 +83,28 @@ def next_guess(
         raise ValueError(
             f"Unsupported `update_type` {update_type} for `next_guess`."
         )
+    if update_type != "fixed_step":
+        next_initial_guess = adaptive_step(
+            x,
+            par_index,
+            par_direction,
+            profile_options,
+            current_profile,
+            problem,
+            global_opt,
+            order,
+            min_step_increase_factor,
+            max_step_reduce_factor,
+        )
 
-    return adaptive_step(
-        x,
-        par_index,
-        par_direction,
-        profile_options,
-        current_profile,
-        problem,
-        global_opt,
-        order,
+    logger.info(
+        f"Next guess for {problem.x_names[par_index]} in direction "
+        f"{par_direction} is {next_initial_guess[par_index]:.4f}. Step size: "
+        f"{next_initial_guess[par_index] - x[par_index]:.4f}."
     )
 
+    return next_initial_guess
+
 
 def fixed_step(
     x: np.ndarray,
@@ -138,6 +158,8 @@ def adaptive_step(
     problem: Problem,
     global_opt: float,
     order: int = 1,
+    min_step_increase_factor: float = 1.0,
+    max_step_reduce_factor: float = 1.0,
 ) -> np.ndarray:
     """Group of more complex methods for point proposal.
 
@@ -168,6 +190,10 @@ def adaptive_step(
         * ``1``: the last two points are used to extrapolate all parameters
         * ``np.nan``: indicates that a more complex regression should be used
           as determined by :attr:`pypesto.profile.ProfileOptions.reg_order`.
+    min_step_increase_factor:
+        Factor to increase the minimal step size bound.
+    max_step_reduce_factor:
+        Factor to reduce the maximal step size bound.
 
 
     Returns
@@ -177,9 +203,9 @@ def adaptive_step(
 
     # restrict step proposal to minimum and maximum step size
     def clip_to_minmax(step_size_proposal):
-        return np.clip(
-            step_size_proposal, options.min_step_size, options.max_step_size
-        )
+        min_step_size = options.min_step_size * min_step_increase_factor
+        max_step_size = options.max_step_size * max_step_reduce_factor
+        return np.clip(step_size_proposal, min_step_size, max_step_size)
 
     # restrict step proposal to bounds
     def clip_to_bounds(step_proposal):
@@ -193,6 +219,7 @@ def clip_to_bounds(step_proposal):
         delta_x_dir,
         reg_par,
         delta_obj_value,
+        last_delta_fval,
     ) = handle_profile_history(
         x,
         par_index,
@@ -206,15 +233,18 @@ def clip_to_bounds(step_proposal):
 
     # check whether we must make a minimum step anyway, since we're close to
     # the next bound
-    min_delta_x = x[par_index] + par_direction * options.min_step_size
+    min_delta_x = (
+        x[par_index]
+        + par_direction * options.min_step_size * min_step_increase_factor
+    )
 
     if par_direction == -1 and (min_delta_x < problem.lb_full[par_index]):
-        step_length = problem.lb_full[par_index] - x[par_index]
-        return x + step_length * delta_x_dir
+        step_length = abs(problem.lb_full[par_index] - x[par_index])
+        return clip_to_bounds(x + step_length * delta_x_dir)
 
     if par_direction == 1 and (min_delta_x > problem.ub_full[par_index]):
-        step_length = problem.ub_full[par_index] - x[par_index]
-        return x + step_length * delta_x_dir
+        step_length = abs(problem.ub_full[par_index] - x[par_index])
+        return clip_to_bounds(x + step_length * delta_x_dir)
 
     # parameter extrapolation function
     n_profile_points = len(current_profile.fval_path)
@@ -241,28 +271,58 @@ def par_extrapol(step_length):
                             x[par_index] + step_length * par_direction
                         )
                     )
+            # Define a trust region for the step size in all directions
+            # to avoid overshooting
+            x_step = np.clip(
+                x_step, x - options.max_step_size, x + options.max_step_size
+            )
+
             return clip_to_bounds(x_step)
 
     else:
         # if not, we do simple extrapolation
         def par_extrapol(step_length):
-            x_step = x + step_length * delta_x_dir
-            return clip_to_bounds(x_step)
+            # Define a trust region for the step size in all directions
+            # to avoid overshooting
+            step_in_x = np.clip(
+                step_length * delta_x_dir,
+                -options.max_step_size,
+                options.max_step_size,
+            )
+            x_stepped = x + step_in_x
+            return clip_to_bounds(x_stepped)
 
     # compute proposal
     next_x = par_extrapol(step_size_guess)
 
     # next start point has to be searched
     # compute the next objective value which we aim for
-    next_obj_target = (
+    high_next_obj_target = (
         -np.log(1.0 - options.delta_ratio_max)
-        + options.magic_factor_obj_value * delta_obj_value
+        + options.adaptive_target_scaling_factor * abs(last_delta_fval)
+        + current_profile.fval_path[-1]
+    )
+    low_next_obj_target = (
+        +np.log(1.0 - options.delta_ratio_max)
+        - options.adaptive_target_scaling_factor * abs(last_delta_fval)
         + current_profile.fval_path[-1]
     )
 
+    # Clip both by 0.5 * delta_obj_value to avoid overshooting
+    if delta_obj_value != 0:
+        high_next_obj_target = min(
+            high_next_obj_target,
+            current_profile.fval_path[-1] + 0.5 * delta_obj_value,
+        )
+        low_next_obj_target = max(
+            low_next_obj_target,
+            current_profile.fval_path[-1] - 0.5 * delta_obj_value,
+        )
+
     # compute objective at the guessed point
     problem.fix_parameters(par_index, next_x[par_index])
     next_obj = problem.objective(problem.get_reduced_vector(next_x))
+    current_obj = current_profile.fval_path[-1]
 
     # iterate until good step size is found
     return do_line_search(
@@ -270,12 +330,16 @@ def par_extrapol(step_length):
         step_size_guess,
         par_extrapol,
         next_obj,
-        next_obj_target,
+        current_obj,
+        high_next_obj_target,
+        low_next_obj_target,
         clip_to_minmax,
         clip_to_bounds,
         par_index,
         problem,
         options,
+        min_step_increase_factor,
+        max_step_reduce_factor,
     )
 
 
@@ -304,6 +368,8 @@ def handle_profile_history(
         The regression polynomial for profile extrapolation.
     delta_obj_value:
         The difference of the objective function value between the last point and `global_opt`.
+    last_delta_fval:
+        The difference of the objective function value between the last two points.
     """
     n_profile_points = len(current_profile.fval_path)
 
@@ -313,32 +379,53 @@ def handle_profile_history(
     reg_par = None
 
     # Is this the first step along this profile? If so, try a simple step
-    if n_profile_points == 1:
+    # Do the same if the last two points are too close to avoid division by small numbers
+    if n_profile_points == 1 or np.isclose(
+        current_profile.x_path[par_index, -1],
+        current_profile.x_path[par_index, -2],
+    ):
         # try to use the default step size
         step_size_guess = options.default_step_size
         delta_obj_value = 0.0
+        last_delta_fval = 0.0
 
     else:
         # try to reuse the previous step size
-        step_size_guess = np.abs(
+        last_delta_x_par_index = np.abs(
             current_profile.x_path[par_index, -1]
             - current_profile.x_path[par_index, -2]
         )
+        # Bound the step size by default values
+        step_size_guess = min(
+            last_delta_x_par_index, options.default_step_size
+        )
+        # Step size cannot be smaller than the minimum step size
+        step_size_guess = max(step_size_guess, options.min_step_size)
+
         delta_obj_value = current_profile.fval_path[-1] - global_opt
+        last_delta_fval = (
+            current_profile.fval_path[-1] - current_profile.fval_path[-2]
+        )
 
         if order == 1 or (np.isnan(order) and n_profile_points < 3):
             # set the update direction (extrapolate with order 1)
             last_delta_x = (
                 current_profile.x_path[:, -1] - current_profile.x_path[:, -2]
             )
-            delta_x_dir = last_delta_x / step_size_guess
+            delta_x_dir = last_delta_x / last_delta_x_par_index
         elif np.isnan(order):
             # compute the regression polynomial for parameter extrapolation
             reg_par = get_reg_polynomial(
                 par_index, current_profile, problem, options
             )
 
-    return step_size_guess, delta_x_dir, reg_par, delta_obj_value
+    return (
+        step_size_guess,
+        delta_x_dir,
+        reg_par,
+        delta_obj_value,
+        last_delta_fval,
+    )
 
 
 def get_reg_polynomial(
@@ -395,12 +482,16 @@ def do_line_search(
     step_size_guess: float,
     par_extrapol: Callable,
     next_obj: float,
-    next_obj_target: float,
+    current_obj: float,
+    high_next_obj_target: float,
+    low_next_obj_target: float,
     clip_to_minmax: Callable,
     clip_to_bounds: Callable,
     par_index: int,
     problem: Problem,
     options: ProfileOptions,
+    min_step_increase_factor: float,
+    max_step_reduce_factor: float,
 ) -> np.ndarray:
     """Perform the line search.
 
@@ -429,14 +520,29 @@ def do_line_search(
         The parameter estimation problem.
     options:
         Profile likelihood options.
+    min_step_increase_factor:
+        Factor to increase the minimal step size bound.
+    max_step_reduce_factor:
+        Factor to reduce the maximal step size bound.
 
     Returns
     -------
     Parameter vector that is expected to yield the objective function value
     closest to `next_obj_target`.
     """
-    # Was the initial step too big or too small?
-    direction = "decrease" if next_obj_target < next_obj else "increase"
+    decreasing_to_low_target = False
+    decreasing_to_high_target = False
+
+    # Determine the direction of the step
+    if next_obj > low_next_obj_target and next_obj < high_next_obj_target:
+        direction = "increase"
+    elif next_obj <= low_next_obj_target:
+        direction = "decrease"
+        decreasing_to_low_target = True
+    elif next_obj >= high_next_obj_target:
+        direction = "decrease"
+        decreasing_to_high_target = True
+
     if direction == "increase":
         adapt_factor = options.step_size_factor
     else:
@@ -452,12 +558,14 @@ def do_line_search(
         # Check if we hit the bounds
         if (
             direction == "decrease"
-            and step_size_guess == options.min_step_size
+            and step_size_guess
+            == options.min_step_size * min_step_increase_factor
         ):
             return next_x
         if (
             direction == "increase"
-            and step_size_guess == options.max_step_size
+            and step_size_guess
+            == options.max_step_size * max_step_reduce_factor
         ):
             return next_x
 
@@ -467,11 +575,22 @@ def do_line_search(
         next_obj = problem.objective(problem.get_reduced_vector(next_x))
 
         # check for root crossing and compute correct step size in case
-        if (direction == "decrease" and next_obj_target >= next_obj) or (
-            direction == "increase" and next_obj_target <= next_obj
+        if (direction == "increase" and next_obj > high_next_obj_target) or (
+            direction == "decrease"
+            and next_obj < high_next_obj_target
+            and decreasing_to_high_target
+        ):
+            return next_x_interpolate(
+                next_obj, last_obj, next_x, last_x, high_next_obj_target
+            )
+
+        if (direction == "increase" and next_obj < low_next_obj_target) or (
+            direction == "decrease"
+            and next_obj > low_next_obj_target
+            and decreasing_to_low_target
         ):
             return next_x_interpolate(
-                next_obj, last_obj, next_x, last_x, next_obj_target
+                next_obj, last_obj, next_x, last_x, low_next_obj_target
             )
 
 
diff --git a/pypesto/profile/util.py b/pypesto/profile/util.py
index 6a87403f8..3ea7a0d00 100644
--- a/pypesto/profile/util.py
+++ b/pypesto/profile/util.py
@@ -189,6 +189,7 @@ def fill_profile_list(
         gradnorm_path=np.array([gradnorm]),
         exitflag_path=np.array([optimizer_result["exitflag"]]),
         time_path=np.array([0.0]),
+        color_path=np.array([[1, 0, 0, 1]]),
         time_total=0.0,
         n_fval=0,
         n_grad=0,
diff --git a/pypesto/profile/walk_along_profile.py b/pypesto/profile/walk_along_profile.py
index c4f610001..0478c0dc0 100644
--- a/pypesto/profile/walk_along_profile.py
+++ b/pypesto/profile/walk_along_profile.py
@@ -63,6 +63,7 @@ def walk_along_profile(
     while True:
         # get current position on the profile path
         x_now = current_profile.x_path[:, -1]
+        color_now = current_profile.color_path[-1]
 
         # check if the next profile point needs to be computed
         # ... check bounds
@@ -78,26 +79,164 @@ def walk_along_profile(
         ):
             break
 
-        # compute the new start point for optimization
-        x_next = create_next_guess(
-            x_now,
-            i_par,
-            par_direction,
-            options,
-            current_profile,
-            problem,
-            global_opt,
-        )
+        optimization_successful = False
+        max_step_reduce_factor = 1.0
+
+        while not optimization_successful:
+            # Check max_step_size is not reduced below min_step_size
+            if (
+                options.max_step_size * max_step_reduce_factor
+                < options.min_step_size
+            ):
+                logger.warning(
+                    "Max step size reduced below min step size. "
+                    "Setting a lower min step size can help avoid this issue."
+                )
+                break
+
+            # compute the new start point for optimization
+            x_next = create_next_guess(
+                x_now,
+                i_par,
+                par_direction,
+                options,
+                current_profile,
+                problem,
+                global_opt,
+                1.0,
+                max_step_reduce_factor,
+            )
+
+            # fix current profiling parameter to current value and set start point
+            problem.fix_parameters(i_par, x_next[i_par])
+            startpoint = x_next[problem.x_free_indices]
+
+            if startpoint.size > 0:
+                optimizer_result = optimizer.minimize(
+                    problem=problem,
+                    x0=startpoint,
+                    id=str(0),
+                    optimize_options=OptimizeOptions(
+                        allow_failed_starts=False
+                    ),
+                )
+
+                if np.isfinite(optimizer_result.fval):
+                    optimization_successful = True
+                    if max_step_reduce_factor == 1.0:
+                        # The color of the point is set to black if no changes were made
+                        color_next = np.array([0, 0, 0, 1])
+                    else:
+                        # The color of the point is set to red if the max_step_size was reduced
+                        color_next = np.array([1, 0, 0, 1])
+                else:
+                    max_step_reduce_factor *= 0.5
+                    logger.warning(
+                        f"Optimization at {problem.x_names[i_par]}={x_next[i_par]} failed. "
+                        f"Reducing max_step_size to {options.max_step_size * max_step_reduce_factor}."
+                    )
+            else:
+                # if too many parameters are fixed, there is nothing to do ...
+                fval = problem.objective(np.array([]))
+                optimizer_result = OptimizerResult(
+                    id="0",
+                    x=np.array([]),
+                    fval=fval,
+                    n_fval=0,
+                    n_grad=0,
+                    n_res=0,
+                    n_hess=0,
+                    n_sres=0,
+                    x0=np.array([]),
+                    fval0=fval,
+                    time=0,
+                )
+                optimizer_result.update_to_full(problem=problem)
+                optimization_successful = True
+                color_next = np.concatenate((color_now[:3], [0.3]))
+
+        if not optimization_successful:
+            # Cannot optimize successfully by reducing max_step_size
+            # Let's try to optimize by increasing min_step_size
+            logger.warning(
+                f"Failing to optimize at {problem.x_names[i_par]}={x_next[i_par]} after reducing max_step_size."
+                f"Trying to increase min_step_size."
+            )
+            min_step_increase_factor = 1.25
+        while not optimization_successful:
+            # Check min_step_size is not increased above max_step_size
+            if (
+                options.min_step_size * min_step_increase_factor
+                > options.max_step_size
+            ):
+                logger.warning(
+                    "Min step size increased above max step size. "
+                    "Setting a higher max step size can help avoid this issue."
+                )
+                break
 
-        # fix current profiling parameter to current value and set start point
-        problem.fix_parameters(i_par, x_next[i_par])
-        startpoint = x_next[problem.x_free_indices]
+            # compute the new start point for optimization
+            x_next = create_next_guess(
+                x_now,
+                i_par,
+                par_direction,
+                options,
+                current_profile,
+                problem,
+                global_opt,
+                min_step_increase_factor,
+                1.0,
+            )
+
+            # fix current profiling parameter to current value and set start point
+            problem.fix_parameters(i_par, x_next[i_par])
+            startpoint = x_next[problem.x_free_indices]
+
+            optimizer_result = optimizer.minimize(
+                problem=problem,
+                x0=startpoint,
+                id=str(0),
+                optimize_options=OptimizeOptions(allow_failed_starts=False),
+            )
+
+            if np.isfinite(optimizer_result.fval):
+                optimization_successful = True
+                # The color of the point is set to blue if the min_step_size was increased
+                color_next = np.array([0, 0, 1, 1])
+            else:
+                min_step_increase_factor *= 1.25
+                logger.warning(
+                    f"Optimization at {problem.x_names[i_par]}={x_next[i_par]} failed. "
+                    f"Increasing min_step_size to {options.min_step_size * min_step_increase_factor}."
+                )
+
+        if not optimization_successful:
+            # Cannot optimize successfully by reducing max_step_size or increasing min_step_size
+            # sample a new starting point for another attempt for max_tries times
+            logger.warning(
+                f"Failing to optimize at {problem.x_names[i_par]}={x_next[i_par]} after reducing max_step_size."
+                f"Trying to sample {max_tries} new starting points."
+            )
+
+            x_next = create_next_guess(
+                x_now,
+                i_par,
+                par_direction,
+                options,
+                current_profile,
+                problem,
+                global_opt,
+                1.0,
+                1.0,
+            )
+
+            problem.fix_parameters(i_par, x_next[i_par])
 
-        # run optimization
-        if startpoint.size > 0:
-            # number of optimization attempts for the given value of i_par in case
-            #  no finite solution is found
             for i_optimize_attempt in range(max_tries):
+                startpoint = problem.startpoint_method(
+                    n_starts=1, problem=problem
+                )[0]
+
                 optimizer_result = optimizer.minimize(
                     problem=problem,
                     x0=startpoint,
@@ -107,40 +246,22 @@ def walk_along_profile(
                     ),
                 )
                 if np.isfinite(optimizer_result.fval):
+                    # The color of the point is set to green if the parameter was resampled
+                    color_next = np.array([0, 1, 0, 1])
                     break
 
                 logger.warning(
                     f"Optimization at {problem.x_names[i_par]}={x_next[i_par]} failed."
                 )
-                # sample a new starting point for another attempt
-                #  might be preferable to stay close to the previous point, at least initially,
-                #  but for now, we just sample from anywhere within the parameter bounds
-                # alternatively, run multi-start optimization
-                startpoint = problem.startpoint_method(
-                    n_starts=1, problem=problem
-                )[0]
             else:
                 raise RuntimeError(
                     f"Computing profile point failed. Could not find a finite solution after {max_tries} attempts."
                 )
-        else:
-            # if too many parameters are fixed, there is nothing to do ...
-            fval = problem.objective(np.array([]))
-            optimizer_result = OptimizerResult(
-                id="0",
-                x=np.array([]),
-                fval=fval,
-                n_fval=0,
-                n_grad=0,
-                n_res=0,
-                n_hess=0,
-                n_sres=0,
-                x0=np.array([]),
-                fval0=fval,
-                time=0,
-            )
-            optimizer_result.update_to_full(problem=problem)
 
+        logger.info(
+            f"Optimization successful for {problem.x_names[i_par]}={x_next[i_par]:.4f}. "
+            f"Start fval {problem.objective(x_next[problem.x_free_indices]):.6f}, end fval {optimizer_result.fval:.6f}."
+        )
         if optimizer_result[GRAD] is not None:
             gradnorm = np.linalg.norm(
                 optimizer_result[GRAD][problem.x_free_indices]
@@ -154,6 +275,7 @@ def walk_along_profile(
             ratio=np.exp(global_opt - optimizer_result.fval),
             gradnorm=gradnorm,
             time=optimizer_result.time,
+            color=color_next,
             exitflag=optimizer_result.exitflag,
             n_fval=optimizer_result.n_fval,
             n_grad=optimizer_result.n_grad,
diff --git a/pypesto/result/profile.py b/pypesto/result/profile.py
index af14e5f1a..5f8ce8405 100644
--- a/pypesto/result/profile.py
+++ b/pypesto/result/profile.py
@@ -38,6 +38,13 @@ class ProfilerResult(dict):
         Number of gradient evaluations.
     n_hess:
         Number of Hessian evaluations.
+    color_path:
+        The color of the profile path. Signifies types of steps made.
+        Red indicates a step for which min_step_size was reduced, blue
+        indicates a step for which max_step_size was increased, and green
+        indicates a step for which the profiler had to resample the parameter
+        vector due to optimization failure of the previous two. Black
+        indicates a step for which none of the above was necessary.
     message:
         Textual comment on the profile result.
 
@@ -55,6 +62,7 @@ def __init__(
         gradnorm_path: np.ndarray = None,
         exitflag_path: np.ndarray = None,
         time_path: np.ndarray = None,
+        color_path: np.ndarray = None,
         time_total: float = 0.0,
         n_fval: int = 0,
         n_grad: int = 0,
@@ -86,6 +94,13 @@ def __init__(
         else:
             self.time_path = time_path.copy()
 
+        if color_path is None:
+            self.color_path = np.full(
+                (x_path.shape[1], 4), np.array([1, 0, 0, 0.3])
+            )
+        else:
+            self.color_path = color_path.copy()
+
         if (
             not self.x_path.shape[1]
             == len(self.fval_path)
@@ -122,6 +137,7 @@ def append_profile_point(
         ratio: float,
         gradnorm: float = np.nan,
         time: float = np.nan,
+        color: np.ndarray = np.nan,
         exitflag: float = np.nan,
         n_fval: int = 0,
         n_grad: int = 0,
@@ -143,6 +159,8 @@ def append_profile_point(
             The gradient norm at `x`.
         time:
             The computation time to find `x`.
+        color:
+            The color of the profile path. Signifies types of steps made.
         exitflag:
             The exitflag of the optimizer (useful if an optimization was
             performed to find `x`).
@@ -159,6 +177,7 @@ def append_profile_point(
         self.gradnorm_path = np.hstack((self.gradnorm_path, gradnorm))
         self.exitflag_path = np.hstack((self.exitflag_path, exitflag))
         self.time_path = np.hstack((self.time_path, time))
+        self.color_path = np.vstack((self.color_path, color))
 
         # increment the time and f_eval counters
         self.time_total += time
@@ -180,6 +199,7 @@ def flip_profile(self) -> None:
         self.gradnorm_path = np.flip(self.gradnorm_path)
         self.exitflag_path = np.flip(self.exitflag_path)
         self.time_path = np.flip(self.time_path)
+        self.color_path = np.flip(self.color_path, axis=0)
 
 
 class ProfileResult:
diff --git a/pypesto/sample/__init__.py b/pypesto/sample/__init__.py
index cb5fe4058..96529a319 100644
--- a/pypesto/sample/__init__.py
+++ b/pypesto/sample/__init__.py
@@ -11,6 +11,12 @@
 from .diagnostics import auto_correlation, effective_sample_size, geweke_test
 from .dynesty import DynestySampler
 from .emcee import EmceeSampler
+from .evidence import (
+    bridge_sampling_log_evidence,
+    harmonic_mean_log_evidence,
+    laplace_approximation_log_evidence,
+    parallel_tempering_log_evidence,
+)
 from .metropolis import MetropolisSampler
 from .parallel_tempering import ParallelTemperingSampler
 from .sample import sample
diff --git a/pypesto/sample/adaptive_parallel_tempering.py b/pypesto/sample/adaptive_parallel_tempering.py
index 1738c6ce7..a2966b46d 100644
--- a/pypesto/sample/adaptive_parallel_tempering.py
+++ b/pypesto/sample/adaptive_parallel_tempering.py
@@ -5,7 +5,6 @@
 import numpy as np
 
 from ..C import EXPONENTIAL_DECAY
-from ..result import Result
 from .parallel_tempering import ParallelTemperingSampler
 
 
@@ -65,20 +64,3 @@ def adjust_betas(self, i_sample: int, swapped: Sequence[bool]):
 
         # fill in
         self.betas = betas
-
-    def compute_log_evidence(
-        self, result: Result, method: str = "trapezoid"
-    ) -> float:
-        """Perform thermodynamic integration to estimate the log evidence.
-
-        Parameters
-        ----------
-        result:
-            Result object containing the samples.
-        method:
-            Integration method, either 'trapezoid' or 'simpson' (uses scipy for integration).
-        """
-        raise NotImplementedError(
-            "Thermodynamic integration is not implemented for adaptive parallel tempering, "
-            "since the temperature schedule is adapted during the sampling process."
-        )
diff --git a/pypesto/sample/diagnostics.py b/pypesto/sample/diagnostics.py
index c4e1fe109..223acf8fc 100644
--- a/pypesto/sample/diagnostics.py
+++ b/pypesto/sample/diagnostics.py
@@ -33,6 +33,14 @@ def geweke_test(
         Iteration where the first and the last fraction of the chain
         do not differ significantly regarding Geweke test -> Burn-In
     """
+    if chain_number == 0:
+        # test if burn-in is already calculated
+        if result.sample_result.burn_in is not None:
+            logger.info(
+                f"Burn-in index ({result.sample_result.burn_in}) already calculated. Skipping Geweke test."
+            )
+            return result.sample_result.burn_in
+
     # Get parameter samples as numpy arrays
     chain = np.asarray(result.sample_result.trace_x[chain_number])
 
diff --git a/pypesto/sample/dynesty.py b/pypesto/sample/dynesty.py
index 89a14ba24..c70e92019 100644
--- a/pypesto/sample/dynesty.py
+++ b/pypesto/sample/dynesty.py
@@ -75,6 +75,7 @@ def __init__(
         run_args: dict = None,
         dynamic: bool = True,
         objective_type: str = OBJECTIVE_NEGLOGPOST,
+        prior_transform: callable = None,
     ):
         """
         Initialize sampler.
@@ -95,6 +96,9 @@ def __init__(
             `pypesto.C.OBJECTIVE_NEGLOGPOST`. If
             `pypesto.C.OBJECTIVE_NEGLOGPOST`, then `x_priors` have to
             be defined in the problem.
+        prior_transform:
+            A function converting a sample from the unit cube to actual prior. If not provided, the default
+            `prior_transform` function is used, which assumes uniform priors.
         """
         if importlib.util.find_spec("dynesty") is None:
             raise SamplerImportError("dynesty")
@@ -118,17 +122,41 @@ def __init__(
             )
         self.objective_type = objective_type
 
+        if prior_transform is None:
+            # if priors are uniform, we can use the default prior transform (assuming that bounds are set correctly)
+            logger.warning(
+                "Assuming 'prior_transform' is correctly specified. If 'x_priors' is not uniform, 'prior_transform'"
+                " has to be adjusted accordingly."
+            )
+            self.prior_transform = self.prior_transform_from_uniform
+        else:
+            self.prior_transform = prior_transform
+
         # set in initialize
         self.problem: Problem | None = None
         self.sampler: (
             dynesty.DynamicNestedSampler | dynesty.NestedSampler | None
         ) = None
 
-    def prior_transform(self, prior_sample: np.ndarray) -> np.ndarray:
-        """Transform prior sample from unit cube to pyPESTO prior.
+    def loglikelihood(self, x):
+        """Log-probability density function."""
+        # check if parameter lies within bounds
+        if any(x < self.problem.lb) or any(x > self.problem.ub):
+            return -np.inf
+        # invert sign
+        if self.objective_type == OBJECTIVE_NEGLOGPOST:
+            # problem.objective returns negative log-posterior
+            # compute log-likelihood by subtracting log-prior
+            return -1.0 * (
+                self.problem.objective(x) - self.problem.x_priors(x)
+            )
+        # problem.objective returns negative log-likelihood
+        return -1.0 * self.problem.objective(x)
 
-        TODO support priors that are not uniform.
-             raise warning in `self.initialize` for now.
+    def prior_transform_from_uniform(
+        self, prior_sample: np.ndarray
+    ) -> np.ndarray:
+        """Transform prior sample from unit cube to pyPESTO prior.
 
         Parameters
         ----------
@@ -144,21 +172,6 @@ def prior_transform(self, prior_sample: np.ndarray) -> np.ndarray:
             + self.problem.lb
         )
 
-    def loglikelihood(self, x):
-        """Log-probability density function."""
-        # check if parameter lies within bounds
-        if any(x < self.problem.lb) or any(x > self.problem.ub):
-            return -np.inf
-        # invert sign
-        if self.objective_type == OBJECTIVE_NEGLOGPOST:
-            # problem.objective returns negative log-posterior
-            # compute log-likelihood by subtracting log-prior
-            return -1.0 * (
-                self.problem.objective(x) - self.problem.x_priors(x)
-            )
-        # problem.objective returns negative log-likelihood
-        return -1.0 * self.problem.objective(x)
-
     def initialize(
         self,
         problem: Problem,
@@ -187,12 +200,6 @@ def initialize(
                     f"'x_priors' defined in the problem will be ignored."
                 )
 
-        # if priors are uniform, we can use the default prior transform (assuming that bounds are set correctly)
-        logger.warning(
-            "Assuming 'prior_transform' is correctly specified. If 'x_priors' is not uniform, 'prior_transform'"
-            " has to be adjusted accordingly."
-        )
-
         # initialize sampler
         self.sampler = sampler_class(
             loglikelihood=self.loglikelihood,
diff --git a/pypesto/sample/evidence.py b/pypesto/sample/evidence.py
new file mode 100644
index 000000000..07f1f7c9f
--- /dev/null
+++ b/pypesto/sample/evidence.py
@@ -0,0 +1,466 @@
+"""Various methods for estimating the log evidence of a model."""
+
+
+import logging
+from typing import Optional, Union
+
+import numpy as np
+from scipy import stats
+from scipy.integrate import simpson, trapezoid
+from scipy.optimize import minimize_scalar
+from scipy.special import logsumexp
+
+from ..C import SIMPSON, STEPPINGSTONE, TRAPEZOID
+from ..objective import (
+    AggregatedObjective,
+    NegLogParameterPriors,
+    NegLogPriors,
+)
+from ..problem import Problem
+from ..result import Result
+from .diagnostics import geweke_test
+
+logger = logging.getLogger(__name__)
+
+
+def laplace_approximation_log_evidence(
+    problem: Problem, x: np.ndarray
+) -> float:
+    """
+    Compute the log evidence using the Laplace approximation.
+
+    The objective in your `problem` must be a negative log posterior, and support Hessian computation.
+
+    Parameters
+    ----------
+    problem:
+        The problem to compute the log evidence for.
+    x:
+        The maximum a posteriori estimate at which to compute the log evidence.
+
+    Returns
+    -------
+    log_evidence: float
+    """
+    hessian = problem.objective(
+        problem.get_reduced_vector(x), sensi_orders=(2,)
+    )
+    _, log_det = np.linalg.slogdet(hessian)
+    log_prop_posterior = -problem.objective(problem.get_reduced_vector(x))
+    log_evidence = (
+        0.5 * np.log(2 * np.pi) * len(problem.x_free_indices)
+        - 0.5 * log_det
+        + log_prop_posterior
+    )
+    return log_evidence
+
+
+def harmonic_mean_log_evidence(
+    result: Result,
+    prior_samples: Optional[np.ndarray] = None,
+    neg_log_likelihood_fun: Optional[callable] = None,
+) -> float:
+    """
+    Compute the log evidence using the harmonic mean estimator.
+
+    Stabilized harmonic mean estimator is used if prior samples are provided.
+    Newton and Raftery (1994): https://doi.org/10.1111/j.2517-6161.1994.tb01956.x
+
+    Parameters
+    ----------
+    result:
+    prior_samples:
+        Samples from the prior distribution. If samples from the prior are provided,
+        the stabilized harmonic mean is computed (recommended). Then, the likelihood function must be provided as well.
+    neg_log_likelihood_fun: callable
+        Function to evaluate the negative log likelihood. Necessary if prior_samples is not `None`.
+
+    Returns
+    -------
+    log_evidence
+    """
+    if result.sample_result is None:
+        raise ValueError("No samples available. Run sampling first.")
+
+    # compute negative log likelihood from traces
+    burn_in = geweke_test(result)
+    trace_neglogpost = result.sample_result.trace_neglogpost[0, burn_in:]
+    trace_neglogprior = result.sample_result.trace_neglogprior[0, burn_in:]
+    neg_log_likelihoods_posterior = trace_neglogpost - trace_neglogprior
+
+    if prior_samples is None:
+        # compute harmonic mean from samples
+        return -logsumexp(neg_log_likelihoods_posterior) + np.log(
+            neg_log_likelihoods_posterior.size
+        )
+
+    # compute stabilized harmonic mean
+    if prior_samples is not None and neg_log_likelihood_fun is None:
+        raise ValueError(
+            "you need to provide a likelihood function to evaluate prior samples"
+        )
+
+    # compute delta (ratio of prior to posterior samples)
+    n_samples_prior = len(prior_samples)
+    n_samples_posterior = len(trace_neglogpost)
+    delta = n_samples_prior / (n_samples_prior + n_samples_posterior)
+    neg_log_likelihoods_prior = np.array(
+        [neg_log_likelihood_fun(x) for x in prior_samples]
+    )
+    log_likelihoods_stack = -np.concatenate(
+        [neg_log_likelihoods_prior, neg_log_likelihoods_posterior]
+    )
+
+    def _log_evidence_objective(log_p: float):
+        # Helper function to compute the log evidence with stabilized harmonic mean
+        log_w_i = logsumexp(
+            np.stack(
+                (
+                    log_p * np.ones_like(log_likelihoods_stack),
+                    log_likelihoods_stack,
+                ),
+                axis=1,
+            ),
+            b=np.array([delta, 1 - delta]),
+            axis=1,
+        )
+        res, sign = logsumexp(
+            [
+                log_p,
+                logsumexp(log_likelihoods_stack - log_w_i)
+                - logsumexp(-log_w_i),
+            ],
+            b=[1, -1],
+            return_sign=True,
+        )
+        return sign * res
+
+    sol = minimize_scalar(_log_evidence_objective)
+    return sol.x
+
+
+def parallel_tempering_log_evidence(
+    result: Result,
+    method: str = "trapezoid",
+    use_all_chains: bool = True,
+) -> Union[float, None]:
+    """Perform thermodynamic integration or steppingstone sampling to estimate the log evidence.
+
+    Thermodynamic integration is performed by integrating the mean log likelihood over the temperatures.
+    Errors might come from the samples itself or the numerical integration.
+    Steppingstone sampling is a form of importance sampling that uses the maximum likelihood of each temperature.
+    It does not require an integration, but can be biased for a small number of temperatures.
+    See (Annis et al., 2019), https://doi.org/10.1016/j.jmp.2019.01.005, for more details.
+
+    This should be used with a beta decay temperature schedule and not with the adaptive version of
+     parallel tempering sampling as the temperature schedule is not optimal for thermodynamic integration.
+
+    Parameters
+    ----------
+    result:
+        Result object containing the samples.
+    method:
+        Integration method, either 'trapezoid' or 'simpson' to perform thermodynamic integration
+        (uses scipy for integration) or 'steppingstone' to perform steppingstone sampling.
+    use_all_chains:
+        If True, calculate burn-in for each chain and use the maximal burn-in for all chains for the integration.
+        This will fail if not all chains have converged yet.
+        Otherwise, use only the converged chains for the integration (might increase the integration error).
+    """
+    # compute burn in for all chains but the last one (prior only)
+    burn_ins = np.zeros(len(result.sample_result.betas), dtype=int)
+    for i_chain in range(len(result.sample_result.betas)):
+        burn_ins[i_chain] = geweke_test(result, chain_number=i_chain)
+    max_burn_in = int(np.max(burn_ins))
+
+    if max_burn_in >= result.sample_result.trace_x.shape[1]:
+        logger.warning(
+            f"At least {np.sum(burn_ins >= result.sample_result.trace_x.shape[1])} chains seem to not have "
+            f"converged yet. You may want to use a larger number of samples."
+        )
+        if use_all_chains:
+            raise ValueError(
+                "Not all chains have converged yet. You may want to use a larger number of samples, "
+                "or try ´use_all_chains=False´, which might increase the integration error."
+            )
+
+    if use_all_chains:
+        # estimate mean of log likelihood for each beta
+        trace_loglike = (
+            result.sample_result.trace_neglogprior[::-1, max_burn_in:]
+            - result.sample_result.trace_neglogpost[::-1, max_burn_in:]
+        )
+        mean_loglike_per_beta = np.mean(trace_loglike, axis=1)
+        temps = result.sample_result.betas[::-1]
+    else:
+        # estimate mean of log likelihood for each beta if chain has converged
+        mean_loglike_per_beta = []
+        trace_loglike = []
+        temps = []
+        for i_chain in reversed(range(len(result.sample_result.betas))):
+            if burn_ins[i_chain] < result.sample_result.trace_x.shape[1]:
+                # save temperature-chain as it is converged
+                temps.append(result.sample_result.betas[i_chain])
+                # calculate mean log likelihood for each beta
+                trace_loglike_i = (
+                    result.sample_result.trace_neglogprior[
+                        i_chain, burn_ins[i_chain] :
+                    ]
+                    - result.sample_result.trace_neglogpost[
+                        i_chain, burn_ins[i_chain] :
+                    ]
+                )
+                trace_loglike.append(trace_loglike_i)
+                mean_loglike_per_beta.append(np.mean(trace_loglike_i))
+
+    if method == TRAPEZOID:
+        log_evidence = trapezoid(
+            # integrate from low to high temperature
+            y=mean_loglike_per_beta,
+            x=temps,
+        )
+    elif method == SIMPSON:
+        log_evidence = simpson(
+            # integrate from low to high temperature
+            y=mean_loglike_per_beta,
+            x=temps,
+        )
+    elif method == STEPPINGSTONE:
+        log_evidence = steppingstone(temps=temps, trace_loglike=trace_loglike)
+    else:
+        raise ValueError(
+            f"Unknown method {method}. Choose 'trapezoid', 'simpson' for thermodynamic integration or ",
+            "'steppingstone' for steppingstone sampling.",
+        )
+
+    return log_evidence
+
+
+def steppingstone(temps: np.ndarray, trace_loglike: np.ndarray) -> float:
+    """Perform steppingstone sampling to estimate the log evidence.
+
+    Implementation based on  Annis et al. (2019): https://doi.org/10.1016/j.jmp.2019.01.005.
+
+    Parameters
+    ----------
+    temps:
+        Temperature values.
+    trace_loglike:
+        Log likelihood values for each temperature.
+    """
+    from scipy.special import logsumexp
+
+    ss_log_evidences = np.zeros(len(temps) - 1)
+    for t_i in range(1, len(temps)):
+        # we use the maximum likelihood times the temperature difference to stabilize the logsumexp
+        # original formulation uses only the maximum likelihood, this is equivalent
+        ss_log_evidences[t_i - 1] = logsumexp(
+            trace_loglike[t_i - 1] * (temps[t_i] - temps[t_i - 1])
+        ) - np.log(trace_loglike[t_i - 1].size)
+    log_evidence = np.sum(ss_log_evidences)
+    return log_evidence
+
+
+def bridge_sampling_log_evidence(
+    result: Result,
+    n_posterior_samples_init: Optional[int] = None,
+    initial_guess_log_evidence: Optional[float] = None,
+    max_iter: int = 1000,
+    tol: float = 1e-6,
+) -> float:
+    """
+    Compute the log evidence using bridge sampling.
+
+    Based on "A Tutorial on Bridge Sampling" by Gronau et al. (2017): https://doi.org/10.1016/j.jmp.2017.09.005.
+    Using the optimal bridge function by Meng and Wong (1996) which minimises the relative mean-squared error.
+    Proposal function is calibrated using posterior samples, which are not used for the final bridge estimate
+    (as this may result in an underestimation of the marginal likelihood, see Overstall and Forster (2010)).
+
+    Parameters
+    ----------
+    result:
+        The pyPESTO result object with filled sample result.
+    n_posterior_samples_init:
+        Number of samples used to calibrate the proposal function. By default, half of the posterior samples are used.
+    initial_guess_log_evidence:
+        Initial guess for the log evidence. By default, the Laplace approximation is used to compute the initial guess.
+    max_iter:
+        Maximum number of iterations. Default is 1000.
+    tol:
+        Tolerance for convergence. Default is 1e-6.
+
+
+    Returns
+    -------
+    log_evidence
+    """
+    if result.sample_result is None:
+        raise ValueError("No samples available. Run sampling first.")
+    if not isinstance(result.problem.objective, AggregatedObjective):
+        raise ValueError("Objective must be an AggregatedObjective.")
+
+    # use Laplace approximation to get initial guess for p(y)
+    if initial_guess_log_evidence is None:
+        initial_guess_log_evidence = laplace_approximation_log_evidence(
+            problem=result.problem, x=result.optimize_result.x[0]
+        )
+    # extract posterior samples
+    burn_in = geweke_test(result)
+    posterior_samples = result.sample_result.trace_x[0, burn_in:]
+
+    # build proposal function from posterior samples
+    if n_posterior_samples_init is None:
+        n_posterior_samples_init = int(posterior_samples.shape[0] * 0.5)
+    # randomly select samples for calibration
+    calibration_index = np.random.choice(
+        np.arange(posterior_samples.shape[0]),
+        n_posterior_samples_init,
+        replace=False,
+    )
+    samples_calibration = posterior_samples[calibration_index]
+    # remove calibration samples from posterior samples
+    posterior_samples = posterior_samples[
+        [
+            j
+            for j in range(posterior_samples.shape[0])
+            if j not in calibration_index
+        ]
+    ]
+    # generate proposal samples and define proposal function
+    n_proposal_samples = posterior_samples.shape[0]
+    posterior_mean = np.mean(samples_calibration, axis=0)
+    posterior_cov = np.cov(samples_calibration.T)
+    # if covariance matrix is not positive definite (numerically), use diagonal covariance matrix only
+    try:
+        # proposal density function
+        log_proposal_fun = stats.multivariate_normal(
+            mean=posterior_mean, cov=posterior_cov
+        ).logpdf
+    except np.linalg.LinAlgError:
+        posterior_cov = np.diag(np.diag(posterior_cov))
+        log_proposal_fun = stats.multivariate_normal(
+            mean=posterior_mean, cov=posterior_cov
+        ).logpdf
+
+    # generate proposal samples
+    if posterior_cov.size == 1:
+        # univariate case
+        proposal_samples = np.random.normal(
+            loc=posterior_mean,
+            scale=np.sqrt(posterior_cov),
+            size=n_proposal_samples,
+        )
+        proposal_samples = proposal_samples.reshape(-1, 1)
+    else:
+        # multivariate case
+        proposal_samples = np.random.multivariate_normal(
+            mean=posterior_mean, cov=posterior_cov, size=n_proposal_samples
+        )
+
+    # Compute the weights for the bridge sampling estimate
+    log_s1 = np.log(
+        posterior_samples.shape[0]
+        / (posterior_samples.shape[0] + n_proposal_samples)
+    )
+    log_s2 = np.log(
+        n_proposal_samples / (posterior_samples.shape[0] + n_proposal_samples)
+    )
+
+    # Start with the initial guess for p(y)
+    log_p_y = initial_guess_log_evidence
+
+    # Compute the log-likelihood, log-prior, and log-proposal for the posterior and proposal samples
+    # assumes that the objective function is the negative log-likelihood + negative log-prior
+
+    # get index of prior in the objective function
+    likelihood_fun_indices = []
+    for i, obj in enumerate(result.problem.objective._objectives):
+        if not isinstance(obj, NegLogParameterPriors) and not isinstance(
+            obj, NegLogPriors
+        ):
+            likelihood_fun_indices.append(i)
+
+    def log_likelihood_fun(x_array):
+        return np.array(
+            [
+                np.sum(
+                    [
+                        -obj(
+                            result.problem.get_full_vector(
+                                x=x, x_fixed_vals=result.problem.x_fixed_vals
+                            )
+                        )
+                        for obj_i, obj in enumerate(
+                            result.problem.objective._objectives
+                        )
+                        if obj_i in likelihood_fun_indices
+                    ]
+                )
+                for x in x_array
+            ]
+        )
+
+    def log_prior_fun(x_array):
+        return np.array(
+            [
+                np.sum(
+                    [
+                        -obj(
+                            result.problem.get_full_vector(
+                                x=x, x_fixed_vals=result.problem.x_fixed_vals
+                            )
+                        )
+                        for obj_i, obj in enumerate(
+                            result.problem.objective._objectives
+                        )
+                        if obj_i not in likelihood_fun_indices
+                    ]
+                )
+                for x in x_array
+            ]
+        )
+
+    log_likelihood_posterior = log_likelihood_fun(posterior_samples)
+    log_prior_posterior = log_prior_fun(posterior_samples)
+    log_proposal_posterior = log_proposal_fun(posterior_samples)
+
+    log_likelihood_proposal = log_likelihood_fun(proposal_samples)
+    log_prior_proposal = log_prior_fun(proposal_samples)
+    log_proposal_proposal = log_proposal_fun(proposal_samples)
+
+    log_h_posterior_1 = log_s1 + log_likelihood_posterior + log_prior_posterior
+    log_h_proposal_1 = log_s1 + log_likelihood_proposal + log_prior_proposal
+    for i in range(max_iter):
+        # Compute h(θ) for posterior samples
+        log_h_posterior_2 = log_s2 + log_p_y + log_proposal_posterior
+        log_h_posterior = logsumexp([log_h_posterior_1, log_h_posterior_2])
+
+        # Compute h(θ) for proposal samples
+        log_h_proposal_2 = log_s2 + log_p_y + log_proposal_proposal
+        log_h_proposal = logsumexp([log_h_proposal_1, log_h_proposal_2])
+
+        # Calculate the numerator and denominator for the bridge sampling estimate
+        temp = log_likelihood_proposal + log_prior_proposal + log_h_proposal
+        log_numerator = logsumexp(temp) - np.log(
+            temp.size
+        )  # compute mean in log space
+        temp = log_proposal_posterior + log_h_posterior
+        log_denominator = logsumexp(temp) - np.log(
+            temp.size
+        )  # compute mean in log space
+
+        # Update p(y)
+        log_p_y_new = log_numerator - log_denominator
+
+        # Check for convergence
+        if abs(log_p_y_new - log_p_y) < tol:
+            break
+
+        log_p_y = log_p_y_new
+
+        if i == max_iter - 1:
+            logger.warning(
+                "Bridge sampling did not converge in the given number of iterations."
+            )
+
+    return log_p_y
diff --git a/pypesto/sample/parallel_tempering.py b/pypesto/sample/parallel_tempering.py
index 306774c46..6e81777f1 100644
--- a/pypesto/sample/parallel_tempering.py
+++ b/pypesto/sample/parallel_tempering.py
@@ -7,9 +7,8 @@
 
 from ..C import BETA_DECAY, EXPONENTIAL_DECAY
 from ..problem import Problem
-from ..result import McmcPtResult, Result
+from ..result import McmcPtResult
 from ..util import tqdm
-from .diagnostics import geweke_test
 from .sampler import InternalSampler, Sampler
 
 logger = logging.getLogger(__name__)
@@ -178,96 +177,6 @@ def swap_samples(self) -> Sequence[bool]:
     def adjust_betas(self, i_sample: int, swapped: Sequence[bool]):
         """Adjust temperature values. Default: Do nothing."""
 
-    def compute_log_evidence(
-        self,
-        result: Result,
-        method: str = "trapezoid",
-        use_all_chains: bool = True,
-    ) -> Union[float, None]:
-        """Perform thermodynamic integration to estimate the log evidence.
-
-        Parameters
-        ----------
-        result:
-            Result object containing the samples.
-        method:
-            Integration method, either 'trapezoid' or 'simpson' (uses scipy for integration).
-        use_all_chains:
-            If True, calculate burn-in for each chain and use the maximal burn-in for all chains for the integration.
-            This will fail if not all chains have converged yet.
-            Otherwise, use only the converged chains for the integration (might increase the integration error).
-        """
-        from scipy.integrate import simpson, trapezoid
-
-        if self.options["beta_init"] == EXPONENTIAL_DECAY:
-            logger.warning(
-                "The temperature schedule is not optimal for thermodynamic integration. "
-                f"Carefully check the results. Consider using beta_init='{BETA_DECAY}' for better results."
-            )
-
-        # compute burn in for all chains but the last one (prior only)
-        burn_ins = np.zeros(len(self.betas), dtype=int)
-        for i_chain in range(len(self.betas)):
-            burn_ins[i_chain] = geweke_test(result, chain_number=i_chain)
-        max_burn_in = int(np.max(burn_ins))
-
-        if max_burn_in >= result.sample_result.trace_x.shape[1]:
-            logger.warning(
-                f"At least {np.sum(burn_ins >= result.sample_result.trace_x.shape[1])} chains seem to not have "
-                f"converged yet. You may want to use a larger number of samples."
-            )
-            if use_all_chains:
-                raise ValueError(
-                    "Not all chains have converged yet. You may want to use a larger number of samples, "
-                    "or try ´use_all_chains=False´, which might increase the integration error."
-                )
-
-        if use_all_chains:
-            # estimate mean of log likelihood for each beta
-            trace_loglike = (
-                result.sample_result.trace_neglogprior[::-1, max_burn_in:]
-                - result.sample_result.trace_neglogpost[::-1, max_burn_in:]
-            )
-            mean_loglike_per_beta = np.mean(trace_loglike, axis=1)
-            temps = self.betas[::-1]
-        else:
-            # estimate mean of log likelihood for each beta if chain has converged
-            mean_loglike_per_beta = []
-            temps = []
-            for i_chain in reversed(range(len(self.betas))):
-                if burn_ins[i_chain] < result.sample_result.trace_x.shape[1]:
-                    # save temperature-chain as it is converged
-                    temps.append(self.betas[i_chain])
-                    # calculate mean log likelihood for each beta
-                    trace_loglike_i = (
-                        result.sample_result.trace_neglogprior[
-                            i_chain, burn_ins[i_chain] :
-                        ]
-                        - result.sample_result.trace_neglogpost[
-                            i_chain, burn_ins[i_chain] :
-                        ]
-                    )
-                    mean_loglike_per_beta.append(np.mean(trace_loglike_i))
-
-        if method == "trapezoid":
-            log_evidence = trapezoid(
-                # integrate from low to high temperature
-                y=mean_loglike_per_beta,
-                x=temps,
-            )
-        elif method == "simpson":
-            log_evidence = simpson(
-                # integrate from low to high temperature
-                y=mean_loglike_per_beta,
-                x=temps,
-            )
-        else:
-            raise ValueError(
-                f"Unknown method {method}. Choose 'trapezoid' or 'simpson'."
-            )
-
-        return log_evidence
-
 
 def beta_decay_betas(n_chains: int, alpha: float) -> np.ndarray:
     """Initialize betas to the (j-1)th quantile of a Beta(alpha, 1) distribution.
diff --git a/pypesto/sample/util.py b/pypesto/sample/util.py
index 30e322659..2824aaeee 100644
--- a/pypesto/sample/util.py
+++ b/pypesto/sample/util.py
@@ -25,6 +25,8 @@ def calculate_ci_mcmc_sample(
         The pyPESTO result object with filled sample result.
     ci_level:
         Lower tail probability, defaults to 95% interval.
+    exclude_burn_in:
+        Whether to exclude the burn-in samples.
 
     Returns
     -------
diff --git a/pypesto/select/misc.py b/pypesto/select/misc.py
index f5af4bf2b..99fb5fae5 100644
--- a/pypesto/select/misc.py
+++ b/pypesto/select/misc.py
@@ -62,10 +62,11 @@ def model_to_pypesto_problem(
         hierarchical=hierarchical,
     )
     if objective is None:
-        amici_model = importer.create_model(
+        factory = importer.create_objective_creator()
+        amici_model = factory.create_model(
             non_estimated_parameters_as_constants=False,
         )
-        objective = importer.create_objective(
+        objective = factory.create_objective(
             model=amici_model,
         )
     pypesto_problem = importer.create_problem(
diff --git a/pypesto/version.py b/pypesto/version.py
index 43a1e95ba..6b27eeebf 100644
--- a/pypesto/version.py
+++ b/pypesto/version.py
@@ -1 +1 @@
-__version__ = "0.5.3"
+__version__ = "0.5.4"
diff --git a/pypesto/visualize/clust_color.py b/pypesto/visualize/clust_color.py
index bf0cf02c9..bbd39c41c 100644
--- a/pypesto/visualize/clust_color.py
+++ b/pypesto/visualize/clust_color.py
@@ -142,7 +142,11 @@ def assign_colors(
             colors = colors[0]
         return np.array([colors] * n_vals)
 
-    if colors.shape[1] == 4 and n_vals == colors.shape[0]:
+    if (
+        len(colors.shape) > 1
+        and colors.shape[1] == 4
+        and n_vals == colors.shape[0]
+    ):
         return colors
 
     if colors.shape[0] == 4:
@@ -195,13 +199,10 @@ def assign_colors_for_list(
         real_indices = np.arange(int(colors.shape[0] / 2))
         return colors[real_indices]
 
-    # if the user specified color lies does not match the number of results
-    if len(colors) != num_entries:
-        raise (
-            "Incorrect color input. Colors must be specified either as "
-            "list of [r, g, b, alpha] with length equal to function "
-            "values Number of function (here: " + str(num_entries) + "), "
-            "or as one single [r, g, b, alpha] color."
-        )
-
-    return colors
+    # Pass the colors through assign_colors to check correct format of RGBA
+    return assign_colors(
+        vals=np.array(list(range(num_entries))),
+        colors=colors,
+        balance_alpha=False,
+        highlight_global=False,
+    )
diff --git a/pypesto/visualize/misc.py b/pypesto/visualize/misc.py
index 5d7c1b491..fd74c0fad 100644
--- a/pypesto/visualize/misc.py
+++ b/pypesto/visualize/misc.py
@@ -349,7 +349,7 @@ def process_start_indices(
             raise ValueError(
                 f"Permissible values for start_indices are {ALL}, "
                 f"{ALL_CLUSTERED}, {FIRST_CLUSTER}, an integer or a "
-                f"list of indices."
+                f"list of indices. Got {start_indices}."
             )
     # if it is an integer n, select the first n starts
     if isinstance(start_indices, Number):
diff --git a/pypesto/visualize/observable_mapping.py b/pypesto/visualize/observable_mapping.py
index 5c8ceba41..ec378fd1c 100644
--- a/pypesto/visualize/observable_mapping.py
+++ b/pypesto/visualize/observable_mapping.py
@@ -28,6 +28,7 @@
     from amici.petab.conditions import fill_in_parameters
 
     from ..hierarchical import InnerCalculatorCollector
+    from ..hierarchical.base_problem import scale_back_value_dict
     from ..hierarchical.relative.calculator import RelativeAmiciCalculator
     from ..hierarchical.relative.problem import RelativeInnerProblem
     from ..hierarchical.semiquantitative.calculator import SemiquantCalculator
@@ -301,6 +302,18 @@ def plot_linear_observable_mappings_from_pypesto_result(
         )
     )
 
+    # Remove inner parameters not belonging to the relative inner problem.
+    inner_parameter_values = {
+        key: value
+        for key, value in inner_parameter_values.items()
+        if key in inner_problem.get_x_ids()
+    }
+
+    # Scale the inner parameters back to linear scale.
+    inner_parameter_values = scale_back_value_dict(
+        inner_parameter_values, inner_problem
+    )
+
     ######################################
     # Plot the linear observable mappings.
     ######################################
diff --git a/pypesto/visualize/parameters.py b/pypesto/visualize/parameters.py
index c50f4fdeb..61269e3fc 100644
--- a/pypesto/visualize/parameters.py
+++ b/pypesto/visualize/parameters.py
@@ -11,7 +11,13 @@
 
 from pypesto.util import delete_nan_inf
 
-from ..C import INNER_PARAMETERS, RGBA, WATERFALL_MAX_VALUE
+from ..C import (
+    INNER_PARAMETERS,
+    LOG10,
+    RGBA,
+    WATERFALL_MAX_VALUE,
+    InnerParameterType,
+)
 from ..result import Result
 from .clust_color import assign_colors
 from .misc import (
@@ -21,6 +27,13 @@
 )
 from .reference_points import ReferencePoint, create_references
 
+try:
+    from ..hierarchical.base_problem import scale_value
+    from ..hierarchical.relative import RelativeInnerProblem
+    from ..hierarchical.semiquantitative import SemiquantProblem
+except ImportError:
+    pass
+
 logger = logging.getLogger(__name__)
 
 
@@ -38,6 +51,7 @@ def parameters(
     start_indices: Optional[Union[int, Iterable[int]]] = None,
     scale_to_interval: Optional[tuple[float, float]] = None,
     plot_inner_parameters: bool = True,
+    log10_scale_hier_sigma: bool = True,
 ) -> matplotlib.axes.Axes:
     """
     Plot parameter values.
@@ -77,6 +91,9 @@ def parameters(
         ``None`` to use bounds as determined by ``lb, ub``.
     plot_inner_parameters:
         Flag indicating whether to plot inner parameters (default: True).
+    log10_scale_hier_sigma:
+        Flag indicating whether to scale inner parameters of type
+        ``InnerParameterType.SIGMA`` to log10 (default: True).
 
     Returns
     -------
@@ -108,13 +125,14 @@ def scale_parameters(x):
 
     for j, result in enumerate(results):
         # handle results and bounds
-        (lb, ub, x_labels, fvals, xs) = handle_inputs(
+        (lb, ub, x_labels, fvals, xs, x_axis_label) = handle_inputs(
             result=result,
             lb=lb,
             ub=ub,
             parameter_indices=parameter_indices,
             start_indices=start_indices,
             plot_inner_parameters=plot_inner_parameters,
+            log10_scale_hier_sigma=log10_scale_hier_sigma,
         )
 
         # parse fvals and parameters
@@ -136,6 +154,7 @@ def scale_parameters(x):
             lb=lb,
             ub=ub,
             x_labels=x_labels,
+            x_axis_label=x_axis_label,
             ax=ax,
             size=size,
             colors=colors[j],
@@ -240,6 +259,7 @@ def parameters_lowlevel(
     lb: Optional[Union[np.ndarray, list[float]]] = None,
     ub: Optional[Union[np.ndarray, list[float]]] = None,
     x_labels: Optional[Iterable[str]] = None,
+    x_axis_label: str = "Parameter value",
     ax: Optional[matplotlib.axes.Axes] = None,
     size: Optional[tuple[float, float]] = None,
     colors: Optional[Sequence[Union[np.ndarray, list[float]]]] = None,
@@ -327,7 +347,7 @@ def parameters_lowlevel(
         ub = np.array(ub, dtype="float64")
         ax.plot(ub.flatten(), parameters_ind, "k--", marker="+")
 
-    ax.set_xlabel("Parameter value")
+    ax.set_xlabel(x_axis_label)
     ax.set_ylabel("Parameter")
     ax.set_title("Estimated parameters")
     if legend_text is not None:
@@ -343,6 +363,7 @@ def handle_inputs(
     ub: Optional[Union[np.ndarray, list[float]]] = None,
     start_indices: Optional[Union[int, Iterable[int]]] = None,
     plot_inner_parameters: bool = False,
+    log10_scale_hier_sigma: bool = True,
 ) -> tuple[np.ndarray, np.ndarray, list[str], np.ndarray, list[np.ndarray]]:
     """
     Compute the correct bounds for the parameter indices to be plotted.
@@ -363,6 +384,9 @@ def handle_inputs(
         int specifying up to which start index should be plotted
     plot_inner_parameters:
         Flag indicating whether inner parameters should be plotted.
+    log10_scale_hier_sigma:
+        Flag indicating whether to scale inner parameters of type
+        ``InnerParameterType.SIGMA`` to log10 (default: True).
 
     Returns
     -------
@@ -374,13 +398,21 @@ def handle_inputs(
         objective function values which are needed for plotting later
     xs:
         parameter values which will be plotted later
+    x_axis_label:
+        label for the x-axis
     """
     # retrieve results
     fvals = result.optimize_result.fval
     xs = result.optimize_result.x
 
     # retrieve inner parameters in case of hierarchical optimization
-    inner_xs, inner_xs_names, inner_lb, inner_ub = _handle_inner_inputs(result)
+    (
+        inner_xs,
+        inner_xs_names,
+        inner_xs_scales,
+        inner_lb,
+        inner_ub,
+    ) = _handle_inner_inputs(result, log10_scale_hier_sigma)
 
     # parse indices which should be plotted
     if start_indices is not None:
@@ -404,8 +436,8 @@ def handle_inputs(
     if ub is None:
         ub = result.problem.ub_full
 
-    # get labels
-    x_labels = result.problem.x_names
+    # get labels as x_names and scales
+    x_labels = list(zip(result.problem.x_names, result.problem.x_scales))
 
     # handle fixed and free indices
     if len(parameter_indices) < result.problem.dim_full:
@@ -423,20 +455,30 @@ def handle_inputs(
     if inner_xs is not None and plot_inner_parameters:
         lb = np.concatenate([lb, inner_lb])
         ub = np.concatenate([ub, inner_ub])
-        x_labels = x_labels + inner_xs_names
+        inner_xs_labels = list(zip(inner_xs_names, inner_xs_scales))
+        x_labels = x_labels + inner_xs_labels
         xs_out = [
             np.concatenate([x, inner_x]) if x is not None else None
             for x, inner_x in zip(xs_out, inner_xs_out)
         ]
 
-    return lb, ub, x_labels, fvals_out, xs_out
+    # If all the scales are the same, put it in the x_axis_label
+    if len({x_scale for _, x_scale in x_labels}) == 1:
+        x_axis_label = "Parameter value (" + x_labels[0][1] + ")"
+        x_labels = [x_name for x_name, _ in x_labels]
+    else:
+        x_axis_label = "Parameter value"
+        x_labels = [f"{x_name} ({x_scale})" for x_name, x_scale in x_labels]
+
+    return lb, ub, x_labels, fvals_out, xs_out, x_axis_label
 
 
 def _handle_inner_inputs(
     result: Result,
+    log10_scale_hier_sigma: bool = True,
 ) -> Union[
-    tuple[None, None, None, None],
-    tuple[list[np.ndarray], list[str], np.ndarray, np.ndarray],
+    tuple[None, None, None, None, None],
+    tuple[list[np.ndarray], list[str], list[str], np.ndarray, np.ndarray],
 ]:
     """Handle inner parameters from hierarchical optimization, if available.
 
@@ -444,6 +486,9 @@ def _handle_inner_inputs(
     ----------
     result:
         Optimization result obtained by 'optimize.py'.
+    log10_scale_hier_sigma:
+        Flag indicating whether to scale inner parameters of type
+        ``InnerParameterType.SIGMA`` to log10 (default: True).
 
     Returns
     -------
@@ -451,6 +496,8 @@ def _handle_inner_inputs(
         Inner parameter values which will be appended to xs.
     inner_xs_names:
         Inner parameter names.
+    inner_xs_scales:
+        Inner parameter scales.
     inner_lb:
         Inner parameter lower bounds.
     inner_ub:
@@ -460,6 +507,7 @@ def _handle_inner_inputs(
         res.get(INNER_PARAMETERS, None) for res in result.optimize_result.list
     ]
     inner_xs_names = None
+    inner_xs_scales = None
     inner_lb = None
     inner_ub = None
 
@@ -473,19 +521,54 @@ def _handle_inner_inputs(
         inner_xs = [
             (
                 np.full(len(inner_xs_names), np.nan)
-                if inner_xs_idx is None
-                else np.asarray(inner_xs_idx)
+                if inner_xs_for_start is None
+                else np.asarray(inner_xs_for_start)
             )
-            for inner_xs_idx in inner_xs
+            for inner_xs_for_start in inner_xs
         ]
         # set bounds for inner parameters
         inner_lb = result.problem.inner_lb
         inner_ub = result.problem.inner_ub
 
+        # Scale inner parameter bounds according to their parameters scales
+        inner_xs_scales = result.problem.inner_scales
+
+        if log10_scale_hier_sigma:
+            inner_problems_with_sigma = [
+                inner_calculator.inner_problem
+                for inner_calculator in result.problem.objective.calculator.inner_calculators
+                if isinstance(
+                    inner_calculator.inner_problem, RelativeInnerProblem
+                )
+                or isinstance(inner_calculator.inner_problem, SemiquantProblem)
+            ]
+            for inner_problem in inner_problems_with_sigma:
+                for inner_x_idx, inner_x_name in enumerate(inner_xs_names):
+                    if (inner_x_name in inner_problem.get_x_ids()) and (
+                        inner_problem.get_for_id(
+                            inner_x_name
+                        ).inner_parameter_type
+                        == InnerParameterType.SIGMA
+                    ):
+                        # Scale all values, lower and upper bounds
+                        for inner_x_for_start in inner_xs:
+                            inner_x_for_start[inner_x_idx] = scale_value(
+                                inner_x_for_start[inner_x_idx], LOG10
+                            )
+                        inner_xs_scales[inner_x_idx] = LOG10
+
+        for inner_x_idx, inner_scale in enumerate(inner_xs_scales):
+            inner_lb[inner_x_idx] = scale_value(
+                inner_lb[inner_x_idx], inner_scale
+            )
+            inner_ub[inner_x_idx] = scale_value(
+                inner_ub[inner_x_idx], inner_scale
+            )
+
     if inner_xs_names is None:
         inner_xs = None
 
-    return inner_xs, inner_xs_names, inner_lb, inner_ub
+    return inner_xs, inner_xs_names, inner_xs_scales, inner_lb, inner_ub
 
 
 def parameters_correlation_matrix(
diff --git a/pypesto/visualize/profiles.py b/pypesto/visualize/profiles.py
index f4ecb6443..bf333a165 100644
--- a/pypesto/visualize/profiles.py
+++ b/pypesto/visualize/profiles.py
@@ -24,6 +24,8 @@ def profiles(
     profile_list_ids: Union[int, Sequence[int]] = 0,
     ratio_min: float = 0.0,
     show_bounds: bool = False,
+    plot_objective_values: bool = False,
+    quality_colors: bool = False,
 ) -> plt.Axes:
     """
     Plot classical 1D profile plot.
@@ -45,7 +47,9 @@ def profiles(
         List of reference points for optimization results, containing at
         least a function value fval.
     colors:
-        List of colors, or single color.
+        List of colors, or single color. If multiple colors are passed, their
+        number needs to corresponds to either the number of results or the
+        number of profile_list_ids. Cannot be provided if quality_colors is set to True.
     legends:
         Labels for line plots, one label per result object.
     x_labels:
@@ -56,12 +60,30 @@ def profiles(
         Minimum ratio below which to cut off.
     show_bounds:
         Whether to show, and extend the plot to, the lower and upper bounds.
+    plot_objective_values:
+        Whether to plot the objective function values instead of the likelihood
+        ratio values.
+    quality_colors:
+        If set to True, the profiles are colored according to types of steps the
+        profiler took. This gives additional information about the profile quality.
+        Red indicates a step for which min_step_size was reduced, blue indicates a step for which
+        max_step_size was increased, and green indicates a step for which the profiler
+        had to resample the parameter vector due to optimization failure of the previous two.
+        Black indicates a step for which none of the above was necessary. This option is only
+        available if there is only one result and one profile_list_id (one profile per plot).
 
     Returns
     -------
     ax:
         The plot axes.
     """
+
+    if colors is not None and quality_colors:
+        raise ValueError(
+            "Cannot visualize the profiles with `quality_colors` of profiler_result.color_path "
+            " and `colors` provided at the same time. Please provide only one of them."
+        )
+
     # parse input
     results, profile_list_ids, colors, legends = process_result_list_profiles(
         results, profile_list_ids, colors, legends
@@ -75,11 +97,12 @@ def profiles(
     # loop over results
     for i_result, result in enumerate(results):
         for i_profile_list, profile_list_id in enumerate(profile_list_ids):
-            fvals = handle_inputs(
+            fvals, color_paths = handle_inputs(
                 result,
                 profile_indices=profile_indices,
                 profile_list=profile_list_id,
                 ratio_min=ratio_min,
+                plot_objective_values=plot_objective_values,
             )
 
             # add x_labels for parameters
@@ -98,17 +121,30 @@ def profiles(
                 # multiple results per axes object
                 color_ind = i_result
 
+            # If quality_colors is set to True, we use the colors provided
+            # by profiler_result.color_path. This will be done only if there is
+            # only one result and one profile_list_id (basically one profile per plot).
+            if (
+                len(results) == 1
+                and len(profile_list_ids) == 1
+                and quality_colors
+            ):
+                color = color_paths
+            else:
+                color = colors[color_ind]
+
             # call lowlevel routine
             ax = profiles_lowlevel(
                 fvals=fvals,
                 ax=ax,
                 size=size,
-                color=colors[color_ind],
+                color=color,
                 legend_text=legends[color_ind],
                 x_labels=x_labels,
                 show_bounds=show_bounds,
                 lb_full=result.problem.lb_full,
                 ub_full=result.problem.ub_full,
+                plot_objective_values=plot_objective_values,
             )
 
     # parse and apply plotting options
@@ -132,6 +168,7 @@ def profiles_lowlevel(
     show_bounds: bool = False,
     lb_full: Sequence[float] = None,
     ub_full: Sequence[float] = None,
+    plot_objective_values: bool = False,
 ) -> list[plt.Axes]:
     """
     Lowlevel routine for profile plotting.
@@ -147,8 +184,9 @@ def profiles_lowlevel(
     size:
         Figure size (width, height) in inches. Is only applied when no ax
         object is specified.
-    color: RGBA, optional
-        Color for profiles in plot.
+    color: RGBA, list[np.ndarray[RGBA]], optional
+        Color for profiles in plot. In case of quality_colors=True, this is a list of
+        np.ndarray[RGBA] for each profile -- one color per profile point for each profile.
     legend_text:
         Label for line plots.
     show_bounds:
@@ -157,6 +195,9 @@ def profiles_lowlevel(
         Lower bound.
     ub_full:
         Upper bound.
+    plot_objective_values:
+        Whether to plot the objective function values instead of the likelihood
+        ratio values.
 
     Returns
     -------
@@ -215,6 +256,12 @@ def profiles_lowlevel(
         # if we have empty profiles and more axes than profiles: skip
         if n_plots != n_fvals and fval is None:
             continue
+        # If we use colors from profiler_result.color_path,
+        # we need to take the color path of each profile
+        if isinstance(color, list) and isinstance(color[i_plot], np.ndarray):
+            color_i = color[i_plot]
+        else:
+            color_i = color
 
         # handle legend
         if i_plot == 0:
@@ -235,7 +282,7 @@ def profiles_lowlevel(
                 fval,
                 ax[counter],
                 size=size,
-                color=color,
+                color=color_i,
                 legend_text=tmp_legend,
                 show_bounds=show_bounds,
                 lb=lb,
@@ -249,13 +296,10 @@ def profiles_lowlevel(
             ax[counter].set_xlabel(x_labels[counter])
 
         if counter % columns == 0:
-            ax[counter].set_ylabel("Log-posterior ratio")
-        else:
-            # fix pyPESTO/pyPESTO/pypesto/visualize/profiles.py:228:
-            # UserWarning: FixedFormatter should only be used
-            # together with FixedLocator. Fix from matplotlib #18848.
-            ax[counter].set_yticks(ax[counter].get_yticks())
-            ax[counter].set_yticklabels(["" for _ in ax[counter].get_yticks()])
+            if plot_objective_values:
+                ax[counter].set_ylabel("Objective function value")
+            else:
+                ax[counter].set_ylabel("Log-posterior ratio")
 
         # increase counter and cleanup legend
         counter += 1
@@ -302,9 +346,17 @@ def profile_lowlevel(
     """
     # parse input
     fvals = np.asarray(fvals)
-
     # get colors
-    color = assign_colors([1.0], color)
+    if (
+        color is None
+        or isinstance(color, list)
+        or isinstance(color, tuple)
+        or (isinstance(color, np.ndarray) and not len(color.shape) == 2)
+    ):
+        color = assign_colors([1.0], color)
+        single_color = True
+    else:
+        single_color = False
 
     # axes
     if ax is None:
@@ -317,7 +369,37 @@ def profile_lowlevel(
     # plot
     if fvals.size != 0:
         ax.xaxis.set_major_locator(MaxNLocator(integer=True))
-        ax.plot(fvals[0, :], fvals[1, :], color=color[0], label=legend_text)
+        xs = fvals[0, :]
+        ratios = fvals[1, :]
+
+        # If we use colors from profiler_result.color_path,
+        # we need to make a mapping from profile points to their colors
+        if not single_color:
+            # Create a mapping from (x, ratio) to color
+            point_to_color = dict(zip(zip(xs, ratios), color))
+        else:
+            point_to_color = None
+
+        # Plot each profile point individually to allow for different colors
+        for i in range(1, len(xs)):
+            point_color = (
+                color
+                if single_color
+                else tuple(point_to_color[(xs[i], ratios[i])])
+            )
+            ax.plot(
+                [xs[i - 1], xs[i]],
+                [ratios[i - 1], ratios[i]],
+                color=color if single_color else (0, 0, 0, 1),
+                linestyle="-",
+            )
+            if not single_color and point_color != (0, 0, 0, 1):
+                ax.plot(xs[i], ratios[i], color=point_color, marker="o")
+            else:
+                ax.plot(xs[i], ratios[i], color=point_color, marker=".")
+
+        # Plot legend text
+        ax.plot([], [], color=color[0], label=legend_text)
 
     if legend_text is not None:
         ax.legend()
@@ -366,6 +448,7 @@ def handle_inputs(
     profile_indices: Sequence[int],
     profile_list: int,
     ratio_min: float,
+    plot_objective_values: bool,
 ) -> list[np.array]:
     """
     Retrieve the values of the profiles to be plotted.
@@ -381,6 +464,8 @@ def handle_inputs(
     ratio_min:
         Exclude values where profile likelihood ratio is smaller than
         ratio_min.
+    plot_objective_values:
+        Whether to plot the objective function values instead of the likelihood
 
     Returns
     -------
@@ -388,6 +473,7 @@ def handle_inputs(
     """
     # extract ratio values from result
     fvals = []
+    colors = []
     for i_par in range(0, len(result.profile_result.list[profile_list])):
         if (
             i_par in profile_indices
@@ -399,18 +485,31 @@ def handle_inputs(
             ratios = result.profile_result.list[profile_list][
                 i_par
             ].ratio_path[:]
+            colors_for_par = result.profile_result.list[profile_list][
+                i_par
+            ].color_path
 
             # constrain
             indices = np.where(ratios > ratio_min)
             xs = xs[indices]
             ratios = ratios[indices]
-
-            fvals_for_par = np.array([xs, ratios])
+            colors_for_par = colors_for_par[indices]
+
+            if plot_objective_values:
+                obj_vals = result.profile_result.list[profile_list][
+                    i_par
+                ].fval_path
+                obj_vals = obj_vals[indices]
+                fvals_for_par = np.array([xs, obj_vals])
+            else:
+                fvals_for_par = np.array([xs, ratios])
         else:
             fvals_for_par = None
+            colors_for_par = None
         fvals.append(fvals_for_par)
+        colors.append(colors_for_par)
 
-    return fvals
+    return fvals, colors
 
 
 def process_result_list_profiles(
diff --git a/setup.cfg b/setup.cfg
index f0f618828..4fc1a4e1b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -74,7 +74,6 @@ all =
     %(all_optimizers)s
     %(mpi)s
     %(pymc)s
-    %(aesara)s
     %(jax)s
     %(julia)s
     %(emcee)s
@@ -121,8 +120,6 @@ pymc =
     arviz >= 0.12.1
     aesara >= 2.8.6
     pymc >= 4.2.1
-aesara =
-    aesara >= 2.0.5
 jax =
     jax >= 0.4.1
     jaxlib >= 0.4.1
@@ -153,7 +150,6 @@ doc =
     %(fides)s
     %(amici)s
     %(petab)s
-    %(aesara)s
     %(jax)s
     %(roadrunner)s
 example =
@@ -164,6 +160,7 @@ example =
     %(nlopt)s
     %(pyswarm)s
     notebook >= 6.1.4
+    ipywidgets >= 8.1.5
     benchmark_models_petab @ git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master#subdirectory=src/python
 select =
     # Remove when vis is moved to PEtab Select version
diff --git a/test/base/test_engine.py b/test/base/test_engine.py
index f86fc000f..6db8e79c3 100644
--- a/test/base/test_engine.py
+++ b/test/base/test_engine.py
@@ -64,8 +64,7 @@ def _test_petab(engine):
             "Boehm_JProteomeRes2014.yaml",
         )
     )
-    objective = petab_importer.create_objective()
-    problem = petab_importer.create_problem(objective)
+    problem = petab_importer.create_problem()
     optimizer = pypesto.optimize.ScipyOptimizer(options={"maxiter": 10})
     result = pypesto.optimize.minimize(
         problem=problem,
@@ -86,7 +85,8 @@ def test_deepcopy_objective():
             "Boehm_JProteomeRes2014.yaml",
         )
     )
-    objective = petab_importer.create_objective()
+    factory = petab_importer.create_objective_creator()
+    objective = factory.create_objective()
 
     objective.amici_solver.setSensitivityMethod(
         amici.SensitivityMethod_adjoint
@@ -123,7 +123,8 @@ def test_pickle_objective():
             "Boehm_JProteomeRes2014.yaml",
         )
     )
-    objective = petab_importer.create_objective()
+    factory = petab_importer.create_objective_creator()
+    objective = factory.create_objective()
 
     objective.amici_solver.setSensitivityMethod(
         amici.SensitivityMethod_adjoint
diff --git a/test/base/test_ensemble.py b/test/base/test_ensemble.py
index 12089af53..9a2383448 100644
--- a/test/base/test_ensemble.py
+++ b/test/base/test_ensemble.py
@@ -6,6 +6,7 @@
 
 import pypesto
 import pypesto.optimize as optimize
+import pypesto.sample as sample
 from pypesto.C import AMICI_STATUS, AMICI_T, AMICI_Y, MEAN, WEIGHTED_SIGMA
 from pypesto.engine import MultiProcessEngine
 from pypesto.ensemble import (
@@ -224,3 +225,49 @@ def post_processor(amici_outputs, output_type, output_ids):
         progress_bar=False,
     )
     return ensemble_prediction
+
+
+def test_hpd_calculation():
+    """Test the calculation of Highest Posterior Density (HPD)."""
+    problem = create_petab_problem()
+
+    sampler = sample.AdaptiveMetropolisSampler(
+        options={"show_progress": False}
+    )
+
+    result = optimize.minimize(
+        problem=problem,
+        n_starts=3,
+        progress_bar=False,
+    )
+
+    result = sample.sample(
+        problem=problem,
+        sampler=sampler,
+        n_samples=100,
+        result=result,
+    )
+
+    # Manually set up sample (only for testing)
+    burn_in = 1
+    result.sample_result.burn_in = burn_in
+    result.sample_result.trace_neglogpost[0][1:] = np.random.permutation(
+        np.arange(len(result.sample_result.trace_neglogpost[0][1:]))
+    )
+
+    hpd_ensemble = Ensemble.from_sample(
+        result=result, remove_burn_in=True, ci_level=0.95
+    )
+
+    expected_length = (
+        int((result.sample_result.trace_x[0][burn_in:].shape[0]) * 0.95) + 1
+    )
+    # Check that the HPD parameters have the expected shape
+    assert hpd_ensemble.x_vectors.shape == (problem.dim, expected_length)
+    x_indices = np.where(result.sample_result.trace_neglogpost[0][1:] <= 95)[0]
+    assert np.all(
+        [
+            np.any(np.all(x[:, None] == hpd_ensemble.x_vectors, axis=0))
+            for x in result.sample_result.trace_x[0][burn_in:][x_indices]
+        ]
+    )
diff --git a/test/base/test_objective.py b/test/base/test_objective.py
index b5d3ee930..703cb69f7 100644
--- a/test/base/test_objective.py
+++ b/test/base/test_objective.py
@@ -2,7 +2,6 @@
 
 import copy
 import numbers
-import sys
 from functools import partial
 
 import numpy as np
@@ -13,11 +12,6 @@
 
 from ..util import CRProblem, poly_for_sensi, rosen_for_sensi
 
-pytest_skip_aesara = pytest.mark.skipif(
-    sys.version_info >= (3, 12),
-    reason="Skipped Aesara tests on Python 3.12 or higher",
-)
-
 
 @pytest.fixture(params=[True, False])
 def integrated(request):
@@ -184,44 +178,6 @@ def rel_err(eps_):
     )
 
 
-@pytest_skip_aesara
-def test_aesara(max_sensi_order, integrated):
-    """Test function composition and gradient computation via aesara"""
-    import aesara.tensor as aet
-
-    from pypesto.objective.aesara import AesaraObjective
-
-    prob = rosen_for_sensi(max_sensi_order, integrated, [0, 1])
-
-    # create aesara specific symbolic tensor variables
-    x = aet.specify_shape(aet.vector("x"), (2,))
-
-    # apply inverse transform such that we evaluate at prob['x']
-    x_ref = np.arcsinh(prob["x"])
-
-    # compose rosenbrock function with sinh transformation
-    obj = AesaraObjective(prob["obj"], x, aet.sinh(x))
-
-    # check function values and derivatives, also after copy
-    for _obj in (obj, copy.deepcopy(obj)):
-        # function value
-        assert _obj(x_ref) == prob["fval"]
-
-        # gradient
-        if max_sensi_order > 0:
-            assert np.allclose(
-                _obj(x_ref, sensi_orders=(1,)), prob["grad"] * np.cosh(x_ref)
-            )
-
-        # hessian
-        if max_sensi_order > 1:
-            assert np.allclose(
-                prob["hess"] * (np.diag(np.power(np.cosh(x_ref), 2)))
-                + np.diag(prob["grad"] * np.sinh(x_ref)),
-                _obj(x_ref, sensi_orders=(2,)),
-            )
-
-
 @pytest.mark.parametrize("enable_x64", [True, False])
 @pytest.mark.parametrize("fix_parameters", [True, False])
 def test_jax(max_sensi_order, integrated, enable_x64, fix_parameters):
@@ -342,12 +298,20 @@ def fd_delta(request):
     return request.param
 
 
-def test_fds(fd_method, fd_delta):
+# add a fixture for fixed and unfixed parameters
+@pytest.mark.parametrize("fixed", [True, False])
+def test_fds(fd_method, fd_delta, fixed):
     """Test finite differences."""
     problem = CRProblem()
 
-    # reference objective
-    obj = problem.get_objective()
+    if fixed:
+        fixed_problem = problem.get_problem()
+        fixed_problem.fix_parameters([1], problem.p_true[1])
+        obj = fixed_problem.objective
+        p = problem.p_true[0]
+    else:
+        obj = problem.get_objective()
+        p = problem.p_true
 
     # FDs for everything
     obj_fd = pypesto.FD(
@@ -394,7 +358,6 @@ def test_fds(fd_method, fd_delta):
         delta_grad=fd_delta,
         delta_res=fd_delta,
     )
-    p = problem.p_true
 
     # check that function values coincide (call delegated)
     for attr in ["fval", "res"]:
diff --git a/test/base/test_roadrunner.py b/test/base/test_roadrunner.py
index 42dd4ec68..50a26dd28 100644
--- a/test/base/test_roadrunner.py
+++ b/test/base/test_roadrunner.py
@@ -10,7 +10,8 @@
 import pytest
 
 import pypesto
-import pypesto.objective.roadrunner as objective_rr
+import pypesto.petab
+from pypesto.objective.roadrunner import simulation_to_measurement_df
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -60,19 +61,21 @@ def _execute_case_rr(case, model_type, version):
     # import petab problem
     yaml_file = case_dir / petabtests.problem_yaml_name(case)
 
-    importer = objective_rr.PetabImporterRR.from_yaml(yaml_file)
+    importer = pypesto.petab.PetabImporter.from_yaml(
+        yaml_file, simulator_type="roadrunner"
+    )
     petab_problem = importer.petab_problem
-    obj = importer.create_objective()
+    obj = importer.create_problem().objective
 
     # the scaled parameters
-    problem_parameters = importer.petab_problem.x_nominal_scaled
+    problem_parameters = importer.petab_problem.x_nominal_free_scaled
 
     # simulate
     ret = obj(problem_parameters, sensi_orders=(0,), return_dict=True)
 
     # extract results
     llh = -ret["fval"]
-    simulation_df = objective_rr.simulation_to_measurement_df(
+    simulation_df = simulation_to_measurement_df(
         ret["simulation_results"], petab_problem.measurement_df
     )
 
@@ -117,7 +120,9 @@ def test_deepcopy():
         os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
     )
     petab_problem.model_name = model_name
-    importer = objective_rr.PetabImporterRR(petab_problem)
+    importer = pypesto.petab.PetabImporter(
+        petab_problem, simulator_type="roadrunner"
+    )
     problem_parameters = petab_problem.x_nominal_free_scaled
 
     problem = importer.create_problem()
@@ -147,7 +152,9 @@ def test_multiprocessing():
         os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
     )
     petab_problem.model_name = model_name
-    importer = objective_rr.PetabImporterRR(petab_problem)
+    importer = pypesto.petab.PetabImporter(
+        petab_problem, simulator_type="roadrunner"
+    )
 
     problem = importer.create_problem()
     # start 30 times from the same point
diff --git a/test/hierarchical/test_censored.py b/test/hierarchical/test_censored.py
index 2538e050f..d204a81b2 100644
--- a/test/hierarchical/test_censored.py
+++ b/test/hierarchical/test_censored.py
@@ -39,10 +39,7 @@ def test_optimization():
     )
 
     importer = pypesto.petab.PetabImporter(petab_problem, hierarchical=True)
-    importer.create_model()
-
-    objective = importer.create_objective()
-    problem = importer.create_problem(objective)
+    problem = importer.create_problem()
 
     result = pypesto.optimize.minimize(
         problem=problem, n_starts=1, optimizer=optimizer
@@ -63,8 +60,7 @@ def test_ordinal_calculator_and_objective():
     petab_problem = petab.Problem.from_yaml(example_censored_yaml)
 
     importer = pypesto.petab.PetabImporter(petab_problem, hierarchical=True)
-    objective = importer.create_objective()
-    problem = importer.create_problem(objective)
+    problem = importer.create_problem()
 
     def calculate(problem, x_dct):
         return problem.objective.calculator(
@@ -88,7 +84,7 @@ def calculate(problem, x_dct):
         problem.objective,
     )
     finite_differences_results = finite_differences(
-        petab_problem.x_nominal_scaled,
+        petab_problem.x_nominal_free_scaled,
         (
             0,
             1,
@@ -100,7 +96,7 @@ def calculate(problem, x_dct):
     # with finite differences.
     assert np.allclose(
         finite_differences_results[1],
-        calculator_result["grad"],
+        calculator_result["grad"][petab_problem.x_free_indices],
     )
 
 
diff --git a/test/hierarchical/test_hierarchical.py b/test/hierarchical/test_hierarchical.py
index bb3645df0..293467289 100644
--- a/test/hierarchical/test_hierarchical.py
+++ b/test/hierarchical/test_hierarchical.py
@@ -11,7 +11,7 @@
 import pypesto
 from pypesto.C import (
     INNER_PARAMETER_BOUNDS,
-    LOG10,
+    LIN,
     LOWER_BOUND,
     MODE_FUN,
     UPPER_BOUND,
@@ -58,8 +58,7 @@ def test_hierarchical_optimization_pipeline():
     problems = {}
     for flag in flags:
         importer = PetabImporter(petab_problems[flag], hierarchical=flag)
-        objective = importer.create_objective()
-        problem = importer.create_problem(objective)
+        problem = importer.create_problem()
         problem.objective.amici_solver.setSensitivityMethod(
             amici.SensitivityMethod_adjoint
         )
@@ -348,7 +347,7 @@ def inner_problem_exp(add_scaling: bool = True, add_offset: bool = True):
         InnerParameter(
             inner_parameter_id=inner_parameter_id,
             inner_parameter_type=inner_parameter_type,
-            scale=LOG10,
+            scale=LIN,
             lb=INNER_PARAMETER_BOUNDS[inner_parameter_type][LOWER_BOUND],
             ub=INNER_PARAMETER_BOUNDS[inner_parameter_type][UPPER_BOUND],
             ixs=mask,
diff --git a/test/hierarchical/test_ordinal.py b/test/hierarchical/test_ordinal.py
index 9a316cb30..6df4e2efc 100644
--- a/test/hierarchical/test_ordinal.py
+++ b/test/hierarchical/test_ordinal.py
@@ -102,9 +102,10 @@ def _create_problem(
 ) -> pypesto.Problem:
     """Creates the ordinal pyPESTO problem with given options."""
     importer = pypesto.petab.PetabImporter(petab_problem, hierarchical=True)
-    importer.create_model()
+    factory = importer.create_objective_creator()
+    factory.create_model()
 
-    objective = importer.create_objective(
+    objective = factory.create_objective(
         inner_options=option,
     )
     problem = importer.create_problem(objective)
@@ -127,7 +128,8 @@ def test_ordinal_calculator_and_objective():
         importer = pypesto.petab.PetabImporter(
             petab_problem, hierarchical=True
         )
-        objective = importer.create_objective(
+        factory = importer.create_objective_creator()
+        objective = factory.create_objective(
             inner_options=options,
         )
         problem = importer.create_problem(objective)
@@ -175,7 +177,7 @@ def inner_calculate(problem, x_dct):
         problem.objective,
     )
     finite_differences_results = finite_differences(
-        petab_problem.x_nominal_scaled,
+        petab_problem.x_nominal_free_scaled,
         (
             0,
             1,
@@ -209,7 +211,7 @@ def inner_calculate(problem, x_dct):
     # with finite differences.
     assert np.allclose(
         finite_differences_results[1],
-        calculator_results[STANDARD]["grad"],
+        calculator_results[STANDARD]["grad"][petab_problem.x_free_indices],
     )
 
     # Since the nominal parameters are close to true ones,
diff --git a/test/hierarchical/test_spline.py b/test/hierarchical/test_spline.py
index dec410df2..32d99c54a 100644
--- a/test/hierarchical/test_spline.py
+++ b/test/hierarchical/test_spline.py
@@ -95,9 +95,10 @@ def _create_problem(
         petab_problem,
         hierarchical=True,
     )
-    importer.create_model()
+    factory = importer.create_objective_creator()
+    factory.create_model()
 
-    objective = importer.create_objective(
+    objective = factory.create_objective(
         inner_options=option,
     )
     problem = importer.create_problem(objective)
@@ -125,7 +126,8 @@ def test_spline_calculator_and_objective():
             petab_problem,
             hierarchical=True,
         )
-        objective = importer.create_objective(
+        factory = importer.create_objective_creator()
+        objective = factory.create_objective(
             inner_options=option,
         )
         problem = importer.create_problem(objective)
@@ -172,7 +174,7 @@ def inner_calculate(problem, x_dct):
 
     finite_differences = pypesto.objective.FD(problem.objective)
     FD_results = finite_differences(
-        x=petab_problem.x_nominal_scaled,
+        x=petab_problem.x_nominal_free_scaled,
         sensi_orders=(0, 1),
         mode=MODE_FUN,
     )
@@ -210,7 +212,9 @@ def inner_calculate(problem, x_dct):
     # The gradient should be close to the one calculated using
     # finite differences.
     assert np.allclose(
-        calculator_results["minimal_diff_on"]["grad"],
+        calculator_results["minimal_diff_on"]["grad"][
+            petab_problem.x_free_indices
+        ],
         FD_results[1],
         atol=atol,
     )
@@ -474,8 +478,7 @@ def test_save_and_load_spline_knots():
         petab_problem,
         hierarchical=True,
     )
-    objective = importer.create_objective()
-    problem = importer.create_problem(objective)
+    problem = importer.create_problem()
 
     optimizer = pypesto.optimize.ScipyOptimizer(
         method="L-BFGS-B",
diff --git a/test/julia/test_pyjulia.py b/test/julia/test_pyjulia.py
index dc0abcdef..8325a44e8 100644
--- a/test/julia/test_pyjulia.py
+++ b/test/julia/test_pyjulia.py
@@ -59,7 +59,11 @@ def test_pyjulia_pipeline():
 
     # check with analytical value
     p_opt = obj.get("p_opt")
-    assert np.allclose(result.optimize_result[0].x, p_opt)  # noqa: S101
+    assert np.allclose(
+        result.optimize_result[0].x,
+        p_opt,
+        atol=1e-6,  # noqa:  S101
+    )
 
 
 def test_petabJL_interface():
diff --git a/test/optimize/test_optimize.py b/test/optimize/test_optimize.py
index 38ee80612..48ebdea55 100644
--- a/test/optimize/test_optimize.py
+++ b/test/optimize/test_optimize.py
@@ -20,11 +20,16 @@
 import pypesto.optimize as optimize
 from pypesto.optimize.ess import (
     ESSOptimizer,
+    FunctionEvaluatorMP,
+    RefSet,
     SacessFidesFactory,
     SacessOptimizer,
+    SacessOptions,
     get_default_ess_options,
 )
-from pypesto.optimize.util import assign_ids
+from pypesto.optimize.util import (
+    assign_ids,
+)
 from pypesto.store import read_result
 
 from ..base.test_x_fixed import create_problem
@@ -488,6 +493,11 @@ def test_ess(problem, local_optimizer, ess_type, request):
             sacess_loglevel=logging.DEBUG,
             ess_loglevel=logging.WARNING,
             ess_init_args=ess_init_args,
+            options=SacessOptions(
+                adaptation_min_evals=500,
+                adaptation_sent_offset=10,
+                adaptation_sent_coeff=5,
+            ),
         )
     else:
         raise ValueError(f"Unsupported ESS type {ess_type}.")
@@ -520,7 +530,21 @@ def test_ess_multiprocess(problem, request):
 
     from fides.constants import Options as FidesOptions
 
-    from pypesto.optimize.ess import ESSOptimizer, FunctionEvaluatorMP, RefSet
+    # augment objective with parameter prior to check it's copyable
+    #  https://github.com/ICB-DCM/pyPESTO/issues/1465
+    #  https://github.com/ICB-DCM/pyPESTO/pull/1467
+    problem.objective = pypesto.objective.AggregatedObjective(
+        [
+            problem.objective,
+            pypesto.objective.NegLogParameterPriors(
+                [
+                    pypesto.objective.get_parameter_prior_dict(
+                        0, "uniform", [0, 1], "lin"
+                    )
+                ]
+            ),
+        ]
+    )
 
     ess = ESSOptimizer(
         max_iter=20,
@@ -545,6 +569,14 @@ def test_ess_multiprocess(problem, request):
     print("ESS result: ", res.summary())
 
 
+def test_ess_refset_repr():
+    assert RefSet(10, None).__repr__() == "RefSet(dim=10)"
+    assert (
+        RefSet(10, None, x=np.zeros(10), fx=np.arange(10)).__repr__()
+        == "RefSet(dim=10, fx=[0 ... 9])"
+    )
+
+
 def test_scipy_integrated_grad():
     integrated = True
     obj = rosen_for_sensi(max_sensi_order=2, integrated=integrated)["obj"]
diff --git a/test/petab/test_amici_objective.py b/test/petab/test_amici_objective.py
index 00c399f6e..274962fc1 100644
--- a/test/petab/test_amici_objective.py
+++ b/test/petab/test_amici_objective.py
@@ -57,7 +57,7 @@ def test_error_leastsquares_with_ssigma():
     )
     petab_problem.model_name = model_name
     importer = pypesto.petab.PetabImporter(petab_problem)
-    obj = importer.create_objective()
+    obj = importer.create_objective_creator().create_objective()
     problem = importer.create_problem(
         obj, startpoint_kwargs={"check_fval": True, "check_grad": True}
     )
diff --git a/test/petab/test_amici_predictor.py b/test/petab/test_amici_predictor.py
index b610e2b97..2d23620a4 100644
--- a/test/petab/test_amici_predictor.py
+++ b/test/petab/test_amici_predictor.py
@@ -365,7 +365,8 @@ def test_petab_prediction():
     petab_problem.model_name = f"{model_name}_petab"
     importer = pypesto.petab.PetabImporter(petab_problem)
     # create prediction via PEtab
-    predictor = importer.create_predictor()
+    factory = importer.create_objective_creator()
+    predictor = factory.create_predictor()
 
     # ===== run test for prediction ===========================================
     p = predictor(
@@ -373,8 +374,8 @@ def test_petab_prediction():
     )
     check_outputs(p, out=(0, 1), n_cond=1, n_timepoints=10, n_obs=1, n_par=2)
     # check outputs for simulation and measurement dataframes
-    importer.prediction_to_petab_measurement_df(p, predictor)
-    importer.prediction_to_petab_simulation_df(p, predictor)
+    factory.prediction_to_petab_measurement_df(p, predictor)
+    factory.prediction_to_petab_simulation_df(p, predictor)
 
     # ===== run test for ensemble prediction ==================================
     # read a set of ensemble vectors from the csv
diff --git a/test/petab/test_petabSimulator.py b/test/petab/test_petabSimulator.py
new file mode 100644
index 000000000..65db1aa83
--- /dev/null
+++ b/test/petab/test_petabSimulator.py
@@ -0,0 +1,108 @@
+"""Run PEtab tests for PetabSimulatorObjective."""
+
+import logging
+
+import basico.petab
+import petab.v1 as petab
+import petabtests
+import pytest
+
+from pypesto.objective.petab import PetabSimulatorObjective
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.parametrize(
+    "case, model_type, version",
+    [
+        (case, "sbml", "v1.0.0")
+        for case in petabtests.get_cases(format_="sbml", version="v1.0.0")
+    ],
+)
+def test_petab_case(case, model_type, version):
+    """Wrapper for _execute_case for handling test outcomes"""
+    try:
+        _execute_case(case, model_type, version)
+    except Exception as e:
+        if isinstance(
+            e, NotImplementedError
+        ) or "Timepoint-specific parameter overrides" in str(e):
+            logger.info(
+                f"Case {case} expectedly failed. Required functionality is "
+                f"not implemented: {e}"
+            )
+            pytest.skip(str(e))
+        else:
+            raise e
+
+
+def _execute_case(case, model_type, version):
+    """Run a single PEtab test suite case"""
+    case = petabtests.test_id_str(case)
+    logger.info(f"Case {case}")
+    if case in ["0006", "0009", "0010", "0017", "0018", "0019"]:
+        pytest.skip("Basico does not support these functionalities.")
+
+    # case folder
+    case_dir = petabtests.get_case_dir(case, model_type, version)
+
+    # load solution
+    solution = petabtests.load_solution(
+        case, format=model_type, version=version
+    )
+    gt_llh = solution[petabtests.LLH]
+    gt_simulation_dfs = solution[petabtests.SIMULATION_DFS]
+    tol_llh = solution[petabtests.TOL_LLH]
+    tol_simulations = solution[petabtests.TOL_SIMULATIONS]
+
+    # import petab problem
+    yaml_file = case_dir / petabtests.problem_yaml_name(case)
+
+    # import and create objective function
+    petab_problem = petab.Problem.from_yaml(yaml_file)
+    simulator = basico.petab.PetabSimulator(petab_problem)
+    obj = PetabSimulatorObjective(simulator)
+
+    # the scaled parameters
+    problem_parameters = petab_problem.x_nominal_scaled
+
+    # simulate
+    ret = obj(problem_parameters, sensi_orders=(0,), return_dict=True)
+
+    # extract results
+    llh = -ret["fval"]
+    simulation_df = ret["simulations"]
+
+    simulation_df = simulation_df.rename(
+        columns={petab.SIMULATION: petab.MEASUREMENT}
+    )
+    petab.check_measurement_df(simulation_df, petab_problem.observable_df)
+    simulation_df = simulation_df.rename(
+        columns={petab.MEASUREMENT: petab.SIMULATION}
+    )
+    simulation_df[petab.TIME] = simulation_df[petab.TIME].astype(int)
+
+    # check if matches
+    llhs_match = petabtests.evaluate_llh(llh, gt_llh, tol_llh)
+    simulations_match = petabtests.evaluate_simulations(
+        [simulation_df], gt_simulation_dfs, tol_simulations
+    )
+
+    # log matches
+    logger.log(
+        logging.INFO if simulations_match else logging.ERROR,
+        f"LLH: simulated: {llh}, expected: {gt_llh}, match = {llhs_match}",
+    )
+    logger.log(
+        logging.INFO if simulations_match else logging.ERROR,
+        f"Simulations: match = {simulations_match}",
+    )
+
+    if not all([llhs_match, simulations_match]):
+        logger.error(f"Case {version}/{model_type}/{case} failed.")
+        raise AssertionError(
+            f"Case {case}: Test results do not match expectations"
+        )
+
+    logger.info(f"Case {version}/{model_type}/{case} passed.")
diff --git a/test/petab/test_petab_import.py b/test/petab/test_petab_import.py
index 5b9f94a2e..aa4eb0067 100644
--- a/test/petab/test_petab_import.py
+++ b/test/petab/test_petab_import.py
@@ -51,7 +51,9 @@ def test_1_compile(self):
             self.petab_importers.append(importer)
 
             # check model
-            model = importer.create_model(force_compile=False)
+            model = importer.create_objective_creator().create_model(
+                force_compile=False
+            )
 
             # observable ids
             model_obs_ids = list(model.getObservableIds())
@@ -62,12 +64,13 @@ def test_1_compile(self):
 
     def test_2_simulate(self):
         for petab_importer in self.petab_importers:
-            obj = petab_importer.create_objective()
-            edatas = petab_importer.create_edatas()
+            factory = petab_importer.create_objective_creator()
+            obj = factory.create_objective()
+            edatas = factory.create_edatas()
             self.obj_edatas.append((obj, edatas))
 
             # run function
-            x_nominal = petab_importer.petab_problem.x_nominal_scaled
+            x_nominal = factory.petab_problem.x_nominal_scaled
             ret = obj(x_nominal)
 
             self.assertTrue(np.isfinite(ret))
@@ -114,12 +117,12 @@ def test_4_optimize(self):
     def test_check_gradients(self):
         """Test objective FD-gradient check function."""
         # Check gradients of simple model (should always be a true positive)
-        model_name = "Bachmann_MSB2011"
-        petab_problem = pypesto.petab.PetabImporter.from_yaml(
+        model_name = "Boehm_JProteomeRes2014"
+        importer = pypesto.petab.PetabImporter.from_yaml(
             os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
         )
 
-        objective = petab_problem.create_objective()
+        objective = importer.create_problem().objective
         objective.amici_solver.setSensitivityMethod(
             amici.SensitivityMethod_forward
         )
@@ -127,7 +130,9 @@ def test_check_gradients(self):
         objective.amici_solver.setRelativeTolerance(1e-12)
 
         self.assertFalse(
-            petab_problem.check_gradients(multi_eps=[1e-3, 1e-4, 1e-5])
+            objective.check_gradients_match_finite_differences(
+                multi_eps=[1e-3, 1e-4, 1e-5]
+            )
         )
 
 
@@ -167,16 +172,16 @@ def test_max_sensi_order():
     """Test that the AMICI objective created via PEtab exposes derivatives
     correctly."""
     model_name = "Boehm_JProteomeRes2014"
-    problem = pypesto.petab.PetabImporter.from_yaml(
+    importer = pypesto.petab.PetabImporter.from_yaml(
         os.path.join(models.MODELS_DIR, model_name, model_name + ".yaml")
     )
 
     # define test parameter
-    par = problem.petab_problem.x_nominal_scaled
+    par = importer.petab_problem.x_nominal_scaled
     npar = len(par)
 
     # auto-computed max_sensi_order and fim_for_hess
-    objective = problem.create_objective()
+    objective = importer.create_objective_creator().create_objective()
     hess = objective(par, sensi_orders=(2,))
     assert hess.shape == (npar, npar)
     assert (hess != 0).any()
@@ -190,18 +195,24 @@ def test_max_sensi_order():
     )
 
     # fix max_sensi_order to 1
-    objective = problem.create_objective(max_sensi_order=1)
+    objective = importer.create_objective_creator().create_objective(
+        max_sensi_order=1
+    )
     objective(par, sensi_orders=(1,))
     with pytest.raises(ValueError):
         objective(par, sensi_orders=(2,))
 
     # do not use FIM
-    objective = problem.create_objective(fim_for_hess=False)
+    objective = importer.create_objective_creator().create_objective(
+        fim_for_hess=False
+    )
     with pytest.raises(ValueError):
         objective(par, sensi_orders=(2,))
 
     # only allow computing function values
-    objective = problem.create_objective(max_sensi_order=0)
+    objective = importer.create_objective_creator().create_objective(
+        max_sensi_order=0
+    )
     objective(par)
     with pytest.raises(ValueError):
         objective(par, sensi_orders=(1,))
diff --git a/test/petab/test_petab_suite.py b/test/petab/test_petab_suite.py
index 87219c6d8..820adf686 100644
--- a/test/petab/test_petab_suite.py
+++ b/test/petab/test_petab_suite.py
@@ -82,11 +82,12 @@ def _execute_case(case, model_type, version):
             yaml_file, output_folder=output_folder
         )
         petab_problem = importer.petab_problem
-    model = importer.create_model(generate_sensitivity_code=False)
-    obj = importer.create_objective(model=model)
+    factory = importer.create_objective_creator()
+    model = factory.create_model(generate_sensitivity_code=False)
+    obj = factory.create_objective(model=model)
 
     # the scaled parameters
-    problem_parameters = importer.petab_problem.x_nominal_scaled
+    problem_parameters = factory.petab_problem.x_nominal_scaled
 
     # simulate
     ret = obj(problem_parameters, sensi_orders=(0,), return_dict=True)
diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py
index f422fe8bf..8b2e3cbaa 100644
--- a/test/profile/test_profile.py
+++ b/test/profile/test_profile.py
@@ -68,7 +68,7 @@ def test_default_profiling(self):
             steps = result.profile_result.list[i_run][0]["ratio_path"].size
             if method == "adaptive_step_regression":
                 self.assertTrue(
-                    steps < 20,
+                    steps < 100,
                     "Profiling with regression based "
                     "proposal needed too many steps.",
                 )
@@ -79,7 +79,7 @@ def test_default_profiling(self):
                 )
             elif method == "adaptive_step_order_1":
                 self.assertTrue(
-                    steps < 25,
+                    steps < 100,
                     "Profiling with 1st order based "
                     "proposal needed too many steps.",
                 )
@@ -90,7 +90,7 @@ def test_default_profiling(self):
                 )
             elif method == "adaptive_step_order_0":
                 self.assertTrue(
-                    steps < 100,
+                    steps < 300,
                     "Profiling with 0th order based "
                     "proposal needed too many steps.",
                 )
@@ -479,6 +479,7 @@ def test_gh1165(lb, ub):
         progress_bar=False,
         profile_options=profile.ProfileOptions(
             min_step_size=0.1,
+            max_step_size=1.0,
             delta_ratio_max=0.05,
             default_step_size=0.5,
             ratio_min=0.01,
diff --git a/test/run_notebook.sh b/test/run_notebook.sh
index 297c2a9d3..693baaa73 100755
--- a/test/run_notebook.sh
+++ b/test/run_notebook.sh
@@ -38,6 +38,7 @@ nbs_2=(
   'sampler_study.ipynb'
   'sampling_diagnostics.ipynb'
   'model_selection.ipynb'
+  'model_evidence_and_bayes_factors.ipynb'
 )
 
 # All tested notebooks
diff --git a/test/sample/test_sample.py b/test/sample/test_sample.py
index b2d246111..0371faf1c 100644
--- a/test/sample/test_sample.py
+++ b/test/sample/test_sample.py
@@ -1,150 +1,42 @@
 """Tests for `pypesto.sample` methods."""
 
-import os
 
 import numpy as np
 import pytest
-import scipy.optimize as so
 from scipy.integrate import quad
-from scipy.stats import ks_2samp, kstest, multivariate_normal, norm, uniform
+from scipy.stats import ks_2samp, kstest, norm
 
 import pypesto
 import pypesto.optimize as optimize
 import pypesto.sample as sample
 from pypesto.C import OBJECTIVE_NEGLOGLIKE, OBJECTIVE_NEGLOGPOST
+from pypesto.objective import (
+    AggregatedObjective,
+    NegLogParameterPriors,
+    Objective,
+)
 
-
-def gaussian_llh(x):
-    return float(norm.logpdf(x).item())
-
-
-def gaussian_problem():
-    def nllh(x):
-        return -gaussian_llh(x)
-
-    objective = pypesto.Objective(fun=nllh)
-    problem = pypesto.Problem(objective=objective, lb=[-10], ub=[10])
-    return problem
-
-
-def gaussian_mixture_llh(x):
-    return np.log(
-        0.3 * multivariate_normal.pdf(x, mean=-1.5, cov=0.1)
-        + 0.7 * multivariate_normal.pdf(x, mean=2.5, cov=0.2)
-    )
-
-
-def gaussian_mixture_problem():
-    """Problem based on a mixture of gaussians."""
-
-    def nllh(x):
-        return -gaussian_mixture_llh(x)
-
-    objective = pypesto.Objective(fun=nllh)
-    problem = pypesto.Problem(
-        objective=objective, lb=[-10], ub=[10], x_names=["x"]
-    )
-    return problem
-
-
-def gaussian_mixture_separated_modes_llh(x):
-    return np.log(
-        0.5 * multivariate_normal.pdf(x, mean=-1.0, cov=0.7)
-        + 0.5 * multivariate_normal.pdf(x, mean=100.0, cov=0.8)
-    )
-
-
-def gaussian_mixture_separated_modes_problem():
-    """Problem based on a mixture of gaussians with far/separated modes."""
-
-    def nllh(x):
-        return -gaussian_mixture_separated_modes_llh(x)
-
-    objective = pypesto.Objective(fun=nllh)
-    problem = pypesto.Problem(
-        objective=objective, lb=[-100], ub=[200], x_names=["x"]
-    )
-    return problem
-
-
-def rosenbrock_problem():
-    """Problem based on rosenbrock objective.
-
-    Features
-    --------
-    * 3-dim
-    * has fixed parameters
-    * has gradient
-    """
-    objective = pypesto.Objective(fun=so.rosen, grad=so.rosen_der)
-
-    dim_full = 2
-    lb = -5 * np.ones((dim_full, 1))
-    ub = 5 * np.ones((dim_full, 1))
-
-    problem = pypesto.Problem(
-        objective=objective,
-        lb=lb,
-        ub=ub,
-        x_fixed_indices=[1],
-        x_fixed_vals=[2],
-    )
-    return problem
-
-
-def create_petab_problem():
-    import petab.v1 as petab
-
-    import pypesto.petab
-
-    current_path = os.path.dirname(os.path.realpath(__file__))
-    dir_path = os.path.abspath(
-        os.path.join(current_path, "..", "..", "doc", "example")
-    )
-    # import to petab
-    petab_problem = petab.Problem.from_yaml(
-        dir_path + "/conversion_reaction/conversion_reaction.yaml"
-    )
-    # import to pypesto
-    importer = pypesto.petab.PetabImporter(petab_problem)
-    # create problem
-    problem = importer.create_problem()
-
-    return problem
-
-
-def sample_petab_problem():
-    # create problem
-    problem = create_petab_problem()
-
-    sampler = sample.AdaptiveMetropolisSampler(
-        options={
-            "show_progress": False,
-        },
-    )
-    result = sample.sample(
-        problem,
-        n_samples=1000,
-        sampler=sampler,
-        x0=np.array([3, -4]),
-    )
-    return result
-
-
-def prior(x):
-    return multivariate_normal.pdf(x, mean=-1.0, cov=0.7)
-
-
-def likelihood(x):
-    return uniform.pdf(x, loc=-10.0, scale=20.0)[0]
-
-
-def negative_log_posterior(x):
-    return -np.log(likelihood(x)) - np.log(prior(x))
-
-
-def negative_log_prior(x):
-    return -np.log(prior(x))
+from .util import (
+    LB_GAUSSIAN,
+    N_CHAINS,
+    N_SAMPLE_FEW,
+    N_SAMPLE_MANY,
+    N_SAMPLE_SOME,
+    N_STARTS_FEW,
+    N_STARTS_SOME,
+    STATISTIC_TOL,
+    UB_GAUSSIAN,
+    X_NAMES,
+    create_petab_problem,
+    gaussian_llh,
+    gaussian_mixture_problem,
+    gaussian_nllh_grad,
+    gaussian_nllh_hess,
+    gaussian_problem,
+    negative_log_posterior,
+    negative_log_prior,
+    rosenbrock_problem,
+)
 
 
 @pytest.fixture(
@@ -185,16 +77,19 @@ def sampler(request):
             options={
                 "show_progress": False,
             },
-            n_chains=5,
+            n_chains=N_CHAINS,
         )
     elif request.param == "Pymc":
         from pypesto.sample.pymc import PymcSampler
 
-        return PymcSampler(tune=5, progressbar=False)
+        return PymcSampler(tune=5, progressbar=False, chains=N_CHAINS)
     elif request.param == "Emcee":
         return sample.EmceeSampler(nwalkers=10)
     elif request.param == "Dynesty":
-        return sample.DynestySampler(objective_type="negloglike")
+        return sample.DynestySampler(
+            objective_type=OBJECTIVE_NEGLOGLIKE,
+            run_args={"maxiter": N_SAMPLE_FEW},
+        )
 
 
 @pytest.fixture(params=["gaussian", "gaussian_mixture", "rosenbrock"])
@@ -213,7 +108,7 @@ def test_pipeline(sampler, problem):
     optimizer = optimize.ScipyOptimizer(options={"maxiter": 10})
     result = optimize.minimize(
         problem=problem,
-        n_starts=3,
+        n_starts=N_STARTS_FEW,
         optimizer=optimizer,
         progress_bar=False,
     )
@@ -222,11 +117,17 @@ def test_pipeline(sampler, problem):
     result = sample.sample(
         problem=problem,
         sampler=sampler,
-        n_samples=100,
+        n_samples=N_SAMPLE_FEW,
         result=result,
     )
-    # remove warnings in test/sample/test_sample.
-    # Warning here: pypesto/visualize/sampling.py:1104
+    # test dynesty mcmc samples
+    if isinstance(sampler, sample.DynestySampler):
+        trace_original = sampler.get_original_samples().trace_neglogpost
+        trace_mcmc = result.sample_result.trace_neglogpost
+        # Nested sampling function values are monotonically increasing
+        assert (np.diff(trace_original) <= 0).all()
+        # MCMC samples are not
+        assert not (np.diff(trace_mcmc) <= 0).all()
     # geweke test
     sample.geweke_test(result=result)
 
@@ -240,7 +141,7 @@ def test_ground_truth():
         options={
             "show_progress": False,
         },
-        n_chains=5,
+        n_chains=N_CHAINS,
     )
 
     problem = gaussian_problem()
@@ -248,11 +149,12 @@ def test_ground_truth():
     result = optimize.minimize(
         problem,
         progress_bar=False,
+        n_starts=N_STARTS_SOME,
     )
 
     result = sample.sample(
         problem,
-        n_samples=5000,
+        n_samples=N_SAMPLE_MANY,
         result=result,
         sampler=sampler,
     )
@@ -263,135 +165,34 @@ def test_ground_truth():
     # test against different distributions
 
     statistic, pval = kstest(samples, "norm")
-    print(statistic, pval)
-    assert statistic < 0.1
+    assert statistic < STATISTIC_TOL
 
     statistic, pval = kstest(samples, "uniform")
-    print(statistic, pval)
-    assert statistic > 0.1
-
-
-def test_ground_truth_separated_modes():
-    """Test whether we actually retrieve correct distributions."""
-    # use best self-implemented sampler, which has a chance to correctly
-    # sample from the distribution
-
-    # First use parallel tempering with 3 chains
-    sampler = sample.AdaptiveParallelTemperingSampler(
-        internal_sampler=sample.AdaptiveMetropolisSampler(),
-        options={
-            "show_progress": False,
-        },
-        n_chains=3,
-    )
-
-    problem = gaussian_mixture_separated_modes_problem()
-
-    result = sample.sample(
-        problem,
-        n_samples=1e4,
-        sampler=sampler,
-        x0=np.array([0.0]),
-    )
-
-    # get samples of first chain
-    samples = result.sample_result.trace_x[0, :, 0]
-
-    # generate bimodal ground-truth samples
-    # "first" mode centered at -1
-    rvs1 = norm.rvs(size=5000, loc=-1.0, scale=np.sqrt(0.7))
-    # "second" mode centered at 100
-    rvs2 = norm.rvs(size=5001, loc=100.0, scale=np.sqrt(0.8))
-
-    # test for distribution similarity
-    statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples)
-
-    # only parallel tempering finds both modes
-    print(statistic, pval)
-    assert statistic < 0.2
-
-    # sample using adaptive metropolis (single-chain)
-    # initiated around the "first" mode of the distribution
-    sampler = sample.AdaptiveMetropolisSampler(
-        options={
-            "show_progress": False,
-        },
-    )
-    result = sample.sample(
-        problem,
-        n_samples=1e4,
-        sampler=sampler,
-        x0=np.array([-2.0]),
-    )
-
-    # get samples of first chain
-    samples = result.sample_result.trace_x[0, :, 0]
-
-    # test for distribution similarity
-    statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples)
-
-    # single-chain adaptive metropolis does not find both modes
-    print(statistic, pval)
-    assert statistic > 0.1
-
-    # actually centered at the "first" mode
-    statistic, pval = ks_2samp(rvs1, samples)
-
-    print(statistic, pval)
-    assert statistic < 0.1
-
-    # sample using adaptive metropolis (single-chain)
-    # initiated around the "second" mode of the distribution
-    sampler = sample.AdaptiveMetropolisSampler(
-        options={
-            "show_progress": False,
-        },
-    )
-    result = sample.sample(
-        problem,
-        n_samples=1e4,
-        sampler=sampler,
-        x0=np.array([120.0]),
-    )
-
-    # get samples of first chain
-    samples = result.sample_result.trace_x[0, :, 0]
-
-    # test for distribution similarity
-    statistic, pval = ks_2samp(np.concatenate([rvs1, rvs2]), samples)
-
-    # single-chain adaptive metropolis does not find both modes
-    print(statistic, pval)
-    assert statistic > 0.1
-
-    # actually centered at the "second" mode
-    statistic, pval = ks_2samp(rvs2, samples)
-
-    print(statistic, pval)
-    assert statistic < 0.1
+    assert statistic > STATISTIC_TOL
 
 
 def test_multiple_startpoints():
     problem = gaussian_problem()
-    x0s = [np.array([0]), np.array([1])]
+    x0s = [np.array([0]), np.array([1]), np.array([2])]
     sampler = sample.ParallelTemperingSampler(
         internal_sampler=sample.MetropolisSampler(),
         options={
             "show_progress": False,
         },
-        n_chains=2,
+        n_chains=N_CHAINS,
     )
     result = sample.sample(
         problem,
-        n_samples=10,
+        n_samples=N_SAMPLE_FEW,
         x0=x0s,
         sampler=sampler,
     )
 
-    assert result.sample_result.trace_neglogpost.shape[0] == 2
+    assert result.sample_result.trace_neglogpost.shape[0] == N_CHAINS
     assert [
         result.sample_result.trace_x[0][0],
         result.sample_result.trace_x[1][0],
+        result.sample_result.trace_x[2][0],
     ] == x0s
 
 
@@ -406,23 +207,20 @@ def test_regularize_covariance():
     assert np.all(np.linalg.eigvals(reg) >= 0)
 
 
-def test_geweke_test_switch():
-    """Check geweke test returns expected burn in index."""
-    warm_up = np.zeros((100, 2))
-    converged = np.ones((901, 2))
-    chain = np.concatenate((warm_up, converged), axis=0)
-    burn_in = sample.diagnostics.burn_in_by_sequential_geweke(chain=chain)
-    assert burn_in == 100
-
-
-def test_geweke_test_switch_short():
-    """Check geweke test returns expected burn in index
-    for small sample numbers."""
-    warm_up = np.zeros((25, 2))
-    converged = np.ones((75, 2))
+@pytest.mark.parametrize(
+    "non_converged_size, converged_size",
+    [
+        (100, 901),  # "Larger" sample numbers
+        (25, 75),  # Small sample numbers
+    ],
+)
+def test_geweke_test_switch(non_converged_size, converged_size):
+    """Check geweke test returns expected burn in index for different chain sizes."""
+    warm_up = np.zeros((non_converged_size, 2))
+    converged = np.ones((converged_size, 2))
     chain = np.concatenate((warm_up, converged), axis=0)
     burn_in = sample.diagnostics.burn_in_by_sequential_geweke(chain=chain)
-    assert burn_in == 25
+    assert burn_in == non_converged_size
 
 
 def test_geweke_test_unconverged():
@@ -438,7 +236,7 @@ def test_geweke_test_unconverged():
     # optimization
     result = optimize.minimize(
         problem=problem,
-        n_starts=3,
+        n_starts=N_STARTS_FEW,
         progress_bar=False,
     )
 
@@ -446,7 +244,7 @@ def test_geweke_test_unconverged():
     result = sample.sample(
         problem,
         sampler=sampler,
-        n_samples=100,
+        n_samples=N_SAMPLE_FEW,
         result=result,
     )
 
@@ -467,7 +265,7 @@ def test_autocorrelation_pipeline():
     # optimization
     result = optimize.minimize(
         problem=problem,
-        n_starts=3,
+        n_starts=N_STARTS_FEW,
         progress_bar=False,
     )
 
@@ -475,7 +273,7 @@ def test_autocorrelation_pipeline():
     result = sample.sample(
         problem=problem,
         sampler=sampler,
-        n_samples=1000,
+        n_samples=N_SAMPLE_SOME,
         result=result,
     )
 
@@ -517,7 +315,7 @@ def test_autocorrelation_short_chain():
     # optimization
     result = optimize.minimize(
         problem=problem,
-        n_starts=3,
+        n_starts=N_STARTS_FEW,
         progress_bar=False,
     )
 
@@ -525,7 +323,7 @@ def test_autocorrelation_short_chain():
     result = sample.sample(
         problem,
         sampler=sampler,
-        n_samples=10,
+        n_samples=N_SAMPLE_FEW,
         result=result,
     )
 
@@ -607,7 +405,7 @@ def test_empty_prior():
 
     result = sample.sample(
         test_problem,
-        n_samples=50,
+        n_samples=N_SAMPLE_FEW,
         sampler=sampler,
         x0=np.array([0.0]),
     )
@@ -635,9 +433,9 @@ def test_prior():
     test_problem = pypesto.Problem(
         objective=posterior_fun,
         x_priors_defs=prior_object,
-        lb=-10,
-        ub=10,
-        x_names=["x"],
+        lb=LB_GAUSSIAN,
+        ub=UB_GAUSSIAN,
+        x_names=X_NAMES,
     )
 
     sampler = sample.AdaptiveMetropolisSampler(
@@ -648,7 +446,7 @@ def test_prior():
 
     result = sample.sample(
         test_problem,
-        n_samples=1e4,
+        n_samples=N_SAMPLE_MANY,
         sampler=sampler,
         x0=np.array([0.0]),
     )
@@ -667,9 +465,7 @@ def test_prior():
 
     # check sample distribution agreement with the ground-truth
     statistic, pval = ks_2samp(rvs, samples)
-    print(statistic, pval)
-
-    assert statistic < 0.1
+    assert statistic < STATISTIC_TOL
 
 
 def test_samples_cis():
@@ -690,7 +486,7 @@ def test_samples_cis():
     # optimization
     result = optimize.minimize(
         problem=problem,
-        n_starts=3,
+        n_starts=N_STARTS_FEW,
         progress_bar=False,
     )
 
@@ -698,7 +494,7 @@ def test_samples_cis():
     result = sample.sample(
         problem=problem,
         sampler=sampler,
-        n_samples=1000,
+        n_samples=N_SAMPLE_SOME,
         result=result,
     )
 
@@ -730,26 +526,6 @@ def test_samples_cis():
         assert lb.shape == ub.shape
 
 
-def test_dynesty_mcmc_samples():
-    problem = gaussian_problem()
-    sampler = sample.DynestySampler(objective_type=OBJECTIVE_NEGLOGLIKE)
-
-    result = sample.sample(
-        problem=problem,
-        sampler=sampler,
-        n_samples=None,
-        filename=None,
-    )
-
-    original_sample_result = sampler.get_original_samples()
-    mcmc_sample_result = result.sample_result
-
-    # Nested sampling function values are monotonically increasing
-    assert (np.diff(original_sample_result.trace_neglogpost) <= 0).all()
-    # MCMC samples are not
-    assert not (np.diff(mcmc_sample_result.trace_neglogpost) <= 0).all()
-
-
 def test_dynesty_posterior():
     # define negative log posterior
     posterior_fun = pypesto.Objective(fun=negative_log_posterior)
@@ -764,21 +540,21 @@ def test_dynesty_posterior():
     test_problem = pypesto.Problem(
         objective=posterior_fun,
         x_priors_defs=prior_object,
-        lb=-10,
-        ub=10,
-        x_names=["x"],
+        lb=LB_GAUSSIAN,
+        ub=UB_GAUSSIAN,
+        x_names=X_NAMES,
     )
 
     # define sampler
     sampler = sample.DynestySampler(
-        objective_type=OBJECTIVE_NEGLOGPOST
+        objective_type=OBJECTIVE_NEGLOGPOST,
+        run_args={"maxiter": N_SAMPLE_FEW},
     )  # default
 
     result = sample.sample(
         problem=test_problem,
         sampler=sampler,
         n_samples=None,
-        filename=None,
     )
 
     original_sample_result = sampler.get_original_samples()
@@ -797,7 +573,7 @@ def test_thermodynamic_integration():
 
     # approximation should be better for more chains
     n_chains = 10
-    tol = 1
+    tol = 2
     sampler = sample.ParallelTemperingSampler(
         internal_sampler=sample.AdaptiveMetropolisSampler(),
         options={"show_progress": False, "beta_init": "beta_decay"},
@@ -811,17 +587,38 @@ def test_thermodynamic_integration():
 
     result = sample.sample(
         problem,
-        n_samples=2000,
+        n_samples=2 * N_SAMPLE_SOME,
         result=result,
         sampler=sampler,
     )
 
     # compute the log evidence using trapezoid and simpson rule
-    log_evidence = sampler.compute_log_evidence(result, method="trapezoid")
-    log_evidence_not_all = sampler.compute_log_evidence(
+    log_evidence = sample.evidence.parallel_tempering_log_evidence(
+        result, method="trapezoid"
+    )
+    log_evidence_not_all = sample.evidence.parallel_tempering_log_evidence(
         result, method="trapezoid", use_all_chains=False
     )
-    log_evidence_simps = sampler.compute_log_evidence(result, method="simpson")
+    log_evidence_simps = sample.evidence.parallel_tempering_log_evidence(
+        result, method="simpson"
+    )
+
+    # use steppingstone sampling
+    log_evidence_steppingstone = (
+        sample.evidence.parallel_tempering_log_evidence(
+            result, method="steppingstone"
+        )
+    )
+
+    # harmonic mean log evidence
+    harmonic_evidence = sample.evidence.harmonic_mean_log_evidence(result)
+    # compute the log evidence using stabilized harmonic mean
+    prior_samples = np.random.uniform(problem.lb, problem.ub, size=100)
+    harmonic_stabilized_evidence = sample.evidence.harmonic_mean_log_evidence(
+        result=result,
+        prior_samples=prior_samples,
+        neg_log_likelihood_fun=problem.objective,
+    )
 
     # compute evidence
     evidence = quad(
@@ -836,3 +633,72 @@ def test_thermodynamic_integration():
     assert np.isclose(log_evidence, np.log(evidence[0]), atol=tol)
     assert np.isclose(log_evidence_not_all, np.log(evidence[0]), atol=tol)
     assert np.isclose(log_evidence_simps, np.log(evidence[0]), atol=tol)
+    assert np.isclose(
+        log_evidence_steppingstone, np.log(evidence[0]), atol=tol
+    )
+    assert np.isclose(harmonic_evidence, np.log(evidence[0]), atol=tol)
+    assert np.isclose(
+        harmonic_stabilized_evidence, np.log(evidence[0]), atol=tol
+    )
+
+
+def test_laplace_approximation_log_evidence():
+    """Test the laplace approximation of the log evidence."""
+    log_evidence_true = 21.2  # approximated by hand
+
+    problem = create_petab_problem()
+
+    # hess
+    result = optimize.minimize(
+        problem=problem,
+        n_starts=N_STARTS_SOME,
+        progress_bar=False,
+    )
+    log_evidence = sample.evidence.laplace_approximation_log_evidence(
+        problem, result.optimize_result.x[0]
+    )
+    assert np.isclose(log_evidence, log_evidence_true, atol=0.1)
+
+
+@pytest.mark.flaky(reruns=3)
+def test_bridge_sampling():
+    tol = 2
+    # define problem
+    objective = Objective(
+        fun=lambda x: -gaussian_llh(x),
+        grad=gaussian_nllh_grad,
+        hess=gaussian_nllh_hess,
+    )
+    prior_true = NegLogParameterPriors(
+        [
+            {
+                "index": 0,
+                "density_fun": lambda x: (1 / (10 + 10)),
+                "density_dx": lambda x: 0,
+                "density_ddx": lambda x: 0,
+            },
+        ]
+    )
+    problem = pypesto.Problem(
+        objective=AggregatedObjective([objective, prior_true]),
+        lb=LB_GAUSSIAN,
+        ub=UB_GAUSSIAN,
+        x_names=X_NAMES,
+    )
+
+    # run optimization and MCMC
+    result = optimize.minimize(
+        problem, progress_bar=False, n_starts=N_STARTS_SOME
+    )
+    result = sample.sample(
+        problem,
+        n_samples=N_SAMPLE_SOME,
+        result=result,
+    )
+
+    # compute the log evidence using harmonic mean
+    bridge_log_evidence = sample.evidence.bridge_sampling_log_evidence(result)
+    laplace = sample.evidence.laplace_approximation_log_evidence(
+        problem, result.optimize_result.x[0]
+    )
+    assert np.isclose(bridge_log_evidence, laplace, atol=tol)
diff --git a/test/sample/util.py b/test/sample/util.py
new file mode 100644
index 000000000..63521eb82
--- /dev/null
+++ b/test/sample/util.py
@@ -0,0 +1,145 @@
+"""Utility functions and constants for tests. Mainly problem definitions."""
+
+
+import numpy as np
+import scipy.optimize as so
+from scipy.stats import multivariate_normal, norm, uniform
+
+import pypesto
+
+# Constants for Gaussian problems or Uniform with Gaussian prior
+MU = 0  # Gaussian mean
+SIGMA = 1  # Gaussian standard deviation
+LB_GAUSSIAN = [-10]  # Lower bound for Gaussian problem
+UB_GAUSSIAN = [10]  # Upper bound for Gaussian problem
+LB_GAUSSIAN_MODES = [-100]  # Lower bound for Gaussian modes problem
+UB_GAUSSIAN_MODES = [200]  # Upper bound for Gaussian modes problem
+X_NAMES = ["x"]  # Parameter names
+MIXTURE_WEIGHTS = [0.3, 0.7]  # Weights for Gaussian mixture model
+MIXTURE_MEANS = [-1.5, 2.5]  # Means for Gaussian mixture model
+MIXTURE_COVS = [0.1, 0.2]  # Covariances for Gaussian mixture model
+
+# Constants for general testing
+N_STARTS_FEW = 5  # Number of starts for tests that dont require convergence
+N_STARTS_SOME = 10  # Number of starts for tests that converge reliably
+N_SAMPLE_FEW = 100  # Number of samples for tests that dont require convergence
+N_SAMPLE_SOME = 1000  # Number of samples for tests that converge reliably
+N_SAMPLE_MANY = 5000  # Number of samples for tests that require convergence
+STATISTIC_TOL = 0.2  # Tolerance when comparing distributions
+N_CHAINS = 3  # Number of chains for ParallelTempering
+
+
+def gaussian_llh(x):
+    """Log-likelihood for Gaussian."""
+    return float(norm.logpdf(x, loc=MU, scale=SIGMA).item())
+
+
+def gaussian_nllh_grad(x):
+    """Negative log-likelihood gradient for Gaussian."""
+    return np.array([((x - MU) / (SIGMA**2))])
+
+
+def gaussian_nllh_hess(x):
+    """Negative log-likelihood Hessian for Gaussian."""
+    return np.array([(1 / (SIGMA**2))])
+
+
+def gaussian_problem():
+    """Defines a simple Gaussian problem."""
+
+    def nllh(x):
+        return -gaussian_llh(x)
+
+    objective = pypesto.Objective(fun=nllh)
+    problem = pypesto.Problem(
+        objective=objective, lb=LB_GAUSSIAN, ub=UB_GAUSSIAN
+    )
+    return problem
+
+
+def gaussian_mixture_llh(x):
+    """Log-likelihood for Gaussian mixture model."""
+    return np.log(
+        MIXTURE_WEIGHTS[0]
+        * multivariate_normal.pdf(
+            x, mean=MIXTURE_MEANS[0], cov=MIXTURE_COVS[0]
+        )
+        + MIXTURE_WEIGHTS[1]
+        * multivariate_normal.pdf(
+            x, mean=MIXTURE_MEANS[1], cov=MIXTURE_COVS[1]
+        )
+    )
+
+
+def gaussian_mixture_problem():
+    """Problem based on a mixture of Gaussians."""
+
+    def nllh(x):
+        return -gaussian_mixture_llh(x)
+
+    objective = pypesto.Objective(fun=nllh)
+    problem = pypesto.Problem(
+        objective=objective, lb=LB_GAUSSIAN, ub=UB_GAUSSIAN, x_names=X_NAMES
+    )
+    return problem
+
+
+def rosenbrock_problem():
+    """Problem based on Rosenbrock objective."""
+    objective = pypesto.Objective(fun=so.rosen, grad=so.rosen_der)
+
+    dim_full = 2
+    lb = -5 * np.ones((dim_full, 1))
+    ub = 5 * np.ones((dim_full, 1))
+
+    problem = pypesto.Problem(
+        objective=objective,
+        lb=lb,
+        ub=ub,
+        x_fixed_indices=[1],
+        x_fixed_vals=[2],
+    )
+    return problem
+
+
+def create_petab_problem():
+    """Creates a petab problem."""
+    import os
+
+    import petab.v1 as petab
+
+    import pypesto.petab
+
+    current_path = os.path.dirname(os.path.realpath(__file__))
+    dir_path = os.path.abspath(
+        os.path.join(current_path, "..", "..", "doc", "example")
+    )
+
+    petab_problem = petab.Problem.from_yaml(
+        dir_path + "/conversion_reaction/conversion_reaction.yaml"
+    )
+
+    importer = pypesto.petab.PetabImporter(petab_problem)
+    problem = importer.create_problem()
+
+    return problem
+
+
+def prior(x):
+    """Calculates the prior."""
+    return multivariate_normal.pdf(x, mean=-1.0, cov=0.7)
+
+
+def likelihood(x):
+    """Calculates the likelihood."""
+    return uniform.pdf(x, loc=-10.0, scale=20.0)[0]
+
+
+def negative_log_posterior(x):
+    """Calculates the negative log posterior."""
+    return -np.log(likelihood(x)) - np.log(prior(x))
+
+
+def negative_log_prior(x):
+    """Calculates the negative log prior."""
+    return -np.log(prior(x))
diff --git a/test/variational/test_variational.py b/test/variational/test_variational.py
index c3b829bf3..f8e66c3a4 100644
--- a/test/variational/test_variational.py
+++ b/test/variational/test_variational.py
@@ -1,29 +1,15 @@
 """Tests for `pypesto.sample` methods."""
 
-import pytest
 from scipy.stats import kstest
 
 import pypesto.optimize as optimize
 from pypesto.variational import variational_fit
 
-from ..sample.test_sample import (
-    gaussian_mixture_problem,
-    gaussian_problem,
-    rosenbrock_problem,
-)
+from ..sample.test_sample import problem  # noqa: F401, fixture from sampling
+from ..sample.util import STATISTIC_TOL, gaussian_problem
 
 
-@pytest.fixture(params=["gaussian", "gaussian_mixture", "rosenbrock"])
-def problem(request):
-    if request.param == "gaussian":
-        return gaussian_problem()
-    if request.param == "gaussian_mixture":
-        return gaussian_mixture_problem()
-    elif request.param == "rosenbrock":
-        return rosenbrock_problem()
-
-
-def test_pipeline(problem):
+def test_pipeline(problem):  # noqa: F811
     """Check that a typical pipeline runs through."""
     # optimization
     optimizer = optimize.ScipyOptimizer(options={"maxiter": 10})
@@ -35,7 +21,7 @@ def test_pipeline(problem):
     )
 
     # sample
-    result = variational_fit(
+    variational_fit(
         problem=problem,
         n_iterations=100,
         n_samples=10,
@@ -45,15 +31,15 @@ def test_pipeline(problem):
 
 def test_ground_truth():
     """Test whether we actually retrieve correct distributions."""
-    problem = gaussian_problem()
+    problem_gaussian = gaussian_problem()
 
     result = optimize.minimize(
-        problem,
+        problem_gaussian,
         progress_bar=False,
     )
 
     result = variational_fit(
-        problem,
+        problem_gaussian,
         n_iterations=10000,
         n_samples=5000,
         result=result,
@@ -65,8 +51,8 @@ def test_ground_truth():
     # test against different distributions
     statistic, pval = kstest(samples, "norm")
     print(statistic, pval)
-    assert statistic < 0.1
+    assert statistic < STATISTIC_TOL
 
     statistic, pval = kstest(samples, "uniform")
     print(statistic, pval)
-    assert statistic > 0.1
+    assert statistic > STATISTIC_TOL
diff --git a/tox.ini b/tox.ini
index 94479aced..6ad9f8719 100644
--- a/tox.ini
+++ b/tox.ini
@@ -79,10 +79,10 @@ extras = test,amici,petab,pyswarm,roadrunner
 deps =
     git+https://github.com/Benchmarking-Initiative/Benchmark-Models-PEtab.git@master\#subdirectory=src/python
     git+https://github.com/AMICI-dev/amici.git@develop\#egg=amici&subdirectory=python/sdist
-
 commands =
     python3 -m pip install git+https://github.com/PEtab-dev/petab_test_suite@main
-    python3 -m pip install git+https://github.com/FFroehlich/pysb@fix_pattern_matching
+    python3 -m pip install git+https://github.com/pysb/pysb@master
+    python3 -m pip install -U copasi-basico[petab]
     python3 -m pip install -U sympy
     pytest --cov=pypesto --cov-report=xml --cov-append \
         test/petab
@@ -129,7 +129,7 @@ description =
 
 [testenv:notebooks1]
 allowlist_externals = bash
-extras = example,amici,petab,pyswarm,pymc3,cma,nlopt,fides,roadrunner
+extras = example,amici,petab,pyswarms,pymc3,cma,nlopt,fides,roadrunner
 commands =
     bash test/run_notebook.sh 1
 description =
@@ -148,6 +148,8 @@ description =
 [testenv:doc]
 extras =
     doc,amici,petab,aesara,jax,select,roadrunner
+deps =
+    numpy < 2.0
 commands =
     sphinx-build -W -b html doc/ doc/_build/html
 description =