Remove chaospy dependency (#304)

optimagic-dev · Apr 22, 2022 · 33da89f · 33da89f
1 parent 17ea3b2
commit 33da89f
Show file tree

Hide file tree

Showing 10 changed files with 68 additions and 68 deletions.
diff --git a/.conda/meta.yaml b/.conda/meta.yaml
@@ -36,7 +36,6 @@ requirements:
     - sqlalchemy >=1.3
     - seaborn
     - dill
-    - chaospy
     - pybaum
 
 test:

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -6,6 +6,11 @@ chronological order. We follow `semantic versioning <https://semver.org/>`_ and
 releases are available on `Anaconda.org
 <https://anaconda.org/OpenSourceEconomics/estimagic>`_.
 
+0.2.4
+-----
+
+- :gh:`304` Removes the chaospy dependency (:ghuser:`segsell`).
+
 0.2.3
 -----
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -70,7 +70,6 @@
     "tornado",
     "petsc4py",
     "statsmodels",
-    "chaospy",
 ]
 
 extlinks = {

diff --git a/docs/source/development/styleguide.rst b/docs/source/development/styleguide.rst
@@ -121,6 +121,4 @@ Styleguide for the documentation
 
 - Format.
     The code formatting in .rst files is ensured by blacken-docs. For Jupyter
-    notebooks, use the
-    `jupyterlab-code-formatter <https://jupyterlab-code-formatter.readthedocs.io/en/latest/>`_
-    with the black formatter.
+    notebooks, use the jupyterlab-code-formatter with the black formatter.
diff --git a/environment.yml b/environment.yml
@@ -42,7 +42,6 @@ dependencies:
   - nlopt
   - sphinx-panels
   - pygmo
-  - chaospy
   - nb_black
   - pybaum
 

diff --git a/setup.cfg b/setup.cfg
@@ -59,7 +59,6 @@ install_requires =
     sqlalchemy>=1.3
     seaborn
     dill
-    chaospy
 
 
 [options.packages.find]

diff --git a/src/estimagic/optimization/optimize.py b/src/estimagic/optimization/optimize.py
@@ -838,7 +838,7 @@ def _fill_multistart_options_with_defaults(options, params, x, params_to_interna
         "n_samples": 10 * len(x),
         "share_optimizations": 0.1,
         "sampling_distribution": "uniform",
-        "sampling_method": "sobol" if len(x) <= 30 else "random",
+        "sampling_method": "sobol" if len(x) <= 200 else "random",
         "mixing_weight_method": "tiktak",
         "mixing_weight_bounds": (0.1, 0.995),
         "convergence_relative_params_tolerance": 0.01,

diff --git a/src/estimagic/optimization/tiktak.py b/src/estimagic/optimization/tiktak.py
@@ -7,19 +7,17 @@
 
 First implemented in Python by Alisdair McKay
 (`GitHub Repository <https://github.com/amckay/TikTak>`_)
-
 """
 import warnings
 from functools import partial
 
-import chaospy
 import numpy as np
-from chaospy.distributions import Triangle
-from chaospy.distributions import Uniform
 from estimagic import batch_evaluators as be
 from estimagic.optimization.optimization_logging import log_scheduled_steps_and_get_ids
 from estimagic.optimization.optimization_logging import update_step_status
 from estimagic.parameters.parameter_conversion import get_internal_bounds
+from scipy.stats import qmc
+from scipy.stats import triang
 
 
 def run_multistart_optimization(
@@ -199,7 +197,6 @@ def determine_steps(n_samples, n_optimizations):
 
     Returns:
         list: List of dictionaries with information on each step.
-
     """
     exploration_step = {
         "type": "exploration",
@@ -230,58 +227,69 @@ def draw_exploration_sample(
 ):
     """Get a sample of parameter values for the first stage of the tiktak algorithm.
 
-    The sample is created randomly or using low a low discrepancy sequence. Different
+    The sample is created randomly or using a low discrepancy sequence. Different
     distributions are available.
 
     Args:
-        x (np.ndarray): Internal parameter vector,
-        lower (np.ndarray): Vector of internal lower bounds.
-        upper (np.ndarray): Vector of internal upper bounts.
-        n_samples (int): Number of sampled points on
-            which to do one function evaluation. Default is 10 * n_params.
-        sampling_distribution (str): One of "uniform", "triangle". Default is
-            "uniform"  as in the original tiktak algorithm.
-        sampling_method (str): One of "random", "sobol", "halton",
-            "hammersley", "korobov", "latin_hypercube" and "chebyshev" or a numpy array
-            or DataFrame with custom points. Default is sobol for problems with up to 30
-            parameters and random for problems with more than 30 parameters.
+        x (np.ndarray): Internal parameter vector of shape (n_params,).
+        lower (np.ndarray): Vector of internal lower bounds of shape (n_params,).
+        upper (np.ndarray): Vector of internal upper bounds of shape (n_params,).
+        n_samples (int): Number of sample points on which one function evaluation
+            shall be performed. Default is 10 * n_params.
+        sampling_distribution (str): One of "uniform", "triangular". Default is
+            "uniform", as in the original tiktak algorithm.
+        sampling_method (str): One of "sobol", "halton", "latin_hypercube" or
+            "random". Default is sobol for problems with up to 200 parameters
+            and random for problems with more than 200 parameters.
         seed (int): Random seed.
 
     Returns:
-        np.ndarray: Numpy array of shape n_samples, len(params). Each row is a vector
-            of parameter values.
-
+        np.ndarray: Numpy array of shape (n_samples, n_params).
+            Each row represents a vector of parameter values.
     """
-    valid_rules = [
-        "random",
-        "sobol",
-        "halton",
-        "hammersley",
-        "korobov",
-        "latin_hypercube",
-    ]
+    valid_rules = ["sobol", "halton", "latin_hypercube", "random"]
+    valid_distributions = ["uniform", "triangular"]
 
     if sampling_method not in valid_rules:
         raise ValueError(
             f"Invalid rule: {sampling_method}. Must be one of\n\n{valid_rules}\n\n"
         )
 
-    if sampling_distribution == "uniform":
-        dist_list = [Uniform(lb, ub) for lb, ub in zip(lower, upper)]
-    elif sampling_distribution == "triangle":
-        dist_list = [Triangle(lb, mp, ub) for lb, mp, ub in zip(lower, x, upper)]
-    else:
+    if sampling_distribution not in valid_distributions:
         raise ValueError(f"Unsupported distribution: {sampling_distribution}")
 
-    joint_distribution = chaospy.J(*dist_list)
+    if sampling_method == "sobol":
+        # Draw `n` points from the open interval (lower, upper)^d.
+        # Note that scipy uses the half-open interval [lower, upper)^d internally.
+        # We apply a burn-in phase of 1, i.e. we skip the first point in the sequence
+        # and thus exclude the lower bound.
+        sampler = qmc.Sobol(d=len(lower), scramble=False, seed=seed)
+        _ = sampler.fast_forward(1)
+        sample_unscaled = sampler.random(n=n_samples)
 
-    np.random.seed(seed)
+    elif sampling_method == "halton":
+        sampler = qmc.Halton(d=len(lower), scramble=False, seed=seed)
+        sample_unscaled = sampler.random(n=n_samples)
+
+    elif sampling_method == "latin_hypercube":
+        sampler = qmc.LatinHypercube(d=len(lower), strength=1, seed=seed)
+        sample_unscaled = sampler.random(n=n_samples)
+
+    elif sampling_method == "random":
+        np.random.seed(seed)
+        sample_unscaled = np.random.sample(size=(n_samples, len(lower)))
+
+    if sampling_distribution == "uniform":
+        sample_scaled = qmc.scale(sample_unscaled, lower, upper)
+    elif sampling_distribution == "triangular":
+        sample_scaled = triang.ppf(
+            sample_unscaled,
+            c=(x - lower) / (upper - lower),
+            loc=lower,
+            scale=upper - lower,
+        )
 
-    sample = joint_distribution.sample(
-        size=n_samples,
-        rule=sampling_method,
-    ).T
-    return sample
+    return sample_scaled
 
 
 def get_internal_sampling_bounds(params, constraints):

diff --git a/tests/optimization/test_tiktak.py b/tests/optimization/test_tiktak.py
@@ -39,29 +39,24 @@ def test_process_multistart_sample(sample, params):
     aaae(calculated, expeceted)
 
 
-distributions = ["triangle", "uniform"]
-rules = [
-    "random",
-    "sobol",
-    "halton",
-    "hammersley",
-    "korobov",
-    "latin_hypercube",
-    # chebyshev generated samples of the wrong size!
-]
-test_cases = list(product(distributions, rules))
-
-
-@pytest.mark.parametrize("dist, rule", test_cases)
-def test_draw_exploration_sample(dist, rule):
+dim = 2
+distributions = ["uniform", "triangular"]
+rules = ["sobol", "halton", "latin_hypercube", "random"]
+lower = [np.zeros(dim), np.ones(dim) * 0.5, -np.ones(dim)]
+upper = [np.ones(dim), np.ones(dim) * 0.75, np.ones(dim) * 2]
+test_cases = list(product(distributions, rules, lower, upper))
 
+
+@pytest.mark.parametrize("dist, rule, lower, upper", test_cases)
+def test_draw_exploration_sample(dist, rule, lower, upper):
     results = []
+
     for _ in range(2):
         results.append(
             draw_exploration_sample(
-                x=np.array([0.5, 0.5]),
-                lower=np.zeros(2),
-                upper=np.ones(2),
+                x=np.ones_like(lower) * 0.5,
+                lower=lower,
+                upper=upper,
                 n_samples=3,
                 sampling_distribution=dist,
                 sampling_method=rule,

diff --git a/tox.ini b/tox.ini
@@ -38,7 +38,6 @@ conda_deps =
     cyipopt
     nlopt
     pygmo
-    chaospy
     pybaum
 commands = pytest {posargs}
 
@@ -75,7 +74,6 @@ conda_deps =
     cyipopt
     nlopt
     pygmo
-    chaospy
     pybaum
     jax
 commands = pytest {posargs}
-Original file line number
+Diff line change
@@ Expand Up / @@ -36,7 +36,6 @@ requirements: @@
         - sqlalchemy >=1.3
         - seaborn
         - dill
-        - chaospy
         - pybaum
     test:
@@ Expand Down @@