From 3fe7a5b6989a2a52974fb2d08a83430df8bd5618 Mon Sep 17 00:00:00 2001
From: Jason Chow <jasonchow@meta.com>
Date: Fri, 8 Nov 2024 22:11:53 -0800
Subject: [PATCH 1/2] derivativeGP gpu support (#444)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:

Add gpu support for derivative GP.

I noticed that this model isn’t actually like a normal model that can show up in a live experiment with a config, but we should still make it work for GPU. I did most of that but it did require some pretty arcane shenanigans with overriding GPyTorch’s underlying handling of train_inputs. This in turn made me do some arcane mypy stuff.

Differential Revision: D65515631
---
 aepsych/config.py                      |  2 +-
 aepsych/likelihoods/bernoulli.py       |  8 +++---
 aepsych/likelihoods/semi_p.py          |  6 ++--
 aepsych/means/constant_partial_grad.py |  2 +-
 aepsych/models/base.py                 | 12 +++++---
 aepsych/models/derivative_gp.py        |  5 +++-
 aepsych/plotting.py                    |  1 -
 tests_gpu/models/test_derivative_gp.py | 39 ++++++++++++++++++++++++++
 8 files changed, 60 insertions(+), 15 deletions(-)
 create mode 100644 tests_gpu/models/test_derivative_gp.py

diff --git a/aepsych/config.py b/aepsych/config.py
index afbc323be..e7c34698f 100644
--- a/aepsych/config.py
+++ b/aepsych/config.py
@@ -16,13 +16,13 @@
     Callable,
     ClassVar,
     Dict,
-    Dict,
     List,
     Mapping,
     Optional,
     Sequence,
     TypeVar,
 )
+
 import botorch
 import gpytorch
 import numpy as np
diff --git a/aepsych/likelihoods/bernoulli.py b/aepsych/likelihoods/bernoulli.py
index 7f6fc66ac..8a71c2fdd 100644
--- a/aepsych/likelihoods/bernoulli.py
+++ b/aepsych/likelihoods/bernoulli.py
@@ -19,7 +19,7 @@ class BernoulliObjectiveLikelihood(_OneDimensionalLikelihood):
 
     def __init__(self, objective: Callable) -> None:
         """Initialize BernoulliObjectiveLikelihood.
-        
+
         Args:
             objective (Callable): Objective function that maps function samples to probabilities."""
         super().__init__()
@@ -42,13 +42,13 @@ def forward(
     @classmethod
     def from_config(cls, config: Config) -> "BernoulliObjectiveLikelihood":
         """Create an instance from a configuration object.
-        
+
         Args:
             config (Config): Configuration object.
-            
+
         Returns:
             BernoulliObjectiveLikelihood: BernoulliObjectiveLikelihood instance.
         """
         objective_cls = config.getobj(cls.__name__, "objective")
         objective = objective_cls.from_config(config)
-        return cls(objective=objective)
\ No newline at end of file
+        return cls(objective=objective)
diff --git a/aepsych/likelihoods/semi_p.py b/aepsych/likelihoods/semi_p.py
index f6a337488..2d9bf9b9b 100644
--- a/aepsych/likelihoods/semi_p.py
+++ b/aepsych/likelihoods/semi_p.py
@@ -111,10 +111,10 @@ def expected_log_prob(
         # modified, TODO fixme upstream (cc @bletham)
         def log_prob_lambda(function_samples: torch.Tensor) -> torch.Tensor:
             """Lambda function to compute the log probability.
-            
+
             Args:
                 function_samples (torch.Tensor): Function samples.
-                
+
             Returns:
                 torch.Tensor: Log probability.
             """
@@ -142,4 +142,4 @@ def from_config(cls, config: Config) -> "LinearBernoulliLikelihood":
         else:
             objective = objective
 
-        return cls(objective=objective)
\ No newline at end of file
+        return cls(objective=objective)
diff --git a/aepsych/means/constant_partial_grad.py b/aepsych/means/constant_partial_grad.py
index ead7ee6ed..e0af2c29a 100644
--- a/aepsych/means/constant_partial_grad.py
+++ b/aepsych/means/constant_partial_grad.py
@@ -26,6 +26,6 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
         idx = input[..., -1].to(dtype=torch.long) > 0
         mean_fit = super(ConstantMeanPartialObsGrad, self).forward(input[..., ~idx, :])
         sz = mean_fit.shape[:-1] + torch.Size([input.shape[-2]])
-        mean = torch.zeros(sz)
+        mean = torch.zeros(sz).to(input)
         mean[~idx] = mean_fit
         return mean
diff --git a/aepsych/models/base.py b/aepsych/models/base.py
index 67f2af75a..0490a20da 100644
--- a/aepsych/models/base.py
+++ b/aepsych/models/base.py
@@ -116,7 +116,7 @@ class AEPsychMixin(GPyTorchModel):
 
     extremum_solver = "Nelder-Mead"
     outcome_types: List[str] = []
-    train_inputs: Optional[Tuple[torch.Tensor]]
+    train_inputs: Optional[Tuple[torch.Tensor, ...]]
     train_targets: Optional[torch.Tensor]
 
     @property
@@ -398,7 +398,7 @@ def p_below_threshold(
 
 
 class AEPsychModelDeviceMixin(AEPsychMixin):
-    _train_inputs: Optional[Tuple[torch.Tensor]]
+    _train_inputs: Optional[Tuple[torch.Tensor, ...]]
     _train_targets: Optional[torch.Tensor]
 
     def set_train_data(self, inputs=None, targets=None, strict=False):
@@ -423,13 +423,17 @@ def device(self) -> torch.device:
         return next(self.parameters()).device
 
     @property
-    def train_inputs(self) -> Optional[Tuple[torch.Tensor]]:
+    def train_inputs(self) -> Optional[Tuple[torch.Tensor, ...]]:
         if self._train_inputs is None:
             return None
 
         # makes sure the tensors are on the right device, move in place
+        _train_inputs = []
         for input in self._train_inputs:
-            input.to(self.device)
+            _train_inputs.append(input.to(self.device))
+
+        _tuple_inputs: Tuple[torch.Tensor, ...] = tuple(_train_inputs)
+        self._train_inputs = _tuple_inputs
 
         return self._train_inputs
 
diff --git a/aepsych/models/derivative_gp.py b/aepsych/models/derivative_gp.py
index b338f5a7a..3b5a92292 100644
--- a/aepsych/models/derivative_gp.py
+++ b/aepsych/models/derivative_gp.py
@@ -13,6 +13,7 @@
 import torch
 from aepsych.kernels.rbf_partial_grad import RBFKernelPartialObsGrad
 from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad
+from aepsych.models.base import AEPsychModelDeviceMixin
 from botorch.models.gpytorch import GPyTorchModel
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.kernels import Kernel
@@ -22,7 +23,9 @@
 from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy
 
 
-class MixedDerivativeVariationalGP(gpytorch.models.ApproximateGP, GPyTorchModel):
+class MixedDerivativeVariationalGP(
+    gpytorch.models.ApproximateGP, AEPsychModelDeviceMixin, GPyTorchModel
+):
     """A variational GP with mixed derivative observations.
 
     For more on GPs with derivative observations, see e.g. Riihimaki & Vehtari 2010.
diff --git a/aepsych/plotting.py b/aepsych/plotting.py
index bb4cab779..12f0e60f5 100644
--- a/aepsych/plotting.py
+++ b/aepsych/plotting.py
@@ -10,7 +10,6 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-
 import torch
 from aepsych.strategy import Strategy
 from aepsych.utils import get_lse_contour, get_lse_interval, make_scaled_sobol
diff --git a/tests_gpu/models/test_derivative_gp.py b/tests_gpu/models/test_derivative_gp.py
new file mode 100644
index 000000000..200ef62eb
--- /dev/null
+++ b/tests_gpu/models/test_derivative_gp.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from aepsych import Config, SequentialStrategy
+from aepsych.models.derivative_gp import MixedDerivativeVariationalGP
+from botorch.fit import fit_gpytorch_mll
+from botorch.utils.testing import BotorchTestCase
+from gpytorch.likelihoods import BernoulliLikelihood
+from gpytorch.mlls.variational_elbo import VariationalELBO
+
+
+class TestDerivativeGP(BotorchTestCase):
+    def test_MixedDerivativeVariationalGP_gpu(self):
+        train_x = torch.cat(
+            (torch.tensor([1.0, 2.0, 3.0, 4.0]).unsqueeze(1), torch.zeros(4, 1)), dim=1
+        )
+        train_y = torch.tensor([1.0, 2.0, 3.0, 4.0])
+        m = MixedDerivativeVariationalGP(
+            train_x=train_x,
+            train_y=train_y,
+            inducing_points=train_x,
+            fixed_prior_mean=0.5,
+        ).cuda()
+
+        self.assertEqual(m.mean_module.constant.item(), 0.5)
+        self.assertEqual(
+            m.covar_module.base_kernel.raw_lengthscale.shape, torch.Size([1, 1])
+        )
+        mll = VariationalELBO(
+            likelihood=BernoulliLikelihood(), model=m, num_data=train_y.numel()
+        ).cuda()
+        mll = fit_gpytorch_mll(mll)
+        test_x = torch.tensor([[1.0, 0], [3.0, 1.0]]).cuda()
+        m(test_x)

From 7cc9f82f24a6f59974467827da55f22ac4ba5134 Mon Sep 17 00:00:00 2001
From: Jason Chow <jasonchow@meta.com>
Date: Fri, 8 Nov 2024 22:11:53 -0800
Subject: [PATCH 2/2] monotonic projection gp gpu support

Summary: monotonic projection gp is a subclass of GPClassificationGP so just need to make sure the methods specific to it can handle device change.

Differential Revision: D65625994
---
 aepsych/models/monotonic_projection_gp.py     |  16 ++-
 .../models/test_monotonic_projection_gp.py    | 114 ++++++++++++++++++
 2 files changed, 124 insertions(+), 6 deletions(-)
 create mode 100644 tests_gpu/models/test_monotonic_projection_gp.py

diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py
index 61bec1648..3de672c8e 100644
--- a/aepsych/models/monotonic_projection_gp.py
+++ b/aepsych/models/monotonic_projection_gp.py
@@ -136,14 +136,17 @@ def posterior(
             # using numpy because torch doesn't support vectorized linspace,
             # pytorch/issues/61292
             grid: Union[np.ndarray, torch.Tensor] = np.linspace(
-                self.lb[dim],
-                X[:, dim].numpy(),
+                self.lb[dim].cpu().numpy(),
+                X[:, dim].cpu().numpy(),
                 s + 1,
             )  # (s+1 x n)
             grid = torch.tensor(grid[:-1, :], dtype=X.dtype)  # Drop x; (s x n)
             X_aug[(1 + i * s) : (1 + (i + 1) * s), :, dim] = grid
         # X_aug[0, :, :] is X, and then subsequent indices are points in the grids
         # Predict marginal distributions on X_aug
+
+        X = X.to(self.device)
+        X_aug = X_aug.to(self.device)
         with torch.no_grad():
             post_aug = super().posterior(X=X_aug)
         mu_aug = post_aug.mean.squeeze()  # (m*s+1 x n)
@@ -158,12 +161,13 @@ def posterior(
         # Adjust the whole covariance matrix to accomadate the projected marginals
         with torch.no_grad():
             post = super().posterior(X=X)
-            R = cov2corr(post.distribution.covariance_matrix.squeeze().numpy())
-            S_proj = torch.tensor(corr2cov(R, sigma_proj.numpy()), dtype=X.dtype)
+            R = cov2corr(post.distribution.covariance_matrix.squeeze().cpu().numpy())
+            S_proj = torch.tensor(corr2cov(R, sigma_proj.cpu().numpy()), dtype=X.dtype)
         mvn_proj = gpytorch.distributions.MultivariateNormal(
-            mu_proj.unsqueeze(0),
-            S_proj.unsqueeze(0),
+            mu_proj.unsqueeze(0).to(self.device),
+            S_proj.unsqueeze(0).to(self.device),
         )
+
         return GPyTorchPosterior(mvn_proj)
 
     def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor:
diff --git a/tests_gpu/models/test_monotonic_projection_gp.py b/tests_gpu/models/test_monotonic_projection_gp.py
new file mode 100644
index 000000000..3c526946d
--- /dev/null
+++ b/tests_gpu/models/test_monotonic_projection_gp.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import unittest
+
+import torch
+
+# run on single threads to keep us from deadlocking weirdly in CI
+if "CI" in os.environ or "SANDCASTLE" in os.environ:
+    torch.set_num_threads(1)
+
+import numpy as np
+from aepsych.config import Config
+from aepsych.models.monotonic_projection_gp import MonotonicProjectionGP
+from sklearn.datasets import make_classification
+
+
+class MonotonicProjectionGPtest(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(1)
+        torch.manual_seed(1)
+        X, y = make_classification(
+            n_samples=25,
+            n_features=3,
+            n_redundant=0,
+            n_informative=3,
+            random_state=1,
+            n_clusters_per_class=1,
+        )
+        self.X, self.y = torch.Tensor(X), torch.Tensor(y)
+
+    def test_posterior_gpu(self):
+        X, y = self.X, self.y
+        config_str = """
+        [common]
+        parnames = [x, y, z]
+        lb = [-4, -4, -4]
+        ub = [4, 4, 4]
+        stimuli_per_trial = 1
+        outcome_types = [binary]
+
+        strategy_names = [init_strat]
+
+        [init_strat]
+        generator = OptimizeAcqfGenerator
+        model = MonotonicProjectionGP
+
+        [MonotonicProjectionGP]
+        monotonic_dims = [0, 1]
+        inducing_size = 10
+
+        [default_mean_covar_factory]
+        lengthscale_prior = gamma
+        fixed_kernel_amplitude = False
+        """
+        config = Config(config_str=config_str)
+        model = MonotonicProjectionGP.from_config(config)
+        model.cuda()
+        model.fit(X, y)
+
+        # Check that it is monotonic in all dims
+        for i in range(2):
+            Xtest = torch.zeros(3, 3)
+            Xtest[:, i] = torch.tensor([-1, 0, 1])
+            post = model.posterior(Xtest)
+            mu = post.mean.squeeze()
+            self.assertTrue(
+                torch.equal(
+                    torch.tensor([0, 1, 2], dtype=torch.long),
+                    torch.argsort(mu).cpu(),
+                )
+            )
+
+        # Check that min_f_val is respected
+        config_str = """
+        [common]
+        parnames = [x, y, z]
+        lb = [-4, -4, -4]
+        ub = [4, 4, 4]
+        stimuli_per_trial = 1
+        outcome_types = [binary]
+
+        strategy_names = [init_strat]
+
+        [init_strat]
+        generator = OptimizeAcqfGenerator
+        model = MonotonicProjectionGP
+
+        [MonotonicProjectionGP]
+        monotonic_dims = [0]
+        inducing_size=10
+        min_f_val = 5.0
+
+        [default_mean_covar_factory]
+        lengthscale_prior = gamma
+        fixed_kernel_amplitude = False
+        """
+        config = Config(config_str=config_str)
+        model = MonotonicProjectionGP.from_config(config)
+        post = model.posterior(Xtest)
+        mu = post.mean.squeeze()
+        self.assertTrue(mu.min().item() >= 4.9)
+        # And in samples
+        samps = model.sample(Xtest, num_samples=10)
+        self.assertTrue(samps.min().item() >= 4.9)
+
+
+if __name__ == "__main__":
+    unittest.main()