From 3fe7a5b6989a2a52974fb2d08a83430df8bd5618 Mon Sep 17 00:00:00 2001 From: Jason Chow Date: Fri, 8 Nov 2024 22:11:53 -0800 Subject: [PATCH 1/2] derivativeGP gpu support (#444) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Add gpu support for derivative GP. I noticed that this model isn’t actually like a normal model that can show up in a live experiment with a config, but we should still make it work for GPU. I did most of that but it did require some pretty arcane shenanigans with overriding GPyTorch’s underlying handling of train_inputs. This in turn made me do some arcane mypy stuff. Differential Revision: D65515631 --- aepsych/config.py | 2 +- aepsych/likelihoods/bernoulli.py | 8 +++--- aepsych/likelihoods/semi_p.py | 6 ++-- aepsych/means/constant_partial_grad.py | 2 +- aepsych/models/base.py | 12 +++++--- aepsych/models/derivative_gp.py | 5 +++- aepsych/plotting.py | 1 - tests_gpu/models/test_derivative_gp.py | 39 ++++++++++++++++++++++++++ 8 files changed, 60 insertions(+), 15 deletions(-) create mode 100644 tests_gpu/models/test_derivative_gp.py diff --git a/aepsych/config.py b/aepsych/config.py index afbc323be..e7c34698f 100644 --- a/aepsych/config.py +++ b/aepsych/config.py @@ -16,13 +16,13 @@ Callable, ClassVar, Dict, - Dict, List, Mapping, Optional, Sequence, TypeVar, ) + import botorch import gpytorch import numpy as np diff --git a/aepsych/likelihoods/bernoulli.py b/aepsych/likelihoods/bernoulli.py index 7f6fc66ac..8a71c2fdd 100644 --- a/aepsych/likelihoods/bernoulli.py +++ b/aepsych/likelihoods/bernoulli.py @@ -19,7 +19,7 @@ class BernoulliObjectiveLikelihood(_OneDimensionalLikelihood): def __init__(self, objective: Callable) -> None: """Initialize BernoulliObjectiveLikelihood. - + Args: objective (Callable): Objective function that maps function samples to probabilities.""" super().__init__() @@ -42,13 +42,13 @@ def forward( @classmethod def from_config(cls, config: Config) -> "BernoulliObjectiveLikelihood": """Create an instance from a configuration object. - + Args: config (Config): Configuration object. - + Returns: BernoulliObjectiveLikelihood: BernoulliObjectiveLikelihood instance. """ objective_cls = config.getobj(cls.__name__, "objective") objective = objective_cls.from_config(config) - return cls(objective=objective) \ No newline at end of file + return cls(objective=objective) diff --git a/aepsych/likelihoods/semi_p.py b/aepsych/likelihoods/semi_p.py index f6a337488..2d9bf9b9b 100644 --- a/aepsych/likelihoods/semi_p.py +++ b/aepsych/likelihoods/semi_p.py @@ -111,10 +111,10 @@ def expected_log_prob( # modified, TODO fixme upstream (cc @bletham) def log_prob_lambda(function_samples: torch.Tensor) -> torch.Tensor: """Lambda function to compute the log probability. - + Args: function_samples (torch.Tensor): Function samples. - + Returns: torch.Tensor: Log probability. """ @@ -142,4 +142,4 @@ def from_config(cls, config: Config) -> "LinearBernoulliLikelihood": else: objective = objective - return cls(objective=objective) \ No newline at end of file + return cls(objective=objective) diff --git a/aepsych/means/constant_partial_grad.py b/aepsych/means/constant_partial_grad.py index ead7ee6ed..e0af2c29a 100644 --- a/aepsych/means/constant_partial_grad.py +++ b/aepsych/means/constant_partial_grad.py @@ -26,6 +26,6 @@ def forward(self, input: torch.Tensor) -> torch.Tensor: idx = input[..., -1].to(dtype=torch.long) > 0 mean_fit = super(ConstantMeanPartialObsGrad, self).forward(input[..., ~idx, :]) sz = mean_fit.shape[:-1] + torch.Size([input.shape[-2]]) - mean = torch.zeros(sz) + mean = torch.zeros(sz).to(input) mean[~idx] = mean_fit return mean diff --git a/aepsych/models/base.py b/aepsych/models/base.py index 67f2af75a..0490a20da 100644 --- a/aepsych/models/base.py +++ b/aepsych/models/base.py @@ -116,7 +116,7 @@ class AEPsychMixin(GPyTorchModel): extremum_solver = "Nelder-Mead" outcome_types: List[str] = [] - train_inputs: Optional[Tuple[torch.Tensor]] + train_inputs: Optional[Tuple[torch.Tensor, ...]] train_targets: Optional[torch.Tensor] @property @@ -398,7 +398,7 @@ def p_below_threshold( class AEPsychModelDeviceMixin(AEPsychMixin): - _train_inputs: Optional[Tuple[torch.Tensor]] + _train_inputs: Optional[Tuple[torch.Tensor, ...]] _train_targets: Optional[torch.Tensor] def set_train_data(self, inputs=None, targets=None, strict=False): @@ -423,13 +423,17 @@ def device(self) -> torch.device: return next(self.parameters()).device @property - def train_inputs(self) -> Optional[Tuple[torch.Tensor]]: + def train_inputs(self) -> Optional[Tuple[torch.Tensor, ...]]: if self._train_inputs is None: return None # makes sure the tensors are on the right device, move in place + _train_inputs = [] for input in self._train_inputs: - input.to(self.device) + _train_inputs.append(input.to(self.device)) + + _tuple_inputs: Tuple[torch.Tensor, ...] = tuple(_train_inputs) + self._train_inputs = _tuple_inputs return self._train_inputs diff --git a/aepsych/models/derivative_gp.py b/aepsych/models/derivative_gp.py index b338f5a7a..3b5a92292 100644 --- a/aepsych/models/derivative_gp.py +++ b/aepsych/models/derivative_gp.py @@ -13,6 +13,7 @@ import torch from aepsych.kernels.rbf_partial_grad import RBFKernelPartialObsGrad from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad +from aepsych.models.base import AEPsychModelDeviceMixin from botorch.models.gpytorch import GPyTorchModel from gpytorch.distributions import MultivariateNormal from gpytorch.kernels import Kernel @@ -22,7 +23,9 @@ from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy -class MixedDerivativeVariationalGP(gpytorch.models.ApproximateGP, GPyTorchModel): +class MixedDerivativeVariationalGP( + gpytorch.models.ApproximateGP, AEPsychModelDeviceMixin, GPyTorchModel +): """A variational GP with mixed derivative observations. For more on GPs with derivative observations, see e.g. Riihimaki & Vehtari 2010. diff --git a/aepsych/plotting.py b/aepsych/plotting.py index bb4cab779..12f0e60f5 100644 --- a/aepsych/plotting.py +++ b/aepsych/plotting.py @@ -10,7 +10,6 @@ import matplotlib.pyplot as plt import numpy as np - import torch from aepsych.strategy import Strategy from aepsych.utils import get_lse_contour, get_lse_interval, make_scaled_sobol diff --git a/tests_gpu/models/test_derivative_gp.py b/tests_gpu/models/test_derivative_gp.py new file mode 100644 index 000000000..200ef62eb --- /dev/null +++ b/tests_gpu/models/test_derivative_gp.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from aepsych import Config, SequentialStrategy +from aepsych.models.derivative_gp import MixedDerivativeVariationalGP +from botorch.fit import fit_gpytorch_mll +from botorch.utils.testing import BotorchTestCase +from gpytorch.likelihoods import BernoulliLikelihood +from gpytorch.mlls.variational_elbo import VariationalELBO + + +class TestDerivativeGP(BotorchTestCase): + def test_MixedDerivativeVariationalGP_gpu(self): + train_x = torch.cat( + (torch.tensor([1.0, 2.0, 3.0, 4.0]).unsqueeze(1), torch.zeros(4, 1)), dim=1 + ) + train_y = torch.tensor([1.0, 2.0, 3.0, 4.0]) + m = MixedDerivativeVariationalGP( + train_x=train_x, + train_y=train_y, + inducing_points=train_x, + fixed_prior_mean=0.5, + ).cuda() + + self.assertEqual(m.mean_module.constant.item(), 0.5) + self.assertEqual( + m.covar_module.base_kernel.raw_lengthscale.shape, torch.Size([1, 1]) + ) + mll = VariationalELBO( + likelihood=BernoulliLikelihood(), model=m, num_data=train_y.numel() + ).cuda() + mll = fit_gpytorch_mll(mll) + test_x = torch.tensor([[1.0, 0], [3.0, 1.0]]).cuda() + m(test_x) From 7cc9f82f24a6f59974467827da55f22ac4ba5134 Mon Sep 17 00:00:00 2001 From: Jason Chow Date: Fri, 8 Nov 2024 22:11:53 -0800 Subject: [PATCH 2/2] monotonic projection gp gpu support Summary: monotonic projection gp is a subclass of GPClassificationGP so just need to make sure the methods specific to it can handle device change. Differential Revision: D65625994 --- aepsych/models/monotonic_projection_gp.py | 16 ++- .../models/test_monotonic_projection_gp.py | 114 ++++++++++++++++++ 2 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 tests_gpu/models/test_monotonic_projection_gp.py diff --git a/aepsych/models/monotonic_projection_gp.py b/aepsych/models/monotonic_projection_gp.py index 61bec1648..3de672c8e 100644 --- a/aepsych/models/monotonic_projection_gp.py +++ b/aepsych/models/monotonic_projection_gp.py @@ -136,14 +136,17 @@ def posterior( # using numpy because torch doesn't support vectorized linspace, # pytorch/issues/61292 grid: Union[np.ndarray, torch.Tensor] = np.linspace( - self.lb[dim], - X[:, dim].numpy(), + self.lb[dim].cpu().numpy(), + X[:, dim].cpu().numpy(), s + 1, ) # (s+1 x n) grid = torch.tensor(grid[:-1, :], dtype=X.dtype) # Drop x; (s x n) X_aug[(1 + i * s) : (1 + (i + 1) * s), :, dim] = grid # X_aug[0, :, :] is X, and then subsequent indices are points in the grids # Predict marginal distributions on X_aug + + X = X.to(self.device) + X_aug = X_aug.to(self.device) with torch.no_grad(): post_aug = super().posterior(X=X_aug) mu_aug = post_aug.mean.squeeze() # (m*s+1 x n) @@ -158,12 +161,13 @@ def posterior( # Adjust the whole covariance matrix to accomadate the projected marginals with torch.no_grad(): post = super().posterior(X=X) - R = cov2corr(post.distribution.covariance_matrix.squeeze().numpy()) - S_proj = torch.tensor(corr2cov(R, sigma_proj.numpy()), dtype=X.dtype) + R = cov2corr(post.distribution.covariance_matrix.squeeze().cpu().numpy()) + S_proj = torch.tensor(corr2cov(R, sigma_proj.cpu().numpy()), dtype=X.dtype) mvn_proj = gpytorch.distributions.MultivariateNormal( - mu_proj.unsqueeze(0), - S_proj.unsqueeze(0), + mu_proj.unsqueeze(0).to(self.device), + S_proj.unsqueeze(0).to(self.device), ) + return GPyTorchPosterior(mvn_proj) def sample(self, x: torch.Tensor, num_samples: int) -> torch.Tensor: diff --git a/tests_gpu/models/test_monotonic_projection_gp.py b/tests_gpu/models/test_monotonic_projection_gp.py new file mode 100644 index 000000000..3c526946d --- /dev/null +++ b/tests_gpu/models/test_monotonic_projection_gp.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import os +import unittest + +import torch + +# run on single threads to keep us from deadlocking weirdly in CI +if "CI" in os.environ or "SANDCASTLE" in os.environ: + torch.set_num_threads(1) + +import numpy as np +from aepsych.config import Config +from aepsych.models.monotonic_projection_gp import MonotonicProjectionGP +from sklearn.datasets import make_classification + + +class MonotonicProjectionGPtest(unittest.TestCase): + def setUp(self): + np.random.seed(1) + torch.manual_seed(1) + X, y = make_classification( + n_samples=25, + n_features=3, + n_redundant=0, + n_informative=3, + random_state=1, + n_clusters_per_class=1, + ) + self.X, self.y = torch.Tensor(X), torch.Tensor(y) + + def test_posterior_gpu(self): + X, y = self.X, self.y + config_str = """ + [common] + parnames = [x, y, z] + lb = [-4, -4, -4] + ub = [4, 4, 4] + stimuli_per_trial = 1 + outcome_types = [binary] + + strategy_names = [init_strat] + + [init_strat] + generator = OptimizeAcqfGenerator + model = MonotonicProjectionGP + + [MonotonicProjectionGP] + monotonic_dims = [0, 1] + inducing_size = 10 + + [default_mean_covar_factory] + lengthscale_prior = gamma + fixed_kernel_amplitude = False + """ + config = Config(config_str=config_str) + model = MonotonicProjectionGP.from_config(config) + model.cuda() + model.fit(X, y) + + # Check that it is monotonic in all dims + for i in range(2): + Xtest = torch.zeros(3, 3) + Xtest[:, i] = torch.tensor([-1, 0, 1]) + post = model.posterior(Xtest) + mu = post.mean.squeeze() + self.assertTrue( + torch.equal( + torch.tensor([0, 1, 2], dtype=torch.long), + torch.argsort(mu).cpu(), + ) + ) + + # Check that min_f_val is respected + config_str = """ + [common] + parnames = [x, y, z] + lb = [-4, -4, -4] + ub = [4, 4, 4] + stimuli_per_trial = 1 + outcome_types = [binary] + + strategy_names = [init_strat] + + [init_strat] + generator = OptimizeAcqfGenerator + model = MonotonicProjectionGP + + [MonotonicProjectionGP] + monotonic_dims = [0] + inducing_size=10 + min_f_val = 5.0 + + [default_mean_covar_factory] + lengthscale_prior = gamma + fixed_kernel_amplitude = False + """ + config = Config(config_str=config_str) + model = MonotonicProjectionGP.from_config(config) + post = model.posterior(Xtest) + mu = post.mean.squeeze() + self.assertTrue(mu.min().item() >= 4.9) + # And in samples + samps = model.sample(Xtest, num_samples=10) + self.assertTrue(samps.min().item() >= 4.9) + + +if __name__ == "__main__": + unittest.main()