Delete select_inducing_point utility function (facebookresearch#480)

Summary: Pull Request resolved: facebookresearch#480 Unnecessary utility function replaced by just calling the Allocator class method. This diff removes using strings to select allocators. Differential Revision: D67068021
JasonKChow · Dec 12, 2024 · e606e74 · e606e74
1 parent 9ac822b
commit e606e74
Show file tree

Hide file tree

Showing 7 changed files with 49 additions and 188 deletions.
diff --git a/aepsych/models/gp_classification.py b/aepsych/models/gp_classification.py
@@ -16,7 +16,6 @@
 from aepsych.factory.default import default_mean_covar_factory
 from aepsych.models.base import AEPsychModelDeviceMixin
 from aepsych.models.inducing_points import AutoAllocator
-from aepsych.models.utils import select_inducing_points
 from aepsych.utils import get_optimizer_options, promote_0d
 from aepsych.utils_logging import getLogger
 from botorch.models.utils.inducing_point_allocators import InducingPointAllocator
@@ -100,9 +99,8 @@ def __init__(
             )
 
         self.inducing_point_method = inducing_point_method
-        inducing_points = select_inducing_points(
-            allocator=self.inducing_point_method,
-            inducing_size=self.inducing_size,
+        inducing_points = self.inducing_point_method.allocate_inducing_points(
+            num_inducing=self.inducing_size,
             covar_module=covar_module or default_covar,
         )
 
@@ -201,11 +199,10 @@ def _reset_variational_strategy(self) -> None:
         if self.train_inputs is not None:
             # remember original device
             device = self.device
-            inducing_points = select_inducing_points(
-                allocator=self.inducing_point_method,
-                inducing_size=self.inducing_size,
+            inducing_points = self.inducing_point_method.allocate_inducing_points(
+                num_inducing=self.inducing_size,
                 covar_module=self.covar_module,
-                X=self.train_inputs[0],
+                inputs=self.train_inputs[0],
             ).to(device)
             variational_distribution = CholeskyVariationalDistribution(
                 inducing_points.size(0), batch_shape=torch.Size([self._batch_size])

diff --git a/aepsych/models/gp_regression.py b/aepsych/models/gp_regression.py
@@ -14,8 +14,6 @@
 from aepsych.config import Config
 from aepsych.factory.default import default_mean_covar_factory
 from aepsych.models.base import AEPsychModelDeviceMixin
-from aepsych.models.inducing_points import AutoAllocator
-from aepsych.models.utils import select_inducing_points
 from aepsych.utils import get_optimizer_options, promote_0d
 from aepsych.utils_logging import getLogger
 from botorch.models.utils.inducing_point_allocators import InducingPointAllocator

diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py
@@ -19,7 +19,6 @@
 from aepsych.means.constant_partial_grad import ConstantMeanPartialObsGrad
 from aepsych.models.base import AEPsychMixin
 from aepsych.models.inducing_points import AutoAllocator, SobolAllocator
-from aepsych.models.utils import select_inducing_points
 from aepsych.utils import _process_bounds, get_optimizer_options, promote_0d
 from botorch.fit import fit_gpytorch_mll
 from botorch.models.utils.inducing_point_allocators import InducingPointAllocator
@@ -100,10 +99,9 @@ def __init__(
         # TODO: This allocator *must* be SobolAllocator and not the set one. This
         # suggests that this model doesn't actually properly use data for inducing
         # points properly.
-        inducing_points = select_inducing_points(
-            allocator=SobolAllocator(bounds=torch.stack([lb, ub]), dim=self.dim),
-            inducing_size=self.inducing_size,
-        )
+        inducing_points = SobolAllocator(
+            bounds=torch.stack([lb, ub]), dim=self.dim
+        ).allocate_inducing_points(num_inducing=self.inducing_size)
 
         inducing_points_aug = self._augment_with_deriv_index(inducing_points, 0)
         variational_distribution = CholeskyVariationalDistribution(
@@ -168,11 +166,10 @@ def fit(self, train_x: torch.Tensor, train_y: torch.Tensor, **kwargs) -> None:
         """
         self.set_train_data(train_x, train_y)
 
-        self.inducing_points = select_inducing_points(
-            allocator=self.inducing_point_method,
-            inducing_size=self.inducing_size,
+        self.inducing_points = self.inducing_point_method.allocate_inducing_points(
+            num_inducing=self.inducing_size,
             covar_module=self.covar_module,
-            X=self.train_inputs[0],
+            inputs=self.train_inputs[0],
         )
         self._set_model(train_x, train_y)
 

diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py
@@ -20,7 +20,6 @@
 from aepsych.likelihoods import BernoulliObjectiveLikelihood, LinearBernoulliLikelihood
 from aepsych.models import GPClassificationModel
 from aepsych.models.inducing_points.auto import AutoAllocator
-from aepsych.models.utils import select_inducing_points
 from aepsych.utils import get_optimizer_options, promote_0d
 from aepsych.utils_logging import getLogger
 from botorch.acquisition.objective import PosteriorTransform

diff --git a/aepsych/models/utils.py b/aepsych/models/utils.py
@@ -61,83 +61,6 @@ def compute_p_quantile(
     return norm.cdf(f_mean + norm.icdf(alpha) * f_std)
 
 
-def select_inducing_points(
-    inducing_size: int,
-    allocator: Union[str, InducingPointAllocator],
-    covar_module: Optional[torch.nn.Module] = None,
-    X: Optional[torch.Tensor] = None,
-    bounds: Optional[torch.Tensor] = None,
-) -> torch.Tensor:
-    """
-    Select inducing points using a specified allocator instance or legacy method.
-
-    Args:
-        inducing_size (int): Number of inducing points.
-        allocator (Union[str, InducingPointAllocator]): An inducing point allocator or a legacy string indicating method.
-        covar_module (torch.nn.Module, optional): Covariance module, required for some allocators.
-        X (torch.Tensor, optional): Input data tensor, required for most allocators.
-        bounds (torch.Tensor, optional): Bounds for Sobol sampling in legacy mode.
-
-    Returns:
-        torch.Tensor: Selected inducing points.
-    """
-    # Handle legacy string methods with a deprecation warning
-    if isinstance(allocator, str):
-        warnings.warn(
-            f"Using string '{allocator}' for inducing point method is deprecated. "
-            "Please use an InducingPointAllocator class instead.",
-            DeprecationWarning,
-        )
-
-        if allocator == "sobol":
-            assert (
-                bounds is not None
-            ), "Bounds must be provided for Sobol inducing points!"
-            inducing_points = (
-                draw_sobol_samples(bounds=bounds, n=inducing_size, q=1)
-                .squeeze()
-                .to(bounds.device)
-            )
-            if inducing_points.ndim == 1:
-                inducing_points = inducing_points.view(-1, 1)
-            return inducing_points
-
-        assert X is not None, "Must pass X for non-Sobol inducing point selection!"
-
-        unique_X = torch.unique(X, dim=0)
-        if allocator == "auto":
-            if unique_X.shape[0] <= inducing_size:
-                return unique_X
-            else:
-                allocator = "kmeans++"
-
-        if allocator == "pivoted_chol":
-            inducing_point_allocator = GreedyVarianceReduction(dim=X.shape[1])
-            inducing_points = inducing_point_allocator.allocate_inducing_points(
-                inputs=X,
-                covar_module=covar_module,
-                num_inducing=inducing_size,
-                input_batch_shape=torch.Size([]),
-            ).to(X.device)
-        elif allocator == "kmeans++":
-            inducing_points = torch.tensor(
-                kmeans2(unique_X.cpu().numpy(), inducing_size, minit="++")[0],
-                dtype=X.dtype,
-            ).to(X.device)
-
-            return inducing_points
-
-    # Call allocate_inducing_points with allocator instance
-    if isinstance(allocator, InducingPointAllocator):
-        inducing_points = allocator.allocate_inducing_points(
-            inputs=X,
-            covar_module=covar_module,
-            num_inducing=inducing_size,
-            input_batch_shape=torch.Size([]),
-        )
-
-    return inducing_points
-
 
 def get_probability_space(
     likelihood: Likelihood, posterior: GPyTorchPosterior

diff --git a/tests/models/test_utils.py b/tests/models/test_utils.py
@@ -17,7 +17,6 @@
     KMeansAllocator,
     SobolAllocator,
 )
-from aepsych.models.utils import select_inducing_points
 from sklearn.datasets import make_classification
 
 
@@ -35,9 +34,6 @@ def test_select_inducing_points(self):
         )
         X, y = torch.Tensor(X), torch.Tensor(y)
         inducing_size = 20
-        lb = torch.Tensor([-3])
-        ub = torch.Tensor([3])
-        bounds = torch.stack([lb, ub])
 
         model = GPClassificationModel(
             dim=1,
@@ -47,93 +43,56 @@ def test_select_inducing_points(self):
         model.set_train_data(X[:10, ...], y[:10])
 
         # (inducing point selection sorts the inputs so we sort X to verify)
-        self.assertTrue(
-            np.allclose(
-                select_inducing_points(
-                    allocator=AutoAllocator(dim=1),
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                ),
-                X[:10].sort(0).values,
-            )
+        allocator = AutoAllocator(dim=1)
+        points = allocator.allocate_inducing_points(
+            inputs=model.train_inputs[0],
+            covar_module=model.covar_module,
+            num_inducing=inducing_size,
         )
+        self.assertTrue(np.allclose(points, X[:10].sort(0).values))
 
         model.set_train_data(X, y)
 
-        self.assertTrue(
-            len(
-                select_inducing_points(
-                    allocator=AutoAllocator(dim=1),
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                )
-            )
-            <= 20
+        points = allocator.allocate_inducing_points(
+            inputs=model.train_inputs[0],
+            covar_module=model.covar_module,
+            num_inducing=inducing_size,
         )
+        self.assertTrue(len(points) <= 20)
 
-        self.assertTrue(
-            len(
-                select_inducing_points(
-                    allocator=GreedyVarianceReduction(dim=1),
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                )
-            )
-            <= 20
+        allocator = GreedyVarianceReduction(dim=1)
+        points = allocator.allocate_inducing_points(
+            inputs=model.train_inputs[0],
+            num_inducing=inducing_size,
+            covar_module=model.covar_module,
         )
+        self.assertTrue(len(points) <= 20)
 
-        self.assertEqual(
-            len(
-                select_inducing_points(
-                    allocator=KMeansAllocator(dim=1),
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                )
-            ),
-            20,
+        allocator = KMeansAllocator(dim=1)
+        points = allocator.allocate_inducing_points(
+            inputs=model.train_inputs[0],
+            num_inducing=inducing_size,
+            covar_module=model.covar_module,
         )
-        self.assertTrue(
-            len(
-                select_inducing_points(
-                    allocator="auto",
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                )
-            )
-            <= 20
+        self.assertEqual(len(points), 20)
+
+        allocator = SobolAllocator(
+            bounds=torch.stack([torch.tensor([0]), torch.tensor([1])]), dim=1
         )
-        self.assertTrue(
-            len(
-                select_inducing_points(
-                    allocator=SobolAllocator(
-                        bounds=torch.stack([torch.tensor([0]), torch.tensor([1])]),
-                        dim=1,
-                    ),
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                )
-            )
-            <= 20
+        points = allocator.allocate_inducing_points(
+            inputs=model.train_inputs[0],
+            num_inducing=inducing_size,
+            covar_module=model.covar_module,
         )
-        self.assertTrue(
-            len(
-                select_inducing_points(
-                    allocator=FixedAllocator(
-                        points=torch.tensor([[0], [1], [2], [3]]), dim=1
-                    ),
-                    inducing_size=inducing_size,
-                    covar_module=model.covar_module,
-                    X=model.train_inputs[0],
-                )
-            )
-            <= 20
+        self.assertTrue(len(points) <= 20)
+
+        allocator = FixedAllocator(points=torch.tensor([[0], [1], [2], [3]]), dim=1)
+        points = allocator.allocate_inducing_points(
+            inputs=model.train_inputs[0],
+            num_inducing=inducing_size,
+            covar_module=model.covar_module,
         )
+        self.assertTrue(len(points) <= 20)
 
 
 if __name__ == "__main__":

diff --git a/tests/test_points_allocators.py b/tests/test_points_allocators.py
@@ -10,7 +10,6 @@
     KMeansAllocator,
     SobolAllocator,
 )
-from aepsych.models.utils import select_inducing_points
 from aepsych.strategy import Strategy
 from aepsych.transforms.parameters import ParameterTransforms, transform_options
 
@@ -411,17 +410,6 @@ def test_allocator_model_fit(self):
             strat.model.variational_strategy.inducing_points.shape, train_X.shape
         )
 
-    def test_select_inducing_points_legacy(self):
-        with self.assertWarns(DeprecationWarning):
-            # Call select_inducing_points directly with a string for allocator to trigger the warning
-            bounds = torch.tensor([[0.0], [1.0]])
-            points = select_inducing_points(
-                inducing_size=5,
-                allocator="sobol",  # Legacy string argument to trigger DeprecationWarning
-                bounds=bounds,
-            )
-            self.assertEqual(points.shape, (5, 1))
-
 
 if __name__ == "__main__":
     unittest.main()