Fix inference_mode (#885)

Summary: Fixes: #875 Test Plan: Test locally with tutorials/quantize_vit/run_vit_b_quant.py with: ``` with torch.inference_mode(): benchmark_model(model, 20, inputs) ``` but can't repro the issue in unit tests Reviewers: Subscribers: Tasks: Tags:
pytorch · Sep 13, 2024 · 90c8cbd · 90c8cbd
1 parent 3fa38aa
commit 90c8cbd
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -1483,7 +1483,7 @@ def _register_aqt_quantized_linear_dispatches():
 
 _register_aqt_quantized_linear_dispatches()
 
-@implements(torch.nn.functional.linear)
+@implements([torch.nn.functional.linear, aten.linear.default])
 def _(func, types, args, kwargs):
     input_tensor, weight_tensor, bias = (
         args[0],

diff --git a/torchao/quantization/linear_activation_quantized_tensor.py b/torchao/quantization/linear_activation_quantized_tensor.py
@@ -91,7 +91,7 @@ def to(self, *args, **kwargs):
 
 implements = LinearActivationQuantizedTensor.implements
 
-@implements(torch.nn.functional.linear)
+@implements([torch.nn.functional.linear, aten.linear.default])
 def _(func, types, args, kwargs):
     input_tensor, weight_tensor, bias = (
         args[0],