Ipsedo · Ipsedo · Jul 4, 2024 · Jul 4, 2024
diff --git a/kan/networks/__init__.py b/kan/networks/__init__.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
-from .conv import Conv2dKan, Conv2dKanLayers
+from .conv import Conv2dKan
 from .hermite import Hermite
-from .linear import LinearKAN, LinearKanLayers
+from .linear import LinearKAN
 from .mlp import MLP
+from .networks import Conv2dKanLayers, LinearKanLayers
 from .spline import BSpline
 from .utils import ActivationFunction, InfoModule
diff --git a/kan/networks/conv.py b/kan/networks/conv.py
@@ -1,13 +1,12 @@
 # -*- coding: utf-8 -*-
-from typing import Callable, List, Tuple
+from typing import Callable
 
 import torch as th
 from torch import nn
 from torch.nn import functional as F
 from torch.nn.init import normal_, xavier_normal_
 
-from .linear import LinearKAN
-from .utils import ActivationFunction, InfoModule
+from .utils import ActivationFunction
 
 
 # pylint: disable=too-many-instance-attributes
@@ -27,7 +26,11 @@ def __init__(
         self.__act_fun = act_fun
         self.__res_act_fun = res_act_fun
 
-        self.__w = nn.Parameter(
+        self.__w_b = nn.Parameter(
+            th.ones(in_channels, out_channels, kernel_size * kernel_size, 1)
+        )
+
+        self.__w_s = nn.Parameter(
             th.ones(in_channels, out_channels, kernel_size * kernel_size, 1)
         )
 
@@ -41,7 +44,8 @@ def __init__(
             )
         )
 
-        xavier_normal_(self.__w)
+        xavier_normal_(self.__w_b, 1)
+        normal_(self.__w_s, 0, 1e-3)
         normal_(self.__c, 0, 1e-1)
 
         self.__in_channels = in_channels
@@ -66,8 +70,11 @@ def __unfold(self, x: th.Tensor) -> th.Tensor:
 
     def __activation(self, windowed_x: th.Tensor) -> th.Tensor:
         # sum over function approximation
-        return self.__res_act_fun(windowed_x) + th.sum(
-            self.__c * self.__act_fun(windowed_x), dim=-1
+        return th.sum(
+            self.__w_b * self.__res_act_fun(windowed_x)
+            + self.__w_s
+            * th.sum(self.__c * self.__act_fun(windowed_x), dim=-1),
+            dim=1,
         )
 
     def forward(self, x: th.Tensor) -> th.Tensor:
@@ -79,47 +86,8 @@ def forward(self, x: th.Tensor) -> th.Tensor:
         output_height = self.__get_output_size(h)
         output_width = self.__get_output_size(w)
 
-        # sum over input space : dim=1
         # sum over window : dim=2
         return th.sum(
-            th.sum(self.__w * self.__activation(self.__unfold(x)), dim=1),
+            self.__activation(self.__unfold(x)),
             dim=2,
         ).view(b, -1, output_height, output_width)
-
-
-class Conv2dKanLayers(nn.Sequential, InfoModule):
-    def __init__(
-        self,
-        channels: List[Tuple[int, int]],
-        kernel_sizes: List[int],
-        strides: List[int],
-        paddings: List[int],
-        linear_sizes: List[Tuple[int, int]],
-        act_fun: ActivationFunction,
-        res_act_fun: Callable[[th.Tensor], th.Tensor],
-    ) -> None:
-        assert (
-            len(channels) == len(kernel_sizes) == len(strides) == len(paddings)
-        )
-
-        conv_layers = [
-            nn.Sequential(
-                nn.BatchNorm2d(c_i, affine=False),
-                Conv2dKan(c_i, c_o, k, s, p, act_fun, res_act_fun),
-            )
-            for (c_i, c_o), k, s, p in zip(
-                channels, kernel_sizes, strides, paddings
-            )
-        ]
-
-        flatten_layer = [nn.Flatten(1, -1)]
-
-        clf_layers = [
-            nn.Sequential(
-                nn.BatchNorm1d(i, affine=False),
-                LinearKAN(i, o, act_fun, res_act_fun),
-            )
-            for i, o in linear_sizes
-        ]
-
-        super().__init__(*conv_layers + flatten_layer + clf_layers)
diff --git a/kan/networks/linear.py b/kan/networks/linear.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
-from typing import Callable, List, Tuple
+from typing import Callable
 
 import torch as th
 from torch import nn
 from torch.nn.init import normal_, xavier_normal_
 
-from .utils import ActivationFunction, InfoModule
+from .utils import ActivationFunction
 
 
 class LinearKAN(nn.Module):
@@ -23,12 +23,14 @@ def __init__(
         self.__act_fun = act_fun
         self.__res_act_fun = res_act_fun
 
-        self.__w = nn.Parameter(th.ones(in_features, out_features))
+        self.__w_b = nn.Parameter(th.ones(in_features, out_features))
+        self.__w_s = nn.Parameter(th.ones(in_features, out_features))
         self.__c = nn.Parameter(
             th.ones(in_features, out_features, self.__act_fun.get_size())
         )
 
-        xavier_normal_(self.__w)
+        xavier_normal_(self.__w_b, 1)
+        normal_(self.__w_s, 0, 1e-3)
         normal_(self.__c, 0, 1e-1)
 
     def forward(self, x: th.Tensor) -> th.Tensor:
@@ -39,22 +41,7 @@ def forward(self, x: th.Tensor) -> th.Tensor:
         x = x.unsqueeze(2)
 
         return th.sum(
-            self.__w
-            * (
-                self.__res_act_fun(x)
-                + th.sum(self.__c * self.__act_fun(x), dim=-1)
-            ),
-            1,  # sum over input space
-        )
-
-
-class LinearKanLayers(nn.Sequential, InfoModule):
-    def __init__(
-        self,
-        layers: List[Tuple[int, int]],
-        act_fun: ActivationFunction,
-        res_act_fun: Callable[[th.Tensor], th.Tensor],
-    ) -> None:
-        super().__init__(
-            *[LinearKAN(c_i, c_o, act_fun, res_act_fun) for c_i, c_o in layers]
+            self.__w_b * self.__res_act_fun(x)
+            + self.__w_s * th.sum(self.__c * self.__act_fun(x), dim=-1),
+            -2,  # sum over input space
         )
diff --git a/kan/networks/networks.py b/kan/networks/networks.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+from typing import Callable, List, Tuple
+
+import torch as th
+from torch import nn
+
+from .conv import Conv2dKan
+from .linear import LinearKAN
+from .utils import ActivationFunction, InfoModule
+
+
+class LinearKanLayers(nn.Sequential, InfoModule):
+    def __init__(
+        self,
+        layers: List[Tuple[int, int]],
+        act_fun: ActivationFunction,
+        res_act_fun: Callable[[th.Tensor], th.Tensor],
+    ) -> None:
+        super().__init__(
+            *[LinearKAN(c_i, c_o, act_fun, res_act_fun) for c_i, c_o in layers]
+        )
+
+
+class Conv2dKanLayers(nn.Sequential, InfoModule):
+    def __init__(
+        self,
+        channels: List[Tuple[int, int]],
+        kernel_sizes: List[int],
+        strides: List[int],
+        paddings: List[int],
+        linear_sizes: List[Tuple[int, int]],
+        act_fun: ActivationFunction,
+        res_act_fun: Callable[[th.Tensor], th.Tensor],
+    ) -> None:
+        assert (
+            len(channels) == len(kernel_sizes) == len(strides) == len(paddings)
+        )
+
+        conv_layers = [
+            nn.Sequential(
+                nn.BatchNorm2d(c_i, affine=False),
+                Conv2dKan(c_i, c_o, k, s, p, act_fun, res_act_fun),
+            )
+            for (c_i, c_o), k, s, p in zip(
+                channels, kernel_sizes, strides, paddings
+            )
+        ]
+
+        flatten_layer = [nn.Flatten(1, -1)]
+
+        clf_layers = [
+            nn.Sequential(
+                nn.BatchNorm1d(i, affine=False),
+                LinearKAN(i, o, act_fun, res_act_fun),
+            )
+            for i, o in linear_sizes
+        ]
+
+        super().__init__(*conv_layers + flatten_layer + clf_layers)