Merge pull request #53 from kotaro-kinoshita/fix/apache-license

ライセンス情報の記載
kotaro-kinoshita · Dec 5, 2024 · 3cbe59b · 3cbe59b
2 parents b4e6830 + 5f5fba0
commit 3cbe59b
Show file tree

Hide file tree

Showing 5 changed files with 155 additions and 36 deletions.
diff --git a/src/yomitoku/models/layers/activate.py b/src/yomitoku/models/layers/activate.py
@@ -1,3 +1,16 @@
+# Copyright(c) 2023 lyuwenyu
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch.nn as nn
 
 

diff --git a/src/yomitoku/models/layers/rtdetr_backbone.py b/src/yomitoku/models/layers/rtdetr_backbone.py
@@ -1,4 +1,16 @@
-"""Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
+# Copyright 2023 lyuwenyu
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from collections import OrderedDict
 
@@ -47,7 +59,9 @@ def forward(self, x):
 class BasicBlock(nn.Module):
     expansion = 1
 
-    def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
+    def __init__(
+        self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
+    ):
         super().__init__()
 
         self.shortcut = shortcut
@@ -86,7 +100,9 @@ def forward(self, x):
 class BottleNeck(nn.Module):
     expansion = 4
 
-    def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
+    def __init__(
+        self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"
+    ):
         super().__init__()
 
         if variant == "a":
@@ -109,13 +125,17 @@ def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
                             ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
                             (
                                 "conv",
-                                ConvNormLayer(ch_in, ch_out * self.expansion, 1, 1),
+                                ConvNormLayer(
+                                    ch_in, ch_out * self.expansion, 1, 1
+                                ),
                             ),
                         ]
                     )
                 )
             else:
-                self.short = ConvNormLayer(ch_in, ch_out * self.expansion, 1, stride)
+                self.short = ConvNormLayer(
+                    ch_in, ch_out * self.expansion, 1, stride
+                )
 
         self.act = nn.Identity() if act is None else get_activation(act)
 
@@ -136,7 +156,9 @@ def forward(self, x):
 
 
 class Blocks(nn.Module):
-    def __init__(self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"):
+    def __init__(
+        self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"
+    ):
         super().__init__()
 
         self.blocks = nn.ModuleList()

diff --git a/src/yomitoku/models/layers/rtdetr_hybrid_encoder.py b/src/yomitoku/models/layers/rtdetr_hybrid_encoder.py
@@ -1,4 +1,16 @@
-"""Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
+# Copyright 2023 lyuwenyu
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import copy
 from collections import OrderedDict
@@ -240,7 +252,9 @@ def __init__(
         for in_channel in in_channels:
             if version == "v1":
                 proj = nn.Sequential(
-                    nn.Conv2d(in_channel, hidden_dim, kernel_size=1, bias=False),
+                    nn.Conv2d(
+                        in_channel, hidden_dim, kernel_size=1, bias=False
+                    ),
                     nn.BatchNorm2d(hidden_dim),
                 )
             elif version == "v2":
@@ -276,7 +290,9 @@ def __init__(
 
         self.encoder = nn.ModuleList(
             [
-                TransformerEncoder(copy.deepcopy(encoder_layer), num_encoder_layers)
+                TransformerEncoder(
+                    copy.deepcopy(encoder_layer), num_encoder_layers
+                )
                 for _ in range(len(use_encoder_idx))
             ]
         )
@@ -331,7 +347,9 @@ def _reset_parameters(self):
                 # self.register_buffer(f'pos_embed{idx}', pos_embed)
 
     @staticmethod
-    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
+    def build_2d_sincos_position_embedding(
+        w, h, embed_dim=256, temperature=10000.0
+    ):
         """ """
         grid_w = torch.arange(int(w), dtype=torch.float32)
         grid_h = torch.arange(int(h), dtype=torch.float32)
@@ -369,7 +387,9 @@ def forward(self, feats):
                         src_flatten.device
                     )
 
-                memory: torch.Tensor = self.encoder[i](src_flatten, pos_embed=pos_embed)
+                memory: torch.Tensor = self.encoder[i](
+                    src_flatten, pos_embed=pos_embed
+                )
                 proj_feats[enc_ind] = (
                     memory.permute(0, 2, 1)
                     .reshape(-1, self.hidden_dim, h, w)
@@ -381,9 +401,13 @@ def forward(self, feats):
         for idx in range(len(self.in_channels) - 1, 0, -1):
             feat_heigh = inner_outs[0]
             feat_low = proj_feats[idx - 1]
-            feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](feat_heigh)
+            feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](
+                feat_heigh
+            )
             inner_outs[0] = feat_heigh
-            upsample_feat = F.interpolate(feat_heigh, scale_factor=2.0, mode="nearest")
+            upsample_feat = F.interpolate(
+                feat_heigh, scale_factor=2.0, mode="nearest"
+            )
             inner_out = self.fpn_blocks[len(self.in_channels) - 1 - idx](
                 torch.concat([upsample_feat, feat_low], dim=1)
             )

diff --git a/src/yomitoku/models/layers/rtdetrv2_decoder.py b/src/yomitoku/models/layers/rtdetrv2_decoder.py
@@ -1,4 +1,17 @@
-"""Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
+# Scene Text Recognition Model Hub
+# Copyright 2023 lyuwenyu
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import copy
 import functools
@@ -27,7 +40,9 @@ def inverse_sigmoid(x: torch.Tensor, eps: float = 1e-5) -> torch.Tensor:
 
 
 class MLP(nn.Module):
-    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act="relu"):
+    def __init__(
+        self, input_dim, hidden_dim, output_dim, num_layers, act="relu"
+    ):
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
@@ -178,7 +193,9 @@ def forward(
         elif reference_points.shape[-1] == 4:
             # reference_points [8, 480, None, 1,  4]
             # sampling_offsets [8, 480, 8,    12, 2]
-            num_points_scale = self.num_points_scale.to(dtype=query.dtype).unsqueeze(-1)
+            num_points_scale = self.num_points_scale.to(
+                dtype=query.dtype
+            ).unsqueeze(-1)
             offset = (
                 sampling_offsets
                 * num_points_scale
@@ -313,7 +330,9 @@ def deformable_attention_core_func_v2(
     _, Len_q, _, _, _ = sampling_locations.shape
 
     split_shape = [h * w for h, w in value_spatial_shapes]
-    value_list = value.permute(0, 2, 3, 1).flatten(0, 1).split(split_shape, dim=-1)
+    value_list = (
+        value.permute(0, 2, 3, 1).flatten(0, 1).split(split_shape, dim=-1)
+    )
 
     # sampling_offsets [8, 480, 8, 12, 2]
     if method == "default":
@@ -342,7 +361,8 @@ def deformable_attention_core_func_v2(
         elif method == "discrete":
             # n * m, seq, n, 2
             sampling_coord = (
-                sampling_grid_l * torch.tensor([[w, h]], device=value.device) + 0.5
+                sampling_grid_l * torch.tensor([[w, h]], device=value.device)
+                + 0.5
             ).to(torch.int64)
 
             # FIX ME? for rectangle input
@@ -369,7 +389,9 @@ def deformable_attention_core_func_v2(
     attn_weights = attention_weights.permute(0, 2, 1, 3).reshape(
         bs * n_head, 1, Len_q, sum(num_points_list)
     )
-    weighted_sample_locs = torch.concat(sampling_value_list, dim=-1) * attn_weights
+    weighted_sample_locs = (
+        torch.concat(sampling_value_list, dim=-1) * attn_weights
+    )
     output = weighted_sample_locs.sum(-1).reshape(bs, n_head * c, Len_q)
 
     return output.permute(0, 2, 1)
@@ -584,7 +606,9 @@ def _build_input_proj_layer(self, feat_channels):
                         [
                             (
                                 "conv",
-                                nn.Conv2d(in_channels, self.hidden_dim, 1, bias=False),
+                                nn.Conv2d(
+                                    in_channels, self.hidden_dim, 1, bias=False
+                                ),
                             ),
                             (
                                 "norm",
@@ -665,9 +689,13 @@ def _generate_anchors(
                 torch.arange(h), torch.arange(w), indexing="ij"
             )
             grid_xy = torch.stack([grid_x, grid_y], dim=-1)
-            grid_xy = (grid_xy.unsqueeze(0) + 0.5) / torch.tensor([w, h], dtype=dtype)
+            grid_xy = (grid_xy.unsqueeze(0) + 0.5) / torch.tensor(
+                [w, h], dtype=dtype
+            )
             wh = torch.ones_like(grid_xy) * grid_size * (2.0**lvl)
-            lvl_anchors = torch.concat([grid_xy, wh], dim=-1).reshape(-1, h * w, 4)
+            lvl_anchors = torch.concat([grid_xy, wh], dim=-1).reshape(
+                -1, h * w, 4
+            )
             anchors.append(lvl_anchors)
 
         anchors = torch.concat(anchors, dim=1).to(device)
@@ -701,18 +729,22 @@ def _get_decoder_input(
         )
 
         enc_topk_bboxes_list, enc_topk_logits_list = [], []
-        enc_topk_memory, enc_topk_logits, enc_topk_bbox_unact = self._select_topk(
-            output_memory,
-            enc_outputs_logits,
-            enc_outputs_coord_unact,
-            self.num_queries,
+        enc_topk_memory, enc_topk_logits, enc_topk_bbox_unact = (
+            self._select_topk(
+                output_memory,
+                enc_outputs_logits,
+                enc_outputs_coord_unact,
+                self.num_queries,
+            )
         )
 
         # if self.num_select_queries != self.num_queries:
         #     raise NotImplementedError('')
 
         if self.learn_query_content:
-            content = self.tgt_embed.weight.unsqueeze(0).tile([memory.shape[0], 1, 1])
+            content = self.tgt_embed.weight.unsqueeze(0).tile(
+                [memory.shape[0], 1, 1]
+            )
         else:
             content = enc_topk_memory.detach()
 
@@ -739,7 +771,9 @@ def _select_topk(
         topk: int,
     ):
         if self.query_select_method == "default":
-            _, topk_ind = torch.topk(outputs_logits.max(-1).values, topk, dim=-1)
+            _, topk_ind = torch.topk(
+                outputs_logits.max(-1).values, topk, dim=-1
+            )
 
         elif self.query_select_method == "one2many":
             _, topk_ind = torch.topk(outputs_logits.flatten(1), topk, dim=-1)
@@ -752,12 +786,16 @@ def _select_topk(
 
         topk_coords = outputs_coords_unact.gather(
             dim=1,
-            index=topk_ind.unsqueeze(-1).repeat(1, 1, outputs_coords_unact.shape[-1]),
+            index=topk_ind.unsqueeze(-1).repeat(
+                1, 1, outputs_coords_unact.shape[-1]
+            ),
         )
 
         topk_logits = outputs_logits.gather(
             dim=1,
-            index=topk_ind.unsqueeze(-1).repeat(1, 1, outputs_logits.shape[-1]),
+            index=topk_ind.unsqueeze(-1).repeat(
+                1, 1, outputs_logits.shape[-1]
+            ),
         )
 
         topk_memory = memory.gather(

diff --git a/src/yomitoku/postprocessor/rtdetr_postprocessor.py b/src/yomitoku/postprocessor/rtdetr_postprocessor.py
@@ -1,4 +1,17 @@
-"""Copyright(c) 2023 lyuwenyu. All Rights Reserved."""
+# Copyright 2023 lyuwenyu
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 
 import torch
 import torch.nn as nn
@@ -41,12 +54,16 @@ def forward(self, outputs, orig_target_sizes: torch.Tensor, threshold):
         logits, boxes = outputs["pred_logits"], outputs["pred_boxes"]
         # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
 
-        bbox_pred = torchvision.ops.box_convert(boxes, in_fmt="cxcywh", out_fmt="xyxy")
+        bbox_pred = torchvision.ops.box_convert(
+            boxes, in_fmt="cxcywh", out_fmt="xyxy"
+        )
         bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
 
         if self.use_focal_loss:
             scores = F.sigmoid(logits)
-            scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
+            scores, index = torch.topk(
+                scores.flatten(1), self.num_top_queries, dim=-1
+            )
             # TODO for older tensorrt
             # labels = index % self.num_classes
             labels = mod(index, self.num_classes)
@@ -60,7 +77,9 @@ def forward(self, outputs, orig_target_sizes: torch.Tensor, threshold):
             scores = F.softmax(logits)[:, :, :-1]
             scores, labels = scores.max(dim=-1)
             if scores.shape[1] > self.num_top_queries:
-                scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
+                scores, index = torch.topk(
+                    scores, self.num_top_queries, dim=-1
+                )
                 labels = torch.gather(labels, dim=1, index=index)
                 boxes = torch.gather(
                     boxes,
@@ -78,7 +97,10 @@ def forward(self, outputs, orig_target_sizes: torch.Tensor, threshold):
 
             labels = (
                 torch.tensor(
-                    [mscoco_label2category[int(x.item())] for x in labels.flatten()]
+                    [
+                        mscoco_label2category[int(x.item())]
+                        for x in labels.flatten()
+                    ]
                 )
                 .to(boxes.device)
                 .reshape(labels.shape)