From c8dfa80ee472abeed84b7896bd75cf5bb8a09217 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 1 Jan 2023 21:33:59 +0000
Subject: [PATCH 01/55] [py_src] handle `I`, `U`, `Y` and `V`

---
 py_src/erl_enum_expression_generator.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/py_src/erl_enum_expression_generator.py b/py_src/erl_enum_expression_generator.py
index 6eef502b..0e454a1c 100644
--- a/py_src/erl_enum_expression_generator.py
+++ b/py_src/erl_enum_expression_generator.py
@@ -14,8 +14,12 @@ def generic_visit(self, node):
         if type(node) is ast.Expression:
             self.visit(node.body)
         elif type(node) is ast.Constant:
-            self.expression = f'{node.value}'
-            self.expression_erlang = f'{node.value}'
+            if node.value in ['I', 'Y', 'U', 'V']:
+                self.expression = f'{ord(node.value)}'
+                self.expression_erlang = f'{ord(node.value)}'
+            else:
+                self.expression = f'{node.value}'
+                self.expression_erlang = f'{node.value}'
         elif type(node) is ast.UnaryOp:
             op = ErlEnumExpressionGenerator()
             op.visit(node.op)

From 22d332037a86dda7c8cf16069cde219ac4992630 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 1 Jan 2023 22:03:52 +0000
Subject: [PATCH 02/55] [py_src] fix argument type spec in docs

---
 py_src/func_variant.py | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/py_src/func_variant.py b/py_src/func_variant.py
index d985e688..f5b6c423 100644
--- a/py_src/func_variant.py
+++ b/py_src/func_variant.py
@@ -386,7 +386,10 @@ def inline_docs_elixir(self, is_instance_method: bool, module_name: str) -> str:
             if len(positional_args) > 0:
                 parameter_info_doc.write("\n  ##### Positional Arguments\n")
                 for (arg_name, _, argtype) in positional_args:
-                    argtype1 = map_argtype_in_docs('elixir', argtype, classname=self.classname)
+                    argtype1 = map_argtype_in_docs('elixir', argtype) #, classname=self.classname)
+                    if argtype1.startswith("Evision."):
+                        if not argtype1.endswith(".t()"):
+                            argtype1 = f"{argtype1}.t()"
                     normalized_arg_name = map_argname('elixir', arg_name)
                     normalized_arg_name = normalized_arg_name.replace(":", "")
                     if parameter_info.get(normalized_arg_name, None) is None:
@@ -404,7 +407,10 @@ def inline_docs_elixir(self, is_instance_method: bool, module_name: str) -> str:
             if len(optional_args) > 0:
                 parameter_info_doc.write("  ##### Keyword Arguments\n")
                 for (arg_name, _, argtype) in optional_args:
-                    argtype1 = map_argtype_in_docs('elixir', argtype, classname=self.classname)
+                    argtype1 = map_argtype_in_docs('elixir', argtype)# , classname=self.classname)
+                    if argtype1.startswith("Evision."):
+                        if not argtype1.endswith(".t()"):
+                            argtype1 = f"{argtype1}.t()"
                     normalized_arg_name = map_argname('elixir', arg_name)
                     normalized_arg_name = normalized_arg_name.replace(":", "")
                     if parameter_info.get(normalized_arg_name, None) is None:
@@ -425,16 +431,31 @@ def inline_docs_elixir(self, is_instance_method: bool, module_name: str) -> str:
             out_args_name = [o[0] for o in self.py_outlist]
             if len(out_args_name) > 0 and (out_args_name[0] in ['retval', 'self']) and self.py_outlist[0][1] == -1:
                 if out_args_name[0] == 'retval':
-                    return_values.insert(0, ('retval', map_argtype_in_docs('elixir', self.rettype, classname=self.classname)))
+                    rettype_docs = map_argtype_in_docs('elixir', self.rettype)
+                    if rettype_docs.startswith("Evision."):
+                        if not rettype_docs.endswith(".t()"):
+                            rettype_docs = f"{rettype_docs}.t()"
+                    return_values.insert(0, ('retval', rettype_docs))
                 elif out_args_name[0] == 'self':
-                    return_values.insert(0, ('self', map_argtype_in_docs('elixir', self.name, classname=self.classname)))
+                    selftype_docs = map_argtype_in_docs('elixir', self.name)
+                    if selftype_docs.startswith("Evision."):
+                        if not selftype_docs.endswith(".t()"):
+                            selftype_docs = f"{selftype_docs}.t()"
+                    return_values.insert(0, ('self', selftype_docs))
             elif self.isconstructor:
-                return_values.insert(0, ('self', map_argtype_in_docs('elixir', self.classname, classname=self.classname)))
+                selftype_docs = map_argtype_in_docs('elixir', self.classname)
+                if selftype_docs.startswith("Evision."):
+                    if not selftype_docs.endswith(".t()"):
+                        selftype_docs = f"{selftype_docs}.t()"
+                return_values.insert(0, ('self', map_argtype_in_docs('elixir', selftype_docs)))
 
             if len(return_values) > 0:
                 parameter_info_doc.write("  ##### Return\n")
                 for (arg_name, argtype) in return_values:
-                    argtype1 = map_argtype_in_docs('elixir', argtype, classname=self.classname)
+                    argtype1 = map_argtype_in_docs('elixir', argtype)# , classname=self.classname)
+                    if argtype1.startswith("Evision."):
+                        if not argtype1.endswith(".t()"):
+                            argtype1 = f"{argtype1}.t()"
                     normalized_arg_name = map_argname('elixir', arg_name)
                     normalized_arg_name = normalized_arg_name.replace(":", "")
                     if parameter_info.get(normalized_arg_name, None) is None:

From 43d46a8bd72758deb8c278d7a207dc6b8716ee57 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 1 Jan 2023 22:05:09 +0000
Subject: [PATCH 03/55] [py_src] handle `N`

---
 py_src/erl_enum_expression_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py_src/erl_enum_expression_generator.py b/py_src/erl_enum_expression_generator.py
index 0e454a1c..55403c49 100644
--- a/py_src/erl_enum_expression_generator.py
+++ b/py_src/erl_enum_expression_generator.py
@@ -14,7 +14,7 @@ def generic_visit(self, node):
         if type(node) is ast.Expression:
             self.visit(node.body)
         elif type(node) is ast.Constant:
-            if node.value in ['I', 'Y', 'U', 'V']:
+            if node.value in ['I', 'Y', 'U', 'V', 'N']:
                 self.expression = f'{ord(node.value)}'
                 self.expression_erlang = f'{ord(node.value)}'
             else:

From 4120ac6827c388724025b0f205a54a07674440d5 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 1 Jan 2023 22:05:38 +0000
Subject: [PATCH 04/55] [py_src] handle cuda modules

---
 mix.exs          |  4 ++-
 py_src/helper.py | 74 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/mix.exs b/mix.exs
index 9b03d137..dc1eea9a 100644
--- a/mix.exs
+++ b/mix.exs
@@ -860,7 +860,7 @@ defmodule Evision.MixProject do
       cudaoptflow: false,
       cudastereo: false,
       cudawarping: false,
-      cudev: false,
+      cudev: true,
     ]
   }
   defp module_configuration, do: @module_configuration
@@ -885,9 +885,11 @@ defmodule Evision.MixProject do
   defp generate_cmake_options() do
     mc = module_configuration()
     enable_opencv_contrib = true
+    enable_opencv_cuda = true
     all_modules = Enum.map(mc.opencv, fn {m, _} -> m end) ++ Enum.map(mc.opencv_contrib, fn {m, _} -> m end)
     enabled_modules = Enum.filter(mc.opencv, fn {_, e} -> e end)
       ++ (if enable_opencv_contrib do Enum.filter(mc.opencv_contrib, fn {_, e} -> e end) else [] end)
+      ++ (if enable_opencv_cuda do Enum.filter(mc.cuda, fn {_, e} -> e end) else [] end)
     disabled_modules = Enum.filter(mc.opencv, fn {_, e} -> !e end)
       ++ (if enable_opencv_contrib do Enum.filter(mc.opencv_contrib, fn {_, e} -> !e end) else [] end)
     enabled_modules = Keyword.keys(enabled_modules)
diff --git a/py_src/helper.py b/py_src/helper.py
index baaf392d..b9594ac7 100644
--- a/py_src/helper.py
+++ b/py_src/helper.py
@@ -175,7 +175,8 @@ def make_elixir_module_names(module_name: Optional[str] = None, separated_ns: Op
         'cuda': 'CUDA',
         'hfs': 'HFS',
         'dnn_superres': "DNNSuperRes",
-        'DnnSuperResImpl': 'DNNSuperResImpl'
+        'DnnSuperResImpl': 'DNNSuperResImpl',
+        'cudacodec': 'CUDACodec'
     }
     if module_name is not None:
         return mapping.get(module_name, f"{module_name[0].upper()}{module_name[1:]}")
@@ -349,11 +350,20 @@ def is_basic_types(argtype: str):
     if argtype.startswith("vector<"):
         argtype = argtype[len("vector<"):-1]
         return is_basic_types(argtype)
-    return argtype in ['bool', 'float', 'double', 'uchar', 'string', 'void*', 'String', 'c_string', 'unsigned'] or \
+    return argtype in ['bool', 'float', 'double', 'string', 'void*', 'String', 'c_string'] or \
         is_int_type(argtype) or is_tuple_type(argtype)
 
 def is_int_type(argtype):
     int_types = [
+        'uchar',
+        'uint8_t',
+        'uint16_t',
+        'uint32_t',
+        'uint64_t',
+        'int8_t',
+        'int16_t',
+        'int32_t',
+        'int64_t',
         'unsigned',
         'int',
         'size_t',
@@ -390,7 +400,8 @@ def is_int_type(argtype):
         "VolumeType",
         "Volume",
         "kinfu_VolumeType",
-        "text_decoder_mode"
+        "text_decoder_mode",
+        "cuda_ConnectedComponentsAlgorithmsTypes"
     ]
     return argtype in int_types
 
@@ -564,7 +575,7 @@ def get_elixir_module_name(cname, double_quote_if_has_dot=False):
         elixir_module_name = f'"{elixir_module_name}"'
     return elixir_module_name
 
-def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[str] = None):
+def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[str] = None, decl: list=None):
     argtype = argtype.replace("std::", "").replace("cv::", "").replace("::", "_")
     special_structs = {
         # todo: UMat should be in its own module
@@ -832,7 +843,22 @@ def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[
         "Pose3D": "Evision.PPFMatch3D.Pose3D",
         "PPF3DDetector": "Evision.PPFMatch3D.PPF3DDetector",
 
-        "WeChatQRCode": "Evision.WeChatQRCode.WeChatQRCode"
+        "WeChatQRCode": "Evision.WeChatQRCode.WeChatQRCode",
+
+        # CUDA
+        "cuda_BackgroundSubtractorMOG": "Evision.CUDA.BackgroundSubtractorMOG",
+        "cuda_BackgroundSubtractorMOG2": "Evision.CUDA.BackgroundSubtractorMOG2",
+        "cuda_CascadeClassifier": "Evision.CUDA.CascadeClassifier",
+        "cuda_CLAHE": "Evision.CUDA.CLAHE",
+        "cudacodec_VideoWriter": "Evision.CUDACodec.VideoWriter",
+        "cuda_DescriptorMatcher": "Evision.CUDA.DescriptorMatcher",
+        "cuda_DisparityBilateralFilter": "Evision.CUDA.DisparityBilateralFilter",
+        "cuda_FastFeatureDetector": "Evision.CUDA.FastFeatureDetector",
+        "cuda_ORB": "Evision.CUDA.ORB",
+        "cuda_StereoBeliefPropagation": "Evision.CUDA.StereoBeliefPropagation",
+        "cuda_StereoBM": "Evision.CUDA.StereoBM",
+        "cuda_StereoConstantSpaceBP": "Evision.CUDA.StereoConstantSpaceBP",
+        "cuda_StereoSGM": "Evision.CUDA.StereoSGM",
     }
 
     # argtype => classname => module name
@@ -904,7 +930,8 @@ def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[
         "ximgproc": "XImgProc",
         "xphoto": "XPhoto",
         "detail": "Detail",
-        "utils": "Utils"
+        "utils": "Utils",
+        "cudacodec": "CUDACodec"
     }
 
     argtype = argtype.strip()
@@ -924,6 +951,9 @@ def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[
     if not arg_is_struct:
         if is_basic_types(argtype):
             return False
+        if classname is not None and decl is not None and len(classname) == 0 and len(decl) > 0:
+            if decl[0].startswith('cv.'):
+                classname = decl[0][3:].replace('.', '_')
         if classname:
             module_class = classname.split("_", maxsplit=2)
             if len(module_class) == 2:
@@ -939,6 +969,9 @@ def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[
                     elif module_name == 'cuda' and argtype == "cuda_GpuMat":
                         arg_is_struct = True
                         argtype = "Evision.CUDA.GpuMat"
+                    elif module_name == 'cuda' and class_name == "Filter":
+                        arg_is_struct = True
+                        argtype = "Evision.CUDA.Filter"
                     elif module_name == 'detail' and argtype in ['vector<KeyPoint>', 'vector<DMatch>']:
                         arg_is_struct = True
                         if argtype == 'vector<KeyPoint>':
@@ -990,7 +1023,7 @@ def is_struct(argtype: str, also_get: Optional[str] = None, classname: Optional[
     else:
         return arg_is_struct, second_ret
 
-def map_argtype_in_docs(kind: str, argtype: str, classname: str) -> str:
+def map_argtype_in_docs(kind: str, argtype: str, classname: str="") -> str:
     if kind == 'elixir':
         return map_argtype_in_docs_elixir(kind, argtype, classname)
     elif kind == 'erlang':
@@ -1177,8 +1210,8 @@ def map_argtype_in_spec_erlang(classname: str, argtype: str, is_in: bool, decl:
         argtype_inner = ", ".join([map_argtype_in_spec_erlang(classname, a.strip(), is_in, decl) for a in argtype[len('std::pair<'):-1].split(",")])
         spec_type = '{' + argtype_inner + '}'
         return spec_type
-    elif is_struct(argtype, classname=classname):
-        _, struct_name = is_struct(argtype, also_get='struct_name', classname=classname)
+    elif is_struct(argtype, classname=classname, decl=decl):
+        _, struct_name = is_struct(argtype, also_get='struct_name', classname=classname, decl=decl)
         ty = struct_name.replace('.', '_').lower()
         return f'#{ty}' + '{}'
     elif argtype in manual_type_spec_map:
@@ -1210,10 +1243,12 @@ def map_argtype_in_spec_erlang(classname: str, argtype: str, is_in: bool, decl:
             return '#evision_aruco_board{}'
         else:
             print(f'warning: generate_spec: unknown argtype `{argtype}`, input_arg? {is_in}, class={classname}')
+            raise RuntimeError("erlang spec")
             return 'term()'
 
 def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl: list) -> str:
     global vec_out_types
+    argtype = argtype.strip()
     if len(argtype) > 0 and argtype[-1] == '*':
         if argtype == 'char*' or argtype == 'uchar*':
             return 'binary()'
@@ -1227,10 +1262,6 @@ def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl:
     if argtype.startswith("cv::"):
         argtype = argtype[4:]
 
-    argtype = argtype.strip()
-    if argtype.startswith("cv::"):
-        argtype = argtype[4:]
-
     if is_int_type(argtype):
         return 'integer()'
     elif argtype == 'bool':
@@ -1247,7 +1278,7 @@ def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl:
         return ':ok'
     elif argtype == 'Range':
         return '{integer(), integer()} | :all'
-    elif is_in and argtype in ['Mat', 'UMat', 'cv::Mat', 'cv::UMat']:
+    elif is_in and argtype in ['Mat', 'UMat', 'cv::Mat', 'cv::UMat', 'cuda::GpuMat']:
         return 'Evision.Mat.maybe_mat_in()'
     elif argtype in evision_structrised_classes:
         return f'Evision.{argtype}.t()'
@@ -1272,8 +1303,8 @@ def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl:
         argtype_inner = ", ".join([map_argtype_in_spec_elixir(classname, a.strip(), is_in, decl) for a in argtype[len('std::pair<'):-1].split(",")])
         spec_type = '{' + argtype_inner + '}'
         return spec_type
-    elif is_struct(argtype, classname=classname):
-        _, struct_name = is_struct(argtype, also_get='struct_name', classname=classname)
+    elif is_struct(argtype, classname=classname, decl=decl):
+        _, struct_name = is_struct(argtype, also_get='struct_name', classname=classname, decl=decl)
         return f'{struct_name}.t()'
     elif argtype in manual_type_spec_map:
         return manual_type_spec_map[argtype]
@@ -1298,10 +1329,11 @@ def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl:
             return f'Evision.CUDA.GpuMat.t()'
         if argtype == 'IndexParams' or argtype == 'SearchParams' or argtype == 'Moments':
             return f'map()'
-        if argtype in ['Board', 'Dictionary'] and len(decl) > 0 and decl[0].startswith("cv.aruco."):
-            return f'Evision.ArUco.Board.t()'
-        if argtype in ['Board', 'Dictionary'] and len(decl) > 0 and decl[0].startswith("cv.aruco."):
-            return f'Evision.ArUco.Board.t()'
+        if len(decl) > 0 and decl[0].startswith("cv.aruco.") and argtype in ['Board', 'Dictionary']:
+            if argtype == 'Board':
+                return f'Evision.ArUco.Board.t()'
+            elif argtype == 'Dictionary':
+                return f'Evision.ArUco.Dictionary.t()'
         else:
             print(f'warning: generate_spec: unknown argtype `{argtype}`, input_arg? {is_in}, class={classname}')
             return 'term()'
@@ -1329,7 +1361,7 @@ def map_argtype_to_guard_elixir(argname, argtype, classname: Optional[str] = Non
     elif is_struct(argtype, classname=classname):
         _, struct_name = is_struct(argtype, also_get='struct_name', classname=classname)
         if struct_name == 'Evision.Mat':
-            return f'(is_struct({argname}, Evision.Mat) or is_struct({argname}, Nx.Tensor))'
+            return f'(is_struct({argname}, Evision.Mat) or is_struct({argname}, Evision.CUDA.GpuMat) or is_struct({argname}, Nx.Tensor))'
         else:
             return f'is_struct({argname}, {struct_name})'
     elif is_ref_or_struct(argtype):

From 6f5a4fd71bfe7e555611385a77fa8f3047f603f6 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 2 Jan 2023 01:17:54 +0000
Subject: [PATCH 05/55] improved handlings for Ptr<T>

---
 c_src/erlcompat.hpp         | 27 ++++++++++++++++++---------
 py_src/evision_templates.py |  9 +++++++--
 py_src/func_info.py         | 20 ++++++++++++++------
 py_src/helper.py            | 10 ++--------
 4 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/c_src/erlcompat.hpp b/c_src/erlcompat.hpp
index 4ace8713..5e90b243 100644
--- a/c_src/erlcompat.hpp
+++ b/c_src/erlcompat.hpp
@@ -68,15 +68,24 @@ ERL_NIF_TERM evision_from(ErlNifEnv *env, const TYPE& src)
     return evision_from(env, ptr);                                                                    \
 }
 
-#define CV_ERL_TO_CLASS_PTR(TYPE)                                                                      \
-template<>                                                                                            \
-bool evision_to(ErlNifEnv *env, ERL_NIF_TERM dst, TYPE*& src, const ArgInfo& info)                    \
-{                                                                                                     \
-    Ptr<TYPE> ptr;                                                                                    \
-                                                                                                      \
-    if (!evision_to(env, dst, ptr, info)) return false;                                               \
-    src = ptr;                                                                                        \
-    return true;                                                                                      \
+#define CV_ERL_TO_CLASS_PTR(TYPE)                                                                    \
+template<>                                                                                           \
+bool evision_to(ErlNifEnv *env, ERL_NIF_TERM dst, TYPE*& src, const ArgInfo& info)                   \
+{                                                                                                    \
+    Ptr<TYPE> ptr;                                                                                   \
+                                                                                                     \
+    if (!evision_to(env, dst, ptr, info)) {                                                          \
+        if (info.outputarg) return true;                                                             \
+        return info.has_default;                                                                     \
+    }                                                                                                \
+    if (ptr.get() == nullptr && info.has_default) {                                                  \
+        return true;                                                                                 \
+    } else {                                                                                         \
+        if (ptr.get() == nullptr && info.outputarg) return true;                                     \
+        src = ptr;                                                                                   \
+    }                                                                                                \
+                                                                                                     \
+    return ptr.get() == nullptr;                                                                     \
 }
 
 #define CV_ERL_FROM_CLASS_PTR(TYPE)                                                                    \
diff --git a/py_src/evision_templates.py b/py_src/evision_templates.py
index 778f4726..98069d6d 100644
--- a/py_src/evision_templates.py
+++ b/py_src/evision_templates.py
@@ -397,8 +397,11 @@ def __enabled_modules__ do
     }
     static bool to(ErlNifEnv *env, ERL_NIF_TERM src, ${cname}& dst, const ArgInfo& info)
     {
-        if(!src || evision::nif::check_nil(env, src))
-            return true;
+        if (evision::nif::check_nil(env, src)) {
+            if (info.outputarg) return true;
+            return info.has_default;
+        }
+
         ${cname} * dst_ = nullptr;
         if (evision_${name}_getp(env, src, dst_))
         {
@@ -406,6 +409,8 @@ def __enabled_modules__ do
             return true;
         }
         ${mappable_code}
+        if (info.has_default) return true;
+
         failmsg(env, "Expected ${cname} for argument '%s'", info.name);
         return false;
     }
diff --git a/py_src/func_info.py b/py_src/func_info.py
index 1e9180c5..fb74737c 100644
--- a/py_src/func_info.py
+++ b/py_src/func_info.py
@@ -205,6 +205,12 @@ def gen_code(self, codegen):
                     if tp.endswith("*"):
                         defval0 = "0"
                 tp_candidates = [a.tp, normalize_class_name(self.namespace + "." + a.tp), normalize_class_name(self.classname + "." + a.tp)]
+                
+                if "::" in tp:
+                    underscore_type = tp.replace("::", "_")
+                else:
+                    underscore_type = tp_candidates[1].replace(".", "_")
+
                 if any(tp in codegen.enums.keys() for tp in tp_candidates):
                     defval0 = "static_cast<%s>(%d)" % (a.tp, 0)
 
@@ -217,7 +223,7 @@ def gen_code(self, codegen):
                         defval = f"static_cast<std::underlying_type_t<{arg_type_info.atype}>>({a.defval})"
                     arg_type_info = ArgTypeInfo(f"std::underlying_type_t<{arg_type_info.atype}>", arg_type_info.format_str, defval, True, True)
                     a.defval = defval
-            
+
                 defval = a.defval
                 if not defval:
                     defval = arg_type_info.default_value
@@ -252,6 +258,8 @@ def gen_code(self, codegen):
                         code_cvt_list.append("convert_to_char(env, %s, &%s, %s)" % (erl_term, a.name, a.crepr(defval)))
                     elif a.tp == 'FileStorage':
                         code_cvt_list.append("evision_to_safe(env, %s, ptr_%s, %s)" % (erl_term, a.name, a.crepr(defval)))
+                    elif underscore_type in all_classes and all_classes[underscore_type].issimple is False:
+                        code_cvt_list.append("evision_to_safe(env, %s, ptr_%s, %s)" % (erl_term, a.name, a.crepr(defval)))
                     else:
                         code_cvt_list.append("evision_to_safe(env, %s, %s, %s)" % (erl_term, a.name, a.crepr(defval)))
                         if elixir_argname == 'outBlobNames':
@@ -264,18 +272,18 @@ def gen_code(self, codegen):
                     if arg_type_info.atype == "QRCodeEncoder_Params":
                         code_decl += "    QRCodeEncoder::Params %s=%s;\n" % (a.name, defval)
                     else:
-                        if arg_type_info.atype == "FileStorage":
-                            code_decl += "    Ptr<%s> %s=ptr_%s;\n" % (arg_type_info.atype, a.name, defval)
-                            code_from_ptr += "%s& %s = *ptr_%s.get();\n" % (arg_type_info.atype, a.name, a.name)
+                        if arg_type_info.atype == "FileStorage" or (underscore_type in all_classes and all_classes[underscore_type].issimple is False):
+                            code_decl += "    Ptr<%s> ptr_%s;\n" % (arg_type_info.atype, a.name,)
+                            code_from_ptr += "    %s %s; if (ptr_%s.get()) { %s = *ptr_%s.get(); } else { %s = %s; }\n    " % (arg_type_info.atype, a.name, a.name, a.name, a.name, a.name, defval)
                         else:
                             code_decl += "    %s %s=%s;\n" % (arg_type_info.atype, a.name, defval)
                 else:
                     if a.name == "nodeName":
                         code_decl += "    %s %s = String();\n" % (arg_type_info.atype, a.name)
                     else:
-                        if arg_type_info.atype == "FileStorage":
+                        if arg_type_info.atype == "FileStorage" or (underscore_type in all_classes and all_classes[underscore_type].issimple is False):
                             code_decl += "    Ptr<%s> ptr_%s;\n" % (arg_type_info.atype, a.name)
-                            code_from_ptr += "%s& %s = *ptr_%s.get();\n" % (arg_type_info.atype, a.name, a.name)
+                            code_from_ptr += "    %s %s; if (ptr_%s.get()) { %s = *ptr_%s.get(); }\n    " % (arg_type_info.atype, a.name, a.name, a.name, a.name)
                         else:
                             code_decl += "    %s %s;\n" % (arg_type_info.atype, a.name)
 
diff --git a/py_src/helper.py b/py_src/helper.py
index b9594ac7..dda6dc09 100644
--- a/py_src/helper.py
+++ b/py_src/helper.py
@@ -1278,7 +1278,7 @@ def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl:
         return ':ok'
     elif argtype == 'Range':
         return '{integer(), integer()} | :all'
-    elif is_in and argtype in ['Mat', 'UMat', 'cv::Mat', 'cv::UMat', 'cuda::GpuMat']:
+    elif is_in and argtype in ['Mat', 'UMat', 'cv::Mat', 'cv::UMat']:
         return 'Evision.Mat.maybe_mat_in()'
     elif argtype in evision_structrised_classes:
         return f'Evision.{argtype}.t()'
@@ -1325,8 +1325,6 @@ def map_argtype_in_spec_elixir(classname: str, argtype: str, is_in: bool, decl:
     else:
         if argtype == 'LayerId':
             return 'term()'
-        if argtype == 'GpuMat' or argtype == 'cuda::GpuMat':
-            return f'Evision.CUDA.GpuMat.t()'
         if argtype == 'IndexParams' or argtype == 'SearchParams' or argtype == 'Moments':
             return f'map()'
         if len(decl) > 0 and decl[0].startswith("cv.aruco.") and argtype in ['Board', 'Dictionary']:
@@ -1361,7 +1359,7 @@ def map_argtype_to_guard_elixir(argname, argtype, classname: Optional[str] = Non
     elif is_struct(argtype, classname=classname):
         _, struct_name = is_struct(argtype, also_get='struct_name', classname=classname)
         if struct_name == 'Evision.Mat':
-            return f'(is_struct({argname}, Evision.Mat) or is_struct({argname}, Evision.CUDA.GpuMat) or is_struct({argname}, Nx.Tensor))'
+            return f'(is_struct({argname}, Evision.Mat) or is_struct({argname}, Nx.Tensor))'
         else:
             return f'is_struct({argname}, {struct_name})'
     elif is_ref_or_struct(argtype):
@@ -1371,8 +1369,6 @@ def map_argtype_to_guard_elixir(argname, argtype, classname: Optional[str] = Non
     else:
         if argtype == 'LayerId':
             return ''
-        if argtype == 'GpuMat' or argtype == 'cuda::GpuMat':
-            return f'is_list({argname})'
         if argtype == 'IndexParams' or argtype == 'SearchParams' or argtype == 'Moments':
             return f'is_map({argname})'
         else:
@@ -1412,8 +1408,6 @@ def map_argtype_to_guard_erlang(argname, argtype, classname: Optional[str] = Non
     else:
         if argtype == 'LayerId':
             return ''
-        if argtype == 'GpuMat' or argtype == 'cuda::GpuMat':
-            return f'is_list({argname})'
         if argtype == 'IndexParams' or argtype == 'SearchParams' or argtype == 'Moments':
             return f'is_map({argname})'
         else:

From b779dab382272418d19fd9dc2ae86f76a0308257 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 2 Jan 2023 01:18:25 +0000
Subject: [PATCH 06/55] structurise Evision.CUDA.GpuMat

---
 c_src/evision.cpp               | 81 ++++++++++++++++++++++++------
 c_src/modules/evision_mat_api.h |  9 +++-
 py_src/evision_structures.py    | 89 +++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 16 deletions(-)

diff --git a/c_src/evision.cpp b/c_src/evision.cpp
index 8047eca3..b73181ff 100644
--- a/c_src/evision.cpp
+++ b/c_src/evision.cpp
@@ -208,6 +208,56 @@ ERL_NIF_TERM evision_from_as_map(ErlNifEnv *env, const T& src, ERL_NIF_TERM res_
     }
 }
 
+template<>
+ERL_NIF_TERM evision_from_as_map(ErlNifEnv *env, const cv::Ptr<cv::cuda::GpuMat>& src, ERL_NIF_TERM res_term, const char * class_name, bool& success) {
+    const size_t num_items = 7;
+    size_t item_index = 0;
+
+    ERL_NIF_TERM keys[num_items];
+    ERL_NIF_TERM values[num_items];
+
+    keys[item_index] = enif_make_atom(env, "ref");
+    values[item_index] = res_term;
+    item_index++;
+
+    keys[item_index] = enif_make_atom(env, "class");
+    values[item_index] = enif_make_atom(env, class_name);
+    item_index++;
+
+    keys[item_index] = enif_make_atom(env, "channels");
+    values[item_index] = enif_make_int(env, src->channels());
+    item_index++;
+
+    keys[item_index] = enif_make_atom(env, "type");
+    values[item_index] = __evision_get_mat_type(env, src->type());
+    item_index++;
+
+    keys[item_index] = enif_make_atom(env, "raw_type");
+    values[item_index] = enif_make_int(env, src->type());
+    item_index++;
+
+    keys[item_index] = enif_make_atom(env, "elemSize");
+    values[item_index] = enif_make_int(env, src->elemSize());
+    item_index++;
+
+    keys[item_index] = enif_make_atom(env, "shape");
+    ERL_NIF_TERM shape[3];
+    shape[0] = enif_make_int(env, src->rows);
+    shape[1] = enif_make_int(env, src->cols);
+    shape[2] = enif_make_int(env, src->channels());
+    values[item_index] = enif_make_tuple_from_array(env, shape, 3);
+    item_index++;
+
+    ERL_NIF_TERM map;
+    if (enif_make_map_from_arrays(env, keys, values, item_index, &map)) {
+        success = true;
+        return map;
+    } else {
+        success = false;
+        return evision::nif::error(env, "enif_make_map_from_arrays failed in evision_from_as_map");
+    }
+}
+
 template <>
 ERL_NIF_TERM evision_from_as_binary(ErlNifEnv *env, const std::vector<uchar>& src, bool& success) {
     size_t n = static_cast<size_t>(src.size());
@@ -518,14 +568,19 @@ struct Evision_Converter< cv::Ptr<T> >
 {
     static ERL_NIF_TERM from(ErlNifEnv *env, const cv::Ptr<T>& p)
     {
-        if (!p)
-            evision::nif::atom(env, "nil");
+        if (!p) {
+            return evision::nif::atom(env, "nil");
+        }
+
         return evision_from(env, *p);
     }
     static bool to(ErlNifEnv * env, ERL_NIF_TERM o, Ptr<T>& p, const ArgInfo& info)
     {
-        if (evision::nif::check_nil(env, o))
-            return true;
+        if (evision::nif::check_nil(env, o)) {
+            if (info.outputarg) return true;
+            return info.has_default;
+        }
+
         p = makePtr<T>();
         return evision_to(env, o, *p, info);
     }
@@ -538,12 +593,16 @@ bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, void*& ptr, const ArgInfo& inf
         return true;
     }
 
-    CV_UNUSED(info);
-
     ErlNifSInt64 i64;
-    if (!enif_get_int64(env, obj, (ErlNifSInt64 *)&i64))
-        return false;
+    if (!enif_get_int64(env, obj, (ErlNifSInt64 *)&i64)) {
+        return info.has_default;
+    }
+        
     ptr = reinterpret_cast<void *>(i64);
+    if (ptr == nullptr && info.has_default) {
+        return true;
+    }
+
     return ptr != nullptr;
 }
 
@@ -2234,12 +2293,6 @@ static int convert_to_char(ErlNifEnv *env, ERL_NIF_TERM o, char *dst, const ArgI
 
 /************************************************************************/
 
-struct ConstDef
-{
-    const char * name;
-    long long val;
-};
-
 #include "evision_generated_modules_content.h"
 
 static void destruct_Mat(ErlNifEnv *env, void *args) {
diff --git a/c_src/modules/evision_mat_api.h b/c_src/modules/evision_mat_api.h
index 354f5a66..cb931e18 100644
--- a/c_src/modules/evision_mat_api.h
+++ b/c_src/modules/evision_mat_api.h
@@ -3,8 +3,7 @@
 
 #include <erl_nif.h>
 
-static ERL_NIF_TERM _evision_get_mat_type(ErlNifEnv *env, const cv::Mat& img) {
-    int type = img.type();
+static ERL_NIF_TERM __evision_get_mat_type(ErlNifEnv *env, int type) {
     uint8_t depth = type & CV_MAT_DEPTH_MASK;
 
     switch ( depth ) {
@@ -20,6 +19,11 @@ static ERL_NIF_TERM _evision_get_mat_type(ErlNifEnv *env, const cv::Mat& img) {
     }
 }
 
+static ERL_NIF_TERM _evision_get_mat_type(ErlNifEnv *env, const cv::Mat& img) {
+    int type = img.type();
+    return __evision_get_mat_type(env, type);
+}
+
 static ERL_NIF_TERM _evision_get_mat_shape(ErlNifEnv *env, const cv::Mat& img) {
     cv::MatSize size = img.size;
     int channels = img.channels();
@@ -40,6 +44,7 @@ static ERL_NIF_TERM _evision_get_mat_shape(ErlNifEnv *env, const cv::Mat& img) {
         shape[dims - 1] = enif_make_int(env, channels);
     }
     ERL_NIF_TERM ret = enif_make_tuple_from_array(env, shape, dims);
+    enif_free(shape);
     return ret;
 }
 
diff --git a/py_src/evision_structures.py b/py_src/evision_structures.py
index 69239139..2acb45b4 100644
--- a/py_src/evision_structures.py
+++ b/py_src/evision_structures.py
@@ -75,6 +75,91 @@ def __to_struct__(pass_through) do
     evision_internal_structurise:to_struct(Any).
 """
 
+gpumat_struct_elixir = '  @typedoc """\n' + \
+"""  Type that represents an `Evision.CUDA.GpuMat` struct.
+
+  - **channels**: `int`.
+
+    The number of matrix channels.
+
+  - **type**: `Evision.Mat.mat_type()`.
+
+    Type of the matrix elements, following `:nx`'s convention.
+
+  - **raw_type**: `int`.
+
+    The raw value returned from `int cv::Mat::type()`.
+
+  - **shape**: `tuple`.
+
+    The shape of the matrix.
+  
+  - **elemSize**: `integer()`.
+
+    Element size in bytes.
+
+  - **ref**: `reference`.
+
+    The underlying erlang resource variable.
+
+""" + \
+  '  """' + \
+"""
+  @type t :: %__MODULE__{
+    channels: integer(),
+    type: Evision.Mat.mat_type(),
+    raw_type: integer(),
+    shape: tuple(),
+    elemSize: integer(),
+    ref: reference()
+  }
+  @enforce_keys [:channels, :type, :raw_type, :shape, :elemSize, :ref]
+  defstruct [:channels, :type, :raw_type, :shape, :elemSize, :ref]
+  alias __MODULE__, as: T
+
+  @doc false
+  def __to_struct__(%{
+        :class => :"CUDA.GpuMat",
+        :channels => channels,
+        :type => type,
+        :raw_type => raw_type,
+        :shape => shape,
+        :elemSize => elemSize,
+        :ref => ref
+      }) do
+    %T{
+      channels: channels,
+      type: type,
+      raw_type: raw_type,
+      shape: shape,
+      elemSize: elemSize,
+      ref: ref
+    }
+  end
+
+  def __to_struct__({:ok, mat = %{:class => :"CUDA.GpuMat"}}) do
+    {:ok, __to_struct__(mat)}
+  end
+
+  def __to_struct__(pass_through) do
+    Evision.Internal.Structurise.to_struct(pass_through)
+  end
+"""
+
+gpumat_struct_erlang = """
+'__to_struct__'(#{class := 'CUDA.GpuMat', ref := Ref, channels := Channels, type := Type, raw_type := RawType, shape := Shape, elemSize := ElemSize}) ->
+  #evision_cuda_gpumat{
+      channels = Channels,
+      type = Type,
+      raw_type = RawType,
+      shape = Shape,
+      ref = Ref,
+      elemSize = ElemSize
+  };
+'__to_struct__'(Any) ->
+    evision_internal_structurise:to_struct(Any).
+"""
+
 generic_struct_template_elixir = Template(
   '  @typedoc """\n'
   '  Type that represents an `Evision.${elixir_module_name}` struct.\n\n'
@@ -121,6 +206,10 @@ def __to_struct__(ret) do
     "VideoCapture": {
       "elixir": videocapture_struct_elixir, 
       "erlang": videocapture_struct_erlang
+    },
+    "CUDA.GpuMat": {
+      "elixir": gpumat_struct_elixir, 
+      "erlang": gpumat_struct_erlang
     }
 }
 

From 68e1c580ab1c1b3ba6ebf5f70cb3683df48c0c4a Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sat, 14 Jan 2023 11:51:34 +0000
Subject: [PATCH 07/55] emit warning when EVISION_ENABLE_CUDA=true while
 EVISION_ENABLE_CONTRIB=false

---
 mix.exs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mix.exs b/mix.exs
index 11567b76..41536688 100644
--- a/mix.exs
+++ b/mix.exs
@@ -902,17 +902,25 @@ defmodule Evision.MixProject do
   defp generate_cmake_options() do
     mc = module_configuration()
 
+    enable_cuda = System.get_env("EVISION_ENABLE_CUDA", "false")
+    enable_opencv_cuda = enable_cuda == "true"
+    if enable_opencv_cuda do
+      System.put_env("EVISION_ENABLE_CONTRIB", "true")
+    else
+      System.put_env("EVISION_ENABLE_CONTRIB", "false")
+    end
 
     enable_contrib = System.get_env("EVISION_ENABLE_CONTRIB", "true")
     enable_opencv_contrib = enable_contrib == "true"
+    if enable_opencv_cuda and !enable_opencv_contrib do
+      Logger.warning("EVISION_ENABLE_CUDA is set to true, while EVISION_ENABLE_CONTRIB is set to false. CUDA support will NOT be available.")
+    end
     if enable_opencv_contrib do
       System.put_env("EVISION_ENABLE_CONTRIB", "true")
     else
       System.put_env("EVISION_ENABLE_CONTRIB", "false")
     end
 
-    enable_opencv_cuda = true
-
     all_modules = Enum.map(mc.opencv, fn {m, _} -> m end) ++ Enum.map(mc.opencv_contrib, fn {m, _} -> m end)
     enabled_modules = Enum.filter(mc.opencv, fn {_, e} -> e end)
       ++ (if enable_opencv_contrib do Enum.filter(mc.opencv_contrib, fn {_, e} -> e end) else [] end)

From 93ee2239e98adaf305000f71c9505183d205b77a Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sat, 14 Jan 2023 11:54:28 +0000
Subject: [PATCH 08/55] updated readme

---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index af1c7acc..99c09571 100644
--- a/README.md
+++ b/README.md
@@ -298,6 +298,21 @@ Defaults to `true` because for precompiled binaries, including these "extra" mod
 
 However, 20 MBs for Nerves users can be a huge deal (still depending on your device, for example, +20 MBs is often much more acceptable for RPIs as they are usually equipped with >= 8 GB microSD cards while being absolutely a luxury thing for some other embedded devices).
 
+#### EVISION_ENABLE_CUDA
+Set environment variable `EVISION_ENABLE_CONTRIB` to `true` to enable CUDA support from [opencv_contrib](https://github.com/opencv/opencv_contrib). Defaults to `false`.
+
+Note that `EVISION_ENABLE_CONTRIB` will need to be `true` as well.
+
+```bash
+# enable CUDA support
+export EVISION_ENABLE_CUDA=true
+# opencv_contrib modules is enabled by default
+export EVISION_ENABLE_CONTRIB=true
+
+# disable CUDA support (default) 
+export EVISION_ENABLE_CUDA=false
+```
+
 #### EVISION_PRECOMPILED_CACHE_DIR
 ```shell
 # optional.

From 07f59281b34c6b2f577900a45a5425f8a991b50b Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 20:42:01 +0000
Subject: [PATCH 09/55] [py_src] class => `Evision.CUDA.GpuMat`

---
 py_src/evision_structures.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/py_src/evision_structures.py b/py_src/evision_structures.py
index 72a8a5f8..82f2fc18 100644
--- a/py_src/evision_structures.py
+++ b/py_src/evision_structures.py
@@ -119,7 +119,7 @@ def __to_struct__(pass_through) do
 
   @doc false
   def __to_struct__(%{
-        :class => :"CUDA.GpuMat",
+        :class => Evision.CUDA.GpuMat,
         :channels => channels,
         :type => type,
         :raw_type => raw_type,
@@ -137,7 +137,7 @@ def __to_struct__(%{
     }
   end
 
-  def __to_struct__({:ok, mat = %{:class => :"CUDA.GpuMat"}}) do
+  def __to_struct__({:ok, mat = %{:class => Evision.CUDA.GpuMat}}) do
     {:ok, __to_struct__(mat)}
   end
 
@@ -147,7 +147,7 @@ def __to_struct__(pass_through) do
 """
 
 gpumat_struct_erlang = """
-'__to_struct__'(#{class := 'CUDA.GpuMat', ref := Ref, channels := Channels, type := Type, raw_type := RawType, shape := Shape, elemSize := ElemSize}) ->
+'__to_struct__'(#{class := 'Elixir.Evision.CUDA.GpuMat', ref := Ref, channels := Channels, type := Type, raw_type := RawType, shape := Shape, elemSize := ElemSize}) ->
   #evision_cuda_gpumat{
       channels = Channels,
       type = Type,

From aeb4e39c3e13aeeddcfb15cdb1d03385c087d19d Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 20:42:42 +0000
Subject: [PATCH 10/55] [py_src] map module name `cudacodec` => `CUDACodec`

---
 py_src/helper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/py_src/helper.py b/py_src/helper.py
index 30966dc3..87ea5178 100644
--- a/py_src/helper.py
+++ b/py_src/helper.py
@@ -165,6 +165,7 @@ def handle_inline_math_escaping(text, start_pos=0):
     "bioinspired": "Bioinspired",
     "ccm": "CCM",
     "cuda": "CUDA",
+    "cudacodec": "CUDACodec",
     "colored_kinfu": "ColoredKinFu",
     "detail": "Detail",
     "dnn": "DNN",

From 49e28b22a8de30bfafa54f74ac67494e33b373d5 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 20:43:18 +0000
Subject: [PATCH 11/55] [py_src] try variants with most restrictions first

---
 py_src/func_info.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/py_src/func_info.py b/py_src/func_info.py
index ab92d126..4a226585 100644
--- a/py_src/func_info.py
+++ b/py_src/func_info.py
@@ -169,7 +169,18 @@ def gen_code(self, codegen):
 
         all_code_variants = []
 
+        variants = {}
+        sorted_variants = []
         for v in self.variants:
+            count = 0
+            for a_index, a in enumerate(v.args):
+                if a.py_inputarg and len(a.defval) == 0:
+                    count += 1
+            variants[v] = count
+        for k, _ in reversed(sorted(variants.items(), key=lambda item: item[1])):
+            sorted_variants.append(k)
+
+        for v in sorted_variants:
             code_decl = ""
             code_ret = ""
             code_cvt_list = []

From 79209f6c17368ee09c2b101af46142ee554d5604 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 20:44:03 +0000
Subject: [PATCH 12/55] [py_src] append `-D WITH_CUDA=ON` if CUDA support is
 requested

---
 mix.exs | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/mix.exs b/mix.exs
index fe000b98..755ad657 100644
--- a/mix.exs
+++ b/mix.exs
@@ -866,17 +866,17 @@ defmodule Evision.MixProject do
     ],
 
     cuda: [
-      cudaarithm: false,
-      cudabgsegm: false,
-      cudacodec: false,
-      cudafeatures2d: false,
-      cudafilters: false,
-      cudaimgproc: false,
-      cudalegacy: false,
-      cudaobjdetect: false,
-      cudaoptflow: false,
-      cudastereo: false,
-      cudawarping: false,
+      cudaarithm: true,
+      cudabgsegm: true,
+      cudacodec: true,
+      cudafeatures2d: true,
+      cudafilters: true,
+      cudaimgproc: true,
+      cudalegacy: true,
+      cudaobjdetect: true,
+      cudaoptflow: true,
+      cudastereo: true,
+      cudawarping: true,
       cudev: true,
     ]
   }
@@ -905,9 +905,9 @@ defmodule Evision.MixProject do
     enable_cuda = System.get_env("EVISION_ENABLE_CUDA", "false")
     enable_opencv_cuda = enable_cuda == "true"
     if enable_opencv_cuda do
-      System.put_env("EVISION_ENABLE_CONTRIB", "true")
+      System.put_env("EVISION_ENABLE_CUDA", "true")
     else
-      System.put_env("EVISION_ENABLE_CONTRIB", "false")
+      System.put_env("EVISION_ENABLE_CUDA", "false")
     end
 
     enable_contrib = System.get_env("EVISION_ENABLE_CONTRIB", "true")
@@ -978,6 +978,12 @@ defmodule Evision.MixProject do
          |> Enum.join(" ")) <>
         " "
 
+    options = if enable_opencv_cuda and enable_opencv_contrib do
+      "#{options} -D WITH_CUDA=ON"
+    else
+      options
+    end
+
     {options, enabled_modules |> Enum.map(&Atom.to_string(&1)) |> Enum.join(",")}
   end
 

From 13cd370b740c6652d2827985a1c4d5dbcdd634cd Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 20:44:33 +0000
Subject: [PATCH 13/55] [ci-linux] minor update

---
 .github/workflows/linux-x86_64.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/linux-x86_64.yml b/.github/workflows/linux-x86_64.yml
index e3f38769..dbc9ad1d 100644
--- a/.github/workflows/linux-x86_64.yml
+++ b/.github/workflows/linux-x86_64.yml
@@ -168,8 +168,8 @@ jobs:
       - name: Install system dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip libncurses5-dev erlang-inets erlang-os-mon erlang-runtime-tools erlang-ssl erlang-dev python3 ca-certificates
-          sudo apt-get install -y libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg
+          sudo apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
+            libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg
 
       - name: Cache mix packages
         id: cache-mix-deps

From da17ce048626eb266416cb106cd1451eb5e6a134 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 21:07:57 +0000
Subject: [PATCH 14/55] check minimal number of required args

---
 py_src/func_info.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/py_src/func_info.py b/py_src/func_info.py
index 4a226585..4119fb10 100644
--- a/py_src/func_info.py
+++ b/py_src/func_info.py
@@ -146,9 +146,16 @@ def gen_code(self, codegen):
         if fname in special_handling_funcs():
             return ""
         code = "%s\n{\n" % (proto,)
-        code += "    using namespace %s;\n    int error_flag = false;\n    ERL_NIF_TERM error_term = 0;\n    std::map<std::string, ERL_NIF_TERM> erl_terms;\n" % self.namespace.replace('.', '::')
-        code += "    int nif_opts_index = %d; // <- autogenerated value \n    if (nif_opts_index < argc) {\n" % (opt_arg_index, )
-        code += "        evision::nif::parse_arg(env, nif_opts_index, argv, erl_terms);\n    }\n"
+        code += f"""    using namespace {self.namespace.replace('.', '::')};
+    int error_flag = false;
+    ERL_NIF_TERM error_term = 0;
+    std::map<std::string, ERL_NIF_TERM> erl_terms;
+    int nif_opts_index = {opt_arg_index};
+    if (nif_opts_index < argc) {{
+        evision::nif::parse_arg(env, nif_opts_index, argv, erl_terms);
+    }}
+    const size_t num_kw_args = erl_terms.size();
+"""
 
         selfinfo = None
         ismethod = self.classname != "" and not self.isconstructor
@@ -191,6 +198,10 @@ def gen_code(self, codegen):
             if v.isphantom and ismethod and not self.is_static:
                 code_args += "_self_"
 
+            min_kw_count = len(v.py_arglist[:v.pos_end])
+            if min_kw_count > 0:
+                code_cvt_list.append(f"num_kw_args >= {min_kw_count}")
+
             # declare all the C function arguments,
             # add necessary conversions from Erlang objects to code_cvt_list,
             # form the function/method call,

From 77f35f8c97d68aa068fcc5fd6af0603c9b1aff22 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 22:48:51 +0000
Subject: [PATCH 15/55] allow Scalar to be nil when default value is available
 or is output

---
 c_src/evision.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c_src/evision.cpp b/c_src/evision.cpp
index 7bb877e2..685ee302 100644
--- a/c_src/evision.cpp
+++ b/c_src/evision.cpp
@@ -614,7 +614,7 @@ static ERL_NIF_TERM evision_from(ErlNifEnv *env, void*& ptr)
 static bool evision_to(ErlNifEnv *env, ERL_NIF_TERM o, Scalar& s, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, o)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     double dval;

From b7c372be1c26ad5ad0724b8826e57ee07f4ae716 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 22:49:18 +0000
Subject: [PATCH 16/55] sort by number of output variables desc

---
 py_src/func_info.py | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/py_src/func_info.py b/py_src/func_info.py
index 4119fb10..7d0f0314 100644
--- a/py_src/func_info.py
+++ b/py_src/func_info.py
@@ -150,7 +150,7 @@ def gen_code(self, codegen):
     int error_flag = false;
     ERL_NIF_TERM error_term = 0;
     std::map<std::string, ERL_NIF_TERM> erl_terms;
-    int nif_opts_index = {opt_arg_index};
+    const int nif_opts_index = {opt_arg_index};
     if (nif_opts_index < argc) {{
         evision::nif::parse_arg(env, nif_opts_index, argv, erl_terms);
     }}
@@ -178,14 +178,50 @@ def gen_code(self, codegen):
 
         variants = {}
         sorted_variants = []
+        counts = set()
         for v in self.variants:
             count = 0
             for a_index, a in enumerate(v.args):
                 if a.py_inputarg and len(a.defval) == 0:
                     count += 1
             variants[v] = count
-        for k, _ in reversed(sorted(variants.items(), key=lambda item: item[1])):
-            sorted_variants.append(k)
+            counts.add(count)
+        for k, v in reversed(sorted(variants.items(), key=lambda item: item[1])):
+            sorted_variants.append((k, v))
+
+        none_umat = {}
+        umat_ones = {}
+        for v, count in sorted_variants:
+            is_umat = False
+            output_count = 0
+            for a in v.args:
+                if a.py_inputarg and 'UMat' in a.tp:
+                    is_umat = True
+                if a.py_outputarg:
+                    output_count += 1
+            if not is_umat:
+                if none_umat.get(count, None) is None:
+                    none_umat[count] = []
+                none_umat[count].append((v, output_count))
+            else:
+                if umat_ones.get(count, None) is None:
+                    umat_ones[count] = []
+                umat_ones[count].append((v, output_count))
+
+        # sort by number of output variables desc
+        input_variants = list(reversed(sorted(list(counts))))
+        for c in input_variants:
+            if none_umat.get(c, None) is not None:
+                none_umat[c] = sorted(none_umat[c], key=lambda item: -item[1])
+            if umat_ones.get(c, None) is not None:
+                umat_ones[c] = sorted(umat_ones[c], key=lambda item: -item[1])
+
+        sorted_variants = []
+        for c in input_variants:
+            if none_umat.get(c, None) is not None:
+                sorted_variants.extend([f[0] for f in none_umat[c]])
+            if umat_ones.get(c, None) is not None:
+                sorted_variants.extend([f[0] for f in umat_ones[c]])
 
         for v in sorted_variants:
             code_decl = ""

From 28c0e68f68097da8dc739ddf63bcc311e1ad9125 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 22:50:43 +0000
Subject: [PATCH 17/55] [test] remove lines related to Winograd

---
 test/dnn_detection_model_test.exs           | 7 -------
 test/dnn_detection_test.exs                 | 5 -----
 test/dnn_text_detection_model_db_test.exs   | 7 -------
 test/dnn_text_detection_model_east_test.exs | 7 -------
 4 files changed, 26 deletions(-)

diff --git a/test/dnn_detection_model_test.exs b/test/dnn_detection_model_test.exs
index 7ecac137..281c9165 100644
--- a/test/dnn_detection_model_test.exs
+++ b/test/dnn_detection_model_test.exs
@@ -17,13 +17,6 @@ defmodule Evision.DNN.DetectionModel.Test do
 
     net = Evision.DNN.readNet(weights, config: config, framework: "")
 
-    # disable Winograd, OpenCV 4.7.0
-    # https://github.com/opencv/opencv/issues/23080
-    enable_winograd = System.get_env("ENABLE_WINOGRAD", "no")
-    if enable_winograd == "no" do
-      Evision.DNN.Net.enableWinograd(net, false)
-    end
-
     model = DetectionModel.detectionModel(net)
     model = DetectionModel.setInputParams(model,
       scale: 1.0,
diff --git a/test/dnn_detection_test.exs b/test/dnn_detection_test.exs
index 4fc9706e..17ee06a0 100644
--- a/test/dnn_detection_test.exs
+++ b/test/dnn_detection_test.exs
@@ -111,11 +111,6 @@ defmodule Evision.DNN.Test do
 
       model = Evision.DNN.Net.setInput(model, blob, name: "", scalefactor: 1.0, mean: {0, 0, 0})
 
-      enable_winograd = System.get_env("ENABLE_WINOGRAD", "no")
-      if enable_winograd == "no" do
-        Evision.DNN.Net.enableWinograd(model, false)
-      end
-
       start_time = :os.system_time(:millisecond)
       detections = Evision.DNN.Net.forward(model, outBlobNames: out_names)
       end_time = :os.system_time(:millisecond)
diff --git a/test/dnn_text_detection_model_db_test.exs b/test/dnn_text_detection_model_db_test.exs
index 17b60219..731503c7 100644
--- a/test/dnn_text_detection_model_db_test.exs
+++ b/test/dnn_text_detection_model_db_test.exs
@@ -17,13 +17,6 @@ defmodule Evision.DNN.TextDetectionModelDB.Test do
 
     net = Evision.DNN.readNet(weights, config: "", framework: "")
 
-    # disable Winograd, OpenCV 4.7.0
-    # https://github.com/opencv/opencv/issues/23080
-    enable_winograd = System.get_env("ENABLE_WINOGRAD", "no")
-    if enable_winograd == "no" do
-      Evision.DNN.Net.enableWinograd(net, false)
-    end
-
     model =
       TextDetectionModelDB.textDetectionModelDB(net)
       |> TextDetectionModelDB.setInputParams(
diff --git a/test/dnn_text_detection_model_east_test.exs b/test/dnn_text_detection_model_east_test.exs
index 9a5b2117..6e8a7b54 100644
--- a/test/dnn_text_detection_model_east_test.exs
+++ b/test/dnn_text_detection_model_east_test.exs
@@ -21,13 +21,6 @@ defmodule Evision.DNN.TextDetectionModelEAST.Test do
 
     net = Evision.DNN.readNet(weights, config: "", framework: "")
 
-    # disable Winograd, OpenCV 4.7.0
-    # https://github.com/opencv/opencv/issues/23080
-    enable_winograd = System.get_env("ENABLE_WINOGRAD", "no")
-    if enable_winograd == "no" do
-      Evision.DNN.Net.enableWinograd(net, false)
-    end
-
     model =
       TextDetectionModelEAST.textDetectionModelEAST(net)
       |> TextDetectionModelEAST.setInputParams(

From 687a3ca3fb0be9ef203964cb84c29f60910f5307 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:16:29 +0000
Subject: [PATCH 18/55] [c_src] `evision_to<std::vector<Tp>>` returns true if
 info.has_default || info.outputarg

---
 c_src/evision.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/c_src/evision.cpp b/c_src/evision.cpp
index 685ee302..12aaa8fb 100644
--- a/c_src/evision.cpp
+++ b/c_src/evision.cpp
@@ -1629,7 +1629,7 @@ bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, std::vector<Tp>& value, const
         if (info.name != nullptr && strncmp(info.name, "netInputShape", 13) == 0) {
             return false;
         }
-        return true;
+        return info.has_default || info.outputarg;
     }
     return evisionVecConverter<Tp>::to(env, obj, value, info);
 }
@@ -1647,7 +1647,7 @@ static bool evision_to_generic_vec(ErlNifEnv *env, ERL_NIF_TERM obj, std::vector
         if (info.name != nullptr && strncmp(info.name, "netInputShape", 13) == 0) {
             return false;
         }
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     if (!enif_is_list(env, obj))

From e42d1f3f3e401b5fbf09cd8fec19f7ce70577d0e Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:17:59 +0000
Subject: [PATCH 19/55] mix format

---
 lib/evision_mat.ex                |  12 ++-
 lib/evision_wx.ex                 |  11 ++-
 lib/smartcell/evision_zoo.ex      |   2 +
 lib/smartcell/ml_rtrees.ex        |   3 +-
 lib/smartcell/ml_svm.ex           |   7 +-
 lib/zoo/zoo.ex                    | 149 ++++++++++++++++++++++--------
 mix.exs                           |  65 +++++++++----
 test/dnn_detection_model_test.exs |  16 ++--
 test/dnn_detection_test.exs       |  10 +-
 test/evision_gpumat_test.exs      | 109 ++++++++++++++++++++++
 test/evision_keypoint.exs         |   8 ++
 test/evision_mat_test.exs         |  92 ++++++++++++++----
 test/evision_test.exs             |   3 +-
 test/orb_test.exs                 |   4 +-
 test/pca_test.exs                 |  29 +++++-
 test/videowriter_test.exs         |  25 +++--
 16 files changed, 434 insertions(+), 111 deletions(-)
 create mode 100644 test/evision_gpumat_test.exs
 create mode 100644 test/evision_keypoint.exs

diff --git a/lib/evision_mat.ex b/lib/evision_mat.ex
index 263590bc..a6fee257 100644
--- a/lib/evision_mat.ex
+++ b/lib/evision_mat.ex
@@ -332,6 +332,7 @@ defmodule Evision.Mat do
       else
         {mat, false}
       end
+
     with_mat =
       if with_mat.dims != tuple_size(with_mat.shape) do
         Evision.Mat.channel_as_last_dim(with_mat)
@@ -340,13 +341,15 @@ defmodule Evision.Mat do
       end
 
     ranges = __standardise_range_list__(ranges, true)
+
     ranges =
       if tuple_size(mat.shape) > Enum.count(ranges) do
         extend =
-          for i <- Enum.count(ranges)..tuple_size(mat.shape)-1, reduce: [] do
+          for i <- Enum.count(ranges)..(tuple_size(mat.shape) - 1), reduce: [] do
             acc ->
               [{0, elem(mat.shape, i)} | acc]
           end
+
         ranges ++ Enum.reverse(extend)
       else
         ranges
@@ -355,9 +358,10 @@ defmodule Evision.Mat do
     with_mat = __from_struct__(with_mat)
     mat = __from_struct__(mat)
 
-    res = Evision.Internal.Structurise.to_struct(
-      :evision_nif.mat_update_roi(mat: mat, ranges: ranges, with_mat: with_mat)
-    )
+    res =
+      Evision.Internal.Structurise.to_struct(
+        :evision_nif.mat_update_roi(mat: mat, ranges: ranges, with_mat: with_mat)
+      )
 
     if bring_back do
       Evision.Mat.last_dim_as_channel(res)
diff --git a/lib/evision_wx.ex b/lib/evision_wx.ex
index 546dfd22..7bc8cd31 100644
--- a/lib/evision_wx.ex
+++ b/lib/evision_wx.ex
@@ -28,10 +28,12 @@ defmodule Evision.Wx do
 
     windows = Process.get(@process_env_key, %{})
     window = Map.get(windows, window_name)
-    wx_pid = case window do
-      nil -> nil
-      {_, _, _, pid} -> pid
-    end
+
+    wx_pid =
+      case window do
+        nil -> nil
+        {_, _, _, pid} -> pid
+      end
 
     window =
       if window == nil || !Process.alive?(wx_pid) do
@@ -64,6 +66,7 @@ defmodule Evision.Wx do
       catch
         :error, {_, {:wxWindow, :close, _}} -> :ok
       end
+
       Process.put(@process_env_key, Map.delete(windows, window_name))
     end
 
diff --git a/lib/smartcell/evision_zoo.ex b/lib/smartcell/evision_zoo.ex
index e6d049c7..76a62e68 100644
--- a/lib/smartcell/evision_zoo.ex
+++ b/lib/smartcell/evision_zoo.ex
@@ -66,6 +66,7 @@ else
 
     defp field_defaults_for(task_id, variant_id) do
       variant = variant_by_id(task_id, variant_id)
+
       if variant == nil do
         %{}
       else
@@ -78,6 +79,7 @@ else
     @impl true
     def handle_connect(ctx) do
       {backend_options, target_options} = Evision.Zoo.available_backend_and_target()
+
       {:ok,
        %{
          id: ctx.assigns.id,
diff --git a/lib/smartcell/ml_rtrees.ex b/lib/smartcell/ml_rtrees.ex
index f7c50f37..70250dc4 100644
--- a/lib/smartcell/ml_rtrees.ex
+++ b/lib/smartcell/ml_rtrees.ex
@@ -204,7 +204,8 @@ else
         unquote(ESCH.quoted_var(attrs["to_variable"])) =
           Evision.ML.RTrees.setTermCriteria(
             unquote(ESCH.quoted_var(attrs["to_variable"])),
-            {Evision.Constant.cv_MAX_ITER() + Evision.Constant.cv_EPS(), unquote(count), unquote(eps)}
+            {Evision.Constant.cv_MAX_ITER() + Evision.Constant.cv_EPS(), unquote(count),
+             unquote(eps)}
           )
       end
     end
diff --git a/lib/smartcell/ml_svm.ex b/lib/smartcell/ml_svm.ex
index e273ea49..596d352f 100644
--- a/lib/smartcell/ml_svm.ex
+++ b/lib/smartcell/ml_svm.ex
@@ -188,7 +188,9 @@ else
       quote do
         unquote(ESCH.quoted_var(attrs["to_variable"])) =
           Evision.ML.SVM.create()
-          |> Evision.ML.SVM.setType(unquote(ESCH.quoted_var("Evision.Constant.cv_#{attrs["type"]}()")))
+          |> Evision.ML.SVM.setType(
+            unquote(ESCH.quoted_var("Evision.Constant.cv_#{attrs["type"]}()"))
+          )
           |> Evision.ML.SVM.setKernel(
             unquote(ESCH.quoted_var("Evision.Constant.cv_#{attrs["kernel_type"]}()"))
           )
@@ -333,7 +335,8 @@ else
         unquote(ESCH.quoted_var(attrs["to_variable"])) =
           Evision.ML.SVM.setTermCriteria(
             unquote(ESCH.quoted_var(attrs["to_variable"])),
-            {Evision.Constant.cv_MAX_ITER() + Evision.Constant.cv_EPS(), unquote(count), unquote(eps)}
+            {Evision.Constant.cv_MAX_ITER() + Evision.Constant.cv_EPS(), unquote(count),
+             unquote(eps)}
           )
       end
     end
diff --git a/lib/zoo/zoo.ex b/lib/zoo/zoo.ex
index ed7ee693..1e3c7791 100644
--- a/lib/zoo/zoo.ex
+++ b/lib/zoo/zoo.ex
@@ -4,29 +4,29 @@ defmodule Evision.Zoo do
   """
 
   @type smartcell_option :: %{
-    value: Strung.t(),
-    label: Strung.t()
-  }
+          value: Strung.t(),
+          label: Strung.t()
+        }
   @type smartcell_param :: %{
-    field: String.t(),
-    label: String.t(),
-    type: atom(),
-    default: term(),
-    is_option: boolean() | nil,
-    options: [smartcell_option()]
-  }
+          field: String.t(),
+          label: String.t(),
+          type: atom(),
+          default: term(),
+          is_option: boolean() | nil,
+          options: [smartcell_option()]
+        }
   @type smartcell_param_map :: %{
-    name: String.t(),
-    params: [smartcell_param()]
-  }
+          name: String.t(),
+          params: [smartcell_param()]
+        }
   @type smartcell_params :: [smartcell_param_map()]
   @type variant :: %{
-    id: String.t(),
-    name: String.t(),
-    docs_url: String.t(),
-    params: smartcell_params(),
-    docs: String.t()
-  }
+          id: String.t(),
+          name: String.t(),
+          docs_url: String.t(),
+          params: smartcell_params(),
+          docs: String.t()
+        }
   @type smartcell_tasks :: [variant()]
 
   def download(file_url, filename, opts \\ [])
@@ -58,12 +58,36 @@ defmodule Evision.Zoo do
 
   def backends do
     %{
-      "opencv" => {Evision.Constant.cv_DNN_BACKEND_OPENCV(), "OpenCV", quote do Evision.Constant.cv_DNN_BACKEND_OPENCV() end},
-      "cuda" => {Evision.Constant.cv_DNN_BACKEND_CUDA(), "CUDA", quote do Evision.Constant.cv_DNN_BACKEND_CUDA() end},
-      "halide" => {Evision.Constant.cv_DNN_BACKEND_HALIDE(), "Halide", quote do Evision.Constant.cv_DNN_BACKEND_HALIDE() end},
-      "inference_engine" => {Evision.Constant.cv_DNN_BACKEND_INFERENCE_ENGINE(), "Inference Engine", quote do Evision.Constant.cv_DNN_BACKEND_INFERENCE_ENGINE() end},
-      "timvx" => {Evision.Constant.cv_DNN_BACKEND_TIMVX(), "TIMVX", quote do Evision.Constant.cv_DNN_BACKEND_TIMVX() end},
-      "vkcom" => {Evision.Constant.cv_DNN_BACKEND_VKCOM(), "VKCOM", quote do Evision.Constant.cv_DNN_BACKEND_VKCOM() end},
+      "opencv" =>
+        {Evision.Constant.cv_DNN_BACKEND_OPENCV(), "OpenCV",
+         quote do
+           Evision.Constant.cv_DNN_BACKEND_OPENCV()
+         end},
+      "cuda" =>
+        {Evision.Constant.cv_DNN_BACKEND_CUDA(), "CUDA",
+         quote do
+           Evision.Constant.cv_DNN_BACKEND_CUDA()
+         end},
+      "halide" =>
+        {Evision.Constant.cv_DNN_BACKEND_HALIDE(), "Halide",
+         quote do
+           Evision.Constant.cv_DNN_BACKEND_HALIDE()
+         end},
+      "inference_engine" =>
+        {Evision.Constant.cv_DNN_BACKEND_INFERENCE_ENGINE(), "Inference Engine",
+         quote do
+           Evision.Constant.cv_DNN_BACKEND_INFERENCE_ENGINE()
+         end},
+      "timvx" =>
+        {Evision.Constant.cv_DNN_BACKEND_TIMVX(), "TIMVX",
+         quote do
+           Evision.Constant.cv_DNN_BACKEND_TIMVX()
+         end},
+      "vkcom" =>
+        {Evision.Constant.cv_DNN_BACKEND_VKCOM(), "VKCOM",
+         quote do
+           Evision.Constant.cv_DNN_BACKEND_VKCOM()
+         end}
     }
   end
 
@@ -77,23 +101,56 @@ defmodule Evision.Zoo do
       Evision.Constant.cv_DNN_TARGET_MYRIAD() => %{value: "myriad", label: "Myriad"},
       Evision.Constant.cv_DNN_TARGET_NPU() => %{value: "npu", label: "NPU"},
       Evision.Constant.cv_DNN_TARGET_OPENCL() => %{value: "opencl", label: "OpenCL"},
-      Evision.Constant.cv_DNN_TARGET_OPENCL_FP16() => %{value: "opencl_fp16", label: "OpenCL FP16"},
-      Evision.Constant.cv_DNN_TARGET_VULKAN() => %{value: "vulkan", label: "Vulkan"},
+      Evision.Constant.cv_DNN_TARGET_OPENCL_FP16() => %{
+        value: "opencl_fp16",
+        label: "OpenCL FP16"
+      },
+      Evision.Constant.cv_DNN_TARGET_VULKAN() => %{value: "vulkan", label: "Vulkan"}
     }
   end
 
   def targets_reverse_lookup do
     %{
-      "cpu" => quote do Evision.Constant.cv_DNN_TARGET_CPU() end,
-      "cuda" => quote do Evision.Constant.cv_DNN_TARGET_CUDA() end,
-      "cuda_fp16" => quote do Evision.Constant.cv_DNN_TARGET_CUDA_FP16() end,
-      "fpga" => quote do Evision.Constant.cv_DNN_TARGET_FPGA() end,
-      "hddl" => quote do Evision.Constant.cv_DNN_TARGET_HDDL() end,
-      "myriad" => quote do Evision.Constant.cv_DNN_TARGET_MYRIAD() end,
-      "npu" => quote do Evision.Constant.cv_DNN_TARGET_NPU() end,
-      "opencl" => quote do Evision.Constant.cv_DNN_TARGET_OPENCL() end,
-      "opencl_fp16" => quote do Evision.Constant.cv_DNN_TARGET_OPENCL_FP16() end,
-      "vulkan" => quote do Evision.Constant.cv_DNN_TARGET_VULKAN() end,
+      "cpu" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_CPU()
+        end,
+      "cuda" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_CUDA()
+        end,
+      "cuda_fp16" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_CUDA_FP16()
+        end,
+      "fpga" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_FPGA()
+        end,
+      "hddl" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_HDDL()
+        end,
+      "myriad" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_MYRIAD()
+        end,
+      "npu" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_NPU()
+        end,
+      "opencl" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_OPENCL()
+        end,
+      "opencl_fp16" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_OPENCL_FP16()
+        end,
+      "vulkan" =>
+        quote do
+          Evision.Constant.cv_DNN_TARGET_VULKAN()
+        end
     }
   end
 
@@ -102,24 +159,30 @@ defmodule Evision.Zoo do
     targets = targets()
 
     {backend_options, target_options} =
-      Enum.reduce(Map.to_list(backends), {[], []}, fn {backend_value, {backend_id, backend_label, _}}, {backend_options, target_options} ->
+      Enum.reduce(Map.to_list(backends), {[], []}, fn {backend_value,
+                                                       {backend_id, backend_label, _}},
+                                                      {backend_options, target_options} ->
         available_targets = Evision.DNN.getAvailableTargets(backend_id)
+
         if Enum.count(available_targets) == 0 do
           {backend_options, target_options}
         else
           target_options =
             Enum.reduce(available_targets, target_options, fn t, target_options ->
               target = Map.get(targets, t)
+
               if target do
                 [target | target_options]
               else
                 target_options
               end
             end)
+
           backend_options = [%{value: backend_value, label: backend_label} | backend_options]
           {backend_options, target_options}
         end
       end)
+
     {Enum.reverse(backend_options), Enum.reverse(target_options)}
   end
 
@@ -127,21 +190,27 @@ defmodule Evision.Zoo do
     backend = attrs["backend"]
     backends = backends()
     selected_backend = Map.get(backends, backend)
+
     backend =
       if selected_backend do
         elem(selected_backend, 2)
       else
-        quote do Evision.Constant.cv_DNN_BACKEND_OPENCV() end
+        quote do
+          Evision.Constant.cv_DNN_BACKEND_OPENCV()
+        end
       end
 
     targets_reverse_lookup = targets_reverse_lookup()
     target = attrs["target"]
     selected_target = Map.get(targets_reverse_lookup, target)
+
     target =
       if selected_target do
         selected_target
       else
-        quote do Evision.Constant.cv_DNN_TARGET_CPU() end
+        quote do
+          Evision.Constant.cv_DNN_TARGET_CPU()
+        end
       end
 
     {backend, target}
diff --git a/mix.exs b/mix.exs
index 755ad657..c51424c8 100644
--- a/mix.exs
+++ b/mix.exs
@@ -47,11 +47,13 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
   def current_target_nif_url(nif_version, version \\ Metadata.version()) do
     {target, _} = get_target()
     enable_contrib = System.get_env("EVISION_ENABLE_CONTRIB", "true") == "true"
+
     if enable_contrib do
       System.put_env("EVISION_ENABLE_CONTRIB", "true")
     else
       System.put_env("EVISION_ENABLE_CONTRIB", "false")
     end
+
     get_download_url(target, version, nif_version, enable_contrib)
   end
 
@@ -342,7 +344,7 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
   def preferred_eccs do
     # TLS curves: X25519, prime256v1, secp384r1
     preferred_eccs = [:secp256r1, :secp384r1]
-    :ssl.eccs() -- (:ssl.eccs() -- preferred_eccs)
+    :ssl.eccs() -- :ssl.eccs() -- preferred_eccs
   end
 
   def secure_ssl? do
@@ -385,10 +387,12 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
   end
 
   def filename(target, version, nif_version, enable_contrib, with_ext \\ "")
-  def filename(target, version, nif_version, _enable_contrib=false, with_ext) do
+
+  def filename(target, version, nif_version, _enable_contrib = false, with_ext) do
     "evision-nif_#{nif_version}-#{target}-#{version}#{with_ext}"
   end
-  def filename(target, version, nif_version, _enable_contrib=true, with_ext) do
+
+  def filename(target, version, nif_version, _enable_contrib = true, with_ext) do
     "evision-nif_#{nif_version}-#{target}-contrib-#{version}#{with_ext}"
   end
 
@@ -819,14 +823,12 @@ defmodule Evision.MixProject do
       ts: true,
       video: true,
       videoio: true,
-
       gapi: false,
       world: false,
       python2: false,
       python3: false,
       java: false
     ],
-
     opencv_contrib: [
       aruco: true,
       barcode: true,
@@ -862,9 +864,8 @@ defmodule Evision.MixProject do
       optflow: false,
       sfm: false,
       videostab: false,
-      xobjdetect: false,
+      xobjdetect: false
     ],
-
     cuda: [
       cudaarithm: true,
       cudabgsegm: true,
@@ -877,7 +878,7 @@ defmodule Evision.MixProject do
       cudaoptflow: true,
       cudastereo: true,
       cudawarping: true,
-      cudev: true,
+      cudev: true
     ]
   }
   defp module_configuration, do: @module_configuration
@@ -904,6 +905,7 @@ defmodule Evision.MixProject do
 
     enable_cuda = System.get_env("EVISION_ENABLE_CUDA", "false")
     enable_opencv_cuda = enable_cuda == "true"
+
     if enable_opencv_cuda do
       System.put_env("EVISION_ENABLE_CUDA", "true")
     else
@@ -912,21 +914,43 @@ defmodule Evision.MixProject do
 
     enable_contrib = System.get_env("EVISION_ENABLE_CONTRIB", "true")
     enable_opencv_contrib = enable_contrib == "true"
+
     if enable_opencv_cuda and !enable_opencv_contrib do
-      Logger.warning("EVISION_ENABLE_CUDA is set to true, while EVISION_ENABLE_CONTRIB is set to false. CUDA support will NOT be available.")
+      Logger.warning(
+        "EVISION_ENABLE_CUDA is set to true, while EVISION_ENABLE_CONTRIB is set to false. CUDA support will NOT be available."
+      )
     end
+
     if enable_opencv_contrib do
       System.put_env("EVISION_ENABLE_CONTRIB", "true")
     else
       System.put_env("EVISION_ENABLE_CONTRIB", "false")
     end
 
-    all_modules = Enum.map(mc.opencv, fn {m, _} -> m end) ++ Enum.map(mc.opencv_contrib, fn {m, _} -> m end)
-    enabled_modules = Enum.filter(mc.opencv, fn {_, e} -> e end)
-      ++ (if enable_opencv_contrib do Enum.filter(mc.opencv_contrib, fn {_, e} -> e end) else [] end)
-      ++ (if enable_opencv_cuda do Enum.filter(mc.cuda, fn {_, e} -> e end) else [] end)
-    disabled_modules = Enum.filter(mc.opencv, fn {_, e} -> !e end)
-      ++ (if enable_opencv_contrib do Enum.filter(mc.opencv_contrib, fn {_, e} -> !e end) else [] end)
+    all_modules =
+      Enum.map(mc.opencv, fn {m, _} -> m end) ++ Enum.map(mc.opencv_contrib, fn {m, _} -> m end)
+
+    enabled_modules =
+      Enum.filter(mc.opencv, fn {_, e} -> e end) ++
+        if enable_opencv_contrib do
+          Enum.filter(mc.opencv_contrib, fn {_, e} -> e end)
+        else
+          []
+        end ++
+        if enable_opencv_cuda do
+          Enum.filter(mc.cuda, fn {_, e} -> e end)
+        else
+          []
+        end
+
+    disabled_modules =
+      Enum.filter(mc.opencv, fn {_, e} -> !e end) ++
+        if enable_opencv_contrib do
+          Enum.filter(mc.opencv_contrib, fn {_, e} -> !e end)
+        else
+          []
+        end
+
     enabled_modules = Keyword.keys(enabled_modules)
     disabled_modules = Keyword.keys(disabled_modules)
     enabled_img_codecs = Application.get_env(:evision, :enabled_img_codecs, @enabled_img_codecs)
@@ -978,11 +1002,12 @@ defmodule Evision.MixProject do
          |> Enum.join(" ")) <>
         " "
 
-    options = if enable_opencv_cuda and enable_opencv_contrib do
-      "#{options} -D WITH_CUDA=ON"
-    else
-      options
-    end
+    options =
+      if enable_opencv_cuda and enable_opencv_contrib do
+        "#{options} -D WITH_CUDA=ON"
+      else
+        options
+      end
 
     {options, enabled_modules |> Enum.map(&Atom.to_string(&1)) |> Enum.join(",")}
   end
diff --git a/test/dnn_detection_model_test.exs b/test/dnn_detection_model_test.exs
index 281c9165..4c74d6eb 100644
--- a/test/dnn_detection_model_test.exs
+++ b/test/dnn_detection_model_test.exs
@@ -18,13 +18,15 @@ defmodule Evision.DNN.DetectionModel.Test do
     net = Evision.DNN.readNet(weights, config: config, framework: "")
 
     model = DetectionModel.detectionModel(net)
-    model = DetectionModel.setInputParams(model,
-      scale: 1.0,
-      size: {416, 416},
-      mean: {0, 0, 0},
-      swapRB: true,
-      crop: false
-    )
+
+    model =
+      DetectionModel.setInputParams(model,
+        scale: 1.0,
+        size: {416, 416},
+        mean: {0, 0, 0},
+        swapRB: true,
+        crop: false
+      )
 
     {classes, _, _} = DetectionModel.detect(model, mat)
 
diff --git a/test/dnn_detection_test.exs b/test/dnn_detection_test.exs
index 17ee06a0..0eab5516 100644
--- a/test/dnn_detection_test.exs
+++ b/test/dnn_detection_test.exs
@@ -34,7 +34,15 @@ defmodule Evision.DNN.Test do
           255
         })
 
-      mat = Evision.putText(mat, text, {l, top}, Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(), 0.5, {0, 0, 255})
+      mat =
+        Evision.putText(
+          mat,
+          text,
+          {l, top},
+          Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
+          0.5,
+          {0, 0, 255}
+        )
 
       _visualise_pred(mat, labels, outs)
     end
diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
new file mode 100644
index 00000000..2b95cbf0
--- /dev/null
+++ b/test/evision_gpumat_test.exs
@@ -0,0 +1,109 @@
+defmodule Evision.CUDA.GpuMat.Test do
+  use ExUnit.Case
+
+  alias Evision.Mat
+  alias Evision.CUDA.GpuMat
+
+  @tag :require_cuda
+  describe "Basic Operations" do
+    test "load an image from file" do
+      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+      gpumat = GpuMat.gpuMat(mat)
+
+      %GpuMat{
+        channels: 3,
+        type: {:u, 8},
+        raw_type: 16,
+        shape: {2, 3, 3},
+        elemSize: 3
+      } = gpumat
+    end
+
+    test "explicitly upload an Evision.Mat" do
+      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+      gpumat = GpuMat.gpuMat()
+
+      gpumat = Evision.CUDA.GpuMat.upload(gpumat, mat)
+      assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
+
+      %GpuMat{
+        channels: 3,
+        type: {:u, 8},
+        raw_type: 16,
+        shape: {2, 3, 3},
+        elemSize: 3
+      } = gpumat
+    end
+
+    test "manually allocate a GpuMat" do
+      gpumat = Evision.CUDA.GpuMat.gpuMat(1000, 1200, Evision.Constant.cv_8UC3())
+      assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
+
+      %GpuMat{
+        channels: 3,
+        type: {:u, 8},
+        raw_type: 16,
+        shape: {1000, 1200, 3},
+        elemSize: 3
+      } = gpumat
+    end
+
+    test "split channels" do
+      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+      gpumat = GpuMat.gpuMat(mat)
+      [b, g, r] = Evision.CUDA.split(gpumat)
+
+      %GpuMat{
+        channels: 1,
+        type: {:u, 8},
+        raw_type: 0,
+        shape: {2, 3, 1},
+        elemSize: 1
+      } = b
+
+      %GpuMat{
+        channels: 1,
+        type: {:u, 8},
+        raw_type: 0,
+        shape: {2, 3, 1},
+        elemSize: 1
+      } = g
+
+      %GpuMat{
+        channels: 1,
+        type: {:u, 8},
+        raw_type: 0,
+        shape: {2, 3, 1},
+        elemSize: 1
+      } = r
+    end
+
+    test "transpose" do
+      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+      gpumat = GpuMat.gpuMat(mat)
+      [b, _, _] = Evision.CUDA.split(gpumat)
+
+      %GpuMat{
+        channels: 1,
+        type: {:u, 8},
+        raw_type: 0,
+        shape: {2, 3, 1},
+        elemSize: 1
+      } = b
+
+      bT = Evision.CUDA.transpose(b)
+
+      %GpuMat{
+        channels: 1,
+        type: {:u, 8},
+        raw_type: 0,
+        shape: {3, 2, 1},
+        elemSize: 1
+      } = bT
+    end
+  end
+end
diff --git a/test/evision_keypoint.exs b/test/evision_keypoint.exs
new file mode 100644
index 00000000..a5991c07
--- /dev/null
+++ b/test/evision_keypoint.exs
@@ -0,0 +1,8 @@
+defmodule Evision.Mat.Test do
+  use ExUnit.Case
+
+  test "Keypoints" do
+    kp = Evision.KeyPoint.convert([{23.3, 23.3}])
+    IO.inspect(kp)
+  end
+end
diff --git a/test/evision_mat_test.exs b/test/evision_mat_test.exs
index 5c57fdbd..c4f71f66 100644
--- a/test/evision_mat_test.exs
+++ b/test/evision_mat_test.exs
@@ -89,40 +89,96 @@ defmodule Evision.Mat.Test do
 
   @tag :nx
   test "update_roi" do
-    image_tensor = Evision.Mat.literal([[[2,2,2]], [[2,3,4]], [[4,5,6]]], :u8)
+    image_tensor = Evision.Mat.literal([[[2, 2, 2]], [[2, 3, 4]], [[4, 5, 6]]], :u8)
     image_2d = Evision.Mat.last_dim_as_channel(image_tensor)
     Evision.Mat.to_nx(image_2d)
 
-    patch_tensor = Evision.Mat.literal([[[7,8]], [[9,10]]], :u8)
+    patch_tensor = Evision.Mat.literal([[[7, 8]], [[9, 10]]], :u8)
     patch_2d = Evision.Mat.last_dim_as_channel(patch_tensor)
     Evision.Mat.to_nx(patch_2d)
 
     flatlist = [2, 7, 8, 2, 9, 10, 4, 5, 6]
     roi = [{0, 2}, {0, 1}, {1, 3}]
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor)))
+
+    assert flatlist ==
+             Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor))
+             )
 
     flatlist = [2, 2, 2, 2, 7, 8, 4, 9, 10]
     roi = [{1, 3}, {0, 1}, {1, 3}]
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor)))
+
+    assert flatlist ==
+             Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor))
+             )
 
     flatlist = [2, 2, 2, 7, 8, 4, 9, 10, 6]
     roi = [{1, 3}, {0, 1}, {0, 2}]
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor)))
+
+    assert flatlist ==
+             Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor))
+             )
 
     flatlist = [7, 8, 2, 9, 10, 4, 4, 5, 6]
     roi = [{0, 2}, {0, 1}, {0, 2}]
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d)))
-    assert flatlist == Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor)))
+
+    assert flatlist ==
+             Nx.to_flat_list(Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_2d)))
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_2d, roi, patch_tensor))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_2d))
+             )
+
+    assert flatlist ==
+             Nx.to_flat_list(
+               Evision.Mat.to_nx(Evision.Mat.update_roi(image_tensor, roi, patch_tensor))
+             )
   end
 end
diff --git a/test/evision_test.exs b/test/evision_test.exs
index 3d95b2a4..fafaf85e 100644
--- a/test/evision_test.exs
+++ b/test/evision_test.exs
@@ -99,7 +99,8 @@ defmodule Evision.Test do
     encoded = Evision.imencode(".png", mat)
     assert is_binary(encoded)
 
-    %Mat{shape: ^shape, type: ^type} = Evision.imdecode(encoded, Evision.Constant.cv_IMREAD_ANYCOLOR())
+    %Mat{shape: ^shape, type: ^type} =
+      Evision.imdecode(encoded, Evision.Constant.cv_IMREAD_ANYCOLOR())
   end
 
   test "Evision.resize" do
diff --git a/test/orb_test.exs b/test/orb_test.exs
index 11235b68..b1227f68 100644
--- a/test/orb_test.exs
+++ b/test/orb_test.exs
@@ -4,11 +4,11 @@ defmodule Evision.ORB.Test do
   @moduletag timeout: 120_000
 
   test "detect keypoints in an image" do
-    img =
-      %Evision.Mat{} = Evision.imread(Path.join([__DIR__, "testdata", "pca_test.jpg"]), flags: 0)
+    img = %Evision.Mat{} = Evision.imread(Path.join([__DIR__, "testdata", "pca_test.jpg"]))
 
     orb = %Evision.ORB{} = Evision.ORB.create()
     kp = Evision.ORB.detect(orb, img)
+    assert Enum.count(kp) != 0
     {kp, _des} = Evision.ORB.compute(orb, img, kp)
     assert Enum.count(kp) != 0
   end
diff --git a/test/pca_test.exs b/test/pca_test.exs
index cbf0e6e5..393a87c9 100644
--- a/test/pca_test.exs
+++ b/test/pca_test.exs
@@ -13,18 +13,28 @@ defmodule Evision.PCA.Test do
 
     %Mat{} =
       src =
-      Evision.line(src, {px, py}, {qx, qy}, colour, thickness: 1, style: Evision.Constant.cv_LINE_AA())
+      Evision.line(src, {px, py}, {qx, qy}, colour,
+        thickness: 1,
+        style: Evision.Constant.cv_LINE_AA()
+      )
 
     px = trunc(qx + 9 * :math.cos(angle + :math.pi() / 4))
     py = trunc(qy + 9 * :math.sin(angle + :math.pi() / 4))
 
     %Mat{} =
       src =
-      Evision.line(src, {px, py}, {qx, qy}, colour, thickness: 1, style: Evision.Constant.cv_LINE_AA())
+      Evision.line(src, {px, py}, {qx, qy}, colour,
+        thickness: 1,
+        style: Evision.Constant.cv_LINE_AA()
+      )
 
     px = trunc(qx + 9 * :math.cos(angle - :math.pi() / 4))
     py = trunc(qy + 9 * :math.sin(angle - :math.pi() / 4))
-    Evision.line(src, {px, py}, {qx, qy}, colour, thickness: 1, style: Evision.Constant.cv_LINE_AA())
+
+    Evision.line(src, {px, py}, {qx, qy}, colour,
+      thickness: 1,
+      style: Evision.Constant.cv_LINE_AA()
+    )
   end
 
   @tag :nx
@@ -35,10 +45,19 @@ defmodule Evision.PCA.Test do
       )
 
     {_, bw} =
-      Evision.threshold(gray, 50, 255, Evision.Constant.cv_THRESH_BINARY() ||| Evision.Constant.cv_THRESH_OTSU())
+      Evision.threshold(
+        gray,
+        50,
+        255,
+        Evision.Constant.cv_THRESH_BINARY() ||| Evision.Constant.cv_THRESH_OTSU()
+      )
 
     {contours, _} =
-      Evision.findContours(bw, Evision.Constant.cv_RETR_LIST(), Evision.Constant.cv_CHAIN_APPROX_NONE())
+      Evision.findContours(
+        bw,
+        Evision.Constant.cv_RETR_LIST(),
+        Evision.Constant.cv_CHAIN_APPROX_NONE()
+      )
 
     contours =
       contours
diff --git a/test/videowriter_test.exs b/test/videowriter_test.exs
index 9d32d536..02be83b2 100644
--- a/test/videowriter_test.exs
+++ b/test/videowriter_test.exs
@@ -8,8 +8,12 @@ defmodule Evision.VideoWriter.Test do
       reader = Evision.VideoCapture.videoCapture(input_video_file)
       # mp4v
       fourcc = Evision.VideoWriter.fourcc(109, 112, 52, 118)
-      height = Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_HEIGHT()) |> trunc()
-      width = Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_WIDTH()) |> trunc()
+
+      height =
+        Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_HEIGHT()) |> trunc()
+
+      width =
+        Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_WIDTH()) |> trunc()
 
       writer =
         Evision.VideoWriter.videoWriter(
@@ -26,10 +30,19 @@ defmodule Evision.VideoWriter.Test do
 
       # verify
       reader = Evision.VideoCapture.videoCapture(output_video_file)
-      ^output_fps = Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FPS()) |> trunc()
-      ^height = Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_HEIGHT()) |> trunc()
-      ^width = Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_WIDTH()) |> trunc()
-      w_frames_count = Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_COUNT())
+
+      ^output_fps =
+        Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FPS()) |> trunc()
+
+      ^height =
+        Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_HEIGHT()) |> trunc()
+
+      ^width =
+        Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_WIDTH()) |> trunc()
+
+      w_frames_count =
+        Evision.VideoCapture.get(reader, Evision.Constant.cv_CAP_PROP_FRAME_COUNT())
+
       Evision.VideoCapture.release(reader)
 
       assert w_frames_count == output_fps * output_seconds

From 12a54177b4076656d21710c8873cc6551edc3ac8 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:19:31 +0000
Subject: [PATCH 20/55] [test] added a simple `Evision.KeyPoint` test

---
 test/{evision_keypoint.exs => evision_keypoint_test.exs} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename test/{evision_keypoint.exs => evision_keypoint_test.exs} (80%)

diff --git a/test/evision_keypoint.exs b/test/evision_keypoint_test.exs
similarity index 80%
rename from test/evision_keypoint.exs
rename to test/evision_keypoint_test.exs
index a5991c07..50e3dfa7 100644
--- a/test/evision_keypoint.exs
+++ b/test/evision_keypoint_test.exs
@@ -3,6 +3,6 @@ defmodule Evision.Mat.Test do
 
   test "Keypoints" do
     kp = Evision.KeyPoint.convert([{23.3, 23.3}])
-    IO.inspect(kp)
+    assert Enum.count(kp) == 1
   end
 end

From 255560f82417b9babd4c5dcab0a88eb9e84dff7a Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:21:43 +0000
Subject: [PATCH 21/55] [ci] add CUDA workflow

---
 .github/workflows/linux-cuda-gnu.yml | 159 +++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 .github/workflows/linux-cuda-gnu.yml

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
new file mode 100644
index 00000000..c94c2500
--- /dev/null
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -0,0 +1,159 @@
+name: linux-x86_64
+
+on:
+  pull_request:
+    types: [ labeled ]
+    paths-ignore:
+      - '*.md'
+      - '**/*.md'
+      - 'LICENSE*'
+      - 'examples/**'
+      - 'nerves/**'
+      - 'Makefile.win'
+      - 'cc_toolchain/**'
+      - 'checksum.exs'
+      - '.github/FUNDING.yml'
+      - '.github/workflows/nerves-*'
+      - '.github/workflows/macos-*'
+      - '.github/workflows/windows-*'
+      - '.github/workflows/test-*.yml'
+      - '.github/workflows/linux-x86_64.yml'
+      - '.github/workflows/linux-arm64.yml'
+      - '.github/workflows/linux-armv7.yml'
+      - '.github/workflows/linux-ppc64le.yml'
+      - '.github/workflows/linux-s390x.yml'
+      - '.github/workflows/linux-precompile-*.yml'
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - '*.md'
+      - '**/*.md'
+      - 'LICENSE*'
+      - 'examples/**'
+      - 'nerves/**'
+      - 'Makefile.win'
+      - 'cc_toolchain/**'
+      - 'checksum.exs'
+      - '.github/FUNDING.yml'
+      - '.github/workflows/nerves-*'
+      - '.github/workflows/macos-*'
+      - '.github/workflows/windows-*'
+      - '.github/workflows/test-*.yml'
+      - '.github/workflows/linux-x86_64.yml'
+      - '.github/workflows/linux-arm64.yml'
+      - '.github/workflows/linux-armv7.yml'
+      - '.github/workflows/linux-ppc64le.yml'
+      - '.github/workflows/linux-s390x.yml'
+      - '.github/workflows/linux-precompile-*.yml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  linux_cuda:
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - container: nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
+            cuda_ver: "11.1.1"
+          - container: nvidia/cuda:11.4.3-cudnn8-devel-ubuntu20.04
+            cuda_ver: "11.4.3"
+          - container: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+            cuda_ver: "11.8.0"
+    container: ${{ matrix.container }}
+    env:
+      # container env vars
+      # ref link: https://github.com/elixir-nx/xla/blob/main/.github/workflows/release.yml
+      ImageOS: ubuntu20
+      LANG: en_US.UTF-8
+      LANGUAGE: en_US:en
+      LC_ALL: en_US.UTF-8
+      DEBIAN_FRONTEND: noninteractive
+      # evision related env vars
+      MIX_ENV: test
+      OPENCV_VER: "4.7.0"
+      OTP_VERSION: "25.1.2"
+      ELIXIR_VERSION: "1.14.2"
+      EVISION_PREFER_PRECOMPILED: "false"
+      EVISION_ENABLE_CUDA: "true"
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
+            libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg
+          echo "$LANG UTF-8" >> /etc/locale.gen
+          locale-gen
+          update-locale LANG=$LANG
+
+      - uses: erlef/setup-beam@v1
+        with:
+          otp-version: ${{ env.OTP_VERSION }}
+          elixir-version: ${{ env.ELIXIR_VERSION }}
+
+      - name: Cache mix packages
+        id: cache-mix-deps
+        uses: actions/cache@v3
+        with:
+          key: deps-${{ hashFiles('mix.lock') }}-${{ env.OTP_VERSION }}-${{ env.ELIXIR_VERSION }}
+          path: |
+            ./deps
+
+      - name: Get mix deps
+        if: steps.cache-mix-deps.outputs.cache-hit != 'true'
+        run: |
+          mix deps.get
+
+      - name: Cache OpenCV
+        id: cache-opencv
+        uses: actions/cache@v3
+        with:
+          key: opencv-with-contrib-${{ env.OPENCV_VER }}
+          path: |
+            ./3rd_party
+
+      - name: Download OpenCV
+        if: steps.cache-opencv.outputs.cache-hit != 'true'
+        run: |
+          bash scripts/download_opencv.sh ${OPENCV_VER} 3rd_party/cache 3rd_party/opencv/
+          bash scripts/download_opencv_contrib.sh ${OPENCV_VER} 3rd_party/cache 3rd_party/opencv/
+
+      - name: Cache compiled OpenCV
+        id: cache-mix-compile_opencv
+        uses: actions/cache@v3
+        with:
+          key: compiled-opencv-${{ env.OPENCV_VER }}-cuda${{ matrix.cuda_ver }}-x86_64-linux-gnu-${{ hashFiles('Makefile') }}
+          path: |
+            ./_build/${{ env.MIX_ENV }}/lib/evision
+            ./c_src/headers.txt
+            ./c_src/configuration.private.hpp
+
+      - name: Compile OpenCV
+        if: steps.cache-mix-compile_opencv.outputs.cache-hit != 'true'
+        run: |
+          mix compile_opencv
+
+      - name: Mix Compile
+        run: |
+          rm -f _build/${{ env.MIX_ENV }}/lib/evision/priv/evision.so
+          ls -la ./c_src
+          mix compile
+          ls -la ./lib/generated
+
+      - name: Cache testdata
+        id: cache-mix-testdata
+        uses: actions/cache@v3
+        with:
+          key: testdata-${{ hashFiles('test/downloading_list.txt') }}
+          path: |
+            ./test/testdata
+
+      - name: Mix Test
+        run: |
+          mix test --include require_downloading --include require_ffmpeg --include require_cuda

From ac74d986110a93a1bac538298d236f5248c2f142 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:21:55 +0000
Subject: [PATCH 22/55] [ci] add GitHub CUDA workflow

---
 .github/workflows/linux-cuda-gnu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index c94c2500..bd67f635 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -52,7 +52,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  linux_cuda:
+  x86_64-gnu-cuda:
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false

From 901e5a029c32d85c9b2fcbc3dc884edc709d255d Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:25:41 +0000
Subject: [PATCH 23/55] [ci] updated CI name

---
 .github/workflows/linux-cuda-gnu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index bd67f635..453abd6a 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -1,4 +1,4 @@
-name: linux-x86_64
+name: linux-cuda
 
 on:
   pull_request:

From 329f7ca0058688b839604a1fb2c4d041cde57cc3 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:32:47 +0000
Subject: [PATCH 24/55] [ci] updated `paths-ignore`

---
 .github/workflows/linux-cuda-gnu.yml | 5 +++--
 .github/workflows/linux-x86_64.yml   | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index 453abd6a..8dd3a550 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -53,6 +53,7 @@ concurrency:
 
 jobs:
   x86_64-gnu-cuda:
+    if: ${{ github.event.label.name == 'cuda' }}
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
@@ -85,8 +86,8 @@ jobs:
 
       - name: Install system dependencies
         run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
+          apt-get update
+          apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
             libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg
           echo "$LANG UTF-8" >> /etc/locale.gen
           locale-gen
diff --git a/.github/workflows/linux-x86_64.yml b/.github/workflows/linux-x86_64.yml
index dbc9ad1d..c8c8ed4e 100644
--- a/.github/workflows/linux-x86_64.yml
+++ b/.github/workflows/linux-x86_64.yml
@@ -20,6 +20,7 @@ on:
       - '.github/workflows/linux-armv7.yml'
       - '.github/workflows/linux-ppc64le.yml'
       - '.github/workflows/linux-s390x.yml'
+      - '.github/workflows/linux-cuda-*.yml'
       - '.github/workflows/linux-precompile-*.yml'
   push:
     branches:
@@ -46,6 +47,7 @@ on:
       - '.github/workflows/linux-armv7.yml'
       - '.github/workflows/linux-ppc64le.yml'
       - '.github/workflows/linux-s390x.yml'
+      - '.github/workflows/linux-cuda-*.yml'
       - '.github/workflows/linux-precompile-*.yml'
 
 concurrency:

From 8205a4c597da4e985348311ab590d99879c80dca Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:35:13 +0000
Subject: [PATCH 25/55] [ci-linux-cuda] trigger on `labeled, synchronize,
 opened, reopened` PR events

---
 .github/workflows/linux-cuda-gnu.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index 8dd3a550..1a1d449e 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -2,7 +2,7 @@ name: linux-cuda
 
 on:
   pull_request:
-    types: [ labeled ]
+    types: [ labeled, synchronize, opened, reopened ]
     paths-ignore:
       - '*.md'
       - '**/*.md'
@@ -53,7 +53,7 @@ concurrency:
 
 jobs:
   x86_64-gnu-cuda:
-    if: ${{ github.event.label.name == 'cuda' }}
+    if: contains(github.event.pull_request.labels.*.name, 'cuda')
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false

From 69d462e4093c2067d22af19a3ad6e4f899a91a51 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:39:52 +0000
Subject: [PATCH 26/55] [ci-linux-cuda] install locales

---
 .github/workflows/linux-cuda-gnu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index 1a1d449e..d3f47d1e 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -88,7 +88,7 @@ jobs:
         run: |
           apt-get update
           apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
-            libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg
+            libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg locales
           echo "$LANG UTF-8" >> /etc/locale.gen
           locale-gen
           update-locale LANG=$LANG

From 4ccc64f184292d21dc8fe0e8d66596c30b2e5e52 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:45:18 +0000
Subject: [PATCH 27/55] [ci-linux-cuda] install system dependencies

---
 .github/workflows/linux-cuda-gnu.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index d3f47d1e..23b70e44 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -77,8 +77,8 @@ jobs:
       # evision related env vars
       MIX_ENV: test
       OPENCV_VER: "4.7.0"
-      OTP_VERSION: "25.1.2"
-      ELIXIR_VERSION: "1.14.2"
+      OTP_VERSION: "25.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
       EVISION_ENABLE_CUDA: "true"
     steps:
@@ -88,10 +88,10 @@ jobs:
         run: |
           apt-get update
           apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
-            libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg locales
-          echo "$LANG UTF-8" >> /etc/locale.gen
+            libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavresample-dev ffmpeg locales curl wget cmake
+          echo "${LANG} UTF-8" >> /etc/locale.gen
           locale-gen
-          update-locale LANG=$LANG
+          update-locale LANG=${LANG}
 
       - uses: erlef/setup-beam@v1
         with:

From 702be872d141818e33c0bbf32be5d2e059072d47 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Sun, 15 Jan 2023 23:45:44 +0000
Subject: [PATCH 28/55] [ci] bump otp version 25.2, elixir version 1.14.3

---
 .github/workflows/linux-precompile-gnu.yml  | 4 ++--
 .github/workflows/linux-precompile-musl.yml | 8 ++++----
 .github/workflows/linux-x86_64.yml          | 8 ++++----
 .github/workflows/nerves-build.yml          | 4 ++--
 .github/workflows/windows-precompile.yml    | 4 ++--
 .github/workflows/windows-x86_64.yml        | 4 ++--
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/linux-precompile-gnu.yml b/.github/workflows/linux-precompile-gnu.yml
index 066456b9..aa54326b 100644
--- a/.github/workflows/linux-precompile-gnu.yml
+++ b/.github/workflows/linux-precompile-gnu.yml
@@ -15,9 +15,9 @@ jobs:
     env:
       OPENCV_VER: "4.7.0"
       MIX_ENV: prod
-      OTP_VERSION: "25.1.2"
+      OTP_VERSION: "25.2"
       NIF_VERSION: "2.16"
-      ELIXIR_VERSION: "1.14.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
       EVISION_GENERATE_LANG: "erlang,elixir"
     strategy:
diff --git a/.github/workflows/linux-precompile-musl.yml b/.github/workflows/linux-precompile-musl.yml
index 8b84c705..e8230b2a 100644
--- a/.github/workflows/linux-precompile-musl.yml
+++ b/.github/workflows/linux-precompile-musl.yml
@@ -16,9 +16,9 @@ jobs:
     env:
       MIX_ENV: prod
       OPENCV_VER: "4.7.0"
-      OTP_VERSION: "25.1.2"
+      OTP_VERSION: "25.2"
       NIF_VERSION: "2.16"
-      ELIXIR_VERSION: "1.14.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
       EVISION_GENERATE_LANG: "erlang,elixir"
     steps:
@@ -170,9 +170,9 @@ jobs:
     env:
       OPENCV_VER: "4.7.0"
       MIX_ENV: prod
-      OTP_VERSION: "25.1.2"
+      OTP_VERSION: "25.2"
       NIF_VERSION: "2.16"
-      ELIXIR_VERSION: "1.14.2"
+      ELIXIR_VERSION: "1.14.3"
       ZIG_VERSION: "0.8.0"
       EVISION_PREFER_PRECOMPILED: "false"
       EVISION_GENERATE_LANG: "erlang,elixir"
diff --git a/.github/workflows/linux-x86_64.yml b/.github/workflows/linux-x86_64.yml
index c8c8ed4e..2cae8ecc 100644
--- a/.github/workflows/linux-x86_64.yml
+++ b/.github/workflows/linux-x86_64.yml
@@ -61,8 +61,8 @@ jobs:
     env:
       MIX_ENV: test
       OPENCV_VER: "4.7.0"
-      OTP_VERSION: "25.1.2"
-      ELIXIR_VERSION: "1.14.2"
+      OTP_VERSION: "25.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
     steps:
       - uses: actions/checkout@v3
@@ -155,8 +155,8 @@ jobs:
     env:
       MIX_ENV: test
       OPENCV_VER: "4.7.0"
-      OTP_VERSION: "25.1.2"
-      ELIXIR_VERSION: "1.14.2"
+      OTP_VERSION: "25.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
 
     steps:
diff --git a/.github/workflows/nerves-build.yml b/.github/workflows/nerves-build.yml
index 5fc50b06..36a9e363 100644
--- a/.github/workflows/nerves-build.yml
+++ b/.github/workflows/nerves-build.yml
@@ -49,8 +49,8 @@ jobs:
       NERVES_PROJ_NAME: nerves_evision
       OPENCV_VER: "4.7.0"
       NERVES_LIVEBOOK_VER: "v0.7.0"
-      OTP_VERSION: "25.1.2"
-      ELIXIR_VERSION: "1.14.2"
+      OTP_VERSION: "25.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
 
     strategy:
diff --git a/.github/workflows/windows-precompile.yml b/.github/workflows/windows-precompile.yml
index b902afb0..6796c73f 100644
--- a/.github/workflows/windows-precompile.yml
+++ b/.github/workflows/windows-precompile.yml
@@ -15,9 +15,9 @@ jobs:
     env:
       MIX_ENV: prod
       OPENCV_VER: "4.7.0"
-      OTP_VERSION: "25.1.2"
+      OTP_VERSION: "25.2"
       NIF_VERSION: "2.16"
-      ELIXIR_VERSION: "1.14.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
       EVISION_GENERATE_LANG: "erlang,elixir"
       MAKE: "nmake"
diff --git a/.github/workflows/windows-x86_64.yml b/.github/workflows/windows-x86_64.yml
index fb6cb2d3..f315fd6b 100644
--- a/.github/workflows/windows-x86_64.yml
+++ b/.github/workflows/windows-x86_64.yml
@@ -47,8 +47,8 @@ jobs:
     env:
       MIX_ENV: test
       OPENCV_VER: "4.7.0"
-      OTP_VERSION: "25.1.2"
-      ELIXIR_VERSION: "1.14.2"
+      OTP_VERSION: "25.2"
+      ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
       MAKE: "nmake"
 

From 4b243b852c87f51b7e8d69facf9f736160b0bf97 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 00:19:19 +0000
Subject: [PATCH 29/55] [test] updated tests

---
 test/test_helper.exs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/test_helper.exs b/test/test_helper.exs
index 975b057d..a4406716 100644
--- a/test/test_helper.exs
+++ b/test/test_helper.exs
@@ -74,10 +74,11 @@ ExUnit.configure(
     # and there is perhaps no way to test input from a camera
     # (could set up a virtual camera, but let's leave that for now)
     require_ffmpeg: true,
-    dnn: !Enum.member?(compiled_modules, "dnn"),
-    ml: !Enum.member?(compiled_modules, "ml"),
-    photo: !Enum.member?(compiled_modules, "photo"),
-    video: !Enum.member?(compiled_modules, "video")
+    require_cuda: true,
+    dnn: !compiled_modules.dnn,
+    ml: !compiled_modules.ml,
+    photo: !compiled_modules.photo,
+    video: !compiled_modules.video
   ]
 )
 

From 35b8198851a1db8a1f607486f8cf0ce65b1b0d81 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 00:22:16 +0000
Subject: [PATCH 30/55] [test] cannot test CUDA related functions in GitHub
 Actions

---
 .github/workflows/linux-cuda-gnu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index 23b70e44..68fc58cc 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -157,4 +157,4 @@ jobs:
 
       - name: Mix Test
         run: |
-          mix test --include require_downloading --include require_ffmpeg --include require_cuda
+          mix test --include require_downloading --include require_ffmpeg

From d0419311c977be8dc2b206122c170634a28f5b99 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 00:59:33 +0000
Subject: [PATCH 31/55] [test] test if `Evision.CUDA.GpuMat` is loaded

---
 test/evision_gpumat_test.exs | 203 ++++++++++++++++++-----------------
 1 file changed, 104 insertions(+), 99 deletions(-)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 2b95cbf0..006bdeee 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -1,109 +1,114 @@
 defmodule Evision.CUDA.GpuMat.Test do
   use ExUnit.Case
 
+  @compile {:no_warn_undefined, Evision.CUDA.GpuMat}
+
   alias Evision.Mat
-  alias Evision.CUDA.GpuMat
 
   @tag :require_cuda
   describe "Basic Operations" do
-    test "load an image from file" do
-      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
-
-      gpumat = GpuMat.gpuMat(mat)
-
-      %GpuMat{
-        channels: 3,
-        type: {:u, 8},
-        raw_type: 16,
-        shape: {2, 3, 3},
-        elemSize: 3
-      } = gpumat
-    end
-
-    test "explicitly upload an Evision.Mat" do
-      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
-
-      gpumat = GpuMat.gpuMat()
-
-      gpumat = Evision.CUDA.GpuMat.upload(gpumat, mat)
-      assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
-
-      %GpuMat{
-        channels: 3,
-        type: {:u, 8},
-        raw_type: 16,
-        shape: {2, 3, 3},
-        elemSize: 3
-      } = gpumat
-    end
-
-    test "manually allocate a GpuMat" do
-      gpumat = Evision.CUDA.GpuMat.gpuMat(1000, 1200, Evision.Constant.cv_8UC3())
-      assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
-
-      %GpuMat{
-        channels: 3,
-        type: {:u, 8},
-        raw_type: 16,
-        shape: {1000, 1200, 3},
-        elemSize: 3
-      } = gpumat
-    end
-
-    test "split channels" do
-      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
-
-      gpumat = GpuMat.gpuMat(mat)
-      [b, g, r] = Evision.CUDA.split(gpumat)
-
-      %GpuMat{
-        channels: 1,
-        type: {:u, 8},
-        raw_type: 0,
-        shape: {2, 3, 1},
-        elemSize: 1
-      } = b
-
-      %GpuMat{
-        channels: 1,
-        type: {:u, 8},
-        raw_type: 0,
-        shape: {2, 3, 1},
-        elemSize: 1
-      } = g
-
-      %GpuMat{
-        channels: 1,
-        type: {:u, 8},
-        raw_type: 0,
-        shape: {2, 3, 1},
-        elemSize: 1
-      } = r
-    end
-
-    test "transpose" do
-      %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
-
-      gpumat = GpuMat.gpuMat(mat)
-      [b, _, _] = Evision.CUDA.split(gpumat)
-
-      %GpuMat{
-        channels: 1,
-        type: {:u, 8},
-        raw_type: 0,
-        shape: {2, 3, 1},
-        elemSize: 1
-      } = b
-
-      bT = Evision.CUDA.transpose(b)
-
-      %GpuMat{
-        channels: 1,
-        type: {:u, 8},
-        raw_type: 0,
-        shape: {3, 2, 1},
-        elemSize: 1
-      } = bT
+    if Code.ensure_loaded?(Evision.CUDA.GpuMat) do
+      alias Evision.CUDA.GpuMat
+
+      test "load an image from file" do
+        %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+        gpumat = Evision.CUDA.GpuMat.gpuMat(mat)
+
+        %GpuMat{
+          channels: 3,
+          type: {:u, 8},
+          raw_type: 16,
+          shape: {2, 3, 3},
+          elemSize: 3
+        } = gpumat
+      end
+
+      test "explicitly upload an Evision.Mat" do
+        %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+        gpumat = GpuMat.gpuMat()
+
+        gpumat = Evision.CUDA.GpuMat.upload(gpumat, mat)
+        assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
+
+        %GpuMat{
+          channels: 3,
+          type: {:u, 8},
+          raw_type: 16,
+          shape: {2, 3, 3},
+          elemSize: 3
+        } = gpumat
+      end
+
+      test "manually allocate a GpuMat" do
+        gpumat = Evision.CUDA.GpuMat.gpuMat(1000, 1200, Evision.Constant.cv_8UC3())
+        assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
+
+        %GpuMat{
+          channels: 3,
+          type: {:u, 8},
+          raw_type: 16,
+          shape: {1000, 1200, 3},
+          elemSize: 3
+        } = gpumat
+      end
+
+      test "split channels" do
+        %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+        gpumat = GpuMat.gpuMat(mat)
+        [b, g, r] = Evision.CUDA.split(gpumat)
+
+        %GpuMat{
+          channels: 1,
+          type: {:u, 8},
+          raw_type: 0,
+          shape: {2, 3, 1},
+          elemSize: 1
+        } = b
+
+        %GpuMat{
+          channels: 1,
+          type: {:u, 8},
+          raw_type: 0,
+          shape: {2, 3, 1},
+          elemSize: 1
+        } = g
+
+        %GpuMat{
+          channels: 1,
+          type: {:u, 8},
+          raw_type: 0,
+          shape: {2, 3, 1},
+          elemSize: 1
+        } = r
+      end
+
+      test "transpose" do
+        %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
+
+        gpumat = GpuMat.gpuMat(mat)
+        [b, _, _] = Evision.CUDA.split(gpumat)
+
+        %GpuMat{
+          channels: 1,
+          type: {:u, 8},
+          raw_type: 0,
+          shape: {2, 3, 1},
+          elemSize: 1
+        } = b
+
+        bT = Evision.CUDA.transpose(b)
+
+        %GpuMat{
+          channels: 1,
+          type: {:u, 8},
+          raw_type: 0,
+          shape: {3, 2, 1},
+          elemSize: 1
+        } = bT
+      end
     end
   end
 end

From c02a2dc9d1abc566886b102e943e2562400ee230 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 02:20:21 +0000
Subject: [PATCH 32/55] [test] added some tests for `Evision.CUDA.GpuMat`

---
 test/evision_gpumat_test.exs | 48 +++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 006bdeee..9e6226c6 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -13,7 +13,7 @@ defmodule Evision.CUDA.GpuMat.Test do
       test "load an image from file" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 
-        gpumat = Evision.CUDA.GpuMat.gpuMat(mat)
+        gpumat = GpuMat.gpuMat(mat)
 
         %GpuMat{
           channels: 3,
@@ -85,6 +85,52 @@ defmodule Evision.CUDA.GpuMat.Test do
         } = r
       end
 
+      test "abs" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :s8)
+        mat = Evision.CUDA.GpuMat.gpuMat(t)
+        ret = Evision.CUDA.abs(mat)
+        bin = Evision.Mat.to_binary(Evision.CUDA.GpuMat.download(ret))
+        assert bin == Nx.to_binary(Nx.abs(t))
+      end
+
+      test "absSum" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        abs_sum = Nx.to_number(Nx.sum(Nx.abs(t)))
+        {^abs_sum, 0.0, 0.0, 0.0} = Evision.CUDA.absSum(t)
+      end
+
+      test "absSum with mask" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        m = Nx.tensor([[1, 0, 0], [0, 0, 1]], type: :u8)
+        abs_sum = Nx.to_number(Nx.sum(Nx.abs(Nx.multiply(t, m))))
+        {^abs_sum, 0.0, 0.0, 0.0} = Evision.CUDA.absSum(t, mask: m)
+      end
+
+      test "absdiff" do
+        t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        absdiff = Nx.to_binary(Nx.abs(Nx.subtract(t1, t2)))
+        assert absdiff == Evision.Mat.to_binary(Evision.CUDA.absdiff(t1, t2))
+      end
+
+      test "add" do
+        t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        sum = Nx.to_binary(Nx.add(t1, t2))
+        assert sum == Evision.Mat.to_binary(Evision.CUDA.add(t1, t2))
+      end
+
+      test "addWeighted" do
+        t1 = Nx.tensor([[100, 200, 300], [400, 500, 600]], type: :f32)
+        alpha = 0.1
+        t2 = Nx.tensor([[1000, 2000, 3000], [4000, 5000, 6000]], type: :f32)
+        beta = 0.2
+        gamma = 10
+
+        weighted_sum = Nx.to_binary(Nx.add(Nx.add(Nx.multiply(t1, alpha), Nx.multiply(t2, beta)), gamma))
+        assert weighted_sum == Evision.Mat.to_binary(Evision.CUDA.addWeighted(t1, alpha, t2, beta, gamma))
+      end
+
       test "transpose" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 

From 28aa3844a1c257833f7ebac61aa3c1afc45edcaf Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 03:30:50 +0000
Subject: [PATCH 33/55] [test] added tests for `Evision.CUDA.calcNorm`

---
 test/evision_gpumat_test.exs | 65 ++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 9e6226c6..55fb0fb0 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -1,15 +1,13 @@
-defmodule Evision.CUDA.GpuMat.Test do
-  use ExUnit.Case
+if !Code.ensure_loaded?(Evision.CUDA.GpuMat) do
+else
+  defmodule Evision.CUDA.GpuMat.Test do
+    use ExUnit.Case
 
-  @compile {:no_warn_undefined, Evision.CUDA.GpuMat}
-
-  alias Evision.Mat
-
-  @tag :require_cuda
-  describe "Basic Operations" do
-    if Code.ensure_loaded?(Evision.CUDA.GpuMat) do
-      alias Evision.CUDA.GpuMat
+    alias Evision.Mat
+    alias Evision.CUDA.GpuMat
 
+    @tag :require_cuda
+    describe "Basic Operations" do
       test "load an image from file" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 
@@ -131,6 +129,53 @@ defmodule Evision.CUDA.GpuMat.Test do
         assert weighted_sum == Evision.Mat.to_binary(Evision.CUDA.addWeighted(t1, alpha, t2, beta, gamma))
       end
 
+      test "calcHist" do
+        t = Nx.tensor([[10, 10, 20], [20, 20, 30]], type: :u8)
+
+        # The input matrix should have been uploaded to GPU
+        {:error, _} = Evision.CUDA.calcHist(t)
+
+        %GpuMat{} = result = Evision.CUDA.calcHist(GpuMat.gpuMat(t))
+        %Mat{} = downloaded = GpuMat.download(result)
+
+        ret = Nx.to_flat_list(Evision.Mat.to_nx(downloaded))
+        expected =
+          Nx.broadcast(Nx.tensor(0, type: :s32), {256})
+          |> Nx.put_slice([10], Nx.tensor([2]))
+          |> Nx.put_slice([20], Nx.tensor([3]))
+          |> Nx.put_slice([30], Nx.tensor([1]))
+          |> Nx.to_flat_list()
+
+        assert ret == expected
+      end
+
+      test "calcNorm L1" do
+        t = Nx.tensor([[10, 10, 20], [20, 20, 30]], type: :u8)
+
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_L1))
+        expected = Nx.to_binary(Nx.as_type(Nx.sum(Nx.abs(t)), :f64))
+
+        assert norm_bin == expected
+      end
+
+      test "calcNorm L2" do
+        t = Nx.tensor([[1, 1]], type: :u8)
+
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_L2))
+        expected = Nx.to_binary(Nx.sqrt(Nx.as_type(Nx.sum(Nx.power(t, 2)), :f64)))
+
+        assert norm_bin == expected
+      end
+
+      test "calcNorm INF" do
+        t = Nx.tensor([1, 42], type: :u8)
+
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_INF))
+        expected = Nx.to_binary(Nx.as_type(Nx.take(t, Nx.argmax(t)), :s32))
+
+        assert norm_bin == expected
+      end
+
       test "transpose" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 

From 17983e553b197d6f76618e63b7090ca4b74fa184 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 03:38:21 +0000
Subject: [PATCH 34/55] [test] added tests for `Evision.CUDA.calcNormDiff`

---
 test/evision_gpumat_test.exs | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 55fb0fb0..61e9c0d2 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -176,6 +176,37 @@ else
         assert norm_bin == expected
       end
 
+      test "calcNormDiff L1" do
+        t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
+        t2 = Nx.tensor([[9, 9], [19, 19]], type: :u8)
+
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_L1))
+        expected = Nx.to_binary(Nx.as_type(Nx.sum(Nx.abs(Nx.subtract(t1, t2))), :s32))
+
+        assert norm_bin == expected
+      end
+
+      test "calcNormDiff L2" do
+        t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
+        t2 = Nx.tensor([[9, 9], [19, 19]], type: :u8)
+
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_L2))
+        expected = Nx.to_binary(Nx.as_type(Nx.sqrt(Nx.sum(Nx.abs(Nx.subtract(t1, t2)))), :f64))
+
+        assert norm_bin == expected
+      end
+
+      test "calcNormDiff INF" do
+        t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
+        t2 = Nx.tensor([[9, 9], [19, 15]], type: :u8)
+
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_INF))
+        diff = Nx.flatten(Nx.abs(Nx.subtract(t1, t2)))
+        expected = Nx.to_binary(Nx.as_type(Nx.take(diff, Nx.argmax(diff)), :s32))
+
+        assert norm_bin == expected
+      end
+
       test "transpose" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 

From c083544bf9a8ebc31aacba5f5622eddba85d853f Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 04:13:09 +0000
Subject: [PATCH 35/55] [test] added tests for `Evision.CUDA.{subtract,sum}`

---
 test/evision_gpumat_test.exs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 61e9c0d2..0d4b23ae 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -207,6 +207,28 @@ else
         assert norm_bin == expected
       end
 
+      test "subtract" do
+        t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        diff = Nx.to_binary(Nx.subtract(t1, t2))
+        assert diff == Evision.Mat.to_binary(Evision.CUDA.subtract(t1, t2))
+      end
+
+      test "sum" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        sum = Nx.to_number(Nx.sum(t))
+        {cuda_sum, 0.0, 0.0, 0.0} = Evision.CUDA.sum(t)
+        assert sum == cuda_sum
+      end
+
+      test "sum with mask" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        m = Nx.tensor([[1, 0, 0], [0, 0, 1]], type: :u8)
+        sum = Nx.to_number(Nx.sum(Nx.multiply(t, m)))
+        {cuda_sum, 0.0, 0.0, 0.0} = Evision.CUDA.sum(t, mask: m)
+        assert sum == cuda_sum
+      end
+
       test "transpose" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 

From 1b6566ae4bdee9293673389c401bbe7a8535b45d Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:16:08 +0000
Subject: [PATCH 36/55] [test] set PKG_CONFIG_PATH

---
 .github/workflows/linux-cuda-gnu.yml | 1 +
 .github/workflows/linux-x86_64.yml   | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index 68fc58cc..5325929a 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -81,6 +81,7 @@ jobs:
       ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
       EVISION_ENABLE_CUDA: "true"
+      PKG_CONFIG_PATH: "/usr/lib/x86_64-linux-gnu/pkgconfig"
     steps:
       - uses: actions/checkout@v3
 
diff --git a/.github/workflows/linux-x86_64.yml b/.github/workflows/linux-x86_64.yml
index 2cae8ecc..ffa45702 100644
--- a/.github/workflows/linux-x86_64.yml
+++ b/.github/workflows/linux-x86_64.yml
@@ -158,10 +158,10 @@ jobs:
       OTP_VERSION: "25.2"
       ELIXIR_VERSION: "1.14.3"
       EVISION_PREFER_PRECOMPILED: "false"
-
+      PKG_CONFIG_PATH: "/usr/lib/x86_64-linux-gnu/pkgconfig"
     steps:
       - uses: actions/checkout@v3
-      
+
       - uses: erlef/setup-beam@v1
         with:
           otp-version: ${{ env.OTP_VERSION }}
@@ -237,4 +237,4 @@ jobs:
 
       - name: Mix Test
         run: |
-          mix test --include require_downloading --include require_ffmpeg --exclude may_crash
+          mix test --include require_downloading --include require_ffmpeg

From 4f28f9e4e802266b10449d3fb546d23f0825e8ce Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:17:27 +0000
Subject: [PATCH 37/55] remove default value for basic data types

---
 c_src/evision.cpp   | 24 +++++++++++-------------
 py_src/func_info.py |  2 +-
 py_src/helper.py    | 10 +++++-----
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/c_src/evision.cpp b/c_src/evision.cpp
index 12aaa8fb..875ad676 100644
--- a/c_src/evision.cpp
+++ b/c_src/evision.cpp
@@ -702,7 +702,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, bool& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     if (enif_is_atom(env, obj))
@@ -755,7 +755,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, unsigned int& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     uint32_t u32;
@@ -776,7 +776,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, int& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     int32_t i32;
@@ -797,11 +797,9 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, unsigned long &val, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
-    CV_UNUSED(info);
-
     ErlNifUInt64 u64;
     if (!enif_get_uint64(env, obj, (ErlNifUInt64 *)&u64))
         return false;
@@ -813,7 +811,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, unsigned long long & value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     ErlNifUInt64 u64;
@@ -834,7 +832,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, int64_t& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     ErlNifSInt64 i64;
@@ -896,7 +894,7 @@ bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, uchar& value, const ArgInfo& i
         value = cv::saturate_cast<uchar>(i32);
         return i32 != -1;
     } else {
-        return false;
+        return info.has_default || info.outputarg;
     }
 }
 
@@ -938,7 +936,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, char& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     int32_t i32;
@@ -963,7 +961,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, double& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     double f64;
@@ -991,7 +989,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, float& value, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     ErlNifSInt64 i64;
@@ -1436,7 +1434,7 @@ template<>
 bool evision_to(ErlNifEnv *env, ERL_NIF_TERM obj, TermCriteria& dst, const ArgInfo& info)
 {
     if (evision::nif::check_nil(env, obj)) {
-        return true;
+        return info.has_default || info.outputarg;
     }
 
     const ERL_NIF_TERM *terms;
diff --git a/py_src/func_info.py b/py_src/func_info.py
index 7d0f0314..88012b7d 100644
--- a/py_src/func_info.py
+++ b/py_src/func_info.py
@@ -326,7 +326,7 @@ def gen_code(self, codegen):
 
                 all_cargs.append([arg_type_info, parse_name])
 
-                if defval:
+                if defval and len(defval) > 0:
                     if arg_type_info.atype == "QRCodeEncoder_Params":
                         code_decl += "    QRCodeEncoder::Params %s=%s;\n" % (a.name, defval)
                     else:
diff --git a/py_src/helper.py b/py_src/helper.py
index 87ea5178..551ccfc9 100644
--- a/py_src/helper.py
+++ b/py_src/helper.py
@@ -17,11 +17,11 @@
 ArgTypeInfo.__new__.__defaults__ = (False,)
 
 simple_argtype_mapping = {
-    "bool": ArgTypeInfo("bool", FormatStrings.unsigned_char, "0", True, False),
-    "size_t": ArgTypeInfo("size_t", FormatStrings.unsigned_long_long, "0", True, False),
-    "int": ArgTypeInfo("int", FormatStrings.int, "0", True, False),
-    "float": ArgTypeInfo("float", FormatStrings.float, "0.f", True, False),
-    "double": ArgTypeInfo("double", FormatStrings.double, "0", True, False),
+    "bool": ArgTypeInfo("bool", FormatStrings.unsigned_char, "", True, False),
+    "size_t": ArgTypeInfo("size_t", FormatStrings.unsigned_long_long, "", True, False),
+    "int": ArgTypeInfo("int", FormatStrings.int, "", True, False),
+    "float": ArgTypeInfo("float", FormatStrings.float, "", True, False),
+    "double": ArgTypeInfo("double", FormatStrings.double, "", True, False),
     "c_string": ArgTypeInfo("char*", FormatStrings.string, '(char*)""', False, False),
     "string": ArgTypeInfo("std::string", FormatStrings.object, None, True, False),
     "Stream": ArgTypeInfo("Stream", FormatStrings.object, 'Stream::Null()', True, False),

From d3c56d77e3a51dae88e2dbbe0afcebb36f10e263 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:19:54 +0000
Subject: [PATCH 38/55] only comple `Evision.CUDA.GpuMat.Test` if
 `Evision.CUDA.split/1` exists

---
 test/evision_gpumat_test.exs | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 0d4b23ae..6b4b8e98 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -1,4 +1,4 @@
-if !Code.ensure_loaded?(Evision.CUDA.GpuMat) do
+if !Kernel.function_exported?(Evision.CUDA, :split, 1) do
 else
   defmodule Evision.CUDA.GpuMat.Test do
     use ExUnit.Case
@@ -207,6 +207,21 @@ else
         assert norm_bin == expected
       end
 
+      test "multiply" do
+        t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        product = Nx.to_binary(Nx.multiply(t1, t2))
+        assert product == Evision.Mat.to_binary(Evision.CUDA.multiply(t1, t2))
+      end
+
+      test "multiply with scale" do
+        t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        scale = 2.0
+        product = Nx.to_binary(Nx.multiply(Nx.multiply(t1, t2), scale))
+        assert product == Evision.Mat.to_binary(Evision.CUDA.multiply(t1, t2, scale: scale))
+      end
+
       test "subtract" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From d8422b0facff40791224f2717520b9a4d2b4084a Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:20:08 +0000
Subject: [PATCH 39/55] only comple `Evision.CUDA.GpuMat.Test` if
 `Evision.CUDA.split/1` exists

---
 test/evision_gpumat_test.exs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 6b4b8e98..26868287 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -1,4 +1,6 @@
 if !Kernel.function_exported?(Evision.CUDA, :split, 1) do
+  defmodule Evision.CUDA.GpuMat.Test do
+  end
 else
   defmodule Evision.CUDA.GpuMat.Test do
     use ExUnit.Case

From 5aa0242d5020734ef5bfefc27bea43bb1763588b Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:25:24 +0000
Subject: [PATCH 40/55] [test] updated VideoCapture and KeyPoint tests

---
 test/evision_keypoint_test.exs | 2 +-
 test/videocapture_test.exs     | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/evision_keypoint_test.exs b/test/evision_keypoint_test.exs
index 50e3dfa7..75ee9f38 100644
--- a/test/evision_keypoint_test.exs
+++ b/test/evision_keypoint_test.exs
@@ -1,4 +1,4 @@
-defmodule Evision.Mat.Test do
+defmodule Evision.KeyPoint.Test do
   use ExUnit.Case
 
   test "Keypoints" do
diff --git a/test/videocapture_test.exs b/test/videocapture_test.exs
index 6cde1679..5296d9df 100644
--- a/test/videocapture_test.exs
+++ b/test/videocapture_test.exs
@@ -6,8 +6,7 @@ defmodule Evision.VideoCapture.Test do
   @tag :video
   @tag :require_ffmpeg
   test "open a video file and read one frame" do
-    video =
-      Evision.VideoCapture.videoCapture(Path.join([__DIR__, "testdata", "videocapture_test.mp4"]))
+    video = Evision.VideoCapture.videoCapture(Path.join([__DIR__, "testdata", "videocapture_test.mp4"]))
 
     %Evision.VideoCapture{
       isOpened: true,

From 9dee2c10e5e0c7849b619a4956037a58406944ce Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:53:34 +0000
Subject: [PATCH 41/55] [test] added tests for
 `Evision.CUDA.{calcSqrSum,calcSum,cartToPolar,compare}`

---
 test/evision_gpumat_test.exs | 108 ++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 26868287..398e8e58 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -1,4 +1,4 @@
-if !Kernel.function_exported?(Evision.CUDA, :split, 1) do
+if !Code.ensure_loaded?(Evision.CUDA.DFT) do
   defmodule Evision.CUDA.GpuMat.Test do
   end
 else
@@ -209,6 +209,112 @@ else
         assert norm_bin == expected
       end
 
+      test "calcSqrSum" do
+        t = Nx.tensor([[1, 1], [2, 2]], type: :u8)
+
+        sum = Evision.Mat.to_binary(Evision.CUDA.calcSqrSum(t))
+        expected = Nx.to_binary(Nx.as_type(Nx.sum(Nx.power(t, 2)), :f64))
+
+        assert sum == expected
+      end
+
+      test "calcSum" do
+        t = Nx.tensor([[1, 1], [2, 2]], type: :u8)
+
+        sum = Evision.Mat.to_binary(Evision.CUDA.calcSum(t))
+        expected = Nx.to_binary(Nx.as_type(Nx.sum(t), :f64))
+
+        assert sum == expected
+      end
+
+      test "cartToPolar" do
+        real = Nx.tensor([1, 2, 3, 4], type: :f32)
+        imag = Nx.tensor([1, 2, 3, 4], type: :f32)
+
+        {magnitude, angle} = Evision.CUDA.cartToPolar(real, imag)
+        magnitude = Nx.reshape(Evision.Mat.to_nx(magnitude, Nx.BinaryBackend), {:auto})
+        expected_magnitude = Nx.tensor([1.414213, 2.828427, 4.242640, 5.656854])
+        assert Nx.to_number(Nx.all_close(magnitude, expected_magnitude, rtol: 0.0001)) == 1
+
+        angle = Nx.reshape(Evision.Mat.to_nx(angle, Nx.BinaryBackend), {:auto})
+        expected_angle = Nx.tensor([0.7853981, 0.7853981, 0.7853981, 0.7853981])
+        assert Nx.to_number(Nx.all_close(angle, expected_angle, rtol: 0.0001)) == 1
+      end
+
+      test "cartToPolar (angleInDegrees)" do
+        real = Nx.tensor([1, 2, 3, 4], type: :f32)
+        imag = Nx.tensor([1, 2, 3, 4], type: :f32)
+
+        {magnitude, angle} = Evision.CUDA.cartToPolar(real, imag, angleInDegrees: true)
+        magnitude = Nx.reshape(Evision.Mat.to_nx(magnitude, Nx.BinaryBackend), {:auto})
+        expected_magnitude = Nx.tensor([1.414213, 2.828427, 4.242640, 5.656854])
+        assert Nx.to_number(Nx.all_close(magnitude, expected_magnitude, rtol: 0.0001)) == 1
+
+        angle = Nx.reshape(Evision.Mat.to_nx(angle, Nx.BinaryBackend), {:auto})
+        expected_angle = Nx.tensor([45.0, 45.0, 45.0, 45.0])
+        assert Nx.to_number(Nx.all_close(angle, expected_angle, rtol: 0.0001)) == 1
+      end
+
+      test "compare CMP_EQ" do
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
+        t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
+
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_EQ))
+        expected = Nx.to_binary(Nx.multiply(Nx.equal(t1, t2), 255))
+
+        assert ret == expected
+      end
+
+      test "compare CMP_GT" do
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
+        t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
+
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_GT))
+        expected = Nx.to_binary(Nx.multiply(Nx.greater(t1, t2), 255))
+
+        assert ret == expected
+      end
+
+      test "compare CMP_GE" do
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
+        t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
+
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_GE))
+        expected = Nx.to_binary(Nx.multiply(Nx.greater_equal(t1, t2), 255))
+
+        assert ret == expected
+      end
+
+      test "compare CMP_LT" do
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
+        t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
+
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_LT))
+        expected = Nx.to_binary(Nx.multiply(Nx.less(t1, t2), 255))
+
+        assert ret == expected
+      end
+
+      test "compare CMP_LE" do
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
+        t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
+
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_LE))
+        expected = Nx.to_binary(Nx.multiply(Nx.less_equal(t1, t2), 255))
+
+        assert ret == expected
+      end
+
+      test "compare CMP_NE" do
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
+        t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
+
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_NE))
+        expected = Nx.to_binary(Nx.multiply(Nx.not_equal(t1, t2), 255))
+
+        assert ret == expected
+      end
+
       test "multiply" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From a97b2ec75be059f8ef49a69c1a09fbe5a7936d4d Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 05:58:56 +0000
Subject: [PATCH 42/55] [test] added tests for `Evision.CUDA.filp`

---
 test/evision_gpumat_test.exs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 398e8e58..45ed9316 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -315,6 +315,30 @@ else
         assert ret == expected
       end
 
+      test "flip (x-axis)" do
+        t = Nx.tensor([
+          [0, 1, 0, 2],
+          [3, 0, 4, 0]],
+        type: :u8)
+        assert [3, 0, 4, 0, 0, 1, 0, 2] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 0)))
+      end
+
+      test "flip (y-axis)" do
+        t = Nx.tensor([
+          [0, 1, 0, 2],
+          [3, 0, 4, 0]],
+        type: :u8)
+        assert [2, 0, 1, 0, 0, 4, 0, 3] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 1)))
+      end
+
+      test "flip (both axes)" do
+        t = Nx.tensor([
+          [0, 1, 0, 2],
+          [3, 0, 4, 0]],
+        type: :u8)
+        assert [0, 4, 0, 3, 2, 0, 1, 0] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, -1)))
+      end
+
       test "multiply" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From e45b8e0e24b80a5390693c7a2fc7c531b61f15e8 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 06:09:33 +0000
Subject: [PATCH 43/55] [test] added tests for `Evision.CUDA.gemm`

---
 test/evision_gpumat_test.exs | 64 ++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 45ed9316..2c8cfda3 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -339,6 +339,70 @@ else
         assert [0, 4, 0, 3, 2, 0, 1, 0] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, -1)))
       end
 
+      test "gemm" do
+        # t1.shape == {2, 3}, t2.shape == {3, 2}, t3.shape == {2, 2}
+        t1 = Nx.tensor([[1, 2, 3], [3, 4, 5]], type: :f32)
+        t2 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
+        t3 = Nx.tensor([[1000, 2000], [3000, 4000]], type: :f32)
+        alpha = 0.5
+        beta = 1.0
+
+        expected = Nx.to_binary(
+          Nx.add(
+            Nx.multiply(Nx.dot(t1, t2), alpha),
+            Nx.multiply(t3, beta))
+        )
+        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta))
+      end
+
+      test "gemm (GEMM_1_T)" do
+        # t1.shape == {3, 2}, t2.shape == {3, 2}, t3.shape == {2, 2}
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
+        t2 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
+        t3 = Nx.tensor([[1000, 2000], [3000, 4000]], type: :f32)
+        alpha = 0.5
+        beta = 1.0
+
+        expected = Nx.to_binary(
+          Nx.add(
+            Nx.multiply(Nx.dot(Nx.transpose(t1), t2), alpha),
+            Nx.multiply(t3, beta))
+        )
+        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_1_T()))
+      end
+
+      test "gemm (GEMM_3_T)" do
+        # t1.shape == {2, 3}, t2.shape == {3, 2}, t3.shape == {2, 2}
+        t1 = Nx.tensor([[1, 2, 3], [3, 4, 5]], type: :f32)
+        t2 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
+        t3 = Nx.tensor([[1000, 3000], [2000, 4000]], type: :f32)
+        alpha = 0.5
+        beta = 1.0
+
+        expected = Nx.to_binary(
+          Nx.add(
+            Nx.multiply(Nx.dot(t1, t2), alpha),
+            Nx.multiply(Nx.transpose(t3), beta))
+        )
+        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_3_T()))
+      end
+
+      test "gemm (GEMM_1_T + GEMM_3_T)" do
+        # t1.shape == {2, 3}, t2.shape == {3, 2}, t3.shape == {2, 2}
+        t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
+        t2 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
+        t3 = Nx.tensor([[1000, 3000], [2000, 4000]], type: :f32)
+        alpha = 0.5
+        beta = 1.0
+
+        expected = Nx.to_binary(
+          Nx.add(
+            Nx.multiply(Nx.dot(Nx.transpose(t1), t2), alpha),
+            Nx.multiply(Nx.transpose(t3), beta))
+        )
+        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_1_T() + Evision.Constant.cv_GEMM_3_T()))
+      end
+
       test "multiply" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From 6f459786ca5d84b6a5a1cd513306e0884bae661a Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 06:16:40 +0000
Subject: [PATCH 44/55] [test] added tests for `Evision.CUDA.polarToCart`

---
 test/evision_gpumat_test.exs | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 2c8cfda3..17cc45ce 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -418,6 +418,34 @@ else
         assert product == Evision.Mat.to_binary(Evision.CUDA.multiply(t1, t2, scale: scale))
       end
 
+      test "polarToCart" do
+        magnitude = Nx.tensor([1.414213, 2.828427, 4.242640, 5.656854], type: :f32)
+        angle = Nx.tensor([0.7853981, 0.7853981, 0.7853981, 0.7853981], type: :f32)
+
+        {real, imag} = Evision.CUDA.polarToCart(magnitude, angle)
+        real = Nx.reshape(Evision.Mat.to_nx(real, Nx.BinaryBackend), {:auto})
+        expected_real = Nx.tensor([1, 2, 3, 4])
+        assert Nx.to_number(Nx.all_close(real, expected_real, rtol: 0.0001)) == 1
+
+        imag = Nx.reshape(Evision.Mat.to_nx(imag, Nx.BinaryBackend), {:auto})
+        expected_imag = Nx.tensor([1, 2, 3, 4])
+        assert Nx.to_number(Nx.all_close(imag, expected_imag, rtol: 0.0001)) == 1
+      end
+
+      test "polarToCart (angleInDegrees)" do
+        magnitude = Nx.tensor([1.414213, 2.828427, 4.242640, 5.656854], type: :f32)
+        angle = Nx.tensor([45.0, 45.0, 45.0, 45.0], type: :f32)
+
+        {real, imag} = Evision.CUDA.polarToCart(magnitude, angle, angleInDegrees: true)
+        real = Nx.reshape(Evision.Mat.to_nx(real, Nx.BinaryBackend), {:auto})
+        expected_real = Nx.tensor([1, 2, 3, 4])
+        assert Nx.to_number(Nx.all_close(real, expected_real, rtol: 0.0001)) == 1
+
+        imag = Nx.reshape(Evision.Mat.to_nx(imag, Nx.BinaryBackend), {:auto})
+        expected_imag = Nx.tensor([1, 2, 3, 4])
+        assert Nx.to_number(Nx.all_close(imag, expected_imag, rtol: 0.0001)) == 1
+      end
+
       test "subtract" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From f1d87f7a83191d5459aed7998684eab52ce8fa4b Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 06:20:22 +0000
Subject: [PATCH 45/55] [test] added tests for `Evision.CUDA.pow`

---
 test/evision_gpumat_test.exs | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 17cc45ce..8378c567 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -446,6 +446,25 @@ else
         assert Nx.to_number(Nx.all_close(imag, expected_imag, rtol: 0.0001)) == 1
       end
 
+      test "pow" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        power = 0
+        expected = Nx.to_binary(Nx.power(t, power))
+        assert expected == Evision.Mat.to_binary(Evision.CUDA.pow(t, power))
+
+        power = 1
+        expected = Nx.power(t, power)
+        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
+
+        power = 2
+        expected = Nx.power(t, power)
+        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
+
+        power = 3
+        expected = Nx.power(t, power)
+        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
+      end
+
       test "subtract" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From e18a3bfcffd47df9e8a0fda5f5356e29878d458b Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 06:46:40 +0000
Subject: [PATCH 46/55] [Evision.Mat] support negative index

---
 lib/evision_mat.ex | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/lib/evision_mat.ex b/lib/evision_mat.ex
index a6fee257..dadbd90b 100644
--- a/lib/evision_mat.ex
+++ b/lib/evision_mat.ex
@@ -165,9 +165,14 @@ defmodule Evision.Mat do
     end
   end
 
+  defp __handle_negative_range__(value, bound) when value < 0 do
+    value + bound
+  end
+  defp __handle_negative_range__(value, _bound), do: value
+
   @doc false
-  def __standardise_range_list__(ranges, inclusive_range) do
-    Enum.map(ranges, fn r ->
+  def __standardise_range_list__(ranges, shape, inclusive_range) do
+    Enum.map(Enum.zip(ranges, Tuple.to_list(shape)), fn {r, dim} ->
       case r do
         :all ->
           :all
@@ -175,11 +180,21 @@ defmodule Evision.Mat do
         {first, last} ->
           # {_, _} is cv::Range
           # hence we don't need to do anything to it
+          first = __handle_negative_range__(first, dim)
+          last = __handle_negative_range__(last, dim)
           {first, last}
 
         first..last//step ->
           # first..last//step is Elixir.Range
           # 0..0 should give [0] if `inclusive_range` is true
+          step =
+            if step == -1 and (last < 0 or first < 0) do
+              1
+            else
+              step
+            end
+          first = __handle_negative_range__(first, dim)
+          last = __handle_negative_range__(last, dim)
           {first, last} = __from_elixir_range__(first..last//step, allowed_step_size: [1])
 
           if inclusive_range do
@@ -191,6 +206,7 @@ defmodule Evision.Mat do
           end
 
         number when is_integer(number) ->
+          number = __handle_negative_range__(number, dim)
           # cv::Range is [start, end)
           # while Elixir.Range is [first, last]
           if inclusive_range do
@@ -316,8 +332,9 @@ defmodule Evision.Mat do
 
   @spec roi(maybe_mat_in(), [{integer(), integer()} | Range.t() | :all]) :: maybe_mat_out()
   def roi(mat, ranges) when is_list(ranges) do
+    shape = mat.shape
     mat = __from_struct__(mat)
-    ranges = __standardise_range_list__(ranges, true)
+    ranges = __standardise_range_list__(ranges, shape, true)
 
     :evision_nif.mat_roi(mat: mat, ranges: ranges)
     |> Evision.Internal.Structurise.to_struct()
@@ -340,7 +357,7 @@ defmodule Evision.Mat do
         with_mat
       end
 
-    ranges = __standardise_range_list__(ranges, true)
+    ranges = __standardise_range_list__(ranges, mat.shape, true)
 
     ranges =
       if tuple_size(mat.shape) > Enum.count(ranges) do
@@ -801,7 +818,7 @@ defmodule Evision.Mat do
   @spec fetch(Evision.Mat.t(), list() | integer()) :: {:ok, maybe_mat_out() | nil}
   def fetch(mat, key) when is_list(key) do
     ranges = __generate_complete_range__(mat.dims, key)
-    ranges = __standardise_range_list__(ranges, true)
+    ranges = __standardise_range_list__(ranges, mat.shape, true)
     {:ok, roi(mat, ranges)}
   end
 

From 83b97a563bfbdb7d4e2d22f0dbfcca6d7209c89d Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 06:47:22 +0000
Subject: [PATCH 47/55] [test] added tests for `Evision.CUDA.{sqr,sqrIntegral}`

---
 test/evision_gpumat_test.exs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 8378c567..aa574e63 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -465,6 +465,19 @@ else
         assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
       end
 
+      test "sqr" do
+        t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
+        expected = Nx.power(t, 2)
+        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.sqr(t), Nx.BinaryBackend), rtol: 0.0001)) == 1
+      end
+
+      test "sqrIntegral" do
+        t = Nx.tensor([1, 2, 3, 4, 5, 6], type: :u8)
+        expected = Nx.as_type(Nx.cumulative_sum(Nx.power(t, 2)), :f64)
+        sqr_sum = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.sqrIntegral(t)[[1..-1, 1]], Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, sqr_sum, rtol: 0.0001)) == 1
+      end
+
       test "subtract" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From 534f115830f8a58dba50ff73ae69b41a089305bd Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 06:53:40 +0000
Subject: [PATCH 48/55] [test] added tests for `Evision.CUDA.{sqrSum,sqrt}`

---
 test/evision_gpumat_test.exs | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index aa574e63..b5ce5dad 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -478,6 +478,26 @@ else
         assert Nx.to_number(Nx.all_close(expected, sqr_sum, rtol: 0.0001)) == 1
       end
 
+      test "sqrSum" do
+        t = Nx.tensor([1, 2, 3, 4, 5, 6], type: :u8)
+        expected = Nx.to_number(Nx.as_type(Nx.sum(Nx.power(t, 2)), :f64))
+        {^expected, 0.0, 0.0, 0.0} = Evision.CUDA.sqrSum(t)
+      end
+
+      test "sqrt" do
+        t = Nx.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], type: :f32)
+        expected = Nx.sqrt(t)
+        sqrt = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.sqrt(t), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, sqrt, rtol: 0.0001)) == 1
+      end
+
+      test "sqrt (integer)" do
+        t = Nx.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], type: :u8)
+        expected = Nx.as_type(Nx.round(Nx.sqrt(t)), :u8)
+        sqrt = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.sqrt(t), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, sqrt, rtol: 0.0001)) == 1
+      end
+
       test "subtract" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From 62f9eaea4c0d9b015f658b56fc59b1f8e0cfbea9 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 07:11:24 +0000
Subject: [PATCH 49/55] [test] added tests for `Evision.CUDA.rshift`

---
 test/evision_gpumat_test.exs | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index b5ce5dad..b89d3dbf 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -465,6 +465,25 @@ else
         assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
       end
 
+      test "rshift" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
+        expected = Nx.right_shift(t, 1)
+        ret = Evision.Mat.to_nx(Evision.CUDA.rshift(t, {1}), Nx.BinaryBackend)
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "rshift (3-channel)" do
+        t = Nx.reshape(Nx.tensor([128, 128, 128, 64, 64, 64, 32, 32, 32], type: :u8), {3, 3})
+        expected = Nx.right_shift(t, Nx.tensor([1, 2, 3]))
+
+        ret = Nx.squeeze(
+          Evision.Mat.to_nx(
+            Evision.CUDA.rshift(
+              Evision.Mat.last_dim_as_channel(
+                Nx.reshape(t, {1, 3, 3})), {1, 2, 3}), Nx.BinaryBackend))
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
       test "sqr" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         expected = Nx.power(t, 2)

From c744be86c5734efc05fa0f8bed0ae92d3c5ef201 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 07:25:12 +0000
Subject: [PATCH 50/55] [test] added tests for `Evision.CUDA.reduce`

---
 test/evision_gpumat_test.exs | 56 ++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index b89d3dbf..baf55d04 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -465,6 +465,62 @@ else
         assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
       end
 
+      test "reduce SUM by row" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
+        expected = Nx.reduce(t, Nx.tensor(0), [axes: [0], keep_axes: true], fn x, y -> Nx.add(x, y) end)
+        ret = Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_SUM()), Nx.BinaryBackend)
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce SUM by col" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
+        expected = Nx.reduce(t, Nx.tensor(0), [axes: [1], keep_axes: true], fn x, y -> Nx.add(x, y) end)
+        ret = Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_SUM()), Nx.BinaryBackend)
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce AVG by row" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        expected = Nx.divide(Nx.sum(t, axes: [0]), 2)
+        ret = Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_AVG()), Nx.BinaryBackend)
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce AVG by col" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        expected = Nx.divide(Nx.sum(t, axes: [1]), 3)
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_AVG()), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce MAX by row" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        expected = Nx.reduce_max(t, axes: [0])
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_MAX()), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce MAX by col" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        expected = Nx.reduce_max(t, axes: [1])
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_MAX()), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce MIN by row" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        expected = Nx.reduce_min(t, axes: [0])
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_MIN()), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "reduce MIN by col" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
+        expected = Nx.reduce_min(t, axes: [1])
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_MIN()), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
       test "rshift" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
         expected = Nx.right_shift(t, 1)

From 6e70e41232fc11e65213802cfe38a19b2d25013e Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 07:32:11 +0000
Subject: [PATCH 51/55] [test] added tests for `Evision.CUDA.{lshift,log}`

---
 test/evision_gpumat_test.exs | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index baf55d04..bb40bd88 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -403,6 +403,32 @@ else
         assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_1_T() + Evision.Constant.cv_GEMM_3_T()))
       end
 
+      test "lshift" do
+        t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
+        expected = Nx.left_shift(t, 1)
+        ret = Evision.Mat.to_nx(Evision.CUDA.lshift(t, {1}), Nx.BinaryBackend)
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "lshift (3-channel)" do
+        t = Nx.reshape(Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u8), {3, 3})
+        expected = Nx.left_shift(t, Nx.tensor([1, 2, 3]))
+
+        ret = Nx.squeeze(
+          Evision.Mat.to_nx(
+            Evision.CUDA.lshift(
+              Evision.Mat.last_dim_as_channel(
+                Nx.reshape(t, {1, 3, 3})), {1, 2, 3}), Nx.BinaryBackend))
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "log" do
+        t = Nx.tensor([1, 10, 100, 1000], type: :f32)
+        expected = Nx.log(t)
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.log(t), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
       test "multiply" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From 6e6aa6b5abc139f45b75baf66e1d55b70fb3941a Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 07:56:19 +0000
Subject: [PATCH 52/55] [test] added tests for
 `Evision.CUDA.{magnitude,magnitudeSqr}`

---
 test/evision_gpumat_test.exs | 59 ++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index bb40bd88..001f1d35 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -315,6 +315,13 @@ else
         assert ret == expected
       end
 
+      test "exp" do
+        t = Nx.tensor([1, 2, 3, 4], type: :f32)
+        expected = Nx.exp(t)
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.exp(t), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
       test "flip (x-axis)" do
         t = Nx.tensor([
           [0, 1, 0, 2],
@@ -429,6 +436,58 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      test "magnitude(x, y)" do
+        x = Nx.tensor([1, 0, 3, 4], type: :f32)
+        y = Nx.tensor([1, 2, 0, 4], type: :f32)
+        expected = Nx.sqrt(Nx.add(Nx.power(x, 2), Nx.power(y, 2)))
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitude(x, y), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "magnitude(xy)" do
+        x = Nx.tensor([1, 0, 3, 4], type: :f32)
+        y = Nx.tensor([1, 2, 0, 4], type: :f32)
+        expected = Nx.sqrt(Nx.add(Nx.power(x, 2), Nx.power(y, 2)))
+        xy = Nx.transpose(Nx.stack([x, y]))
+        xy = Evision.Mat.last_dim_as_channel(Evision.Mat.from_nx_2d(xy))
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitude(xy), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "magnitudeSqr(x, y)" do
+        x = Nx.tensor([1, 0, 3, 4], type: :f32)
+        y = Nx.tensor([1, 2, 0, 4], type: :f32)
+        expected = Nx.add(Nx.power(x, 2), Nx.power(y, 2))
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitudeSqr(x, y), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "magnitudeSqr(xy)" do
+        x = Nx.tensor([1, 0, 3, 4], type: :f32)
+        y = Nx.tensor([1, 2, 0, 4], type: :f32)
+        expected = Nx.add(Nx.power(x, 2), Nx.power(y, 2))
+        xy = Nx.transpose(Nx.stack([x, y]))
+        xy = Evision.Mat.last_dim_as_channel(Evision.Mat.from_nx_2d(xy))
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitudeSqr(xy), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "max" do
+        t1 = Nx.tensor([1, 0, 3, 0], type: :f32)
+        t2 = Nx.tensor([0, 2, 0, 4], type: :f32)
+        expected = Nx.max(t1, t2)
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.max(t1, t2), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
+      test "min" do
+        t1 = Nx.tensor([1, 0, 3, 0], type: :f32)
+        t2 = Nx.tensor([0, 2, 0, 4], type: :f32)
+        expected = Nx.min(t1, t2)
+        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.min(t1, t2), Nx.BinaryBackend), {:auto})
+        assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
+      end
+
       test "multiply" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)

From 0378c8c30b377a2d4ef3002071426da5199bd479 Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 08:32:30 +0000
Subject: [PATCH 53/55] [ci] CUDA precompile

---
 .github/workflows/linux-cuda-gnu.yml          |   3 +
 .../workflows/linux-precompile-cuda-gnu.yml   | 126 ++++++++++++++++++
 mix.exs                                       |  54 ++++++--
 3 files changed, 169 insertions(+), 14 deletions(-)
 create mode 100644 .github/workflows/linux-precompile-cuda-gnu.yml

diff --git a/.github/workflows/linux-cuda-gnu.yml b/.github/workflows/linux-cuda-gnu.yml
index 5325929a..36cf4e93 100644
--- a/.github/workflows/linux-cuda-gnu.yml
+++ b/.github/workflows/linux-cuda-gnu.yml
@@ -61,10 +61,13 @@ jobs:
         include:
           - container: nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
             cuda_ver: "11.1.1"
+            cuda_id: "111"
           - container: nvidia/cuda:11.4.3-cudnn8-devel-ubuntu20.04
             cuda_ver: "11.4.3"
+            cuda_id: "114"
           - container: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
             cuda_ver: "11.8.0"
+            cuda_id: "118"
     container: ${{ matrix.container }}
     env:
       # container env vars
diff --git a/.github/workflows/linux-precompile-cuda-gnu.yml b/.github/workflows/linux-precompile-cuda-gnu.yml
new file mode 100644
index 00000000..9fa51800
--- /dev/null
+++ b/.github/workflows/linux-precompile-cuda-gnu.yml
@@ -0,0 +1,126 @@
+name: linux-precompile-cuda-gnu
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  mix_compile:
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - container: nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
+            cuda_ver: "11.1.1"
+            cuda_id: "111"
+          - container: nvidia/cuda:11.4.3-cudnn8-devel-ubuntu20.04
+            cuda_ver: "11.4.3"
+            cuda_id: "114"
+          - container: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+            cuda_ver: "11.8.0"
+            cuda_id: "118"
+    container: ${{ matrix.container }}
+    env:
+      # container env vars
+      # ref link: https://github.com/elixir-nx/xla/blob/main/.github/workflows/release.yml
+      ImageOS: ubuntu20
+      LANG: en_US.UTF-8
+      LANGUAGE: en_US:en
+      LC_ALL: en_US.UTF-8
+      DEBIAN_FRONTEND: noninteractive
+      # evision related env vars
+      MIX_ENV: test
+      OPENCV_VER: "4.7.0"
+      OTP_VERSION: "25.2"
+      ELIXIR_VERSION: "1.14.3"
+      EVISION_PREFER_PRECOMPILED: "false"
+      EVISION_ENABLE_CUDA: "true"
+      PKG_CONFIG_PATH: "/usr/lib/x86_64-linux-gnu/pkgconfig"
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install system dependencies
+        run: |
+          apt-get update
+          apt-get install -y build-essential automake autoconf pkg-config bc m4 unzip zip curl git libssl-dev gzip python3 ca-certificates \
+            locales curl wget cmake
+          echo "${LANG} UTF-8" >> /etc/locale.gen
+          locale-gen
+          update-locale LANG=${LANG}
+
+      - uses: erlef/setup-beam@v1
+        with:
+          otp-version: ${{ env.OTP_VERSION }}
+          elixir-version: ${{ env.ELIXIR_VERSION }}
+
+      - name: Cache mix packages
+        id: cache-mix-deps
+        uses: actions/cache@v3
+        with:
+          key: deps-${{ hashFiles('mix.lock') }}-${{ env.OTP_VERSION }}-${{ env.ELIXIR_VERSION }}
+          path: |
+            ./deps
+
+      - name: Get mix deps
+        if: steps.cache-mix-deps.outputs.cache-hit != 'true'
+        run: |
+          mix deps.get
+
+      - name: Cache OpenCV
+        id: cache-opencv
+        uses: actions/cache@v3
+        with:
+          key: opencv-with-contrib-${{ env.OPENCV_VER }}
+          path: |
+            ./3rd_party
+
+      - name: Download OpenCV
+        if: steps.cache-opencv.outputs.cache-hit != 'true'
+        run: |
+          bash scripts/download_opencv.sh ${OPENCV_VER} 3rd_party/cache 3rd_party/opencv/
+          bash scripts/download_opencv_contrib.sh ${OPENCV_VER} 3rd_party/cache 3rd_party/opencv/
+
+      - name: Cache compiled OpenCV
+        id: cache-mix-compile_opencv
+        uses: actions/cache@v3
+        with:
+          key: precompiled-opencv-${{ env.OPENCV_VER }}-cuda${{ matrix.cuda_ver }}-x86_64-linux-gnu-${{ hashFiles('Makefile') }}
+          path: |
+            ./_build/${{ env.MIX_ENV }}/lib/evision
+            ./c_src/headers.txt
+            ./c_src/configuration.private.hpp
+
+      - name: Compile OpenCV
+        if: steps.cache-mix-compile_opencv.outputs.cache-hit != 'true'
+        run: |
+          mix compile_opencv
+
+      - name: Mix Compile
+        run: |
+          rm -f _build/${{ env.MIX_ENV }}/lib/evision/priv/evision.so
+          ls -la ./c_src
+          mix compile
+          ls -la ./lib/generated
+
+      - name: Create precompiled library
+        run: |
+          export PKG_NAME=evision-nif_${NIF_VERSION}-x86_64-linux-gnu-contrib-cuda${{ matrix.cuda_ver }}-${GITHUB_REF##*/v}
+          mkdir -p "${PKG_NAME}"
+          cp -a _build/${MIX_ENV}/lib/evision/priv "${PKG_NAME}"
+          cp -a lib/generated "${PKG_NAME}/elixir_generated"
+          cp -a src/generated "${PKG_NAME}/erlang_generated"
+          tar -czf "${PKG_NAME}.tar.gz" "${PKG_NAME}"
+          rm -rf "${PKG_NAME}"
+          ls -lah "${PKG_NAME}.tar.gz"
+          mkdir -p artifacts
+          mv "${PKG_NAME}.tar.gz" artifacts
+
+      - uses: softprops/action-gh-release@v1
+        with:
+          files: artifacts/*.tar.gz
diff --git a/mix.exs b/mix.exs
index c51424c8..7be8fbaa 100644
--- a/mix.exs
+++ b/mix.exs
@@ -7,6 +7,8 @@ defmodule Evision.MixProject.Metadata do
   def opencv_version, do: "4.7.0"
   # only means compatible. need to write more tests
   def compatible_opencv_versions, do: ["4.5.3", "4.5.4", "4.5.5", "4.6.0", "4.7.0"]
+  def default_cuda_version, do: "118"
+  def all_cuda_version, do: ["111", "114", "118"]
 end
 
 defmodule Mix.Tasks.Compile.EvisionPrecompiled do
@@ -38,9 +40,16 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
 
   def available_nif_urls(nif_version, version \\ Metadata.version()) do
     Enum.reduce(@available_targets, [], fn target, acc ->
-      no_contrib = get_download_url(target, version, nif_version, false)
-      with_contrib = get_download_url(target, version, nif_version, true)
-      [no_contrib, with_contrib] ++ acc
+      no_contrib = get_download_url(target, version, nif_version, false, false, "")
+      with_contrib = get_download_url(target, version, nif_version, true, false, "")
+
+      with_cuda = if target == "x86_64-linux-gnu" do
+        Enum.map(Metadata.all_cuda_version(), fn cuda_ver -> get_download_url(target, version, nif_version, true, true, cuda_ver) end)
+      else
+        []
+      end
+
+      [no_contrib, with_contrib] ++ with_cuda ++ acc
     end)
   end
 
@@ -54,7 +63,18 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
       System.put_env("EVISION_ENABLE_CONTRIB", "false")
     end
 
-    get_download_url(target, version, nif_version, enable_contrib)
+    enable_cuda = System.get_env("EVISION_ENABLE_CUDA", "false") == "true"
+
+    cuda_version =
+      if enable_cuda do
+        System.put_env("EVISION_ENABLE_CUDA", "true")
+        System.get_env("EVISION_CUDA_VERSION", Metadata.default_cuda_version())
+      else
+        System.put_env("EVISION_ENABLE_CUDA", "false")
+        ""
+      end
+
+    get_download_url(target, version, nif_version, enable_contrib, enable_cuda, cuda_version)
   end
 
   def checksum_file(app \\ Mix.Project.config()[:app]) when is_atom(app) do
@@ -386,18 +406,22 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
     end
   end
 
-  def filename(target, version, nif_version, enable_contrib, with_ext \\ "")
+  def filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, with_ext \\ "")
 
-  def filename(target, version, nif_version, _enable_contrib = false, with_ext) do
+  def filename(target, version, nif_version, _enable_contrib = false, _enable_cuda, _cuda_version, with_ext) do
     "evision-nif_#{nif_version}-#{target}-#{version}#{with_ext}"
   end
 
-  def filename(target, version, nif_version, _enable_contrib = true, with_ext) do
+  def filename(target, version, nif_version, _enable_contrib = true, _enable_cuda=false, _cuda_version, with_ext) do
     "evision-nif_#{nif_version}-#{target}-contrib-#{version}#{with_ext}"
   end
 
-  def get_download_url(target, version, nif_version, enable_contrib) do
-    tar_file = filename(target, version, nif_version, enable_contrib, ".tar.gz")
+  def filename(target, version, nif_version, _enable_contrib = true, _enable_cuda=true, cuda_version, with_ext) do
+    "evision-nif_#{nif_version}-#{target}-contrib-cuda#{cuda_version}-#{version}#{with_ext}"
+  end
+
+  def get_download_url(target, version, nif_version, enable_contrib, enable_cuda, cuda_version) do
+    tar_file = filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, ".tar.gz")
     "#{Metadata.github_url()}/releases/download/v#{version}/#{tar_file}"
   end
 
@@ -434,9 +458,9 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
     Path.join([build_path, "lib", "#{app}", "priv"])
   end
 
-  def prepare(target, os, version, nif_version, enable_contrib) do
-    name = filename(target, version, nif_version, enable_contrib)
-    filename = filename(target, version, nif_version, enable_contrib, ".tar.gz")
+  def prepare(target, os, version, nif_version, enable_contrib, enable_cuda, cuda_version) do
+    name = filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version)
+    filename = filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, ".tar.gz")
     cache_dir = cache_dir()
     cache_file = Path.join([cache_dir, filename])
     unarchive_dest_dir = Path.join([cache_dir, name])
@@ -509,7 +533,7 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
           end
 
         if needs_download do
-          download_url = get_download_url(target, version, nif_version, enable_contrib)
+          download_url = get_download_url(target, version, nif_version, enable_contrib, enable_cuda, cuda_version)
 
           {:ok, _} = Application.ensure_all_started(:inets)
           {:ok, _} = Application.ensure_all_started(:ssl)
@@ -645,7 +669,9 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
         version = Metadata.version()
         nif_version = get_nif_version()
         enable_contrib = System.get_env("EVISION_ENABLE_CONTRIB", "true") == "true"
-        prepare(target, os, version, nif_version, enable_contrib)
+        enable_cuda = System.get_env("EVISION_ENABLE_CUDA", "false") == "true"
+        cuda_version = System.get_env("EVISION_CUDA_VERSION", Metadata.default_cuda_version())
+        prepare(target, os, version, nif_version, enable_contrib, enable_cuda, cuda_version)
       else
         _ ->
           raise RuntimeError, "Cannot use precompiled binaries."

From 0e8dba155bb2c75633344470d5b45b8fbf62051f Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 08:33:12 +0000
Subject: [PATCH 54/55] 0.1.26-rc3

---
 lib/evision_mat.ex           |   2 +
 mix.exs                      |  74 ++++++--
 test/evision_gpumat_test.exs | 344 +++++++++++++++++++++++++++--------
 test/videocapture_test.exs   |   3 +-
 4 files changed, 329 insertions(+), 94 deletions(-)

diff --git a/lib/evision_mat.ex b/lib/evision_mat.ex
index dadbd90b..335feb92 100644
--- a/lib/evision_mat.ex
+++ b/lib/evision_mat.ex
@@ -168,6 +168,7 @@ defmodule Evision.Mat do
   defp __handle_negative_range__(value, bound) when value < 0 do
     value + bound
   end
+
   defp __handle_negative_range__(value, _bound), do: value
 
   @doc false
@@ -193,6 +194,7 @@ defmodule Evision.Mat do
             else
               step
             end
+
           first = __handle_negative_range__(first, dim)
           last = __handle_negative_range__(last, dim)
           {first, last} = __from_elixir_range__(first..last//step, allowed_step_size: [1])
diff --git a/mix.exs b/mix.exs
index 7be8fbaa..174415f4 100644
--- a/mix.exs
+++ b/mix.exs
@@ -2,7 +2,7 @@ defmodule Evision.MixProject.Metadata do
   @moduledoc false
 
   def app, do: :evision
-  def version, do: "0.1.26-rc2"
+  def version, do: "0.1.26-rc3"
   def github_url, do: "https://github.com/cocoa-xu/evision"
   def opencv_version, do: "4.7.0"
   # only means compatible. need to write more tests
@@ -43,11 +43,14 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
       no_contrib = get_download_url(target, version, nif_version, false, false, "")
       with_contrib = get_download_url(target, version, nif_version, true, false, "")
 
-      with_cuda = if target == "x86_64-linux-gnu" do
-        Enum.map(Metadata.all_cuda_version(), fn cuda_ver -> get_download_url(target, version, nif_version, true, true, cuda_ver) end)
-      else
-        []
-      end
+      with_cuda =
+        if target == "x86_64-linux-gnu" do
+          Enum.map(Metadata.all_cuda_version(), fn cuda_ver ->
+            get_download_url(target, version, nif_version, true, true, cuda_ver)
+          end)
+        else
+          []
+        end
 
       [no_contrib, with_contrib] ++ with_cuda ++ acc
     end)
@@ -406,22 +409,56 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
     end
   end
 
-  def filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, with_ext \\ "")
+  def filename(
+        target,
+        version,
+        nif_version,
+        enable_contrib,
+        enable_cuda,
+        cuda_version,
+        with_ext \\ ""
+      )
 
-  def filename(target, version, nif_version, _enable_contrib = false, _enable_cuda, _cuda_version, with_ext) do
+  def filename(
+        target,
+        version,
+        nif_version,
+        _enable_contrib = false,
+        _enable_cuda,
+        _cuda_version,
+        with_ext
+      ) do
     "evision-nif_#{nif_version}-#{target}-#{version}#{with_ext}"
   end
 
-  def filename(target, version, nif_version, _enable_contrib = true, _enable_cuda=false, _cuda_version, with_ext) do
+  def filename(
+        target,
+        version,
+        nif_version,
+        _enable_contrib = true,
+        _enable_cuda = false,
+        _cuda_version,
+        with_ext
+      ) do
     "evision-nif_#{nif_version}-#{target}-contrib-#{version}#{with_ext}"
   end
 
-  def filename(target, version, nif_version, _enable_contrib = true, _enable_cuda=true, cuda_version, with_ext) do
+  def filename(
+        target,
+        version,
+        nif_version,
+        _enable_contrib = true,
+        _enable_cuda = true,
+        cuda_version,
+        with_ext
+      ) do
     "evision-nif_#{nif_version}-#{target}-contrib-cuda#{cuda_version}-#{version}#{with_ext}"
   end
 
   def get_download_url(target, version, nif_version, enable_contrib, enable_cuda, cuda_version) do
-    tar_file = filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, ".tar.gz")
+    tar_file =
+      filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, ".tar.gz")
+
     "#{Metadata.github_url()}/releases/download/v#{version}/#{tar_file}"
   end
 
@@ -460,7 +497,10 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
 
   def prepare(target, os, version, nif_version, enable_contrib, enable_cuda, cuda_version) do
     name = filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version)
-    filename = filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, ".tar.gz")
+
+    filename =
+      filename(target, version, nif_version, enable_contrib, enable_cuda, cuda_version, ".tar.gz")
+
     cache_dir = cache_dir()
     cache_file = Path.join([cache_dir, filename])
     unarchive_dest_dir = Path.join([cache_dir, name])
@@ -533,7 +573,15 @@ defmodule Mix.Tasks.Compile.EvisionPrecompiled do
           end
 
         if needs_download do
-          download_url = get_download_url(target, version, nif_version, enable_contrib, enable_cuda, cuda_version)
+          download_url =
+            get_download_url(
+              target,
+              version,
+              nif_version,
+              enable_contrib,
+              enable_cuda,
+              cuda_version
+            )
 
           {:ok, _} = Application.ensure_all_started(:inets)
           {:ok, _} = Application.ensure_all_started(:ssl)
diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 001f1d35..02d3ccad 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -127,8 +127,11 @@ else
         beta = 0.2
         gamma = 10
 
-        weighted_sum = Nx.to_binary(Nx.add(Nx.add(Nx.multiply(t1, alpha), Nx.multiply(t2, beta)), gamma))
-        assert weighted_sum == Evision.Mat.to_binary(Evision.CUDA.addWeighted(t1, alpha, t2, beta, gamma))
+        weighted_sum =
+          Nx.to_binary(Nx.add(Nx.add(Nx.multiply(t1, alpha), Nx.multiply(t2, beta)), gamma))
+
+        assert weighted_sum ==
+                 Evision.Mat.to_binary(Evision.CUDA.addWeighted(t1, alpha, t2, beta, gamma))
       end
 
       test "calcHist" do
@@ -141,6 +144,7 @@ else
         %Mat{} = downloaded = GpuMat.download(result)
 
         ret = Nx.to_flat_list(Evision.Mat.to_nx(downloaded))
+
         expected =
           Nx.broadcast(Nx.tensor(0, type: :s32), {256})
           |> Nx.put_slice([10], Nx.tensor([2]))
@@ -154,7 +158,7 @@ else
       test "calcNorm L1" do
         t = Nx.tensor([[10, 10, 20], [20, 20, 30]], type: :u8)
 
-        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_L1))
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_L1()))
         expected = Nx.to_binary(Nx.as_type(Nx.sum(Nx.abs(t)), :f64))
 
         assert norm_bin == expected
@@ -163,7 +167,7 @@ else
       test "calcNorm L2" do
         t = Nx.tensor([[1, 1]], type: :u8)
 
-        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_L2))
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_L2()))
         expected = Nx.to_binary(Nx.sqrt(Nx.as_type(Nx.sum(Nx.power(t, 2)), :f64)))
 
         assert norm_bin == expected
@@ -172,7 +176,7 @@ else
       test "calcNorm INF" do
         t = Nx.tensor([1, 42], type: :u8)
 
-        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_INF))
+        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNorm(t, Evision.Constant.cv_NORM_INF()))
         expected = Nx.to_binary(Nx.as_type(Nx.take(t, Nx.argmax(t)), :s32))
 
         assert norm_bin == expected
@@ -182,7 +186,11 @@ else
         t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
         t2 = Nx.tensor([[9, 9], [19, 19]], type: :u8)
 
-        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_L1))
+        norm_bin =
+          Evision.Mat.to_binary(
+            Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_L1())
+          )
+
         expected = Nx.to_binary(Nx.as_type(Nx.sum(Nx.abs(Nx.subtract(t1, t2))), :s32))
 
         assert norm_bin == expected
@@ -192,7 +200,11 @@ else
         t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
         t2 = Nx.tensor([[9, 9], [19, 19]], type: :u8)
 
-        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_L2))
+        norm_bin =
+          Evision.Mat.to_binary(
+            Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_L2())
+          )
+
         expected = Nx.to_binary(Nx.as_type(Nx.sqrt(Nx.sum(Nx.abs(Nx.subtract(t1, t2)))), :f64))
 
         assert norm_bin == expected
@@ -202,7 +214,11 @@ else
         t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
         t2 = Nx.tensor([[9, 9], [19, 15]], type: :u8)
 
-        norm_bin = Evision.Mat.to_binary(Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_INF))
+        norm_bin =
+          Evision.Mat.to_binary(
+            Evision.CUDA.calcNormDiff(t1, t2, normType: Evision.Constant.cv_NORM_INF())
+          )
+
         diff = Nx.flatten(Nx.abs(Nx.subtract(t1, t2)))
         expected = Nx.to_binary(Nx.as_type(Nx.take(diff, Nx.argmax(diff)), :s32))
 
@@ -259,7 +275,7 @@ else
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
 
-        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_EQ))
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_EQ()))
         expected = Nx.to_binary(Nx.multiply(Nx.equal(t1, t2), 255))
 
         assert ret == expected
@@ -269,7 +285,7 @@ else
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
 
-        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_GT))
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_GT()))
         expected = Nx.to_binary(Nx.multiply(Nx.greater(t1, t2), 255))
 
         assert ret == expected
@@ -279,7 +295,7 @@ else
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
 
-        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_GE))
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_GE()))
         expected = Nx.to_binary(Nx.multiply(Nx.greater_equal(t1, t2), 255))
 
         assert ret == expected
@@ -289,7 +305,7 @@ else
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
 
-        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_LT))
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_LT()))
         expected = Nx.to_binary(Nx.multiply(Nx.less(t1, t2), 255))
 
         assert ret == expected
@@ -299,7 +315,7 @@ else
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
 
-        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_LE))
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_LE()))
         expected = Nx.to_binary(Nx.multiply(Nx.less_equal(t1, t2), 255))
 
         assert ret == expected
@@ -309,7 +325,7 @@ else
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
 
-        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_NE))
+        ret = Evision.Mat.to_binary(Evision.CUDA.compare(t1, t2, Evision.Constant.cv_CMP_NE()))
         expected = Nx.to_binary(Nx.multiply(Nx.not_equal(t1, t2), 255))
 
         assert ret == expected
@@ -323,27 +339,45 @@ else
       end
 
       test "flip (x-axis)" do
-        t = Nx.tensor([
-          [0, 1, 0, 2],
-          [3, 0, 4, 0]],
-        type: :u8)
-        assert [3, 0, 4, 0, 0, 1, 0, 2] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 0)))
+        t =
+          Nx.tensor(
+            [
+              [0, 1, 0, 2],
+              [3, 0, 4, 0]
+            ],
+            type: :u8
+          )
+
+        assert [3, 0, 4, 0, 0, 1, 0, 2] ==
+                 Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 0)))
       end
 
       test "flip (y-axis)" do
-        t = Nx.tensor([
-          [0, 1, 0, 2],
-          [3, 0, 4, 0]],
-        type: :u8)
-        assert [2, 0, 1, 0, 0, 4, 0, 3] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 1)))
+        t =
+          Nx.tensor(
+            [
+              [0, 1, 0, 2],
+              [3, 0, 4, 0]
+            ],
+            type: :u8
+          )
+
+        assert [2, 0, 1, 0, 0, 4, 0, 3] ==
+                 Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 1)))
       end
 
       test "flip (both axes)" do
-        t = Nx.tensor([
-          [0, 1, 0, 2],
-          [3, 0, 4, 0]],
-        type: :u8)
-        assert [0, 4, 0, 3, 2, 0, 1, 0] == Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, -1)))
+        t =
+          Nx.tensor(
+            [
+              [0, 1, 0, 2],
+              [3, 0, 4, 0]
+            ],
+            type: :u8
+          )
+
+        assert [0, 4, 0, 3, 2, 0, 1, 0] ==
+                 Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, -1)))
       end
 
       test "gemm" do
@@ -354,11 +388,14 @@ else
         alpha = 0.5
         beta = 1.0
 
-        expected = Nx.to_binary(
-          Nx.add(
-            Nx.multiply(Nx.dot(t1, t2), alpha),
-            Nx.multiply(t3, beta))
-        )
+        expected =
+          Nx.to_binary(
+            Nx.add(
+              Nx.multiply(Nx.dot(t1, t2), alpha),
+              Nx.multiply(t3, beta)
+            )
+          )
+
         assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta))
       end
 
@@ -370,12 +407,20 @@ else
         alpha = 0.5
         beta = 1.0
 
-        expected = Nx.to_binary(
-          Nx.add(
-            Nx.multiply(Nx.dot(Nx.transpose(t1), t2), alpha),
-            Nx.multiply(t3, beta))
-        )
-        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_1_T()))
+        expected =
+          Nx.to_binary(
+            Nx.add(
+              Nx.multiply(Nx.dot(Nx.transpose(t1), t2), alpha),
+              Nx.multiply(t3, beta)
+            )
+          )
+
+        assert expected ==
+                 Evision.Mat.to_binary(
+                   Evision.CUDA.gemm(t1, t2, alpha, t3, beta,
+                     flags: Evision.Constant.cv_GEMM_1_T()
+                   )
+                 )
       end
 
       test "gemm (GEMM_3_T)" do
@@ -386,12 +431,20 @@ else
         alpha = 0.5
         beta = 1.0
 
-        expected = Nx.to_binary(
-          Nx.add(
-            Nx.multiply(Nx.dot(t1, t2), alpha),
-            Nx.multiply(Nx.transpose(t3), beta))
-        )
-        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_3_T()))
+        expected =
+          Nx.to_binary(
+            Nx.add(
+              Nx.multiply(Nx.dot(t1, t2), alpha),
+              Nx.multiply(Nx.transpose(t3), beta)
+            )
+          )
+
+        assert expected ==
+                 Evision.Mat.to_binary(
+                   Evision.CUDA.gemm(t1, t2, alpha, t3, beta,
+                     flags: Evision.Constant.cv_GEMM_3_T()
+                   )
+                 )
       end
 
       test "gemm (GEMM_1_T + GEMM_3_T)" do
@@ -402,12 +455,20 @@ else
         alpha = 0.5
         beta = 1.0
 
-        expected = Nx.to_binary(
-          Nx.add(
-            Nx.multiply(Nx.dot(Nx.transpose(t1), t2), alpha),
-            Nx.multiply(Nx.transpose(t3), beta))
-        )
-        assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta, flags: Evision.Constant.cv_GEMM_1_T() + Evision.Constant.cv_GEMM_3_T()))
+        expected =
+          Nx.to_binary(
+            Nx.add(
+              Nx.multiply(Nx.dot(Nx.transpose(t1), t2), alpha),
+              Nx.multiply(Nx.transpose(t3), beta)
+            )
+          )
+
+        assert expected ==
+                 Evision.Mat.to_binary(
+                   Evision.CUDA.gemm(t1, t2, alpha, t3, beta,
+                     flags: Evision.Constant.cv_GEMM_1_T() + Evision.Constant.cv_GEMM_3_T()
+                   )
+                 )
       end
 
       test "lshift" do
@@ -421,11 +482,17 @@ else
         t = Nx.reshape(Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u8), {3, 3})
         expected = Nx.left_shift(t, Nx.tensor([1, 2, 3]))
 
-        ret = Nx.squeeze(
-          Evision.Mat.to_nx(
-            Evision.CUDA.lshift(
-              Evision.Mat.last_dim_as_channel(
-                Nx.reshape(t, {1, 3, 3})), {1, 2, 3}), Nx.BinaryBackend))
+        ret =
+          Nx.squeeze(
+            Evision.Mat.to_nx(
+              Evision.CUDA.lshift(
+                Evision.Mat.last_dim_as_channel(Nx.reshape(t, {1, 3, 3})),
+                {1, 2, 3}
+              ),
+              Nx.BinaryBackend
+            )
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
@@ -440,7 +507,10 @@ else
         x = Nx.tensor([1, 0, 3, 4], type: :f32)
         y = Nx.tensor([1, 2, 0, 4], type: :f32)
         expected = Nx.sqrt(Nx.add(Nx.power(x, 2), Nx.power(y, 2)))
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitude(x, y), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitude(x, y), Nx.BinaryBackend), {:auto})
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
@@ -458,7 +528,13 @@ else
         x = Nx.tensor([1, 0, 3, 4], type: :f32)
         y = Nx.tensor([1, 2, 0, 4], type: :f32)
         expected = Nx.add(Nx.power(x, 2), Nx.power(y, 2))
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitudeSqr(x, y), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(
+            Evision.Mat.to_nx(Evision.CUDA.magnitudeSqr(x, y), Nx.BinaryBackend),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
@@ -468,7 +544,10 @@ else
         expected = Nx.add(Nx.power(x, 2), Nx.power(y, 2))
         xy = Nx.transpose(Nx.stack([x, y]))
         xy = Evision.Mat.last_dim_as_channel(Evision.Mat.from_nx_2d(xy))
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitudeSqr(xy), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.magnitudeSqr(xy), Nx.BinaryBackend), {:auto})
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
@@ -539,70 +618,158 @@ else
 
         power = 1
         expected = Nx.power(t, power)
-        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
+
+        assert Nx.to_number(
+                 Nx.all_close(
+                   expected,
+                   Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend),
+                   rtol: 0.0001
+                 )
+               ) == 1
 
         power = 2
         expected = Nx.power(t, power)
-        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
+
+        assert Nx.to_number(
+                 Nx.all_close(
+                   expected,
+                   Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend),
+                   rtol: 0.0001
+                 )
+               ) == 1
 
         power = 3
         expected = Nx.power(t, power)
-        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend), rtol: 0.0001)) == 1
+
+        assert Nx.to_number(
+                 Nx.all_close(
+                   expected,
+                   Evision.Mat.to_nx(Evision.CUDA.pow(t, power), Nx.BinaryBackend),
+                   rtol: 0.0001
+                 )
+               ) == 1
       end
 
       test "reduce SUM by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
-        expected = Nx.reduce(t, Nx.tensor(0), [axes: [0], keep_axes: true], fn x, y -> Nx.add(x, y) end)
-        ret = Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_SUM()), Nx.BinaryBackend)
+
+        expected =
+          Nx.reduce(t, Nx.tensor(0), [axes: [0], keep_axes: true], fn x, y -> Nx.add(x, y) end)
+
+        ret =
+          Evision.Mat.to_nx(
+            Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_SUM()),
+            Nx.BinaryBackend
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce SUM by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
-        expected = Nx.reduce(t, Nx.tensor(0), [axes: [1], keep_axes: true], fn x, y -> Nx.add(x, y) end)
-        ret = Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_SUM()), Nx.BinaryBackend)
+
+        expected =
+          Nx.reduce(t, Nx.tensor(0), [axes: [1], keep_axes: true], fn x, y -> Nx.add(x, y) end)
+
+        ret =
+          Evision.Mat.to_nx(
+            Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_SUM()),
+            Nx.BinaryBackend
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce AVG by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.divide(Nx.sum(t, axes: [0]), 2)
-        ret = Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_AVG()), Nx.BinaryBackend)
+
+        ret =
+          Evision.Mat.to_nx(
+            Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_AVG()),
+            Nx.BinaryBackend
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce AVG by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.divide(Nx.sum(t, axes: [1]), 3)
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_AVG()), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(
+            Evision.Mat.to_nx(
+              Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_AVG()),
+              Nx.BinaryBackend
+            ),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce MAX by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_max(t, axes: [0])
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_MAX()), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(
+            Evision.Mat.to_nx(
+              Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_MAX()),
+              Nx.BinaryBackend
+            ),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce MAX by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_max(t, axes: [1])
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_MAX()), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(
+            Evision.Mat.to_nx(
+              Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_MAX()),
+              Nx.BinaryBackend
+            ),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce MIN by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_min(t, axes: [0])
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_MIN()), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(
+            Evision.Mat.to_nx(
+              Evision.CUDA.reduce(t, 0, Evision.Constant.cv_REDUCE_MIN()),
+              Nx.BinaryBackend
+            ),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "reduce MIN by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_min(t, axes: [1])
-        ret = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_MIN()), Nx.BinaryBackend), {:auto})
+
+        ret =
+          Nx.reshape(
+            Evision.Mat.to_nx(
+              Evision.CUDA.reduce(t, 1, Evision.Constant.cv_REDUCE_MIN()),
+              Nx.BinaryBackend
+            ),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
@@ -617,24 +784,41 @@ else
         t = Nx.reshape(Nx.tensor([128, 128, 128, 64, 64, 64, 32, 32, 32], type: :u8), {3, 3})
         expected = Nx.right_shift(t, Nx.tensor([1, 2, 3]))
 
-        ret = Nx.squeeze(
-          Evision.Mat.to_nx(
-            Evision.CUDA.rshift(
-              Evision.Mat.last_dim_as_channel(
-                Nx.reshape(t, {1, 3, 3})), {1, 2, 3}), Nx.BinaryBackend))
+        ret =
+          Nx.squeeze(
+            Evision.Mat.to_nx(
+              Evision.CUDA.rshift(
+                Evision.Mat.last_dim_as_channel(Nx.reshape(t, {1, 3, 3})),
+                {1, 2, 3}
+              ),
+              Nx.BinaryBackend
+            )
+          )
+
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
       test "sqr" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         expected = Nx.power(t, 2)
-        assert Nx.to_number(Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.sqr(t), Nx.BinaryBackend), rtol: 0.0001)) == 1
+
+        assert Nx.to_number(
+                 Nx.all_close(expected, Evision.Mat.to_nx(Evision.CUDA.sqr(t), Nx.BinaryBackend),
+                   rtol: 0.0001
+                 )
+               ) == 1
       end
 
       test "sqrIntegral" do
         t = Nx.tensor([1, 2, 3, 4, 5, 6], type: :u8)
         expected = Nx.as_type(Nx.cumulative_sum(Nx.power(t, 2)), :f64)
-        sqr_sum = Nx.reshape(Evision.Mat.to_nx(Evision.CUDA.sqrIntegral(t)[[1..-1, 1]], Nx.BinaryBackend), {:auto})
+
+        sqr_sum =
+          Nx.reshape(
+            Evision.Mat.to_nx(Evision.CUDA.sqrIntegral(t)[[1..-1, 1]], Nx.BinaryBackend),
+            {:auto}
+          )
+
         assert Nx.to_number(Nx.all_close(expected, sqr_sum, rtol: 0.0001)) == 1
       end
 
diff --git a/test/videocapture_test.exs b/test/videocapture_test.exs
index 5296d9df..6cde1679 100644
--- a/test/videocapture_test.exs
+++ b/test/videocapture_test.exs
@@ -6,7 +6,8 @@ defmodule Evision.VideoCapture.Test do
   @tag :video
   @tag :require_ffmpeg
   test "open a video file and read one frame" do
-    video = Evision.VideoCapture.videoCapture(Path.join([__DIR__, "testdata", "videocapture_test.mp4"]))
+    video =
+      Evision.VideoCapture.videoCapture(Path.join([__DIR__, "testdata", "videocapture_test.mp4"]))
 
     %Evision.VideoCapture{
       isOpened: true,

From d4eb6c74cb2f73d4453846d1b7d970fd8b8fd49f Mon Sep 17 00:00:00 2001
From: Cocoa <i@uwucocoa.moe>
Date: Mon, 16 Jan 2023 08:51:02 +0000
Subject: [PATCH 55/55] tag each test

---
 test/evision_gpumat_test.exs | 69 +++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/test/evision_gpumat_test.exs b/test/evision_gpumat_test.exs
index 02d3ccad..37855545 100644
--- a/test/evision_gpumat_test.exs
+++ b/test/evision_gpumat_test.exs
@@ -8,8 +8,8 @@ else
     alias Evision.Mat
     alias Evision.CUDA.GpuMat
 
-    @tag :require_cuda
     describe "Basic Operations" do
+      @tag :require_cuda
       test "load an image from file" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 
@@ -24,6 +24,7 @@ else
         } = gpumat
       end
 
+      @tag :require_cuda
       test "explicitly upload an Evision.Mat" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 
@@ -41,6 +42,7 @@ else
         } = gpumat
       end
 
+      @tag :require_cuda
       test "manually allocate a GpuMat" do
         gpumat = Evision.CUDA.GpuMat.gpuMat(1000, 1200, Evision.Constant.cv_8UC3())
         assert Evision.CUDA.GpuMat.cudaPtr(gpumat) > 0
@@ -54,6 +56,7 @@ else
         } = gpumat
       end
 
+      @tag :require_cuda
       test "split channels" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))
 
@@ -85,6 +88,7 @@ else
         } = r
       end
 
+      @tag :require_cuda
       test "abs" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :s8)
         mat = Evision.CUDA.GpuMat.gpuMat(t)
@@ -93,12 +97,14 @@ else
         assert bin == Nx.to_binary(Nx.abs(t))
       end
 
+      @tag :require_cuda
       test "absSum" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         abs_sum = Nx.to_number(Nx.sum(Nx.abs(t)))
         {^abs_sum, 0.0, 0.0, 0.0} = Evision.CUDA.absSum(t)
       end
 
+      @tag :require_cuda
       test "absSum with mask" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         m = Nx.tensor([[1, 0, 0], [0, 0, 1]], type: :u8)
@@ -106,6 +112,7 @@ else
         {^abs_sum, 0.0, 0.0, 0.0} = Evision.CUDA.absSum(t, mask: m)
       end
 
+      @tag :require_cuda
       test "absdiff" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
@@ -113,6 +120,7 @@ else
         assert absdiff == Evision.Mat.to_binary(Evision.CUDA.absdiff(t1, t2))
       end
 
+      @tag :require_cuda
       test "add" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
@@ -120,6 +128,7 @@ else
         assert sum == Evision.Mat.to_binary(Evision.CUDA.add(t1, t2))
       end
 
+      @tag :require_cuda
       test "addWeighted" do
         t1 = Nx.tensor([[100, 200, 300], [400, 500, 600]], type: :f32)
         alpha = 0.1
@@ -134,6 +143,7 @@ else
                  Evision.Mat.to_binary(Evision.CUDA.addWeighted(t1, alpha, t2, beta, gamma))
       end
 
+      @tag :require_cuda
       test "calcHist" do
         t = Nx.tensor([[10, 10, 20], [20, 20, 30]], type: :u8)
 
@@ -155,6 +165,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "calcNorm L1" do
         t = Nx.tensor([[10, 10, 20], [20, 20, 30]], type: :u8)
 
@@ -164,6 +175,7 @@ else
         assert norm_bin == expected
       end
 
+      @tag :require_cuda
       test "calcNorm L2" do
         t = Nx.tensor([[1, 1]], type: :u8)
 
@@ -173,6 +185,7 @@ else
         assert norm_bin == expected
       end
 
+      @tag :require_cuda
       test "calcNorm INF" do
         t = Nx.tensor([1, 42], type: :u8)
 
@@ -182,6 +195,7 @@ else
         assert norm_bin == expected
       end
 
+      @tag :require_cuda
       test "calcNormDiff L1" do
         t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
         t2 = Nx.tensor([[9, 9], [19, 19]], type: :u8)
@@ -196,6 +210,7 @@ else
         assert norm_bin == expected
       end
 
+      @tag :require_cuda
       test "calcNormDiff L2" do
         t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
         t2 = Nx.tensor([[9, 9], [19, 19]], type: :u8)
@@ -210,6 +225,7 @@ else
         assert norm_bin == expected
       end
 
+      @tag :require_cuda
       test "calcNormDiff INF" do
         t1 = Nx.tensor([[10, 10], [20, 20]], type: :u8)
         t2 = Nx.tensor([[9, 9], [19, 15]], type: :u8)
@@ -225,6 +241,7 @@ else
         assert norm_bin == expected
       end
 
+      @tag :require_cuda
       test "calcSqrSum" do
         t = Nx.tensor([[1, 1], [2, 2]], type: :u8)
 
@@ -234,6 +251,7 @@ else
         assert sum == expected
       end
 
+      @tag :require_cuda
       test "calcSum" do
         t = Nx.tensor([[1, 1], [2, 2]], type: :u8)
 
@@ -243,6 +261,7 @@ else
         assert sum == expected
       end
 
+      @tag :require_cuda
       test "cartToPolar" do
         real = Nx.tensor([1, 2, 3, 4], type: :f32)
         imag = Nx.tensor([1, 2, 3, 4], type: :f32)
@@ -257,6 +276,7 @@ else
         assert Nx.to_number(Nx.all_close(angle, expected_angle, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "cartToPolar (angleInDegrees)" do
         real = Nx.tensor([1, 2, 3, 4], type: :f32)
         imag = Nx.tensor([1, 2, 3, 4], type: :f32)
@@ -271,6 +291,7 @@ else
         assert Nx.to_number(Nx.all_close(angle, expected_angle, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "compare CMP_EQ" do
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
@@ -281,6 +302,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "compare CMP_GT" do
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
@@ -291,6 +313,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "compare CMP_GE" do
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
@@ -301,6 +324,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "compare CMP_LT" do
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
@@ -311,6 +335,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "compare CMP_LE" do
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
@@ -321,6 +346,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "compare CMP_NE" do
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :u8)
         t2 = Nx.tensor([[5, 6], [3, 4], [1, 2]], type: :u8)
@@ -331,6 +357,7 @@ else
         assert ret == expected
       end
 
+      @tag :require_cuda
       test "exp" do
         t = Nx.tensor([1, 2, 3, 4], type: :f32)
         expected = Nx.exp(t)
@@ -338,6 +365,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "flip (x-axis)" do
         t =
           Nx.tensor(
@@ -352,6 +380,7 @@ else
                  Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 0)))
       end
 
+      @tag :require_cuda
       test "flip (y-axis)" do
         t =
           Nx.tensor(
@@ -366,6 +395,7 @@ else
                  Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, 1)))
       end
 
+      @tag :require_cuda
       test "flip (both axes)" do
         t =
           Nx.tensor(
@@ -380,6 +410,7 @@ else
                  Nx.to_flat_list(Evision.Mat.to_nx(Evision.CUDA.flip(t, -1)))
       end
 
+      @tag :require_cuda
       test "gemm" do
         # t1.shape == {2, 3}, t2.shape == {3, 2}, t3.shape == {2, 2}
         t1 = Nx.tensor([[1, 2, 3], [3, 4, 5]], type: :f32)
@@ -399,6 +430,7 @@ else
         assert expected == Evision.Mat.to_binary(Evision.CUDA.gemm(t1, t2, alpha, t3, beta))
       end
 
+      @tag :require_cuda
       test "gemm (GEMM_1_T)" do
         # t1.shape == {3, 2}, t2.shape == {3, 2}, t3.shape == {2, 2}
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
@@ -423,6 +455,7 @@ else
                  )
       end
 
+      @tag :require_cuda
       test "gemm (GEMM_3_T)" do
         # t1.shape == {2, 3}, t2.shape == {3, 2}, t3.shape == {2, 2}
         t1 = Nx.tensor([[1, 2, 3], [3, 4, 5]], type: :f32)
@@ -447,6 +480,7 @@ else
                  )
       end
 
+      @tag :require_cuda
       test "gemm (GEMM_1_T + GEMM_3_T)" do
         # t1.shape == {2, 3}, t2.shape == {3, 2}, t3.shape == {2, 2}
         t1 = Nx.tensor([[1, 2], [3, 4], [5, 6]], type: :f32)
@@ -471,6 +505,7 @@ else
                  )
       end
 
+      @tag :require_cuda
       test "lshift" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
         expected = Nx.left_shift(t, 1)
@@ -478,6 +513,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "lshift (3-channel)" do
         t = Nx.reshape(Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u8), {3, 3})
         expected = Nx.left_shift(t, Nx.tensor([1, 2, 3]))
@@ -496,6 +532,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "log" do
         t = Nx.tensor([1, 10, 100, 1000], type: :f32)
         expected = Nx.log(t)
@@ -503,6 +540,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "magnitude(x, y)" do
         x = Nx.tensor([1, 0, 3, 4], type: :f32)
         y = Nx.tensor([1, 2, 0, 4], type: :f32)
@@ -514,6 +552,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "magnitude(xy)" do
         x = Nx.tensor([1, 0, 3, 4], type: :f32)
         y = Nx.tensor([1, 2, 0, 4], type: :f32)
@@ -524,6 +563,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "magnitudeSqr(x, y)" do
         x = Nx.tensor([1, 0, 3, 4], type: :f32)
         y = Nx.tensor([1, 2, 0, 4], type: :f32)
@@ -538,6 +578,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "magnitudeSqr(xy)" do
         x = Nx.tensor([1, 0, 3, 4], type: :f32)
         y = Nx.tensor([1, 2, 0, 4], type: :f32)
@@ -551,6 +592,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "max" do
         t1 = Nx.tensor([1, 0, 3, 0], type: :f32)
         t2 = Nx.tensor([0, 2, 0, 4], type: :f32)
@@ -559,6 +601,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "min" do
         t1 = Nx.tensor([1, 0, 3, 0], type: :f32)
         t2 = Nx.tensor([0, 2, 0, 4], type: :f32)
@@ -567,6 +610,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "multiply" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
@@ -574,6 +618,7 @@ else
         assert product == Evision.Mat.to_binary(Evision.CUDA.multiply(t1, t2))
       end
 
+      @tag :require_cuda
       test "multiply with scale" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
@@ -582,6 +627,7 @@ else
         assert product == Evision.Mat.to_binary(Evision.CUDA.multiply(t1, t2, scale: scale))
       end
 
+      @tag :require_cuda
       test "polarToCart" do
         magnitude = Nx.tensor([1.414213, 2.828427, 4.242640, 5.656854], type: :f32)
         angle = Nx.tensor([0.7853981, 0.7853981, 0.7853981, 0.7853981], type: :f32)
@@ -596,6 +642,7 @@ else
         assert Nx.to_number(Nx.all_close(imag, expected_imag, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "polarToCart (angleInDegrees)" do
         magnitude = Nx.tensor([1.414213, 2.828427, 4.242640, 5.656854], type: :f32)
         angle = Nx.tensor([45.0, 45.0, 45.0, 45.0], type: :f32)
@@ -610,6 +657,7 @@ else
         assert Nx.to_number(Nx.all_close(imag, expected_imag, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "pow" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         power = 0
@@ -650,6 +698,7 @@ else
                ) == 1
       end
 
+      @tag :require_cuda
       test "reduce SUM by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
 
@@ -665,6 +714,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce SUM by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
 
@@ -680,6 +730,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce AVG by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.divide(Nx.sum(t, axes: [0]), 2)
@@ -693,6 +744,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce AVG by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.divide(Nx.sum(t, axes: [1]), 3)
@@ -709,6 +761,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce MAX by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_max(t, axes: [0])
@@ -725,6 +778,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce MAX by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_max(t, axes: [1])
@@ -741,6 +795,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce MIN by row" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_min(t, axes: [0])
@@ -757,6 +812,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "reduce MIN by col" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
         expected = Nx.reduce_min(t, axes: [1])
@@ -773,6 +829,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "rshift" do
         t = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :u8)
         expected = Nx.right_shift(t, 1)
@@ -780,6 +837,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "rshift (3-channel)" do
         t = Nx.reshape(Nx.tensor([128, 128, 128, 64, 64, 64, 32, 32, 32], type: :u8), {3, 3})
         expected = Nx.right_shift(t, Nx.tensor([1, 2, 3]))
@@ -798,6 +856,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, ret, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "sqr" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         expected = Nx.power(t, 2)
@@ -809,6 +868,7 @@ else
                ) == 1
       end
 
+      @tag :require_cuda
       test "sqrIntegral" do
         t = Nx.tensor([1, 2, 3, 4, 5, 6], type: :u8)
         expected = Nx.as_type(Nx.cumulative_sum(Nx.power(t, 2)), :f64)
@@ -822,12 +882,14 @@ else
         assert Nx.to_number(Nx.all_close(expected, sqr_sum, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "sqrSum" do
         t = Nx.tensor([1, 2, 3, 4, 5, 6], type: :u8)
         expected = Nx.to_number(Nx.as_type(Nx.sum(Nx.power(t, 2)), :f64))
         {^expected, 0.0, 0.0, 0.0} = Evision.CUDA.sqrSum(t)
       end
 
+      @tag :require_cuda
       test "sqrt" do
         t = Nx.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], type: :f32)
         expected = Nx.sqrt(t)
@@ -835,6 +897,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, sqrt, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "sqrt (integer)" do
         t = Nx.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], type: :u8)
         expected = Nx.as_type(Nx.round(Nx.sqrt(t)), :u8)
@@ -842,6 +905,7 @@ else
         assert Nx.to_number(Nx.all_close(expected, sqrt, rtol: 0.0001)) == 1
       end
 
+      @tag :require_cuda
       test "subtract" do
         t1 = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         t2 = Nx.tensor([[0, 1, 2], [3, 4, 5]], type: :f32)
@@ -849,6 +913,7 @@ else
         assert diff == Evision.Mat.to_binary(Evision.CUDA.subtract(t1, t2))
       end
 
+      @tag :require_cuda
       test "sum" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         sum = Nx.to_number(Nx.sum(t))
@@ -856,6 +921,7 @@ else
         assert sum == cuda_sum
       end
 
+      @tag :require_cuda
       test "sum with mask" do
         t = Nx.tensor([[-1, 2, -3], [4, -5, 6]], type: :f32)
         m = Nx.tensor([[1, 0, 0], [0, 0, 1]], type: :u8)
@@ -864,6 +930,7 @@ else
         assert sum == cuda_sum
       end
 
+      @tag :require_cuda
       test "transpose" do
         %Mat{} = mat = Evision.imread(Path.join([__DIR__, "testdata", "test.png"]))