diff --git a/external/README.md b/external/README.md new file mode 100644 index 000000000..9cd00ef70 --- /dev/null +++ b/external/README.md @@ -0,0 +1,38 @@ +# Overview + +An example of using external native types in Warp. + +The [wim](wim) subdirectory contains an independent library with [header-only types](wim/wim.h). It also defines a [C-style public interface](wim/wim.cpp) used for the [Python bindings](wim/__init__.py). Normally, there would be more code there, but this is a minimal viable example. For simplicity, importing the `wim` Python module will build and load the native library and initialize the Python bindings. This will happen automatically when running the example, so no need to build it separately. + +The file [wim_warp.h](wim_warp.h) is a header that will be included by Warp when building kernels. It imports the types into the `wp` namespace, which is currently necessary, but may change in the future. It also defines some useful functions that will be exposed to Warp code generation. + +The file [wim_types.py](wim_types.py) defines the Python versions of the custom types and registers the native utility functions as buitin functions that are available in kernels. + +The file [wim_paint.py](wim_paint.py) is the main program for the example. It creates an image and draws shapes using Warp kernels. + +# Prerequisites + +* Linux is required +* CUDA Toolkit installed in `/usr/local/cuda` (Note that `cuda_path` can be modified in [wim/__init__.py](wim/__init__.py)). This is needed for building the "external" `wim` library. +* `pip install matplotlib` for showing and saving the generated image. +* `pip install torch` for an optional interop example! + +# Running + +From the repo root: + +```bash +$ python external/wim_paint.py +``` + +If PyTorch is installed, the example will also demonstrate inverting the image using PyTorch. + +# Limitations and Future Work + +* The fact that Warp builds kernels with its own custom CRT might be a stumbling block for users who want to include their own external headers. For example, including standard library headers fails during kernel compilation. + +* Currently, Warp doesn't have "proper" support for custom native types. Our codegen assumes that all types and builtins are in the `wp::` namespace. I was able to hack around that, but it's not clean (or clear to external users). + +* Warp supports accessing public struct/class members using `Type.vars`, but there's no way to expose getters and setters (or other methods) from native classes. Free functions/builtins can be used to get around it, but some users might prefer OO syntax. + +* Using custom native types in Warp arrays is not fully supported yet. In this example, I substituted the built-in `vec3f` for `Color` to expose the image data as a Warp array, but this kind of substitution would not work for all cases. diff --git a/external/wim/__init__.py b/external/wim/__init__.py new file mode 100644 index 000000000..7ecc7643e --- /dev/null +++ b/external/wim/__init__.py @@ -0,0 +1,39 @@ +import ctypes +import os +import subprocess + +_lib_dir = os.path.abspath(os.path.dirname(__file__)) +_lib_path = os.path.join(_lib_dir, "wim.so") + + +def _build_lib(cuda_path="/usr/local/cuda"): + build_cmd = [os.path.join(cuda_path, "bin", "nvcc"), + "-shared", + "-Xcompiler", "-fPIC", + os.path.join(_lib_dir, "wim.cpp"), + "-o", _lib_path] + subprocess.run(build_cmd, check=True) + + +def _load_lib(): + lib_dir = os.path.abspath(os.path.dirname(__file__)) + return ctypes.CDLL(os.path.join(lib_dir, _lib_path)) + + +# build the lib +_build_lib() + +# load the lib +_core = _load_lib() + +# bindings for CPU images +_core.create_image_cpu.argtypes = [ctypes.c_int, ctypes.c_int] +_core.create_image_cpu.restype = ctypes.c_void_p +_core.destroy_image_cpu.argtypes = [ctypes.c_void_p] +_core.destroy_image_cpu.restype = None + +# bindings for GPU images +_core.create_image_cuda.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_int] +_core.create_image_cuda.restype = ctypes.c_void_p +_core.destroy_image_cuda.argtypes = [ctypes.c_int, ctypes.c_void_p] +_core.destroy_image_cuda.restype = None diff --git a/external/wim/wim.cpp b/external/wim/wim.cpp new file mode 100644 index 000000000..d97cfe8a1 --- /dev/null +++ b/external/wim/wim.cpp @@ -0,0 +1,78 @@ +#include "wim.h" + +#include + +#include + +#if defined(_WIN32) + #define WIM_API __declspec(dllexport) +#else + #define WIM_API __attribute__ ((visibility ("default"))) +#endif + +#define check_cuda(code) (wim::check_cuda_result(code, __FILE__, __LINE__)) + +// internal stuff +namespace wim +{ + +bool check_cuda_result(cudaError_t code, const char* file, int line) +{ + if (code == cudaSuccess) + return true; + + fprintf(stderr, "WIM CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line); + return false; +} + +} + +// API for Python bindings +extern "C" +{ + +WIM_API wim::Image* create_image_cpu(int width, int height) +{ + wim::Color* data = new wim::Color[width * height]; + wim::Image* img = new wim::Image(width, height, data); + return img; +} + +WIM_API void destroy_image_cpu(wim::Image* img) +{ + if (img) + { + delete [] img->getData(); + delete img; + } +} + +WIM_API wim::Image* create_image_cuda(int device, int width, int height) +{ + if (!check_cuda(cudaSetDevice(device))) + return nullptr; + + wim::Color* data = nullptr; + if (!check_cuda(cudaMalloc(&data, width * height * sizeof(wim::Color)))) + return nullptr; + if (!check_cuda(cudaMemset(data, 0, width * height * sizeof(wim::Color)))) + return nullptr; + + wim::Image* img = new wim::Image(width, height, data); + return img; +} + +WIM_API void destroy_image_cuda(int device, wim::Image* img) +{ + if (!img) + return; + + if (!check_cuda(cudaSetDevice(device))) + return; + + check_cuda(cudaFree(img->getData())); + + delete img; +} + +} diff --git a/external/wim/wim.h b/external/wim/wim.h new file mode 100644 index 000000000..537cf5f45 --- /dev/null +++ b/external/wim/wim.h @@ -0,0 +1,110 @@ +#pragma once + +#if !defined(__CUDACC__) + #define CUDA_CALLABLE + #define CUDA_CALLABLE_DEVICE +#else + #define CUDA_CALLABLE __host__ __device__ + #define CUDA_CALLABLE_DEVICE __device__ +#endif + +// our amazing Wraparound IMage lib +namespace wim +{ + +struct Color +{ + float r, g, b; + + CUDA_CALLABLE Color(float r = 0.0f, float g = 0.0f, float b = 0.0f) + : r(r), g(g), b(b) + { + } +}; + +struct Coord +{ + int x, y; + + CUDA_CALLABLE Coord(int x = 0, int y = 0) + : x(x), y(y) + { + } +}; + +class Image +{ + int mWidth; + int mHeight; + Color* mData; + +public: + CUDA_CALLABLE Image() + : mWidth(0), mHeight(0), mData(nullptr) + { + } + + CUDA_CALLABLE Image(int width, int height, Color* data) + : mWidth(width), mHeight(height), mData(data) + { + } + + CUDA_CALLABLE int getWidth() const + { + return mWidth; + } + + CUDA_CALLABLE int getHeight() const + { + return mHeight; + } + + CUDA_CALLABLE const Color* getData() const + { + return mData; + } + + CUDA_CALLABLE Color* getData() + { + return mData; + } + + CUDA_CALLABLE Coord wrapCoord(const Coord& coord) const + { + int x = coord.x; + int y = coord.y; + + while (x < 0) + x += mWidth; + while (x >= mWidth) + x -= mWidth; + + while (y < 0) + y += mHeight; + while (y >= mHeight) + y -= mHeight; + + return Coord(x, y); + } + + CUDA_CALLABLE Color getPixel(const Coord& coord) const + { + if (mData) + { + Coord wc = wrapCoord(coord); + return mData[wc.y * mWidth + wc.x]; + } + return Color(1.0f, 0.0f, 1.0f); + } + + CUDA_CALLABLE void setPixel(const Coord& coord, const Color& color) + { + if (mData) + { + Coord wc = wrapCoord(coord); + mData[wc.y * mWidth + wc.x] = color; + } + } +}; + +} // end of namespace wim diff --git a/external/wim_paint.py b/external/wim_paint.py new file mode 100644 index 000000000..5a5057ca8 --- /dev/null +++ b/external/wim_paint.py @@ -0,0 +1,196 @@ +import warp as wp + +import wim_types +from wim_types import Coord, Color, Image + + +# print some info about an image +@wp.kernel +def print_image_info_kernel(img: Image): + width = wp.img_width(img) + height = wp.img_height(img) + data = wp.img_data(img) # this is a Warp array which wraps the image data + + wp.printf("Dimensions: %dx%d, data array shape: (%d, %d)\n", width, height, data.shape[0], data.shape[1]) + + if width > 0 and height > 0: + # middle pixel coordinates + x = width // 2 + y = height // 2 + + # demonstrate accessing elements as Colors using new builtins + color = wp.img_get_pixel(img, wp.Coord_(x, y)) + wp.printf("Middle pixel color: (%g, %g, %g)\n", color.r, color.g, color.b) + + # demonstrate accessing elements through Warp array + value = data[y, x] + wp.printf("Middle array value: (%g, %g, %g)\n", value[0], value[1], value[2]) + + +# fill image with a constant color +@wp.kernel +def fill_kernel(img: Image, color: Color): + y, x = wp.tid() + coord = wp.Coord_(x, y) + wp.img_set_pixel(img, coord, color) + + +# fill a rectangle centered at `pos` +@wp.kernel +def fill_rect_kernel(img: Image, half_width: int, half_height: int, pos: Coord, color: Color): + j, i = wp.tid() + i -= half_width + j -= half_height + coord = wp.Coord_(pos.x + i, pos.y + j) + wp.img_set_pixel(img, coord, color) + + +# fill a circle centered at `pos` +@wp.kernel +def fill_circle_kernel(img: Image, radius: int, pos: Coord, color: Color): + j, i = wp.tid() + i -= radius + j -= radius + if i * i + j * j <= radius * radius: + x = pos.x + i + y = pos.y + j + coord = wp.Coord_(x, y) + wp.img_set_pixel(img, coord, color) + + +# blur the image using a simple weighted sum over the neighbours +@wp.kernel +def blur_kernel(img: Image): + y, x = wp.tid() + + c00 = wp.img_get_pixel(img, wp.Coord_(x - 1, y - 1)) + c01 = wp.img_get_pixel(img, wp.Coord_(x, y - 1)) + c02 = wp.img_get_pixel(img, wp.Coord_(x + 1, y - 1)) + c10 = wp.img_get_pixel(img, wp.Coord_(x - 1, y)) + c11 = wp.img_get_pixel(img, wp.Coord_(x, y)) + c12 = wp.img_get_pixel(img, wp.Coord_(x + 1, y)) + c20 = wp.img_get_pixel(img, wp.Coord_(x - 1, y + 1)) + c21 = wp.img_get_pixel(img, wp.Coord_(x, y + 1)) + c22 = wp.img_get_pixel(img, wp.Coord_(x + 1, y + 1)) + + c = (c00 + c02 + c20 + c22) + 2.0 * (c01 + c21 + c10 + c12) + 4.0 * c11 + c = (1.0 / 16.0) * c + + wp.img_set_pixel(img, wp.Coord_(x, y), c) + + +# fill image with constant color +def fill(img: Image, color: Color): + domain = (img.height, img.width) + wp.launch(fill_kernel, dim=domain, inputs=[img, color]) + + +# draw a width x height rectangle centered at pos +def draw_rect(img: Image, width: int, height: int, pos: Coord, color: Color): + domain = (height, width) + wp.launch(fill_rect_kernel, dim=domain, inputs=[img, width//2, height//2, pos, color]) + + +# draw a circle with the given radius centered at pos +def draw_circle(img: Image, radius: int, pos: Coord, color: Color): + domain = (2 * radius, 2 * radius) + wp.launch(fill_circle_kernel, dim=domain, inputs=[img, radius, pos, color]) + + +# blur the image +def blur(img: Image): + domain = (img.height, img.width) + wp.launch(blur_kernel, dim=domain, inputs=[img]) + + +# make awesome art +def create_example_image(): + + # create image + img = Image(800, 600) + + # fill with background color + fill(img, Color(0.3, 0.0, 0.3)) + + # concentric circles in the corners + for iter in range(10): + g = iter / 10 + r = 20 + (10 - iter - 1) * 20 + draw_circle(img, r, Coord(0, 0), Color(0, g, 1)) + + for _ in range(500): + blur(img) + + for iter in range(10): + # rectangle crossing the vertical edges + if iter == 0: + draw_rect(img, 200, 300, Coord(img.width//2, 0), Color(1, 0, 0)) + # rectangle crossing the horizontal edges + elif iter == 9: + draw_rect(img, 200, 50, Coord(0, img.height//2), Color(1, 1, 0)) + + for _ in range(20): + blur(img) + + # center pieces + draw_rect(img, 100, 100, Coord(img.width//2, img.height//2), Color(0.5, 0.2, 0.5)) + draw_circle(img, 30, Coord(img.width//2, img.height//2), Color(0.9, 0.7, 0.9)) + + return img + + +# show the image and optionally save it if save_path is specified +def show_image(img, title, save_path=None): + + # get the image data as a numpy array + img_data = img.data_array.numpy() + + if save_path is not None: + import matplotlib.image as img + img.imsave(save_path, img_data) + + import matplotlib.pyplot as plt + fig = plt.figure(title) + plt.imshow(img_data) + plt.show() + + +wp.init() + +# It's a good idea to always clear the kernel cache when developing new native or codegen features +wp.build.clear_kernel_cache() + +# !!! DO THIS BEFORE LOADING MODULES OR LAUNCHING KERNELS +wim_types.register() + +with wp.ScopedDevice("cuda:0"): + + # create an image on the current device + img = create_example_image() + + # print image info + print("===== Image info:") + wp.launch(print_image_info_kernel, dim=1, inputs=[img]) + + # show and save the image + show_image(img, "Result", save_path="result.png") + + # run some post-processing using PyTorch if it's installed to demonstrate interop + try: + import torch + + # wrap the image data as a PyTorch tensor (no copy) + t = wp.to_torch(img.data_array, requires_grad=False) + + # invert the image in-place using PyTorch + torch.sub(1, t, out=t) + + # print image info + print("===== Inverted image info:") + wp.launch(print_image_info_kernel, dim=1, inputs=[img]) + + # show and save the image + show_image(img, "Inverted using PyTorch", save_path="result_inverted.png") + + except ImportError: + print("Torch is not installed, couldn't post-process image") diff --git a/external/wim_types.py b/external/wim_types.py new file mode 100644 index 000000000..805df8ca6 --- /dev/null +++ b/external/wim_types.py @@ -0,0 +1,218 @@ +import ctypes +import os +import warp as wp + +# our external lib bindings (importing it will build the native lib and initialize the Python bindings) +import wim + + +class Coord: + + # define variables accessible in kernels (e.g., coord.x) + vars = { + "x": wp.codegen.Var("x", int), + "y": wp.codegen.Var("y", int), + } + + # struct that corresponds to the native Coord type + # - used when packing arguments for kernels (pass-by-value) + # - binary layout of fields must match native type + class _type_(ctypes.Structure): + + _fields_ = [ + ("x", ctypes.c_int), + ("y", ctypes.c_int), + ] + + def __init__(self, coord): + self.x = coord.x + self.y = coord.y + + def __init__(self, x=0, y=0): + self.x = x + self.y = y + + # HACK: used when packing kernel argument as `arg_type._type_(value.value)` in `pack_arg()` during `wp.launch()` + @property + def value(self): + return self + + +class Color: + + # define variables accessible in kernels + vars = { + "r": wp.codegen.Var("r", float), + "g": wp.codegen.Var("g", float), + "b": wp.codegen.Var("b", float), + } + + # struct that corresponds to the native Color type + # - used when packing arguments for kernels (pass-by-value) + # - binary layout of fields must match native type + class _type_(ctypes.Structure): + + _fields_ = [ + ("r", ctypes.c_float), + ("g", ctypes.c_float), + ("b", ctypes.c_float), + ] + + def __init__(self, color): + self.r = color.r + self.g = color.g + self.b = color.b + + def __init__(self, r=0, g=0, b=0): + self.r = r + self.g = g + self.b = b + + # HACK: used when packing kernel argument as `arg_type._type_(value.value)` in `pack_arg()` during `wp.launch()` + @property + def value(self): + return self + + +class Image: + + # struct that corresponds to the native Image type + # - used when packing arguments for kernels (pass-by-value) + # - binary layout of fields must match native type + class _type_(ctypes.Structure): + + _fields_ = [ + ("width", ctypes.c_int), + ("height", ctypes.c_int), + ("data", ctypes.c_void_p), + ] + + def __init__(self, img): + self.width = img.width + self.height = img.height + self.data = img.data + + def __init__(self, width: int, height: int, device=None): + + # image shape + self.width = width + self.height = height + + self.device = wp.get_device(device) + + # pointer to the native wim::Image class (on CPU) + self.ptr = None + + if self.device.is_cpu: + self.ptr = wim._core.create_image_cpu(width, height) + elif self.device.is_cuda: + self.ptr = wim._core.create_image_cuda(self.device.ordinal, width, height) + else: + raise ValueError(f"Invalid device {device}") + + # get pointer to the data, which could be on CPU or GPU + img_ptr = ctypes.cast(self.ptr, ctypes.POINTER(self._type_)) + self.data = img_ptr.contents.data + + def __del__(self): + if self.ptr: + if self.device.is_cpu: + wim._core.destroy_image_cpu(self.ptr) + else: + wim._core.destroy_image_cuda(self.device.ordinal, self.ptr) + + # HACK: used when packing kernel argument as `arg_type._type_(value.value)` in `pack_arg()` during `wp.launch()` + @property + def value(self): + return self + + # return the data as a Warp array on the correct device + # TODO: can't currently use arrays of custom native types, so using vec3f instead + @property + def data_array(self): + shape = (self.height, self.width) + return wp.array(ptr=self.data, shape=shape, dtype=wp.vec3f, owner=False) + + +def _add_header(path): + include_directive = f"#include \"{path}\"\n" + # add this header for all native modules + wp.codegen.cpu_module_header += include_directive + wp.codegen.cuda_module_header += include_directive + + +def _register_headers(): + include_path = os.path.abspath(os.path.dirname(__file__)) + _add_header(f"{include_path}/wim_warp.h") + + +def _register_builtins(): + + # Coord constructor + wp.context.add_builtin( + "Coord_", + input_types={"x": int, "y": int}, + value_type=Coord, + missing_grad=True, + ) + + # Color addition + wp.context.add_builtin( + "add", + input_types={"a": Color, "b": Color}, + value_type=Color, + missing_grad=True, + ) + + # Color scaling + wp.context.add_builtin( + "mul", + input_types={"s": float, "c": Color}, + value_type=Color, + missing_grad=True, + ) + + # get image width + wp.context.add_builtin( + "img_width", + input_types={"img": Image}, + value_type=int, + missing_grad=True, + ) + + # get image height + wp.context.add_builtin( + "img_height", + input_types={"img": Image}, + value_type=int, + missing_grad=True, + ) + + # get image data as a Warp array + wp.context.add_builtin( + "img_data", + input_types={"img": Image}, + value_type=wp.array2d(dtype=wp.vec3f), + missing_grad=True, + ) + + # get pixel + wp.context.add_builtin( + "img_get_pixel", + input_types={"img": Image, "coord": Coord}, + value_type=Color, + missing_grad=True, + ) + + # set pixel + wp.context.add_builtin( + "img_set_pixel", + input_types={"img": Image, "coord": Coord, "color": Color}, + value_type=None, + missing_grad=True, + ) + + +def register(): + _register_headers() + _register_builtins() diff --git a/external/wim_warp.h b/external/wim_warp.h new file mode 100644 index 000000000..5fd606b86 --- /dev/null +++ b/external/wim_warp.h @@ -0,0 +1,70 @@ +#pragma once + +// TODO: may need to add a mechanism for include paths +#include "wim/wim.h" + +// include some Warp types so we can expose the image data as a Warp array +#include "../warp/native/array.h" +#include "../warp/native/vec.h" + +// TODO: currently, all types and builtins need to be in the wp:: namespace +namespace wp +{ + +// import types into this namespace +using Color = ::wim::Color; +using Coord = ::wim::Coord; +using Image = ::wim::Image; + +// Coord constructor exposed as a free function +CUDA_CALLABLE inline Coord Coord_(int x, int y) +{ + return Coord(x, y); +} + +// overload operator+ for colors +CUDA_CALLABLE inline Color add(const Color& a, const Color& b) +{ + return Color(a.r + b.r, a.g + b.g, a.b + b.b); +} + +// overload operator* for scaling colors +CUDA_CALLABLE inline Color mul(float s, const Color& c) +{ + return Color(s * c.r, s * c.g, s * c.b); +} + +// get image width (can't be exposed as a named var directly, because the member is private) +CUDA_CALLABLE inline int img_width(const Image& img) +{ + return img.getWidth(); +} + +// get image height (can't be exposed as a named var directly, because the member is private) +CUDA_CALLABLE inline int img_height(const Image& img) +{ + return img.getHeight(); +} + +// get image data as a Warp array +CUDA_CALLABLE inline array_t img_data(Image& img) +{ + Color* data = img.getData(); + + // TODO: can't currently use array of custom native types, so use vec3f + return array_t((vec3f*)data, img.getHeight(), img.getWidth()); +} + +// get pixel +CUDA_CALLABLE inline Color img_get_pixel(const Image& img, const Coord& coord) +{ + return img.getPixel(coord); +} + +// set pixel +CUDA_CALLABLE inline void img_set_pixel(Image& img, const Coord& coord, const Color& color) +{ + img.setPixel(coord, color); +} + +}