diff --git a/.gitattributes b/.gitattributes
index b7d08089e..fe825aa40 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,4 @@
*.hip gitlab-language=cuda linguist-language=Cuda
+*.sln text eol=crlf
+*.vcxproj text eol=crlf
+*.vcxproj.filters text eol=crlf
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6dd911d80..5f588ae5c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -29,6 +29,12 @@ include:
- /gpus-nvcc.yaml
- /rules.yaml
+variables:
+ # suppressing 186 allows us to write `assert(a && "message")`.
+ CUDA_FLAGS: "-Xcompiler -Wall,-Wextra,-Werror --Werror all-warnings --diag-suppress 186"
+ CXX_FLAGS: "-Wall -Wextra -Werror"
+ HIP_FLAGS: "-Wall -Wextra -Werror"
+
stages:
- lint
- build
@@ -98,7 +104,7 @@ build:make-rocm:
- rocm-build
needs: []
script:
- - cd $CI_PROJECT_DIR && make -j $(nproc)
+ - cd $CI_PROJECT_DIR && make CXXFLAGS="$HIP_FLAGS" -j $(nproc)
build:make-cuda:
image: $DOCKER_TAG_PREFIX:cuda-ubuntu
@@ -109,7 +115,7 @@ build:make-cuda:
- nvcc-build
needs: []
script:
- - cd $CI_PROJECT_DIR && make GPU_RUNTIME=CUDA -j $(nproc)
+ - cd $CI_PROJECT_DIR && make CXXFLAGS="$CUDA_FLAGS" GPU_RUNTIME=CUDA -j $(nproc)
.build:cmake:
stage: build
@@ -132,6 +138,15 @@ build:cmake-rocm:
-S $CI_PROJECT_DIR
-B $CI_PROJECT_DIR/build
-D CMAKE_HIP_ARCHITECTURES="$GPU_TARGETS"
+ -D CMAKE_CXX_FLAGS="$CXX_FLAGS"
+ -D CMAKE_HIP_FLAGS="$HIP_FLAGS"
+ | tee cmake_log.txt
+ # check if all dependencies were found
+ - |-
+ if grep -q "Could NOT find" cmake_log.txt; then
+ echo "Some CMake libraries could not be found"
+ exit 1
+ fi
- cmake --build $CI_PROJECT_DIR/build
build:cmake-cuda:
@@ -145,6 +160,15 @@ build:cmake-cuda:
-S $CI_PROJECT_DIR
-B $CI_PROJECT_DIR/build
-D GPU_RUNTIME=CUDA
+ -D CMAKE_CXX_FLAGS="$CXX_FLAGS"
+ -D CMAKE_CUDA_FLAGS="$CUDA_FLAGS"
+ | tee cmake_log.txt
+ # check if all dependencies were found
+ - |-
+ if grep -q "Could NOT find" cmake_log.txt; then
+ echo "Some CMake libraries could not be found"
+ exit 1
+ fi
- cmake --build $CI_PROJECT_DIR/build
.test:
@@ -190,16 +214,19 @@ test:rocm-windows-vs2019:
- >
& "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/MSBuild/Current/Bin/MSBuild.exe"
/maxCpuCount
+ /warnAsError
"/p:Configuration=$BUILD_TYPE"
"$CI_PROJECT_DIR"
- |-
Get-ChildItem -Path "$CI_PROJECT_DIR/$BUILD_TYPE" -Filter "*_vs2019.exe" |
ForEach-Object {
- echo "--" $_.Name
- & "$_"
- if (!$?) {
- throw "{0} returned: {1}" -f $_.Name, $LASTEXITCODE
- }
+ if (("hip_vulkan_interop_vs2019.exe","hip_opengl_interop_vs2019.exe") -NotContains $_.Name) {
+ echo "--" $_.Name
+ & "$_"
+ if (!$?) {
+ throw "{0} returned: {1}" -f $_.Name, $LASTEXITCODE
+ }
+ }
}
test:rocm-windows-cmake:
@@ -210,8 +237,16 @@ test:rocm-windows-cmake:
-S "$CI_PROJECT_DIR"
-B "$CI_PROJECT_DIR/build"
-G Ninja
+ -D CMAKE_CXX_FLAGS="$CXX_FLAGS"
+ -D CMAKE_HIP_FLAGS="$HIP_FLAGS"
-D CMAKE_BUILD_TYPE="$BUILD_TYPE"
-D CMAKE_HIP_ARCHITECTURES=gfx1030
-D CMAKE_RC_COMPILER="C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64/rc.exe"
+ -D CMAKE_TOOLCHAIN_FILE="C:/Tools/Microsoft/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ | Tee-Object -filepath cmake_log.txt
+ - |-
+ if (Select-String -Path cmake_log.txt -Pattern "Could NOT find") {
+ throw "Some cmake libraries are missing"
+ }
- cmake --build "$CI_PROJECT_DIR/build"
- cd "$CI_PROJECT_DIR/build" && ctest --output-on-failure
diff --git a/Applications/CMakeLists.txt b/Applications/CMakeLists.txt
new file mode 100644
index 000000000..98a45b32b
--- /dev/null
+++ b/Applications/CMakeLists.txt
@@ -0,0 +1,26 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(Applications LANGUAGES CXX)
+
+add_subdirectory(floyd_warshall)
diff --git a/Applications/Makefile b/Applications/Makefile
new file mode 100644
index 000000000..d1a397102
--- /dev/null
+++ b/Applications/Makefile
@@ -0,0 +1,34 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLES := \
+ floyd_warshall
+
+all: $(EXAMPLES)
+
+clean: TARGET=clean
+clean: all
+
+$(EXAMPLES):
+ $(MAKE) -C $@ $(TARGET)
+
+.PHONY: all clean $(EXAMPLES)
diff --git a/Applications/README.md b/Applications/README.md
new file mode 100644
index 000000000..626d275df
--- /dev/null
+++ b/Applications/README.md
@@ -0,0 +1,43 @@
+# Applications Examples
+
+## Summary
+The examples in this subdirectory showcase several GPU-implementations of finance, computer science, physics, etc. models or algorithms that additionally offer a command line application. The examples are build on Linux for the ROCm (AMD GPU) backend. Some examples additionally support the CUDA (NVIDIA GPU) backend.
+
+## Prerequisites
+### Linux
+- [CMake](https://cmake.org/download/) (at least version 3.21)
+- OR GNU Make - available via the distribution's package manager
+- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
+
+### Windows
+- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
+- ROCm toolchain for Windows (No public release yet)
+ - The Visual Studio ROCm extension needs to be installed to build with the solution files.
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
+
+## Building
+### Linux
+Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment.
+
+#### Using CMake
+All examples in the `Applications` subdirectory can either be built by a single CMake project or be built independently.
+
+- `$ cd Libraries/Applications`
+- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA, when supported)
+- `$ cmake --build build`
+
+#### Using Make
+All examples can be built by a single invocation to Make or be built independently.
+
+- `$ cd Libraries/Applications`
+- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA, when supported)
+
+### Windows
+#### Visual Studio
+Visual Studio solution files are available for the individual examples. To build all supported HIP runtime examples open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for Applications.
+
+For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `Applications` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/Applications/floyd_warshall/.gitignore b/Applications/floyd_warshall/.gitignore
new file mode 100644
index 000000000..fa39f0305
--- /dev/null
+++ b/Applications/floyd_warshall/.gitignore
@@ -0,0 +1 @@
+applications_floyd_warshall
diff --git a/Applications/floyd_warshall/CMakeLists.txt b/Applications/floyd_warshall/CMakeLists.txt
new file mode 100644
index 000000000..1c6e60abe
--- /dev/null
+++ b/Applications/floyd_warshall/CMakeLists.txt
@@ -0,0 +1,58 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name applications_floyd_warshall)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+# For examples targeting NVIDIA, include the HIP header directory.
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/Applications/floyd_warshall/Makefile b/Applications/floyd_warshall/Makefile
new file mode 100644
index 000000000..64a44225e
--- /dev/null
+++ b/Applications/floyd_warshall/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := applications_floyd_warshall
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/cmdparser.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/Applications/floyd_warshall/README.md b/Applications/floyd_warshall/README.md
new file mode 100644
index 000000000..bb028a5d8
--- /dev/null
+++ b/Applications/floyd_warshall/README.md
@@ -0,0 +1,60 @@
+# Applications Floyd-Warshall Example
+
+## Description
+This example showcases a GPU implementation of the [Floyd-Warshall algorithm](https://en.wikipedia.org/wiki/Floyd%E2%80%93Warshall_algorithm), which computes the shortest path between each pair of nodes in a given directed and (in this case) complete graph $G = (V, E, \omega)$. The key point of this implementation is that each kernel launch represents a step $k$ of the traditional CPU-implemented algorithm. Therefore, the kernel is launched as much times as nodes $\left(n = \vert V \vert \right)$ has the graph.
+
+In this example, there are `iterations` (consecutive) executions of the algorithm on the same graph. As each execution requires an unmodified graph input, multiple copy operations are required. Hence, the performance of the example can be improved by using _pinned memory_.
+
+Pinned memory is simply a special kind of memory that cannot be paged out the physical memory of a process, meaning that the virtual addresses associated with it are always mapped to physical memory. When copying data from/to the host to/from the GPU, the host source/destination must be pinned memory and, in case it is not, an extra allocation of pinned memory is first performed (copying the data residing in or being copied to the non-pinned host memory) and then the actual copy of the data takes place.
+
+Therefore, using pinned memory saves around 2x the time needed to copy from/to host memory. In this example, performances is improved by using this type of memory, given that there are `iterations` (consecutive) executions of the algorithm on the same graph.
+
+### Application flow
+1. Default values for the number of nodes of the graph and the number of iterations for the algorithm execution are set.
+2. Command line arguments are parsed (if any) and the previous values are updated.
+3. A number of constants are defined for kernel execution and input/output data size.
+4. Host memory is allocated for the distance matrix and initialized with the increasing sequence $1,2,3,\dots$ . These values represent the weights of the edges of the graph.
+5. Host memory is allocated for the adjacency matrix and initialized such that the initial path between each pair of vertices $x,y \in V$ ($x \neq y$) is the edge $(x,y)$.
+6. Pinned memory is allocated and mapped to device memory. The latter is initialized with the input matrices (distance and adjacency) representing the graph $G$ and the Floyd-Warshall kernel is executed for each node of the graph.
+7. The resulting distance and adjacency matrices are copied to the host and pinned memory is freed.
+8. The mean time in milliseconds needed for each iteration is printed to standard output.
+9. The results obtained are compared with the CPU implementation of the algorithm. The result of the comparison is printed to the standard output.
+
+
+### Command line interface
+There are three parameters available:
+- `-h` displays information about the available parameters and their default values.
+- `-n nodes` sets `nodes` as the number of nodes of the graph to which the Floyd-Warshall algorithm will be applied. It must be a (positive) multiple of `block_size` (= 16). Its default value is 16.
+- `-i iterations` sets `iterations` as the number of times that the algorithm will be applied to the (same) graph. It must be an integer greater than 0. Its default value is 1.
+
+## Key APIs and Concepts
+- For this GPU implementation of the Floyd-Warshall algorithm, the main kernel (`floyd_warshall_kernel`) that is launched in a 2-dimensional grid. Each thread in the grid computes the shortest path between two nodes of the graph at a certain step $k$ $\left(0 \leq k < n \right)$. The threads compare the previously computed shortest paths using only the nodes in $V'=\{v_0,v_1,...,v_{k-1}\} \subseteq V$ as intermediate nodes with the paths that include node $v_k$ as an intermediate node, and take the shortest option. Therefore, the kernel is launched $n$ times.
+- For improved performance, pinned memory is used to pass the results obtained in each iteration to the next one. With `hipHostMalloc` pinned host memory (accessible by the device) can be allocated, and `hipHostFree` frees it. In this example, host pinned memory is allocated using the `hipHostMallocMapped` flag, which indicates that `hipHostMalloc` must map the allocation into the address space of the current device. The device pointer to such allocated pinned memory is obtained with `hipHostGetDevicePointer`. Beware that an excessive allocation of pinned memory can slow down the host execution, as the program is left with less physical memory available to map the rest of the virtual addresses used.
+- With `hipMemcpy` data bytes can be transferred from host to device (using `hipMemcpyHostToDevice`) or from device to host (using `hipMemcpyDeviceToHost`), among others.
+- `hipLaunchKernelGGL` queues the kernel execution on the device. All the kernels are launched on the `hipStreamDefault`, meaning that these executions are performed in order. `hipGetLastError` returns the last error produced by any runtime API call, allowing to check if any kernel launch resulted in error.
+- `hipEventCreate` creates the events used to measure kernel execution time, `hipEventRecord` starts recording an event and `hipEventSynchronize` waits for all the previous work in the stream when the specified event was recorded. With these three functions it can be measured the start and stop times of the kernel, and with `hipEventElapsedTime` the kernel execution time (in milliseconds) can be obtained.
+
+## Demonstrated API Calls
+
+### HIP runtime
+#### Device symbols
+- `blockIdx`
+- `blockDim`
+- `threadIdx`
+
+#### Host symbols
+- `__global__`
+- `hipEventCreate`
+- `hipEventElapsedTime`
+- `hipEventRecord`
+- `hipEventSynchronize`
+- `hipGetLastError`
+- `hipHostFree`
+- `hipHostGetDevicePointer`
+- `hipHostMalloc`
+- `hipHostMallocMapped`
+- `hipLaunchKernelGGL`
+- `hipMemcpy`
+- `hipMemcpyDeviceToHost`
+- `hipMemcpyHostToDevice`
+- `hipStreamDefault`
diff --git a/Applications/floyd_warshall/floyd_warshall_vs2019.sln b/Applications/floyd_warshall/floyd_warshall_vs2019.sln
new file mode 100644
index 000000000..0a2620727
--- /dev/null
+++ b/Applications/floyd_warshall/floyd_warshall_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "floyd_warshall_vs2019", "floyd_warshall_vs2019.vcxproj", "{FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Debug|x64.ActiveCfg = Debug|x64
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Debug|x64.Build.0 = Debug|x64
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Release|x64.ActiveCfg = Release|x64
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {D7C4B290-7C93-4D26-85D9-364F6A448EE0}
+ EndGlobalSection
+EndGlobal
diff --git a/Applications/floyd_warshall/floyd_warshall_vs2019.vcxproj b/Applications/floyd_warshall/floyd_warshall_vs2019.vcxproj
new file mode 100644
index 000000000..45ed99a5b
--- /dev/null
+++ b/Applications/floyd_warshall/floyd_warshall_vs2019.vcxproj
@@ -0,0 +1,104 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+ 15.0
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}
+ Win32Proj
+ floyd_warshall_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ applications_$(ProjectName)
+
+
+ false
+ applications_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ true
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ true
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/Applications/floyd_warshall/floyd_warshall_vs2019.vcxproj.filters b/Applications/floyd_warshall/floyd_warshall_vs2019.vcxproj.filters
new file mode 100644
index 000000000..a36242334
--- /dev/null
+++ b/Applications/floyd_warshall/floyd_warshall_vs2019.vcxproj.filters
@@ -0,0 +1,30 @@
+
+
+
+
+ {2932a426-602b-4926-887e-27c50ba7eab7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {ed043ec4-e8ac-4831-93f5-a58546ec7bea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {0da954bd-e555-4454-b082-b68d10c753b9}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/Applications/floyd_warshall/main.hip b/Applications/floyd_warshall/main.hip
new file mode 100644
index 000000000..4fc2625c2
--- /dev/null
+++ b/Applications/floyd_warshall/main.hip
@@ -0,0 +1,281 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, includ_adjacency_matrixg without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUd_adjacency_matrixG BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "cmdparser.hpp"
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+#include
+#include
+
+/// \brief Implements the k-th (0 <= k < nodes) step of Floyd-Warshall algorithm. That is,
+/// given a directed and weighted graph G = (V,E,w) (also complete in this example), it
+/// computes the shortest path between every pair of vertices only considering as intermediate
+/// nodes in the path the ones in the subset V' = {v_0,v_1,...,v_k} of V.
+__global__ void floyd_warshall_kernel(unsigned int* part_adjacency_matrix,
+ unsigned int* part_next_matrix,
+ const unsigned int nodes,
+ const unsigned int k)
+{
+ // Compute the vertices which shortest path each thread is going to process.
+ int x = blockIdx.x * blockDim.x + threadIdx.x;
+ int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ // Get the current distance between the two vertices (only with intermediate nodes in
+ // {v_0,v_1,...,v_{k-1}}) and compute the distance using node v_k as intermediate. Note that
+ // d_x_k_y is the shortest path between x and y with node v_k as intermediate, because
+ // otherwise we could find a shorter path between y and v_k or/and v_k and x using intermediate
+ // nodes from {v_0,v_1,...,v_{k-1}} and thus contradicting the fact that the current paths
+ // between those two pairs of nodes are already the shortest possible.
+ int d_x_y = part_adjacency_matrix[y * nodes + x];
+ int d_x_k_y = part_adjacency_matrix[y * nodes + k] + part_adjacency_matrix[k * nodes + x];
+
+ // If the path with intermediate nodes in {v_0, ..., v_{k-1}} is longer than the one
+ // with intermediate node v_k, update matrices so the latter is selected as the
+ // shortest path between x and y with intermediate nodes in {v_0, ..., v_k}.
+ if(d_x_k_y < d_x_y)
+ {
+ part_adjacency_matrix[y * nodes + x] = d_x_k_y;
+ part_next_matrix[y * nodes + x] = k;
+ }
+}
+
+/// \brief Reference CPU implementation of Floyd-Warshall algorithm for results verification.
+void floyd_warshall_reference(unsigned int* adjacency_matrix,
+ unsigned int* next_matrix,
+ const unsigned int nodes)
+{
+ for(unsigned int k = 0; k < nodes; k++)
+ {
+ for(unsigned int x = 0; x < nodes; x++)
+ {
+ const unsigned int row_x = x * nodes;
+ for(unsigned int y = 0; y < nodes; y++)
+ {
+ // d_x_y is the shortest distance from node x to node y with intermediate
+ // nodes in {v_0, ..., v_{k-1}}. The other two are analogous.
+ const unsigned int d_x_y = adjacency_matrix[row_x + y];
+ const unsigned int d_x_k = adjacency_matrix[row_x + k];
+ const unsigned int d_k_y = adjacency_matrix[k * nodes + y];
+
+ // Shortest distance from node x to node y passing through node v_k.
+ const unsigned int d_x_k_y = d_x_k + d_k_y;
+
+ // If the path with intermediate nodes in {v_0, ..., v_{k-1}} is longer than the one
+ // with intermediate node v_k, update matrices so the latter is selected as the
+ // shortest path between x and y with intermediate nodes in {v_0, ..., v_k}.
+ if(d_x_k_y < d_x_y)
+ {
+ adjacency_matrix[row_x + y] = d_x_k_y;
+ next_matrix[row_x + y] = k;
+ }
+ }
+ }
+ }
+}
+
+/// \brief Adds to a command line parser the necessary options for this example.
+template
+void configure_parser(cli::Parser& parser)
+{
+ // Default parameters.
+ constexpr unsigned int nodes = 16;
+ constexpr unsigned int iterations = 1;
+
+ static_assert(((nodes % BlockSize == 0)),
+ "Number of nodes must be a positive multiple of BlockSize");
+ static_assert(((iterations > 0)), "Number of iterations must be at least 1");
+
+ // Add options to the command line parser.
+ parser.set_optional("n", "nodes", nodes, "Number of nodes in the graph.");
+ parser.set_optional("i",
+ "iterations",
+ iterations,
+ "Number of times the algorithm is executed.");
+}
+
+int main(int argc, char* argv[])
+{
+ // Number of threads in each kernel block dimension.
+ constexpr unsigned int block_size = 16;
+
+ // Parse user input.
+ cli::Parser parser(argc, argv);
+ configure_parser(parser);
+ parser.run_and_exit_if_error();
+
+ // Get number of nodes and iterations from the command line, if provided.
+ const unsigned int nodes = parser.get("n");
+ const unsigned int iterations = parser.get("i");
+
+ // Check values provided.
+ if(nodes % block_size)
+ {
+ std::cout << "Number of nodes must be a positive multiple of block_size ("
+ << std::to_string(block_size) << ")." << std::endl;
+ exit(0);
+ }
+ if(iterations == 0)
+ {
+ std::cout << "Number of iterations must be at least 1." << std::endl;
+ exit(0);
+ }
+
+ // Total number of elements and bytes of the input matrices.
+ const unsigned int size = nodes * nodes;
+ const unsigned int size_bytes = nodes * nodes * sizeof(unsigned int);
+
+ // Number of threads in each kernel block and number of blocks in the grid.
+ const dim3 block_dim(block_size, block_size);
+ const dim3 grid_dim(nodes / block_size, nodes / block_size);
+
+ // Allocate host input adjacency matrix initialized with the increasing sequence 1,2,3,... .
+ // Overwrite diagonal values (distance from a node to itself) to 0.
+ std::vector adjacency_matrix(size);
+ std::iota(adjacency_matrix.begin(), adjacency_matrix.end(), 1);
+ for(unsigned int x = 0; x < nodes; x++)
+ {
+ adjacency_matrix[x * nodes + x] = 0;
+ }
+
+ // Allocate host input matrix for the reconstruction of the paths obtained and initialize such
+ // that the path from node x to node y is just the edge (x,y) for any pair of nodes x and y.
+ std::vector next_matrix(size);
+ for(unsigned int x = 0; x < nodes; x++)
+ {
+ for(unsigned int y = 0; y < x; y++)
+ {
+ next_matrix[x * nodes + y] = x;
+ next_matrix[y * nodes + x] = y;
+ }
+ next_matrix[x * nodes + x] = x;
+ }
+
+ // Allocate host memory for the CPU implementation and copy input data.
+ std::vector expected_adjacency_matrix(adjacency_matrix);
+ std::vector expected_next_matrix(next_matrix);
+
+ // Declare host input (pinned) memory for incremental results from kernel executions.
+ unsigned int* part_adjacency_matrix = nullptr;
+ unsigned int* part_next_matrix = nullptr;
+
+ // Cumulative variable to compute the mean time per iteration of the algorithm.
+ double kernel_time = 0;
+
+ std::cout << "Executing Floyd-Warshall algorithm for " << iterations
+ << " iterations with a complete graph of " << nodes << " nodes." << std::endl;
+
+ // Allocate pinned host memory mapped to device memory.
+ HIP_CHECK(hipHostMalloc(&part_adjacency_matrix, size_bytes, hipHostMallocMapped));
+ HIP_CHECK(hipHostMalloc(&part_next_matrix, size_bytes, hipHostMallocMapped));
+
+ // Get device pointer to pinned host memory allocations for the input matrices.
+ float *d_adjacency_matrix, *d_next_matrix;
+ HIP_CHECK(
+ hipHostGetDevicePointer((void**)&d_adjacency_matrix, part_adjacency_matrix, 0 /*flags*/));
+ HIP_CHECK(hipHostGetDevicePointer((void**)&d_next_matrix, part_next_matrix, 0 /*flags*/));
+
+ // Run iterations times the Floyd-Warshall GPU algorithm.
+ for(unsigned int i = 0; i < iterations; ++i)
+ {
+ // Copy input data from host to device memory.
+ HIP_CHECK(hipMemcpy(d_adjacency_matrix,
+ adjacency_matrix.data(),
+ size_bytes,
+ hipMemcpyHostToDevice));
+ HIP_CHECK(hipMemcpy(d_next_matrix, next_matrix.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Create events to measure the execution time of the kernels.
+ hipEvent_t start, stop;
+ HIP_CHECK(hipEventCreate(&start));
+ HIP_CHECK(hipEventCreate(&stop));
+ float kernel_ms{};
+
+ // Floyd-Warshall GPU algorithm: launch Floyd-Warshall kernel for each node of the graph.
+ for(unsigned int k = 0; k < nodes; ++k)
+ {
+ // Record the start event.
+ HIP_CHECK(hipEventRecord(start, hipStreamDefault));
+
+ // Launch Floyd-Warshall kernel on the default stream.
+ hipLaunchKernelGGL(floyd_warshall_kernel,
+ grid_dim,
+ block_dim,
+ 0,
+ hipStreamDefault,
+ part_adjacency_matrix,
+ part_next_matrix,
+ nodes,
+ k);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Record the stop event and wait until the kernel execution finishes.
+ HIP_CHECK(hipEventRecord(stop, hipStreamDefault));
+ HIP_CHECK(hipEventSynchronize(stop));
+
+ // Get the execution time of the kernel and add it to the total count.
+ HIP_CHECK(hipEventElapsedTime(&kernel_ms, start, stop));
+ kernel_time += kernel_ms;
+ }
+ }
+
+ // Copy results back to host.
+ HIP_CHECK(
+ hipMemcpy(adjacency_matrix.data(), d_adjacency_matrix, size_bytes, hipMemcpyDeviceToHost));
+ HIP_CHECK(hipMemcpy(next_matrix.data(), d_next_matrix, size_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory.
+ HIP_CHECK(hipHostFree(part_adjacency_matrix));
+ HIP_CHECK(hipHostFree(part_next_matrix));
+
+ // Print the mean time per iteration (in miliseconds) of the algorithm.
+ kernel_time /= iterations;
+ std::cout << "The mean time needed for each iteration has been " << kernel_time << "ms."
+ << std::endl;
+
+ // Execute CPU algorithm.
+ floyd_warshall_reference(expected_adjacency_matrix.data(), expected_next_matrix.data(), nodes);
+
+ // Verify results.
+ unsigned int errors = 0;
+ std::cout << "Validating results with CPU implementation." << std::endl;
+ for(unsigned int i = 0; i < size; ++i)
+ {
+ errors += (adjacency_matrix[i] - expected_adjacency_matrix[i] != 0);
+ errors += (next_matrix[i] - expected_next_matrix[i] != 0);
+ }
+
+ if(errors)
+ {
+ std::cout << "Validation failed with " << errors << " errors." << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a96b6852d..56b6a04f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,8 +21,9 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(ROCMm-SDK-Examples)
+project(ROCMm-SDK-Examples LANGUAGES CXX)
enable_testing()
+add_subdirectory(Applications)
add_subdirectory(HIP-Basic)
add_subdirectory(Libraries)
diff --git a/Common/cmdparser.hpp b/Common/cmdparser.hpp
index a2a566b81..c7acd5147 100644
--- a/Common/cmdparser.hpp
+++ b/Common/cmdparser.hpp
@@ -433,11 +433,8 @@ class Parser
[this](CallbackArgs& args)
{
args.output << this->usage();
-#pragma warning(push)
-#pragma warning(disable : 4702)
exit(0);
return false;
-#pragma warning(pop)
}),
"",
true);
@@ -765,4 +762,4 @@ class Parser
std::vector _arguments;
std::vector _commands;
};
-} // namespace cli
\ No newline at end of file
+} // namespace cli
diff --git a/Common/example_utils.hpp b/Common/example_utils.hpp
index 9e555e501..a63bbcd17 100644
--- a/Common/example_utils.hpp
+++ b/Common/example_utils.hpp
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
#include
@@ -52,7 +53,7 @@ constexpr int error_exit_code = -1;
/// must be dereferencable in host code. Its value type must be formattable to
/// \p std::ostream.
template
-std::string format_range(const BidirectionalIterator begin, const BidirectionalIterator end)
+inline std::string format_range(const BidirectionalIterator begin, const BidirectionalIterator end)
{
std::stringstream sstream;
sstream << "[ ";
@@ -74,10 +75,10 @@ std::string format_range(const BidirectionalIterator begin, const BidirectionalI
/// \tparam BidirectionalIteratorU - must implement the BidirectionalIterator concept and
/// must be dereferencable in host code. Its value type must be formattable to \p std::ostream.
template
-std::string format_pairs(const BidirectionalIteratorT begin_a,
- const BidirectionalIteratorT end_a,
- const BidirectionalIteratorU begin_b,
- const BidirectionalIteratorU end_b)
+inline std::string format_pairs(const BidirectionalIteratorT begin_a,
+ const BidirectionalIteratorT end_a,
+ const BidirectionalIteratorU begin_b,
+ const BidirectionalIteratorU end_b)
{
(void)end_b;
assert(std::distance(begin_a, end_a) == std::distance(begin_b, end_b));
@@ -101,7 +102,7 @@ std::string format_pairs(const BidirectionalIteratorT begin_a,
/// \brief A function to parse a string for an int. If the string is a valid integer then return true
/// else if it has non-numeric character then return false.
-bool parse_int_string(const std::string& str, int& out)
+inline bool parse_int_string(const std::string& str, int& out)
{
try
{
@@ -133,16 +134,17 @@ class HostClock
this->reset_timer();
}
- void reset_timer()
+ inline void reset_timer()
{
this->elapsed_time = std::chrono::steady_clock::duration(0);
}
- void start_timer()
+ inline void start_timer()
{
this->start_time = std::chrono::steady_clock::now();
}
- void stop_timer()
+
+ inline void stop_timer()
{
const auto end_time = std::chrono::steady_clock::now();
this->elapsed_time += end_time - this->start_time;
@@ -150,10 +152,21 @@ class HostClock
/// @brief Returns time elapsed in Seconds
/// @return type double that contains the elapsed time in Seconds
- double get_elapsed_time() const
+ inline double get_elapsed_time() const
{
return std::chrono::duration_cast>(this->elapsed_time)
.count();
}
};
+
+/// \brief Returns ceil(dividend / divisor), where \p dividend is an integer and
+/// \p divisor is an unsigned integer.
+template::value && std::is_unsigned::value, int> = 0>
+__host__ __device__ auto ceiling_div(const T& dividend, const U& divisor)
+{
+ return (dividend + divisor - 1) / divisor;
+}
+
#endif // COMMON_EXAMPLE_UTILS_HPP
diff --git a/External/KHR/khrplatform.h b/External/KHR/khrplatform.h
new file mode 100644
index 000000000..01646449c
--- /dev/null
+++ b/External/KHR/khrplatform.h
@@ -0,0 +1,311 @@
+#ifndef __khrplatform_h_
+#define __khrplatform_h_
+
+/*
+** Copyright (c) 2008-2018 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+/* Khronos platform-specific types and definitions.
+ *
+ * The master copy of khrplatform.h is maintained in the Khronos EGL
+ * Registry repository at https://github.com/KhronosGroup/EGL-Registry
+ * The last semantic modification to khrplatform.h was at commit ID:
+ * 67a3e0864c2d75ea5287b9f3d2eb74a745936692
+ *
+ * Adopters may modify this file to suit their platform. Adopters are
+ * encouraged to submit platform specific modifications to the Khronos
+ * group so that they can be included in future versions of this file.
+ * Please submit changes by filing pull requests or issues on
+ * the EGL Registry repository linked above.
+ *
+ *
+ * See the Implementer's Guidelines for information about where this file
+ * should be located on your system and for more details of its use:
+ * http://www.khronos.org/registry/implementers_guide.pdf
+ *
+ * This file should be included as
+ * #include
+ * by Khronos client API header files that use its types and defines.
+ *
+ * The types in khrplatform.h should only be used to define API-specific types.
+ *
+ * Types defined in khrplatform.h:
+ * khronos_int8_t signed 8 bit
+ * khronos_uint8_t unsigned 8 bit
+ * khronos_int16_t signed 16 bit
+ * khronos_uint16_t unsigned 16 bit
+ * khronos_int32_t signed 32 bit
+ * khronos_uint32_t unsigned 32 bit
+ * khronos_int64_t signed 64 bit
+ * khronos_uint64_t unsigned 64 bit
+ * khronos_intptr_t signed same number of bits as a pointer
+ * khronos_uintptr_t unsigned same number of bits as a pointer
+ * khronos_ssize_t signed size
+ * khronos_usize_t unsigned size
+ * khronos_float_t signed 32 bit floating point
+ * khronos_time_ns_t unsigned 64 bit time in nanoseconds
+ * khronos_utime_nanoseconds_t unsigned time interval or absolute time in
+ * nanoseconds
+ * khronos_stime_nanoseconds_t signed time interval in nanoseconds
+ * khronos_boolean_enum_t enumerated boolean type. This should
+ * only be used as a base type when a client API's boolean type is
+ * an enum. Client APIs which use an integer or other type for
+ * booleans cannot use this as the base type for their boolean.
+ *
+ * Tokens defined in khrplatform.h:
+ *
+ * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
+ *
+ * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
+ * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
+ *
+ * Calling convention macros defined in this file:
+ * KHRONOS_APICALL
+ * KHRONOS_APIENTRY
+ * KHRONOS_APIATTRIBUTES
+ *
+ * These may be used in function prototypes as:
+ *
+ * KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
+ * int arg1,
+ * int arg2) KHRONOS_APIATTRIBUTES;
+ */
+
+#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
+# define KHRONOS_STATIC 1
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APICALL
+ *-------------------------------------------------------------------------
+ * This precedes the return type of the function in the function prototype.
+ */
+#if defined(KHRONOS_STATIC)
+ /* If the preprocessor constant KHRONOS_STATIC is defined, make the
+ * header compatible with static linking. */
+# define KHRONOS_APICALL
+#elif defined(_WIN32)
+# define KHRONOS_APICALL __declspec(dllimport)
+#elif defined (__SYMBIAN32__)
+# define KHRONOS_APICALL IMPORT_C
+#elif defined(__ANDROID__)
+# define KHRONOS_APICALL __attribute__((visibility("default")))
+#else
+# define KHRONOS_APICALL
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APIENTRY
+ *-------------------------------------------------------------------------
+ * This follows the return type of the function and precedes the function
+ * name in the function prototype.
+ */
+#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
+ /* Win32 but not WinCE */
+# define KHRONOS_APIENTRY __stdcall
+#else
+# define KHRONOS_APIENTRY
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APIATTRIBUTES
+ *-------------------------------------------------------------------------
+ * This follows the closing parenthesis of the function prototype arguments.
+ */
+#if defined (__ARMCC_2__)
+#define KHRONOS_APIATTRIBUTES __softfp
+#else
+#define KHRONOS_APIATTRIBUTES
+#endif
+
+/*-------------------------------------------------------------------------
+ * basic type definitions
+ *-----------------------------------------------------------------------*/
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
+
+
+/*
+ * Using
+ */
+#include
+typedef int32_t khronos_int32_t;
+typedef uint32_t khronos_uint32_t;
+typedef int64_t khronos_int64_t;
+typedef uint64_t khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64 1
+#define KHRONOS_SUPPORT_FLOAT 1
+/*
+ * To support platform where unsigned long cannot be used interchangeably with
+ * inptr_t (e.g. CHERI-extended ISAs), we can use the stdint.h intptr_t.
+ * Ideally, we could just use (u)intptr_t everywhere, but this could result in
+ * ABI breakage if khronos_uintptr_t is changed from unsigned long to
+ * unsigned long long or similar (this results in different C++ name mangling).
+ * To avoid changes for existing platforms, we restrict usage of intptr_t to
+ * platforms where the size of a pointer is larger than the size of long.
+ */
+#if defined(__SIZEOF_LONG__) && defined(__SIZEOF_POINTER__)
+#if __SIZEOF_POINTER__ > __SIZEOF_LONG__
+#define KHRONOS_USE_INTPTR_T
+#endif
+#endif
+
+#elif defined(__VMS ) || defined(__sgi)
+
+/*
+ * Using
+ */
+#include
+typedef int32_t khronos_int32_t;
+typedef uint32_t khronos_uint32_t;
+typedef int64_t khronos_int64_t;
+typedef uint64_t khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64 1
+#define KHRONOS_SUPPORT_FLOAT 1
+
+#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
+
+/*
+ * Win32
+ */
+typedef __int32 khronos_int32_t;
+typedef unsigned __int32 khronos_uint32_t;
+typedef __int64 khronos_int64_t;
+typedef unsigned __int64 khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64 1
+#define KHRONOS_SUPPORT_FLOAT 1
+
+#elif defined(__sun__) || defined(__digital__)
+
+/*
+ * Sun or Digital
+ */
+typedef int khronos_int32_t;
+typedef unsigned int khronos_uint32_t;
+#if defined(__arch64__) || defined(_LP64)
+typedef long int khronos_int64_t;
+typedef unsigned long int khronos_uint64_t;
+#else
+typedef long long int khronos_int64_t;
+typedef unsigned long long int khronos_uint64_t;
+#endif /* __arch64__ */
+#define KHRONOS_SUPPORT_INT64 1
+#define KHRONOS_SUPPORT_FLOAT 1
+
+#elif 0
+
+/*
+ * Hypothetical platform with no float or int64 support
+ */
+typedef int khronos_int32_t;
+typedef unsigned int khronos_uint32_t;
+#define KHRONOS_SUPPORT_INT64 0
+#define KHRONOS_SUPPORT_FLOAT 0
+
+#else
+
+/*
+ * Generic fallback
+ */
+#include
+typedef int32_t khronos_int32_t;
+typedef uint32_t khronos_uint32_t;
+typedef int64_t khronos_int64_t;
+typedef uint64_t khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64 1
+#define KHRONOS_SUPPORT_FLOAT 1
+
+#endif
+
+
+/*
+ * Types that are (so far) the same on all platforms
+ */
+typedef signed char khronos_int8_t;
+typedef unsigned char khronos_uint8_t;
+typedef signed short int khronos_int16_t;
+typedef unsigned short int khronos_uint16_t;
+
+/*
+ * Types that differ between LLP64 and LP64 architectures - in LLP64,
+ * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
+ * to be the only LLP64 architecture in current use.
+ */
+#ifdef KHRONOS_USE_INTPTR_T
+typedef intptr_t khronos_intptr_t;
+typedef uintptr_t khronos_uintptr_t;
+#elif defined(_WIN64)
+typedef signed long long int khronos_intptr_t;
+typedef unsigned long long int khronos_uintptr_t;
+#else
+typedef signed long int khronos_intptr_t;
+typedef unsigned long int khronos_uintptr_t;
+#endif
+
+#if defined(_WIN64)
+typedef signed long long int khronos_ssize_t;
+typedef unsigned long long int khronos_usize_t;
+#else
+typedef signed long int khronos_ssize_t;
+typedef unsigned long int khronos_usize_t;
+#endif
+
+#if KHRONOS_SUPPORT_FLOAT
+/*
+ * Float type
+ */
+typedef float khronos_float_t;
+#endif
+
+#if KHRONOS_SUPPORT_INT64
+/* Time types
+ *
+ * These types can be used to represent a time interval in nanoseconds or
+ * an absolute Unadjusted System Time. Unadjusted System Time is the number
+ * of nanoseconds since some arbitrary system event (e.g. since the last
+ * time the system booted). The Unadjusted System Time is an unsigned
+ * 64 bit value that wraps back to 0 every 584 years. Time intervals
+ * may be either signed or unsigned.
+ */
+typedef khronos_uint64_t khronos_utime_nanoseconds_t;
+typedef khronos_int64_t khronos_stime_nanoseconds_t;
+#endif
+
+/*
+ * Dummy value used to pad enum types to 32 bits.
+ */
+#ifndef KHRONOS_MAX_ENUM
+#define KHRONOS_MAX_ENUM 0x7FFFFFFF
+#endif
+
+/*
+ * Enumerated boolean type
+ *
+ * Values other than zero should be considered to be true. Therefore
+ * comparisons should not be made against KHRONOS_TRUE.
+ */
+typedef enum {
+ KHRONOS_FALSE = 0,
+ KHRONOS_TRUE = 1,
+ KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
+} khronos_boolean_enum_t;
+
+#endif /* __khrplatform_h_ */
diff --git a/External/glad/glad.cpp b/External/glad/glad.cpp
new file mode 100644
index 000000000..d44908d22
--- /dev/null
+++ b/External/glad/glad.cpp
@@ -0,0 +1,1947 @@
+/*
+
+ OpenGL loader generated by glad 0.1.36 on Fri Oct 28 09:33:23 2022.
+
+ Language/Generator: C/C++
+ Specification: gl
+ APIs: gl=3.3
+ Profile: compatibility
+ Extensions:
+ GL_ARB_debug_output
+ Loader: True
+ Local files: False
+ Omit khrplatform: False
+ Reproducible: False
+
+ Commandline:
+ --profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions="GL_ARB_debug_output"
+ Online:
+ https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3&extensions=GL_ARB_debug_output
+*/
+
+#include
+#include
+#include
+#include
+
+static void* get_proc(const char* namez);
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+ #ifndef _WINDOWS_
+ #undef APIENTRY
+ #endif
+ #include
+static HMODULE libGL;
+
+typedef void*(APIENTRYP PFNWGLGETPROCADDRESSPROC_PRIVATE)(const char*);
+static PFNWGLGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr;
+
+ #ifdef _MSC_VER
+ #ifdef __has_include
+ #if __has_include()
+ #define HAVE_WINAPIFAMILY 1
+ #endif
+ #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_
+ #define HAVE_WINAPIFAMILY 1
+ #endif
+ #endif
+
+ #ifdef HAVE_WINAPIFAMILY
+ #include
+ #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) \
+ && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+ #define IS_UWP 1
+ #endif
+ #endif
+
+static int open_gl(void)
+{
+ #ifndef IS_UWP
+ libGL = LoadLibraryW(L"opengl32.dll");
+ if(libGL != NULL)
+ {
+ void (*tmp)(void);
+ tmp = (void (*)(void))GetProcAddress(libGL, "wglGetProcAddress");
+ gladGetProcAddressPtr = (PFNWGLGETPROCADDRESSPROC_PRIVATE)tmp;
+ return gladGetProcAddressPtr != NULL;
+ }
+ #endif
+
+ return 0;
+}
+
+static void close_gl(void)
+{
+ if(libGL != NULL)
+ {
+ FreeLibrary((HMODULE)libGL);
+ libGL = NULL;
+ }
+}
+#else
+ #include
+static void* libGL;
+
+ #if !defined(__APPLE__) && !defined(__HAIKU__)
+typedef void*(APIENTRYP PFNGLXGETPROCADDRESSPROC_PRIVATE)(const char*);
+static PFNGLXGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr;
+ #endif
+
+static int open_gl(void)
+{
+ #ifdef __APPLE__
+ static const char* NAMES[]
+ = {"../Frameworks/OpenGL.framework/OpenGL",
+ "/Library/Frameworks/OpenGL.framework/OpenGL",
+ "/System/Library/Frameworks/OpenGL.framework/OpenGL",
+ "/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL"};
+ #else
+ static const char* NAMES[] = {"libGL.so.1", "libGL.so"};
+ #endif
+
+ unsigned int index = 0;
+ for(index = 0; index < (sizeof(NAMES) / sizeof(NAMES[0])); index++)
+ {
+ libGL = dlopen(NAMES[index], RTLD_NOW | RTLD_GLOBAL);
+
+ if(libGL != NULL)
+ {
+ #if defined(__APPLE__) || defined(__HAIKU__)
+ return 1;
+ #else
+ gladGetProcAddressPtr
+ = (PFNGLXGETPROCADDRESSPROC_PRIVATE)dlsym(libGL, "glXGetProcAddressARB");
+ return gladGetProcAddressPtr != NULL;
+ #endif
+ }
+ }
+
+ return 0;
+}
+
+static void close_gl(void)
+{
+ if(libGL != NULL)
+ {
+ dlclose(libGL);
+ libGL = NULL;
+ }
+}
+#endif
+
+static void* get_proc(const char* namez)
+{
+ void* result = NULL;
+ if(libGL == NULL)
+ return NULL;
+
+#if !defined(__APPLE__) && !defined(__HAIKU__)
+ if(gladGetProcAddressPtr != NULL)
+ {
+ result = gladGetProcAddressPtr(namez);
+ }
+#endif
+ if(result == NULL)
+ {
+#if defined(_WIN32) || defined(__CYGWIN__)
+ result = (void*)GetProcAddress((HMODULE)libGL, namez);
+#else
+ result = dlsym(libGL, namez);
+#endif
+ }
+
+ return result;
+}
+
+int gladLoadGL(void)
+{
+ int status = 0;
+
+ if(open_gl())
+ {
+ status = gladLoadGLLoader(&get_proc);
+ close_gl();
+ }
+
+ return status;
+}
+
+struct gladGLversionStruct GLVersion = {0, 0};
+
+#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0)
+ #define _GLAD_IS_SOME_NEW_VERSION 1
+#endif
+
+static int max_loaded_major;
+
+static const char* exts = NULL;
+static int num_exts_i = 0;
+static char** exts_i = NULL;
+
+static int get_exts(void)
+{
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+ if(max_loaded_major < 3)
+ {
+#endif
+ exts = (const char*)glGetString(GL_EXTENSIONS);
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+ }
+ else
+ {
+ unsigned int index;
+
+ num_exts_i = 0;
+ glGetIntegerv(GL_NUM_EXTENSIONS, &num_exts_i);
+ if(num_exts_i > 0)
+ {
+ exts_i = (char**)malloc((size_t)num_exts_i * (sizeof *exts_i));
+ }
+
+ if(exts_i == NULL)
+ {
+ return 0;
+ }
+
+ for(index = 0; index < (unsigned)num_exts_i; index++)
+ {
+ const char* gl_str_tmp = (const char*)glGetStringi(GL_EXTENSIONS, index);
+ size_t len = strlen(gl_str_tmp);
+
+ char* local_str = (char*)malloc((len + 1) * sizeof(char));
+ if(local_str != NULL)
+ {
+ memcpy(local_str, gl_str_tmp, (len + 1) * sizeof(char));
+ }
+ exts_i[index] = local_str;
+ }
+ }
+#endif
+ return 1;
+}
+
+static void free_exts(void)
+{
+ if(exts_i != NULL)
+ {
+ int index;
+ for(index = 0; index < num_exts_i; index++)
+ {
+ free((char*)exts_i[index]);
+ }
+ free((void*)exts_i);
+ exts_i = NULL;
+ }
+}
+
+static int has_ext(const char* ext)
+{
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+ if(max_loaded_major < 3)
+ {
+#endif
+ const char* extensions;
+ const char* loc;
+ const char* terminator;
+ extensions = exts;
+ if(extensions == NULL || ext == NULL)
+ {
+ return 0;
+ }
+
+ while(1)
+ {
+ loc = strstr(extensions, ext);
+ if(loc == NULL)
+ {
+ return 0;
+ }
+
+ terminator = loc + strlen(ext);
+ if((loc == extensions || *(loc - 1) == ' ')
+ && (*terminator == ' ' || *terminator == '\0'))
+ {
+ return 1;
+ }
+ extensions = terminator;
+ }
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+ }
+ else
+ {
+ int index;
+ if(exts_i == NULL)
+ return 0;
+ for(index = 0; index < num_exts_i; index++)
+ {
+ const char* e = exts_i[index];
+
+ if(exts_i[index] != NULL && strcmp(e, ext) == 0)
+ {
+ return 1;
+ }
+ }
+ }
+#endif
+
+ return 0;
+}
+int GLAD_GL_VERSION_1_0 = 0;
+int GLAD_GL_VERSION_1_1 = 0;
+int GLAD_GL_VERSION_1_2 = 0;
+int GLAD_GL_VERSION_1_3 = 0;
+int GLAD_GL_VERSION_1_4 = 0;
+int GLAD_GL_VERSION_1_5 = 0;
+int GLAD_GL_VERSION_2_0 = 0;
+int GLAD_GL_VERSION_2_1 = 0;
+int GLAD_GL_VERSION_3_0 = 0;
+int GLAD_GL_VERSION_3_1 = 0;
+int GLAD_GL_VERSION_3_2 = 0;
+int GLAD_GL_VERSION_3_3 = 0;
+PFNGLACCUMPROC glad_glAccum = NULL;
+PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL;
+PFNGLALPHAFUNCPROC glad_glAlphaFunc = NULL;
+PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident = NULL;
+PFNGLARRAYELEMENTPROC glad_glArrayElement = NULL;
+PFNGLATTACHSHADERPROC glad_glAttachShader = NULL;
+PFNGLBEGINPROC glad_glBegin = NULL;
+PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender = NULL;
+PFNGLBEGINQUERYPROC glad_glBeginQuery = NULL;
+PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback = NULL;
+PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL;
+PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL;
+PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase = NULL;
+PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange = NULL;
+PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation = NULL;
+PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed = NULL;
+PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL;
+PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL;
+PFNGLBINDSAMPLERPROC glad_glBindSampler = NULL;
+PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL;
+PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray = NULL;
+PFNGLBITMAPPROC glad_glBitmap = NULL;
+PFNGLBLENDCOLORPROC glad_glBlendColor = NULL;
+PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL;
+PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate = NULL;
+PFNGLBLENDFUNCPROC glad_glBlendFunc = NULL;
+PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL;
+PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer = NULL;
+PFNGLBUFFERDATAPROC glad_glBufferData = NULL;
+PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL;
+PFNGLCALLLISTPROC glad_glCallList = NULL;
+PFNGLCALLLISTSPROC glad_glCallLists = NULL;
+PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL;
+PFNGLCLAMPCOLORPROC glad_glClampColor = NULL;
+PFNGLCLEARPROC glad_glClear = NULL;
+PFNGLCLEARACCUMPROC glad_glClearAccum = NULL;
+PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi = NULL;
+PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv = NULL;
+PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv = NULL;
+PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv = NULL;
+PFNGLCLEARCOLORPROC glad_glClearColor = NULL;
+PFNGLCLEARDEPTHPROC glad_glClearDepth = NULL;
+PFNGLCLEARINDEXPROC glad_glClearIndex = NULL;
+PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL;
+PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture = NULL;
+PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync = NULL;
+PFNGLCLIPPLANEPROC glad_glClipPlane = NULL;
+PFNGLCOLOR3BPROC glad_glColor3b = NULL;
+PFNGLCOLOR3BVPROC glad_glColor3bv = NULL;
+PFNGLCOLOR3DPROC glad_glColor3d = NULL;
+PFNGLCOLOR3DVPROC glad_glColor3dv = NULL;
+PFNGLCOLOR3FPROC glad_glColor3f = NULL;
+PFNGLCOLOR3FVPROC glad_glColor3fv = NULL;
+PFNGLCOLOR3IPROC glad_glColor3i = NULL;
+PFNGLCOLOR3IVPROC glad_glColor3iv = NULL;
+PFNGLCOLOR3SPROC glad_glColor3s = NULL;
+PFNGLCOLOR3SVPROC glad_glColor3sv = NULL;
+PFNGLCOLOR3UBPROC glad_glColor3ub = NULL;
+PFNGLCOLOR3UBVPROC glad_glColor3ubv = NULL;
+PFNGLCOLOR3UIPROC glad_glColor3ui = NULL;
+PFNGLCOLOR3UIVPROC glad_glColor3uiv = NULL;
+PFNGLCOLOR3USPROC glad_glColor3us = NULL;
+PFNGLCOLOR3USVPROC glad_glColor3usv = NULL;
+PFNGLCOLOR4BPROC glad_glColor4b = NULL;
+PFNGLCOLOR4BVPROC glad_glColor4bv = NULL;
+PFNGLCOLOR4DPROC glad_glColor4d = NULL;
+PFNGLCOLOR4DVPROC glad_glColor4dv = NULL;
+PFNGLCOLOR4FPROC glad_glColor4f = NULL;
+PFNGLCOLOR4FVPROC glad_glColor4fv = NULL;
+PFNGLCOLOR4IPROC glad_glColor4i = NULL;
+PFNGLCOLOR4IVPROC glad_glColor4iv = NULL;
+PFNGLCOLOR4SPROC glad_glColor4s = NULL;
+PFNGLCOLOR4SVPROC glad_glColor4sv = NULL;
+PFNGLCOLOR4UBPROC glad_glColor4ub = NULL;
+PFNGLCOLOR4UBVPROC glad_glColor4ubv = NULL;
+PFNGLCOLOR4UIPROC glad_glColor4ui = NULL;
+PFNGLCOLOR4UIVPROC glad_glColor4uiv = NULL;
+PFNGLCOLOR4USPROC glad_glColor4us = NULL;
+PFNGLCOLOR4USVPROC glad_glColor4usv = NULL;
+PFNGLCOLORMASKPROC glad_glColorMask = NULL;
+PFNGLCOLORMASKIPROC glad_glColorMaski = NULL;
+PFNGLCOLORMATERIALPROC glad_glColorMaterial = NULL;
+PFNGLCOLORP3UIPROC glad_glColorP3ui = NULL;
+PFNGLCOLORP3UIVPROC glad_glColorP3uiv = NULL;
+PFNGLCOLORP4UIPROC glad_glColorP4ui = NULL;
+PFNGLCOLORP4UIVPROC glad_glColorP4uiv = NULL;
+PFNGLCOLORPOINTERPROC glad_glColorPointer = NULL;
+PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL;
+PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D = NULL;
+PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL;
+PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D = NULL;
+PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData = NULL;
+PFNGLCOPYPIXELSPROC glad_glCopyPixels = NULL;
+PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D = NULL;
+PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL;
+PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D = NULL;
+PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL;
+PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D = NULL;
+PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL;
+PFNGLCREATESHADERPROC glad_glCreateShader = NULL;
+PFNGLCULLFACEPROC glad_glCullFace = NULL;
+PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL;
+PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL;
+PFNGLDELETELISTSPROC glad_glDeleteLists = NULL;
+PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL;
+PFNGLDELETEQUERIESPROC glad_glDeleteQueries = NULL;
+PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers = NULL;
+PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers = NULL;
+PFNGLDELETESHADERPROC glad_glDeleteShader = NULL;
+PFNGLDELETESYNCPROC glad_glDeleteSync = NULL;
+PFNGLDELETETEXTURESPROC glad_glDeleteTextures = NULL;
+PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays = NULL;
+PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL;
+PFNGLDEPTHMASKPROC glad_glDepthMask = NULL;
+PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL;
+PFNGLDETACHSHADERPROC glad_glDetachShader = NULL;
+PFNGLDISABLEPROC glad_glDisable = NULL;
+PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState = NULL;
+PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL;
+PFNGLDISABLEIPROC glad_glDisablei = NULL;
+PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL;
+PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced = NULL;
+PFNGLDRAWBUFFERPROC glad_glDrawBuffer = NULL;
+PFNGLDRAWBUFFERSPROC glad_glDrawBuffers = NULL;
+PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL;
+PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex = NULL;
+PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced = NULL;
+PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex = NULL;
+PFNGLDRAWPIXELSPROC glad_glDrawPixels = NULL;
+PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements = NULL;
+PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex = NULL;
+PFNGLEDGEFLAGPROC glad_glEdgeFlag = NULL;
+PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer = NULL;
+PFNGLEDGEFLAGVPROC glad_glEdgeFlagv = NULL;
+PFNGLENABLEPROC glad_glEnable = NULL;
+PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState = NULL;
+PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL;
+PFNGLENABLEIPROC glad_glEnablei = NULL;
+PFNGLENDPROC glad_glEnd = NULL;
+PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender = NULL;
+PFNGLENDLISTPROC glad_glEndList = NULL;
+PFNGLENDQUERYPROC glad_glEndQuery = NULL;
+PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback = NULL;
+PFNGLEVALCOORD1DPROC glad_glEvalCoord1d = NULL;
+PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv = NULL;
+PFNGLEVALCOORD1FPROC glad_glEvalCoord1f = NULL;
+PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv = NULL;
+PFNGLEVALCOORD2DPROC glad_glEvalCoord2d = NULL;
+PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv = NULL;
+PFNGLEVALCOORD2FPROC glad_glEvalCoord2f = NULL;
+PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv = NULL;
+PFNGLEVALMESH1PROC glad_glEvalMesh1 = NULL;
+PFNGLEVALMESH2PROC glad_glEvalMesh2 = NULL;
+PFNGLEVALPOINT1PROC glad_glEvalPoint1 = NULL;
+PFNGLEVALPOINT2PROC glad_glEvalPoint2 = NULL;
+PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer = NULL;
+PFNGLFENCESYNCPROC glad_glFenceSync = NULL;
+PFNGLFINISHPROC glad_glFinish = NULL;
+PFNGLFLUSHPROC glad_glFlush = NULL;
+PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange = NULL;
+PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer = NULL;
+PFNGLFOGCOORDDPROC glad_glFogCoordd = NULL;
+PFNGLFOGCOORDDVPROC glad_glFogCoorddv = NULL;
+PFNGLFOGCOORDFPROC glad_glFogCoordf = NULL;
+PFNGLFOGCOORDFVPROC glad_glFogCoordfv = NULL;
+PFNGLFOGFPROC glad_glFogf = NULL;
+PFNGLFOGFVPROC glad_glFogfv = NULL;
+PFNGLFOGIPROC glad_glFogi = NULL;
+PFNGLFOGIVPROC glad_glFogiv = NULL;
+PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL;
+PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture = NULL;
+PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D = NULL;
+PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL;
+PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D = NULL;
+PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer = NULL;
+PFNGLFRONTFACEPROC glad_glFrontFace = NULL;
+PFNGLFRUSTUMPROC glad_glFrustum = NULL;
+PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL;
+PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL;
+PFNGLGENLISTSPROC glad_glGenLists = NULL;
+PFNGLGENQUERIESPROC glad_glGenQueries = NULL;
+PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL;
+PFNGLGENSAMPLERSPROC glad_glGenSamplers = NULL;
+PFNGLGENTEXTURESPROC glad_glGenTextures = NULL;
+PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays = NULL;
+PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL;
+PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL;
+PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL;
+PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName = NULL;
+PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv = NULL;
+PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName = NULL;
+PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv = NULL;
+PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL;
+PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL;
+PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v = NULL;
+PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL;
+PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v = NULL;
+PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL;
+PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL;
+PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData = NULL;
+PFNGLGETCLIPPLANEPROC glad_glGetClipPlane = NULL;
+PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage = NULL;
+PFNGLGETDOUBLEVPROC glad_glGetDoublev = NULL;
+PFNGLGETERRORPROC glad_glGetError = NULL;
+PFNGLGETFLOATVPROC glad_glGetFloatv = NULL;
+PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex = NULL;
+PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation = NULL;
+PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL;
+PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v = NULL;
+PFNGLGETINTEGER64VPROC glad_glGetInteger64v = NULL;
+PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v = NULL;
+PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL;
+PFNGLGETLIGHTFVPROC glad_glGetLightfv = NULL;
+PFNGLGETLIGHTIVPROC glad_glGetLightiv = NULL;
+PFNGLGETMAPDVPROC glad_glGetMapdv = NULL;
+PFNGLGETMAPFVPROC glad_glGetMapfv = NULL;
+PFNGLGETMAPIVPROC glad_glGetMapiv = NULL;
+PFNGLGETMATERIALFVPROC glad_glGetMaterialfv = NULL;
+PFNGLGETMATERIALIVPROC glad_glGetMaterialiv = NULL;
+PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv = NULL;
+PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv = NULL;
+PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv = NULL;
+PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv = NULL;
+PFNGLGETPOINTERVPROC glad_glGetPointerv = NULL;
+PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple = NULL;
+PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL;
+PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL;
+PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v = NULL;
+PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv = NULL;
+PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v = NULL;
+PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv = NULL;
+PFNGLGETQUERYIVPROC glad_glGetQueryiv = NULL;
+PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL;
+PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv = NULL;
+PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv = NULL;
+PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv = NULL;
+PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv = NULL;
+PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog = NULL;
+PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL;
+PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL;
+PFNGLGETSTRINGPROC glad_glGetString = NULL;
+PFNGLGETSTRINGIPROC glad_glGetStringi = NULL;
+PFNGLGETSYNCIVPROC glad_glGetSynciv = NULL;
+PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv = NULL;
+PFNGLGETTEXENVIVPROC glad_glGetTexEnviv = NULL;
+PFNGLGETTEXGENDVPROC glad_glGetTexGendv = NULL;
+PFNGLGETTEXGENFVPROC glad_glGetTexGenfv = NULL;
+PFNGLGETTEXGENIVPROC glad_glGetTexGeniv = NULL;
+PFNGLGETTEXIMAGEPROC glad_glGetTexImage = NULL;
+PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv = NULL;
+PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv = NULL;
+PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv = NULL;
+PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv = NULL;
+PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL;
+PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL;
+PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying = NULL;
+PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex = NULL;
+PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices = NULL;
+PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL;
+PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL;
+PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL;
+PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv = NULL;
+PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv = NULL;
+PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv = NULL;
+PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL;
+PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv = NULL;
+PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL;
+PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL;
+PFNGLHINTPROC glad_glHint = NULL;
+PFNGLINDEXMASKPROC glad_glIndexMask = NULL;
+PFNGLINDEXPOINTERPROC glad_glIndexPointer = NULL;
+PFNGLINDEXDPROC glad_glIndexd = NULL;
+PFNGLINDEXDVPROC glad_glIndexdv = NULL;
+PFNGLINDEXFPROC glad_glIndexf = NULL;
+PFNGLINDEXFVPROC glad_glIndexfv = NULL;
+PFNGLINDEXIPROC glad_glIndexi = NULL;
+PFNGLINDEXIVPROC glad_glIndexiv = NULL;
+PFNGLINDEXSPROC glad_glIndexs = NULL;
+PFNGLINDEXSVPROC glad_glIndexsv = NULL;
+PFNGLINDEXUBPROC glad_glIndexub = NULL;
+PFNGLINDEXUBVPROC glad_glIndexubv = NULL;
+PFNGLINITNAMESPROC glad_glInitNames = NULL;
+PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays = NULL;
+PFNGLISBUFFERPROC glad_glIsBuffer = NULL;
+PFNGLISENABLEDPROC glad_glIsEnabled = NULL;
+PFNGLISENABLEDIPROC glad_glIsEnabledi = NULL;
+PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL;
+PFNGLISLISTPROC glad_glIsList = NULL;
+PFNGLISPROGRAMPROC glad_glIsProgram = NULL;
+PFNGLISQUERYPROC glad_glIsQuery = NULL;
+PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer = NULL;
+PFNGLISSAMPLERPROC glad_glIsSampler = NULL;
+PFNGLISSHADERPROC glad_glIsShader = NULL;
+PFNGLISSYNCPROC glad_glIsSync = NULL;
+PFNGLISTEXTUREPROC glad_glIsTexture = NULL;
+PFNGLISVERTEXARRAYPROC glad_glIsVertexArray = NULL;
+PFNGLLIGHTMODELFPROC glad_glLightModelf = NULL;
+PFNGLLIGHTMODELFVPROC glad_glLightModelfv = NULL;
+PFNGLLIGHTMODELIPROC glad_glLightModeli = NULL;
+PFNGLLIGHTMODELIVPROC glad_glLightModeliv = NULL;
+PFNGLLIGHTFPROC glad_glLightf = NULL;
+PFNGLLIGHTFVPROC glad_glLightfv = NULL;
+PFNGLLIGHTIPROC glad_glLighti = NULL;
+PFNGLLIGHTIVPROC glad_glLightiv = NULL;
+PFNGLLINESTIPPLEPROC glad_glLineStipple = NULL;
+PFNGLLINEWIDTHPROC glad_glLineWidth = NULL;
+PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL;
+PFNGLLISTBASEPROC glad_glListBase = NULL;
+PFNGLLOADIDENTITYPROC glad_glLoadIdentity = NULL;
+PFNGLLOADMATRIXDPROC glad_glLoadMatrixd = NULL;
+PFNGLLOADMATRIXFPROC glad_glLoadMatrixf = NULL;
+PFNGLLOADNAMEPROC glad_glLoadName = NULL;
+PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd = NULL;
+PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf = NULL;
+PFNGLLOGICOPPROC glad_glLogicOp = NULL;
+PFNGLMAP1DPROC glad_glMap1d = NULL;
+PFNGLMAP1FPROC glad_glMap1f = NULL;
+PFNGLMAP2DPROC glad_glMap2d = NULL;
+PFNGLMAP2FPROC glad_glMap2f = NULL;
+PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL;
+PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange = NULL;
+PFNGLMAPGRID1DPROC glad_glMapGrid1d = NULL;
+PFNGLMAPGRID1FPROC glad_glMapGrid1f = NULL;
+PFNGLMAPGRID2DPROC glad_glMapGrid2d = NULL;
+PFNGLMAPGRID2FPROC glad_glMapGrid2f = NULL;
+PFNGLMATERIALFPROC glad_glMaterialf = NULL;
+PFNGLMATERIALFVPROC glad_glMaterialfv = NULL;
+PFNGLMATERIALIPROC glad_glMateriali = NULL;
+PFNGLMATERIALIVPROC glad_glMaterialiv = NULL;
+PFNGLMATRIXMODEPROC glad_glMatrixMode = NULL;
+PFNGLMULTMATRIXDPROC glad_glMultMatrixd = NULL;
+PFNGLMULTMATRIXFPROC glad_glMultMatrixf = NULL;
+PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd = NULL;
+PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf = NULL;
+PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays = NULL;
+PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements = NULL;
+PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex = NULL;
+PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d = NULL;
+PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv = NULL;
+PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f = NULL;
+PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv = NULL;
+PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i = NULL;
+PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv = NULL;
+PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s = NULL;
+PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv = NULL;
+PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d = NULL;
+PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv = NULL;
+PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f = NULL;
+PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv = NULL;
+PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i = NULL;
+PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv = NULL;
+PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s = NULL;
+PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv = NULL;
+PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d = NULL;
+PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv = NULL;
+PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f = NULL;
+PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv = NULL;
+PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i = NULL;
+PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv = NULL;
+PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s = NULL;
+PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv = NULL;
+PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d = NULL;
+PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv = NULL;
+PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f = NULL;
+PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv = NULL;
+PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i = NULL;
+PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv = NULL;
+PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s = NULL;
+PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv = NULL;
+PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui = NULL;
+PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv = NULL;
+PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui = NULL;
+PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv = NULL;
+PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL;
+PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL;
+PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL;
+PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL;
+PFNGLNEWLISTPROC glad_glNewList = NULL;
+PFNGLNORMAL3BPROC glad_glNormal3b = NULL;
+PFNGLNORMAL3BVPROC glad_glNormal3bv = NULL;
+PFNGLNORMAL3DPROC glad_glNormal3d = NULL;
+PFNGLNORMAL3DVPROC glad_glNormal3dv = NULL;
+PFNGLNORMAL3FPROC glad_glNormal3f = NULL;
+PFNGLNORMAL3FVPROC glad_glNormal3fv = NULL;
+PFNGLNORMAL3IPROC glad_glNormal3i = NULL;
+PFNGLNORMAL3IVPROC glad_glNormal3iv = NULL;
+PFNGLNORMAL3SPROC glad_glNormal3s = NULL;
+PFNGLNORMAL3SVPROC glad_glNormal3sv = NULL;
+PFNGLNORMALP3UIPROC glad_glNormalP3ui = NULL;
+PFNGLNORMALP3UIVPROC glad_glNormalP3uiv = NULL;
+PFNGLNORMALPOINTERPROC glad_glNormalPointer = NULL;
+PFNGLORTHOPROC glad_glOrtho = NULL;
+PFNGLPASSTHROUGHPROC glad_glPassThrough = NULL;
+PFNGLPIXELMAPFVPROC glad_glPixelMapfv = NULL;
+PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv = NULL;
+PFNGLPIXELMAPUSVPROC glad_glPixelMapusv = NULL;
+PFNGLPIXELSTOREFPROC glad_glPixelStoref = NULL;
+PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL;
+PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf = NULL;
+PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi = NULL;
+PFNGLPIXELZOOMPROC glad_glPixelZoom = NULL;
+PFNGLPOINTPARAMETERFPROC glad_glPointParameterf = NULL;
+PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv = NULL;
+PFNGLPOINTPARAMETERIPROC glad_glPointParameteri = NULL;
+PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv = NULL;
+PFNGLPOINTSIZEPROC glad_glPointSize = NULL;
+PFNGLPOLYGONMODEPROC glad_glPolygonMode = NULL;
+PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL;
+PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple = NULL;
+PFNGLPOPATTRIBPROC glad_glPopAttrib = NULL;
+PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib = NULL;
+PFNGLPOPMATRIXPROC glad_glPopMatrix = NULL;
+PFNGLPOPNAMEPROC glad_glPopName = NULL;
+PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex = NULL;
+PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures = NULL;
+PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex = NULL;
+PFNGLPUSHATTRIBPROC glad_glPushAttrib = NULL;
+PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib = NULL;
+PFNGLPUSHMATRIXPROC glad_glPushMatrix = NULL;
+PFNGLPUSHNAMEPROC glad_glPushName = NULL;
+PFNGLQUERYCOUNTERPROC glad_glQueryCounter = NULL;
+PFNGLRASTERPOS2DPROC glad_glRasterPos2d = NULL;
+PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv = NULL;
+PFNGLRASTERPOS2FPROC glad_glRasterPos2f = NULL;
+PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv = NULL;
+PFNGLRASTERPOS2IPROC glad_glRasterPos2i = NULL;
+PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv = NULL;
+PFNGLRASTERPOS2SPROC glad_glRasterPos2s = NULL;
+PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv = NULL;
+PFNGLRASTERPOS3DPROC glad_glRasterPos3d = NULL;
+PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv = NULL;
+PFNGLRASTERPOS3FPROC glad_glRasterPos3f = NULL;
+PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv = NULL;
+PFNGLRASTERPOS3IPROC glad_glRasterPos3i = NULL;
+PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv = NULL;
+PFNGLRASTERPOS3SPROC glad_glRasterPos3s = NULL;
+PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv = NULL;
+PFNGLRASTERPOS4DPROC glad_glRasterPos4d = NULL;
+PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv = NULL;
+PFNGLRASTERPOS4FPROC glad_glRasterPos4f = NULL;
+PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv = NULL;
+PFNGLRASTERPOS4IPROC glad_glRasterPos4i = NULL;
+PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv = NULL;
+PFNGLRASTERPOS4SPROC glad_glRasterPos4s = NULL;
+PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv = NULL;
+PFNGLREADBUFFERPROC glad_glReadBuffer = NULL;
+PFNGLREADPIXELSPROC glad_glReadPixels = NULL;
+PFNGLRECTDPROC glad_glRectd = NULL;
+PFNGLRECTDVPROC glad_glRectdv = NULL;
+PFNGLRECTFPROC glad_glRectf = NULL;
+PFNGLRECTFVPROC glad_glRectfv = NULL;
+PFNGLRECTIPROC glad_glRecti = NULL;
+PFNGLRECTIVPROC glad_glRectiv = NULL;
+PFNGLRECTSPROC glad_glRects = NULL;
+PFNGLRECTSVPROC glad_glRectsv = NULL;
+PFNGLRENDERMODEPROC glad_glRenderMode = NULL;
+PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL;
+PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample = NULL;
+PFNGLROTATEDPROC glad_glRotated = NULL;
+PFNGLROTATEFPROC glad_glRotatef = NULL;
+PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL;
+PFNGLSAMPLEMASKIPROC glad_glSampleMaski = NULL;
+PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv = NULL;
+PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv = NULL;
+PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf = NULL;
+PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv = NULL;
+PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri = NULL;
+PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv = NULL;
+PFNGLSCALEDPROC glad_glScaled = NULL;
+PFNGLSCALEFPROC glad_glScalef = NULL;
+PFNGLSCISSORPROC glad_glScissor = NULL;
+PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b = NULL;
+PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv = NULL;
+PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d = NULL;
+PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv = NULL;
+PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f = NULL;
+PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv = NULL;
+PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i = NULL;
+PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv = NULL;
+PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s = NULL;
+PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv = NULL;
+PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub = NULL;
+PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv = NULL;
+PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui = NULL;
+PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv = NULL;
+PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us = NULL;
+PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv = NULL;
+PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui = NULL;
+PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv = NULL;
+PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer = NULL;
+PFNGLSELECTBUFFERPROC glad_glSelectBuffer = NULL;
+PFNGLSHADEMODELPROC glad_glShadeModel = NULL;
+PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL;
+PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL;
+PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL;
+PFNGLSTENCILMASKPROC glad_glStencilMask = NULL;
+PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate = NULL;
+PFNGLSTENCILOPPROC glad_glStencilOp = NULL;
+PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL;
+PFNGLTEXBUFFERPROC glad_glTexBuffer = NULL;
+PFNGLTEXCOORD1DPROC glad_glTexCoord1d = NULL;
+PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv = NULL;
+PFNGLTEXCOORD1FPROC glad_glTexCoord1f = NULL;
+PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv = NULL;
+PFNGLTEXCOORD1IPROC glad_glTexCoord1i = NULL;
+PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv = NULL;
+PFNGLTEXCOORD1SPROC glad_glTexCoord1s = NULL;
+PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv = NULL;
+PFNGLTEXCOORD2DPROC glad_glTexCoord2d = NULL;
+PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv = NULL;
+PFNGLTEXCOORD2FPROC glad_glTexCoord2f = NULL;
+PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv = NULL;
+PFNGLTEXCOORD2IPROC glad_glTexCoord2i = NULL;
+PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv = NULL;
+PFNGLTEXCOORD2SPROC glad_glTexCoord2s = NULL;
+PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv = NULL;
+PFNGLTEXCOORD3DPROC glad_glTexCoord3d = NULL;
+PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv = NULL;
+PFNGLTEXCOORD3FPROC glad_glTexCoord3f = NULL;
+PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv = NULL;
+PFNGLTEXCOORD3IPROC glad_glTexCoord3i = NULL;
+PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv = NULL;
+PFNGLTEXCOORD3SPROC glad_glTexCoord3s = NULL;
+PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv = NULL;
+PFNGLTEXCOORD4DPROC glad_glTexCoord4d = NULL;
+PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv = NULL;
+PFNGLTEXCOORD4FPROC glad_glTexCoord4f = NULL;
+PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv = NULL;
+PFNGLTEXCOORD4IPROC glad_glTexCoord4i = NULL;
+PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv = NULL;
+PFNGLTEXCOORD4SPROC glad_glTexCoord4s = NULL;
+PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv = NULL;
+PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui = NULL;
+PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv = NULL;
+PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui = NULL;
+PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv = NULL;
+PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui = NULL;
+PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv = NULL;
+PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui = NULL;
+PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv = NULL;
+PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer = NULL;
+PFNGLTEXENVFPROC glad_glTexEnvf = NULL;
+PFNGLTEXENVFVPROC glad_glTexEnvfv = NULL;
+PFNGLTEXENVIPROC glad_glTexEnvi = NULL;
+PFNGLTEXENVIVPROC glad_glTexEnviv = NULL;
+PFNGLTEXGENDPROC glad_glTexGend = NULL;
+PFNGLTEXGENDVPROC glad_glTexGendv = NULL;
+PFNGLTEXGENFPROC glad_glTexGenf = NULL;
+PFNGLTEXGENFVPROC glad_glTexGenfv = NULL;
+PFNGLTEXGENIPROC glad_glTexGeni = NULL;
+PFNGLTEXGENIVPROC glad_glTexGeniv = NULL;
+PFNGLTEXIMAGE1DPROC glad_glTexImage1D = NULL;
+PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL;
+PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample = NULL;
+PFNGLTEXIMAGE3DPROC glad_glTexImage3D = NULL;
+PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample = NULL;
+PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv = NULL;
+PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv = NULL;
+PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL;
+PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL;
+PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL;
+PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL;
+PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D = NULL;
+PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL;
+PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D = NULL;
+PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings = NULL;
+PFNGLTRANSLATEDPROC glad_glTranslated = NULL;
+PFNGLTRANSLATEFPROC glad_glTranslatef = NULL;
+PFNGLUNIFORM1FPROC glad_glUniform1f = NULL;
+PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL;
+PFNGLUNIFORM1IPROC glad_glUniform1i = NULL;
+PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL;
+PFNGLUNIFORM1UIPROC glad_glUniform1ui = NULL;
+PFNGLUNIFORM1UIVPROC glad_glUniform1uiv = NULL;
+PFNGLUNIFORM2FPROC glad_glUniform2f = NULL;
+PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL;
+PFNGLUNIFORM2IPROC glad_glUniform2i = NULL;
+PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL;
+PFNGLUNIFORM2UIPROC glad_glUniform2ui = NULL;
+PFNGLUNIFORM2UIVPROC glad_glUniform2uiv = NULL;
+PFNGLUNIFORM3FPROC glad_glUniform3f = NULL;
+PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL;
+PFNGLUNIFORM3IPROC glad_glUniform3i = NULL;
+PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL;
+PFNGLUNIFORM3UIPROC glad_glUniform3ui = NULL;
+PFNGLUNIFORM3UIVPROC glad_glUniform3uiv = NULL;
+PFNGLUNIFORM4FPROC glad_glUniform4f = NULL;
+PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL;
+PFNGLUNIFORM4IPROC glad_glUniform4i = NULL;
+PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL;
+PFNGLUNIFORM4UIPROC glad_glUniform4ui = NULL;
+PFNGLUNIFORM4UIVPROC glad_glUniform4uiv = NULL;
+PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding = NULL;
+PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL;
+PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv = NULL;
+PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv = NULL;
+PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL;
+PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv = NULL;
+PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv = NULL;
+PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL;
+PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv = NULL;
+PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv = NULL;
+PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL;
+PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL;
+PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL;
+PFNGLVERTEX2DPROC glad_glVertex2d = NULL;
+PFNGLVERTEX2DVPROC glad_glVertex2dv = NULL;
+PFNGLVERTEX2FPROC glad_glVertex2f = NULL;
+PFNGLVERTEX2FVPROC glad_glVertex2fv = NULL;
+PFNGLVERTEX2IPROC glad_glVertex2i = NULL;
+PFNGLVERTEX2IVPROC glad_glVertex2iv = NULL;
+PFNGLVERTEX2SPROC glad_glVertex2s = NULL;
+PFNGLVERTEX2SVPROC glad_glVertex2sv = NULL;
+PFNGLVERTEX3DPROC glad_glVertex3d = NULL;
+PFNGLVERTEX3DVPROC glad_glVertex3dv = NULL;
+PFNGLVERTEX3FPROC glad_glVertex3f = NULL;
+PFNGLVERTEX3FVPROC glad_glVertex3fv = NULL;
+PFNGLVERTEX3IPROC glad_glVertex3i = NULL;
+PFNGLVERTEX3IVPROC glad_glVertex3iv = NULL;
+PFNGLVERTEX3SPROC glad_glVertex3s = NULL;
+PFNGLVERTEX3SVPROC glad_glVertex3sv = NULL;
+PFNGLVERTEX4DPROC glad_glVertex4d = NULL;
+PFNGLVERTEX4DVPROC glad_glVertex4dv = NULL;
+PFNGLVERTEX4FPROC glad_glVertex4f = NULL;
+PFNGLVERTEX4FVPROC glad_glVertex4fv = NULL;
+PFNGLVERTEX4IPROC glad_glVertex4i = NULL;
+PFNGLVERTEX4IVPROC glad_glVertex4iv = NULL;
+PFNGLVERTEX4SPROC glad_glVertex4s = NULL;
+PFNGLVERTEX4SVPROC glad_glVertex4sv = NULL;
+PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d = NULL;
+PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv = NULL;
+PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL;
+PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL;
+PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s = NULL;
+PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv = NULL;
+PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d = NULL;
+PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv = NULL;
+PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL;
+PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL;
+PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s = NULL;
+PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv = NULL;
+PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d = NULL;
+PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv = NULL;
+PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL;
+PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL;
+PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s = NULL;
+PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv = NULL;
+PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv = NULL;
+PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv = NULL;
+PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv = NULL;
+PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub = NULL;
+PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv = NULL;
+PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv = NULL;
+PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv = NULL;
+PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv = NULL;
+PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d = NULL;
+PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv = NULL;
+PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL;
+PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL;
+PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv = NULL;
+PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s = NULL;
+PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv = NULL;
+PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv = NULL;
+PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv = NULL;
+PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv = NULL;
+PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor = NULL;
+PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i = NULL;
+PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv = NULL;
+PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui = NULL;
+PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv = NULL;
+PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i = NULL;
+PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv = NULL;
+PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui = NULL;
+PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv = NULL;
+PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i = NULL;
+PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv = NULL;
+PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui = NULL;
+PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv = NULL;
+PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv = NULL;
+PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i = NULL;
+PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv = NULL;
+PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv = NULL;
+PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv = NULL;
+PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui = NULL;
+PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv = NULL;
+PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv = NULL;
+PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer = NULL;
+PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui = NULL;
+PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv = NULL;
+PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui = NULL;
+PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv = NULL;
+PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui = NULL;
+PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv = NULL;
+PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui = NULL;
+PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv = NULL;
+PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL;
+PFNGLVERTEXP2UIPROC glad_glVertexP2ui = NULL;
+PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv = NULL;
+PFNGLVERTEXP3UIPROC glad_glVertexP3ui = NULL;
+PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv = NULL;
+PFNGLVERTEXP4UIPROC glad_glVertexP4ui = NULL;
+PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv = NULL;
+PFNGLVERTEXPOINTERPROC glad_glVertexPointer = NULL;
+PFNGLVIEWPORTPROC glad_glViewport = NULL;
+PFNGLWAITSYNCPROC glad_glWaitSync = NULL;
+PFNGLWINDOWPOS2DPROC glad_glWindowPos2d = NULL;
+PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv = NULL;
+PFNGLWINDOWPOS2FPROC glad_glWindowPos2f = NULL;
+PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv = NULL;
+PFNGLWINDOWPOS2IPROC glad_glWindowPos2i = NULL;
+PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv = NULL;
+PFNGLWINDOWPOS2SPROC glad_glWindowPos2s = NULL;
+PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv = NULL;
+PFNGLWINDOWPOS3DPROC glad_glWindowPos3d = NULL;
+PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv = NULL;
+PFNGLWINDOWPOS3FPROC glad_glWindowPos3f = NULL;
+PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv = NULL;
+PFNGLWINDOWPOS3IPROC glad_glWindowPos3i = NULL;
+PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv = NULL;
+PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL;
+PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL;
+int GLAD_GL_ARB_debug_output = 0;
+PFNGLDEBUGMESSAGECONTROLARBPROC glad_glDebugMessageControlARB = NULL;
+PFNGLDEBUGMESSAGEINSERTARBPROC glad_glDebugMessageInsertARB = NULL;
+PFNGLDEBUGMESSAGECALLBACKARBPROC glad_glDebugMessageCallbackARB = NULL;
+PFNGLGETDEBUGMESSAGELOGARBPROC glad_glGetDebugMessageLogARB = NULL;
+static void load_GL_VERSION_1_0(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_1_0)
+ return;
+ glad_glCullFace = (PFNGLCULLFACEPROC)load("glCullFace");
+ glad_glFrontFace = (PFNGLFRONTFACEPROC)load("glFrontFace");
+ glad_glHint = (PFNGLHINTPROC)load("glHint");
+ glad_glLineWidth = (PFNGLLINEWIDTHPROC)load("glLineWidth");
+ glad_glPointSize = (PFNGLPOINTSIZEPROC)load("glPointSize");
+ glad_glPolygonMode = (PFNGLPOLYGONMODEPROC)load("glPolygonMode");
+ glad_glScissor = (PFNGLSCISSORPROC)load("glScissor");
+ glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC)load("glTexParameterf");
+ glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC)load("glTexParameterfv");
+ glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC)load("glTexParameteri");
+ glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC)load("glTexParameteriv");
+ glad_glTexImage1D = (PFNGLTEXIMAGE1DPROC)load("glTexImage1D");
+ glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC)load("glTexImage2D");
+ glad_glDrawBuffer = (PFNGLDRAWBUFFERPROC)load("glDrawBuffer");
+ glad_glClear = (PFNGLCLEARPROC)load("glClear");
+ glad_glClearColor = (PFNGLCLEARCOLORPROC)load("glClearColor");
+ glad_glClearStencil = (PFNGLCLEARSTENCILPROC)load("glClearStencil");
+ glad_glClearDepth = (PFNGLCLEARDEPTHPROC)load("glClearDepth");
+ glad_glStencilMask = (PFNGLSTENCILMASKPROC)load("glStencilMask");
+ glad_glColorMask = (PFNGLCOLORMASKPROC)load("glColorMask");
+ glad_glDepthMask = (PFNGLDEPTHMASKPROC)load("glDepthMask");
+ glad_glDisable = (PFNGLDISABLEPROC)load("glDisable");
+ glad_glEnable = (PFNGLENABLEPROC)load("glEnable");
+ glad_glFinish = (PFNGLFINISHPROC)load("glFinish");
+ glad_glFlush = (PFNGLFLUSHPROC)load("glFlush");
+ glad_glBlendFunc = (PFNGLBLENDFUNCPROC)load("glBlendFunc");
+ glad_glLogicOp = (PFNGLLOGICOPPROC)load("glLogicOp");
+ glad_glStencilFunc = (PFNGLSTENCILFUNCPROC)load("glStencilFunc");
+ glad_glStencilOp = (PFNGLSTENCILOPPROC)load("glStencilOp");
+ glad_glDepthFunc = (PFNGLDEPTHFUNCPROC)load("glDepthFunc");
+ glad_glPixelStoref = (PFNGLPIXELSTOREFPROC)load("glPixelStoref");
+ glad_glPixelStorei = (PFNGLPIXELSTOREIPROC)load("glPixelStorei");
+ glad_glReadBuffer = (PFNGLREADBUFFERPROC)load("glReadBuffer");
+ glad_glReadPixels = (PFNGLREADPIXELSPROC)load("glReadPixels");
+ glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC)load("glGetBooleanv");
+ glad_glGetDoublev = (PFNGLGETDOUBLEVPROC)load("glGetDoublev");
+ glad_glGetError = (PFNGLGETERRORPROC)load("glGetError");
+ glad_glGetFloatv = (PFNGLGETFLOATVPROC)load("glGetFloatv");
+ glad_glGetIntegerv = (PFNGLGETINTEGERVPROC)load("glGetIntegerv");
+ glad_glGetString = (PFNGLGETSTRINGPROC)load("glGetString");
+ glad_glGetTexImage = (PFNGLGETTEXIMAGEPROC)load("glGetTexImage");
+ glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC)load("glGetTexParameterfv");
+ glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC)load("glGetTexParameteriv");
+ glad_glGetTexLevelParameterfv
+ = (PFNGLGETTEXLEVELPARAMETERFVPROC)load("glGetTexLevelParameterfv");
+ glad_glGetTexLevelParameteriv
+ = (PFNGLGETTEXLEVELPARAMETERIVPROC)load("glGetTexLevelParameteriv");
+ glad_glIsEnabled = (PFNGLISENABLEDPROC)load("glIsEnabled");
+ glad_glDepthRange = (PFNGLDEPTHRANGEPROC)load("glDepthRange");
+ glad_glViewport = (PFNGLVIEWPORTPROC)load("glViewport");
+ glad_glNewList = (PFNGLNEWLISTPROC)load("glNewList");
+ glad_glEndList = (PFNGLENDLISTPROC)load("glEndList");
+ glad_glCallList = (PFNGLCALLLISTPROC)load("glCallList");
+ glad_glCallLists = (PFNGLCALLLISTSPROC)load("glCallLists");
+ glad_glDeleteLists = (PFNGLDELETELISTSPROC)load("glDeleteLists");
+ glad_glGenLists = (PFNGLGENLISTSPROC)load("glGenLists");
+ glad_glListBase = (PFNGLLISTBASEPROC)load("glListBase");
+ glad_glBegin = (PFNGLBEGINPROC)load("glBegin");
+ glad_glBitmap = (PFNGLBITMAPPROC)load("glBitmap");
+ glad_glColor3b = (PFNGLCOLOR3BPROC)load("glColor3b");
+ glad_glColor3bv = (PFNGLCOLOR3BVPROC)load("glColor3bv");
+ glad_glColor3d = (PFNGLCOLOR3DPROC)load("glColor3d");
+ glad_glColor3dv = (PFNGLCOLOR3DVPROC)load("glColor3dv");
+ glad_glColor3f = (PFNGLCOLOR3FPROC)load("glColor3f");
+ glad_glColor3fv = (PFNGLCOLOR3FVPROC)load("glColor3fv");
+ glad_glColor3i = (PFNGLCOLOR3IPROC)load("glColor3i");
+ glad_glColor3iv = (PFNGLCOLOR3IVPROC)load("glColor3iv");
+ glad_glColor3s = (PFNGLCOLOR3SPROC)load("glColor3s");
+ glad_glColor3sv = (PFNGLCOLOR3SVPROC)load("glColor3sv");
+ glad_glColor3ub = (PFNGLCOLOR3UBPROC)load("glColor3ub");
+ glad_glColor3ubv = (PFNGLCOLOR3UBVPROC)load("glColor3ubv");
+ glad_glColor3ui = (PFNGLCOLOR3UIPROC)load("glColor3ui");
+ glad_glColor3uiv = (PFNGLCOLOR3UIVPROC)load("glColor3uiv");
+ glad_glColor3us = (PFNGLCOLOR3USPROC)load("glColor3us");
+ glad_glColor3usv = (PFNGLCOLOR3USVPROC)load("glColor3usv");
+ glad_glColor4b = (PFNGLCOLOR4BPROC)load("glColor4b");
+ glad_glColor4bv = (PFNGLCOLOR4BVPROC)load("glColor4bv");
+ glad_glColor4d = (PFNGLCOLOR4DPROC)load("glColor4d");
+ glad_glColor4dv = (PFNGLCOLOR4DVPROC)load("glColor4dv");
+ glad_glColor4f = (PFNGLCOLOR4FPROC)load("glColor4f");
+ glad_glColor4fv = (PFNGLCOLOR4FVPROC)load("glColor4fv");
+ glad_glColor4i = (PFNGLCOLOR4IPROC)load("glColor4i");
+ glad_glColor4iv = (PFNGLCOLOR4IVPROC)load("glColor4iv");
+ glad_glColor4s = (PFNGLCOLOR4SPROC)load("glColor4s");
+ glad_glColor4sv = (PFNGLCOLOR4SVPROC)load("glColor4sv");
+ glad_glColor4ub = (PFNGLCOLOR4UBPROC)load("glColor4ub");
+ glad_glColor4ubv = (PFNGLCOLOR4UBVPROC)load("glColor4ubv");
+ glad_glColor4ui = (PFNGLCOLOR4UIPROC)load("glColor4ui");
+ glad_glColor4uiv = (PFNGLCOLOR4UIVPROC)load("glColor4uiv");
+ glad_glColor4us = (PFNGLCOLOR4USPROC)load("glColor4us");
+ glad_glColor4usv = (PFNGLCOLOR4USVPROC)load("glColor4usv");
+ glad_glEdgeFlag = (PFNGLEDGEFLAGPROC)load("glEdgeFlag");
+ glad_glEdgeFlagv = (PFNGLEDGEFLAGVPROC)load("glEdgeFlagv");
+ glad_glEnd = (PFNGLENDPROC)load("glEnd");
+ glad_glIndexd = (PFNGLINDEXDPROC)load("glIndexd");
+ glad_glIndexdv = (PFNGLINDEXDVPROC)load("glIndexdv");
+ glad_glIndexf = (PFNGLINDEXFPROC)load("glIndexf");
+ glad_glIndexfv = (PFNGLINDEXFVPROC)load("glIndexfv");
+ glad_glIndexi = (PFNGLINDEXIPROC)load("glIndexi");
+ glad_glIndexiv = (PFNGLINDEXIVPROC)load("glIndexiv");
+ glad_glIndexs = (PFNGLINDEXSPROC)load("glIndexs");
+ glad_glIndexsv = (PFNGLINDEXSVPROC)load("glIndexsv");
+ glad_glNormal3b = (PFNGLNORMAL3BPROC)load("glNormal3b");
+ glad_glNormal3bv = (PFNGLNORMAL3BVPROC)load("glNormal3bv");
+ glad_glNormal3d = (PFNGLNORMAL3DPROC)load("glNormal3d");
+ glad_glNormal3dv = (PFNGLNORMAL3DVPROC)load("glNormal3dv");
+ glad_glNormal3f = (PFNGLNORMAL3FPROC)load("glNormal3f");
+ glad_glNormal3fv = (PFNGLNORMAL3FVPROC)load("glNormal3fv");
+ glad_glNormal3i = (PFNGLNORMAL3IPROC)load("glNormal3i");
+ glad_glNormal3iv = (PFNGLNORMAL3IVPROC)load("glNormal3iv");
+ glad_glNormal3s = (PFNGLNORMAL3SPROC)load("glNormal3s");
+ glad_glNormal3sv = (PFNGLNORMAL3SVPROC)load("glNormal3sv");
+ glad_glRasterPos2d = (PFNGLRASTERPOS2DPROC)load("glRasterPos2d");
+ glad_glRasterPos2dv = (PFNGLRASTERPOS2DVPROC)load("glRasterPos2dv");
+ glad_glRasterPos2f = (PFNGLRASTERPOS2FPROC)load("glRasterPos2f");
+ glad_glRasterPos2fv = (PFNGLRASTERPOS2FVPROC)load("glRasterPos2fv");
+ glad_glRasterPos2i = (PFNGLRASTERPOS2IPROC)load("glRasterPos2i");
+ glad_glRasterPos2iv = (PFNGLRASTERPOS2IVPROC)load("glRasterPos2iv");
+ glad_glRasterPos2s = (PFNGLRASTERPOS2SPROC)load("glRasterPos2s");
+ glad_glRasterPos2sv = (PFNGLRASTERPOS2SVPROC)load("glRasterPos2sv");
+ glad_glRasterPos3d = (PFNGLRASTERPOS3DPROC)load("glRasterPos3d");
+ glad_glRasterPos3dv = (PFNGLRASTERPOS3DVPROC)load("glRasterPos3dv");
+ glad_glRasterPos3f = (PFNGLRASTERPOS3FPROC)load("glRasterPos3f");
+ glad_glRasterPos3fv = (PFNGLRASTERPOS3FVPROC)load("glRasterPos3fv");
+ glad_glRasterPos3i = (PFNGLRASTERPOS3IPROC)load("glRasterPos3i");
+ glad_glRasterPos3iv = (PFNGLRASTERPOS3IVPROC)load("glRasterPos3iv");
+ glad_glRasterPos3s = (PFNGLRASTERPOS3SPROC)load("glRasterPos3s");
+ glad_glRasterPos3sv = (PFNGLRASTERPOS3SVPROC)load("glRasterPos3sv");
+ glad_glRasterPos4d = (PFNGLRASTERPOS4DPROC)load("glRasterPos4d");
+ glad_glRasterPos4dv = (PFNGLRASTERPOS4DVPROC)load("glRasterPos4dv");
+ glad_glRasterPos4f = (PFNGLRASTERPOS4FPROC)load("glRasterPos4f");
+ glad_glRasterPos4fv = (PFNGLRASTERPOS4FVPROC)load("glRasterPos4fv");
+ glad_glRasterPos4i = (PFNGLRASTERPOS4IPROC)load("glRasterPos4i");
+ glad_glRasterPos4iv = (PFNGLRASTERPOS4IVPROC)load("glRasterPos4iv");
+ glad_glRasterPos4s = (PFNGLRASTERPOS4SPROC)load("glRasterPos4s");
+ glad_glRasterPos4sv = (PFNGLRASTERPOS4SVPROC)load("glRasterPos4sv");
+ glad_glRectd = (PFNGLRECTDPROC)load("glRectd");
+ glad_glRectdv = (PFNGLRECTDVPROC)load("glRectdv");
+ glad_glRectf = (PFNGLRECTFPROC)load("glRectf");
+ glad_glRectfv = (PFNGLRECTFVPROC)load("glRectfv");
+ glad_glRecti = (PFNGLRECTIPROC)load("glRecti");
+ glad_glRectiv = (PFNGLRECTIVPROC)load("glRectiv");
+ glad_glRects = (PFNGLRECTSPROC)load("glRects");
+ glad_glRectsv = (PFNGLRECTSVPROC)load("glRectsv");
+ glad_glTexCoord1d = (PFNGLTEXCOORD1DPROC)load("glTexCoord1d");
+ glad_glTexCoord1dv = (PFNGLTEXCOORD1DVPROC)load("glTexCoord1dv");
+ glad_glTexCoord1f = (PFNGLTEXCOORD1FPROC)load("glTexCoord1f");
+ glad_glTexCoord1fv = (PFNGLTEXCOORD1FVPROC)load("glTexCoord1fv");
+ glad_glTexCoord1i = (PFNGLTEXCOORD1IPROC)load("glTexCoord1i");
+ glad_glTexCoord1iv = (PFNGLTEXCOORD1IVPROC)load("glTexCoord1iv");
+ glad_glTexCoord1s = (PFNGLTEXCOORD1SPROC)load("glTexCoord1s");
+ glad_glTexCoord1sv = (PFNGLTEXCOORD1SVPROC)load("glTexCoord1sv");
+ glad_glTexCoord2d = (PFNGLTEXCOORD2DPROC)load("glTexCoord2d");
+ glad_glTexCoord2dv = (PFNGLTEXCOORD2DVPROC)load("glTexCoord2dv");
+ glad_glTexCoord2f = (PFNGLTEXCOORD2FPROC)load("glTexCoord2f");
+ glad_glTexCoord2fv = (PFNGLTEXCOORD2FVPROC)load("glTexCoord2fv");
+ glad_glTexCoord2i = (PFNGLTEXCOORD2IPROC)load("glTexCoord2i");
+ glad_glTexCoord2iv = (PFNGLTEXCOORD2IVPROC)load("glTexCoord2iv");
+ glad_glTexCoord2s = (PFNGLTEXCOORD2SPROC)load("glTexCoord2s");
+ glad_glTexCoord2sv = (PFNGLTEXCOORD2SVPROC)load("glTexCoord2sv");
+ glad_glTexCoord3d = (PFNGLTEXCOORD3DPROC)load("glTexCoord3d");
+ glad_glTexCoord3dv = (PFNGLTEXCOORD3DVPROC)load("glTexCoord3dv");
+ glad_glTexCoord3f = (PFNGLTEXCOORD3FPROC)load("glTexCoord3f");
+ glad_glTexCoord3fv = (PFNGLTEXCOORD3FVPROC)load("glTexCoord3fv");
+ glad_glTexCoord3i = (PFNGLTEXCOORD3IPROC)load("glTexCoord3i");
+ glad_glTexCoord3iv = (PFNGLTEXCOORD3IVPROC)load("glTexCoord3iv");
+ glad_glTexCoord3s = (PFNGLTEXCOORD3SPROC)load("glTexCoord3s");
+ glad_glTexCoord3sv = (PFNGLTEXCOORD3SVPROC)load("glTexCoord3sv");
+ glad_glTexCoord4d = (PFNGLTEXCOORD4DPROC)load("glTexCoord4d");
+ glad_glTexCoord4dv = (PFNGLTEXCOORD4DVPROC)load("glTexCoord4dv");
+ glad_glTexCoord4f = (PFNGLTEXCOORD4FPROC)load("glTexCoord4f");
+ glad_glTexCoord4fv = (PFNGLTEXCOORD4FVPROC)load("glTexCoord4fv");
+ glad_glTexCoord4i = (PFNGLTEXCOORD4IPROC)load("glTexCoord4i");
+ glad_glTexCoord4iv = (PFNGLTEXCOORD4IVPROC)load("glTexCoord4iv");
+ glad_glTexCoord4s = (PFNGLTEXCOORD4SPROC)load("glTexCoord4s");
+ glad_glTexCoord4sv = (PFNGLTEXCOORD4SVPROC)load("glTexCoord4sv");
+ glad_glVertex2d = (PFNGLVERTEX2DPROC)load("glVertex2d");
+ glad_glVertex2dv = (PFNGLVERTEX2DVPROC)load("glVertex2dv");
+ glad_glVertex2f = (PFNGLVERTEX2FPROC)load("glVertex2f");
+ glad_glVertex2fv = (PFNGLVERTEX2FVPROC)load("glVertex2fv");
+ glad_glVertex2i = (PFNGLVERTEX2IPROC)load("glVertex2i");
+ glad_glVertex2iv = (PFNGLVERTEX2IVPROC)load("glVertex2iv");
+ glad_glVertex2s = (PFNGLVERTEX2SPROC)load("glVertex2s");
+ glad_glVertex2sv = (PFNGLVERTEX2SVPROC)load("glVertex2sv");
+ glad_glVertex3d = (PFNGLVERTEX3DPROC)load("glVertex3d");
+ glad_glVertex3dv = (PFNGLVERTEX3DVPROC)load("glVertex3dv");
+ glad_glVertex3f = (PFNGLVERTEX3FPROC)load("glVertex3f");
+ glad_glVertex3fv = (PFNGLVERTEX3FVPROC)load("glVertex3fv");
+ glad_glVertex3i = (PFNGLVERTEX3IPROC)load("glVertex3i");
+ glad_glVertex3iv = (PFNGLVERTEX3IVPROC)load("glVertex3iv");
+ glad_glVertex3s = (PFNGLVERTEX3SPROC)load("glVertex3s");
+ glad_glVertex3sv = (PFNGLVERTEX3SVPROC)load("glVertex3sv");
+ glad_glVertex4d = (PFNGLVERTEX4DPROC)load("glVertex4d");
+ glad_glVertex4dv = (PFNGLVERTEX4DVPROC)load("glVertex4dv");
+ glad_glVertex4f = (PFNGLVERTEX4FPROC)load("glVertex4f");
+ glad_glVertex4fv = (PFNGLVERTEX4FVPROC)load("glVertex4fv");
+ glad_glVertex4i = (PFNGLVERTEX4IPROC)load("glVertex4i");
+ glad_glVertex4iv = (PFNGLVERTEX4IVPROC)load("glVertex4iv");
+ glad_glVertex4s = (PFNGLVERTEX4SPROC)load("glVertex4s");
+ glad_glVertex4sv = (PFNGLVERTEX4SVPROC)load("glVertex4sv");
+ glad_glClipPlane = (PFNGLCLIPPLANEPROC)load("glClipPlane");
+ glad_glColorMaterial = (PFNGLCOLORMATERIALPROC)load("glColorMaterial");
+ glad_glFogf = (PFNGLFOGFPROC)load("glFogf");
+ glad_glFogfv = (PFNGLFOGFVPROC)load("glFogfv");
+ glad_glFogi = (PFNGLFOGIPROC)load("glFogi");
+ glad_glFogiv = (PFNGLFOGIVPROC)load("glFogiv");
+ glad_glLightf = (PFNGLLIGHTFPROC)load("glLightf");
+ glad_glLightfv = (PFNGLLIGHTFVPROC)load("glLightfv");
+ glad_glLighti = (PFNGLLIGHTIPROC)load("glLighti");
+ glad_glLightiv = (PFNGLLIGHTIVPROC)load("glLightiv");
+ glad_glLightModelf = (PFNGLLIGHTMODELFPROC)load("glLightModelf");
+ glad_glLightModelfv = (PFNGLLIGHTMODELFVPROC)load("glLightModelfv");
+ glad_glLightModeli = (PFNGLLIGHTMODELIPROC)load("glLightModeli");
+ glad_glLightModeliv = (PFNGLLIGHTMODELIVPROC)load("glLightModeliv");
+ glad_glLineStipple = (PFNGLLINESTIPPLEPROC)load("glLineStipple");
+ glad_glMaterialf = (PFNGLMATERIALFPROC)load("glMaterialf");
+ glad_glMaterialfv = (PFNGLMATERIALFVPROC)load("glMaterialfv");
+ glad_glMateriali = (PFNGLMATERIALIPROC)load("glMateriali");
+ glad_glMaterialiv = (PFNGLMATERIALIVPROC)load("glMaterialiv");
+ glad_glPolygonStipple = (PFNGLPOLYGONSTIPPLEPROC)load("glPolygonStipple");
+ glad_glShadeModel = (PFNGLSHADEMODELPROC)load("glShadeModel");
+ glad_glTexEnvf = (PFNGLTEXENVFPROC)load("glTexEnvf");
+ glad_glTexEnvfv = (PFNGLTEXENVFVPROC)load("glTexEnvfv");
+ glad_glTexEnvi = (PFNGLTEXENVIPROC)load("glTexEnvi");
+ glad_glTexEnviv = (PFNGLTEXENVIVPROC)load("glTexEnviv");
+ glad_glTexGend = (PFNGLTEXGENDPROC)load("glTexGend");
+ glad_glTexGendv = (PFNGLTEXGENDVPROC)load("glTexGendv");
+ glad_glTexGenf = (PFNGLTEXGENFPROC)load("glTexGenf");
+ glad_glTexGenfv = (PFNGLTEXGENFVPROC)load("glTexGenfv");
+ glad_glTexGeni = (PFNGLTEXGENIPROC)load("glTexGeni");
+ glad_glTexGeniv = (PFNGLTEXGENIVPROC)load("glTexGeniv");
+ glad_glFeedbackBuffer = (PFNGLFEEDBACKBUFFERPROC)load("glFeedbackBuffer");
+ glad_glSelectBuffer = (PFNGLSELECTBUFFERPROC)load("glSelectBuffer");
+ glad_glRenderMode = (PFNGLRENDERMODEPROC)load("glRenderMode");
+ glad_glInitNames = (PFNGLINITNAMESPROC)load("glInitNames");
+ glad_glLoadName = (PFNGLLOADNAMEPROC)load("glLoadName");
+ glad_glPassThrough = (PFNGLPASSTHROUGHPROC)load("glPassThrough");
+ glad_glPopName = (PFNGLPOPNAMEPROC)load("glPopName");
+ glad_glPushName = (PFNGLPUSHNAMEPROC)load("glPushName");
+ glad_glClearAccum = (PFNGLCLEARACCUMPROC)load("glClearAccum");
+ glad_glClearIndex = (PFNGLCLEARINDEXPROC)load("glClearIndex");
+ glad_glIndexMask = (PFNGLINDEXMASKPROC)load("glIndexMask");
+ glad_glAccum = (PFNGLACCUMPROC)load("glAccum");
+ glad_glPopAttrib = (PFNGLPOPATTRIBPROC)load("glPopAttrib");
+ glad_glPushAttrib = (PFNGLPUSHATTRIBPROC)load("glPushAttrib");
+ glad_glMap1d = (PFNGLMAP1DPROC)load("glMap1d");
+ glad_glMap1f = (PFNGLMAP1FPROC)load("glMap1f");
+ glad_glMap2d = (PFNGLMAP2DPROC)load("glMap2d");
+ glad_glMap2f = (PFNGLMAP2FPROC)load("glMap2f");
+ glad_glMapGrid1d = (PFNGLMAPGRID1DPROC)load("glMapGrid1d");
+ glad_glMapGrid1f = (PFNGLMAPGRID1FPROC)load("glMapGrid1f");
+ glad_glMapGrid2d = (PFNGLMAPGRID2DPROC)load("glMapGrid2d");
+ glad_glMapGrid2f = (PFNGLMAPGRID2FPROC)load("glMapGrid2f");
+ glad_glEvalCoord1d = (PFNGLEVALCOORD1DPROC)load("glEvalCoord1d");
+ glad_glEvalCoord1dv = (PFNGLEVALCOORD1DVPROC)load("glEvalCoord1dv");
+ glad_glEvalCoord1f = (PFNGLEVALCOORD1FPROC)load("glEvalCoord1f");
+ glad_glEvalCoord1fv = (PFNGLEVALCOORD1FVPROC)load("glEvalCoord1fv");
+ glad_glEvalCoord2d = (PFNGLEVALCOORD2DPROC)load("glEvalCoord2d");
+ glad_glEvalCoord2dv = (PFNGLEVALCOORD2DVPROC)load("glEvalCoord2dv");
+ glad_glEvalCoord2f = (PFNGLEVALCOORD2FPROC)load("glEvalCoord2f");
+ glad_glEvalCoord2fv = (PFNGLEVALCOORD2FVPROC)load("glEvalCoord2fv");
+ glad_glEvalMesh1 = (PFNGLEVALMESH1PROC)load("glEvalMesh1");
+ glad_glEvalPoint1 = (PFNGLEVALPOINT1PROC)load("glEvalPoint1");
+ glad_glEvalMesh2 = (PFNGLEVALMESH2PROC)load("glEvalMesh2");
+ glad_glEvalPoint2 = (PFNGLEVALPOINT2PROC)load("glEvalPoint2");
+ glad_glAlphaFunc = (PFNGLALPHAFUNCPROC)load("glAlphaFunc");
+ glad_glPixelZoom = (PFNGLPIXELZOOMPROC)load("glPixelZoom");
+ glad_glPixelTransferf = (PFNGLPIXELTRANSFERFPROC)load("glPixelTransferf");
+ glad_glPixelTransferi = (PFNGLPIXELTRANSFERIPROC)load("glPixelTransferi");
+ glad_glPixelMapfv = (PFNGLPIXELMAPFVPROC)load("glPixelMapfv");
+ glad_glPixelMapuiv = (PFNGLPIXELMAPUIVPROC)load("glPixelMapuiv");
+ glad_glPixelMapusv = (PFNGLPIXELMAPUSVPROC)load("glPixelMapusv");
+ glad_glCopyPixels = (PFNGLCOPYPIXELSPROC)load("glCopyPixels");
+ glad_glDrawPixels = (PFNGLDRAWPIXELSPROC)load("glDrawPixels");
+ glad_glGetClipPlane = (PFNGLGETCLIPPLANEPROC)load("glGetClipPlane");
+ glad_glGetLightfv = (PFNGLGETLIGHTFVPROC)load("glGetLightfv");
+ glad_glGetLightiv = (PFNGLGETLIGHTIVPROC)load("glGetLightiv");
+ glad_glGetMapdv = (PFNGLGETMAPDVPROC)load("glGetMapdv");
+ glad_glGetMapfv = (PFNGLGETMAPFVPROC)load("glGetMapfv");
+ glad_glGetMapiv = (PFNGLGETMAPIVPROC)load("glGetMapiv");
+ glad_glGetMaterialfv = (PFNGLGETMATERIALFVPROC)load("glGetMaterialfv");
+ glad_glGetMaterialiv = (PFNGLGETMATERIALIVPROC)load("glGetMaterialiv");
+ glad_glGetPixelMapfv = (PFNGLGETPIXELMAPFVPROC)load("glGetPixelMapfv");
+ glad_glGetPixelMapuiv = (PFNGLGETPIXELMAPUIVPROC)load("glGetPixelMapuiv");
+ glad_glGetPixelMapusv = (PFNGLGETPIXELMAPUSVPROC)load("glGetPixelMapusv");
+ glad_glGetPolygonStipple = (PFNGLGETPOLYGONSTIPPLEPROC)load("glGetPolygonStipple");
+ glad_glGetTexEnvfv = (PFNGLGETTEXENVFVPROC)load("glGetTexEnvfv");
+ glad_glGetTexEnviv = (PFNGLGETTEXENVIVPROC)load("glGetTexEnviv");
+ glad_glGetTexGendv = (PFNGLGETTEXGENDVPROC)load("glGetTexGendv");
+ glad_glGetTexGenfv = (PFNGLGETTEXGENFVPROC)load("glGetTexGenfv");
+ glad_glGetTexGeniv = (PFNGLGETTEXGENIVPROC)load("glGetTexGeniv");
+ glad_glIsList = (PFNGLISLISTPROC)load("glIsList");
+ glad_glFrustum = (PFNGLFRUSTUMPROC)load("glFrustum");
+ glad_glLoadIdentity = (PFNGLLOADIDENTITYPROC)load("glLoadIdentity");
+ glad_glLoadMatrixf = (PFNGLLOADMATRIXFPROC)load("glLoadMatrixf");
+ glad_glLoadMatrixd = (PFNGLLOADMATRIXDPROC)load("glLoadMatrixd");
+ glad_glMatrixMode = (PFNGLMATRIXMODEPROC)load("glMatrixMode");
+ glad_glMultMatrixf = (PFNGLMULTMATRIXFPROC)load("glMultMatrixf");
+ glad_glMultMatrixd = (PFNGLMULTMATRIXDPROC)load("glMultMatrixd");
+ glad_glOrtho = (PFNGLORTHOPROC)load("glOrtho");
+ glad_glPopMatrix = (PFNGLPOPMATRIXPROC)load("glPopMatrix");
+ glad_glPushMatrix = (PFNGLPUSHMATRIXPROC)load("glPushMatrix");
+ glad_glRotated = (PFNGLROTATEDPROC)load("glRotated");
+ glad_glRotatef = (PFNGLROTATEFPROC)load("glRotatef");
+ glad_glScaled = (PFNGLSCALEDPROC)load("glScaled");
+ glad_glScalef = (PFNGLSCALEFPROC)load("glScalef");
+ glad_glTranslated = (PFNGLTRANSLATEDPROC)load("glTranslated");
+ glad_glTranslatef = (PFNGLTRANSLATEFPROC)load("glTranslatef");
+}
+static void load_GL_VERSION_1_1(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_1_1)
+ return;
+ glad_glDrawArrays = (PFNGLDRAWARRAYSPROC)load("glDrawArrays");
+ glad_glDrawElements = (PFNGLDRAWELEMENTSPROC)load("glDrawElements");
+ glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv");
+ glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC)load("glPolygonOffset");
+ glad_glCopyTexImage1D = (PFNGLCOPYTEXIMAGE1DPROC)load("glCopyTexImage1D");
+ glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC)load("glCopyTexImage2D");
+ glad_glCopyTexSubImage1D = (PFNGLCOPYTEXSUBIMAGE1DPROC)load("glCopyTexSubImage1D");
+ glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC)load("glCopyTexSubImage2D");
+ glad_glTexSubImage1D = (PFNGLTEXSUBIMAGE1DPROC)load("glTexSubImage1D");
+ glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC)load("glTexSubImage2D");
+ glad_glBindTexture = (PFNGLBINDTEXTUREPROC)load("glBindTexture");
+ glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC)load("glDeleteTextures");
+ glad_glGenTextures = (PFNGLGENTEXTURESPROC)load("glGenTextures");
+ glad_glIsTexture = (PFNGLISTEXTUREPROC)load("glIsTexture");
+ glad_glArrayElement = (PFNGLARRAYELEMENTPROC)load("glArrayElement");
+ glad_glColorPointer = (PFNGLCOLORPOINTERPROC)load("glColorPointer");
+ glad_glDisableClientState = (PFNGLDISABLECLIENTSTATEPROC)load("glDisableClientState");
+ glad_glEdgeFlagPointer = (PFNGLEDGEFLAGPOINTERPROC)load("glEdgeFlagPointer");
+ glad_glEnableClientState = (PFNGLENABLECLIENTSTATEPROC)load("glEnableClientState");
+ glad_glIndexPointer = (PFNGLINDEXPOINTERPROC)load("glIndexPointer");
+ glad_glInterleavedArrays = (PFNGLINTERLEAVEDARRAYSPROC)load("glInterleavedArrays");
+ glad_glNormalPointer = (PFNGLNORMALPOINTERPROC)load("glNormalPointer");
+ glad_glTexCoordPointer = (PFNGLTEXCOORDPOINTERPROC)load("glTexCoordPointer");
+ glad_glVertexPointer = (PFNGLVERTEXPOINTERPROC)load("glVertexPointer");
+ glad_glAreTexturesResident = (PFNGLARETEXTURESRESIDENTPROC)load("glAreTexturesResident");
+ glad_glPrioritizeTextures = (PFNGLPRIORITIZETEXTURESPROC)load("glPrioritizeTextures");
+ glad_glIndexub = (PFNGLINDEXUBPROC)load("glIndexub");
+ glad_glIndexubv = (PFNGLINDEXUBVPROC)load("glIndexubv");
+ glad_glPopClientAttrib = (PFNGLPOPCLIENTATTRIBPROC)load("glPopClientAttrib");
+ glad_glPushClientAttrib = (PFNGLPUSHCLIENTATTRIBPROC)load("glPushClientAttrib");
+}
+static void load_GL_VERSION_1_2(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_1_2)
+ return;
+ glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC)load("glDrawRangeElements");
+ glad_glTexImage3D = (PFNGLTEXIMAGE3DPROC)load("glTexImage3D");
+ glad_glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC)load("glTexSubImage3D");
+ glad_glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC)load("glCopyTexSubImage3D");
+}
+static void load_GL_VERSION_1_3(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_1_3)
+ return;
+ glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC)load("glActiveTexture");
+ glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC)load("glSampleCoverage");
+ glad_glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC)load("glCompressedTexImage3D");
+ glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)load("glCompressedTexImage2D");
+ glad_glCompressedTexImage1D = (PFNGLCOMPRESSEDTEXIMAGE1DPROC)load("glCompressedTexImage1D");
+ glad_glCompressedTexSubImage3D
+ = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)load("glCompressedTexSubImage3D");
+ glad_glCompressedTexSubImage2D
+ = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)load("glCompressedTexSubImage2D");
+ glad_glCompressedTexSubImage1D
+ = (PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)load("glCompressedTexSubImage1D");
+ glad_glGetCompressedTexImage = (PFNGLGETCOMPRESSEDTEXIMAGEPROC)load("glGetCompressedTexImage");
+ glad_glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC)load("glClientActiveTexture");
+ glad_glMultiTexCoord1d = (PFNGLMULTITEXCOORD1DPROC)load("glMultiTexCoord1d");
+ glad_glMultiTexCoord1dv = (PFNGLMULTITEXCOORD1DVPROC)load("glMultiTexCoord1dv");
+ glad_glMultiTexCoord1f = (PFNGLMULTITEXCOORD1FPROC)load("glMultiTexCoord1f");
+ glad_glMultiTexCoord1fv = (PFNGLMULTITEXCOORD1FVPROC)load("glMultiTexCoord1fv");
+ glad_glMultiTexCoord1i = (PFNGLMULTITEXCOORD1IPROC)load("glMultiTexCoord1i");
+ glad_glMultiTexCoord1iv = (PFNGLMULTITEXCOORD1IVPROC)load("glMultiTexCoord1iv");
+ glad_glMultiTexCoord1s = (PFNGLMULTITEXCOORD1SPROC)load("glMultiTexCoord1s");
+ glad_glMultiTexCoord1sv = (PFNGLMULTITEXCOORD1SVPROC)load("glMultiTexCoord1sv");
+ glad_glMultiTexCoord2d = (PFNGLMULTITEXCOORD2DPROC)load("glMultiTexCoord2d");
+ glad_glMultiTexCoord2dv = (PFNGLMULTITEXCOORD2DVPROC)load("glMultiTexCoord2dv");
+ glad_glMultiTexCoord2f = (PFNGLMULTITEXCOORD2FPROC)load("glMultiTexCoord2f");
+ glad_glMultiTexCoord2fv = (PFNGLMULTITEXCOORD2FVPROC)load("glMultiTexCoord2fv");
+ glad_glMultiTexCoord2i = (PFNGLMULTITEXCOORD2IPROC)load("glMultiTexCoord2i");
+ glad_glMultiTexCoord2iv = (PFNGLMULTITEXCOORD2IVPROC)load("glMultiTexCoord2iv");
+ glad_glMultiTexCoord2s = (PFNGLMULTITEXCOORD2SPROC)load("glMultiTexCoord2s");
+ glad_glMultiTexCoord2sv = (PFNGLMULTITEXCOORD2SVPROC)load("glMultiTexCoord2sv");
+ glad_glMultiTexCoord3d = (PFNGLMULTITEXCOORD3DPROC)load("glMultiTexCoord3d");
+ glad_glMultiTexCoord3dv = (PFNGLMULTITEXCOORD3DVPROC)load("glMultiTexCoord3dv");
+ glad_glMultiTexCoord3f = (PFNGLMULTITEXCOORD3FPROC)load("glMultiTexCoord3f");
+ glad_glMultiTexCoord3fv = (PFNGLMULTITEXCOORD3FVPROC)load("glMultiTexCoord3fv");
+ glad_glMultiTexCoord3i = (PFNGLMULTITEXCOORD3IPROC)load("glMultiTexCoord3i");
+ glad_glMultiTexCoord3iv = (PFNGLMULTITEXCOORD3IVPROC)load("glMultiTexCoord3iv");
+ glad_glMultiTexCoord3s = (PFNGLMULTITEXCOORD3SPROC)load("glMultiTexCoord3s");
+ glad_glMultiTexCoord3sv = (PFNGLMULTITEXCOORD3SVPROC)load("glMultiTexCoord3sv");
+ glad_glMultiTexCoord4d = (PFNGLMULTITEXCOORD4DPROC)load("glMultiTexCoord4d");
+ glad_glMultiTexCoord4dv = (PFNGLMULTITEXCOORD4DVPROC)load("glMultiTexCoord4dv");
+ glad_glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC)load("glMultiTexCoord4f");
+ glad_glMultiTexCoord4fv = (PFNGLMULTITEXCOORD4FVPROC)load("glMultiTexCoord4fv");
+ glad_glMultiTexCoord4i = (PFNGLMULTITEXCOORD4IPROC)load("glMultiTexCoord4i");
+ glad_glMultiTexCoord4iv = (PFNGLMULTITEXCOORD4IVPROC)load("glMultiTexCoord4iv");
+ glad_glMultiTexCoord4s = (PFNGLMULTITEXCOORD4SPROC)load("glMultiTexCoord4s");
+ glad_glMultiTexCoord4sv = (PFNGLMULTITEXCOORD4SVPROC)load("glMultiTexCoord4sv");
+ glad_glLoadTransposeMatrixf = (PFNGLLOADTRANSPOSEMATRIXFPROC)load("glLoadTransposeMatrixf");
+ glad_glLoadTransposeMatrixd = (PFNGLLOADTRANSPOSEMATRIXDPROC)load("glLoadTransposeMatrixd");
+ glad_glMultTransposeMatrixf = (PFNGLMULTTRANSPOSEMATRIXFPROC)load("glMultTransposeMatrixf");
+ glad_glMultTransposeMatrixd = (PFNGLMULTTRANSPOSEMATRIXDPROC)load("glMultTransposeMatrixd");
+}
+static void load_GL_VERSION_1_4(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_1_4)
+ return;
+ glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC)load("glBlendFuncSeparate");
+ glad_glMultiDrawArrays = (PFNGLMULTIDRAWARRAYSPROC)load("glMultiDrawArrays");
+ glad_glMultiDrawElements = (PFNGLMULTIDRAWELEMENTSPROC)load("glMultiDrawElements");
+ glad_glPointParameterf = (PFNGLPOINTPARAMETERFPROC)load("glPointParameterf");
+ glad_glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC)load("glPointParameterfv");
+ glad_glPointParameteri = (PFNGLPOINTPARAMETERIPROC)load("glPointParameteri");
+ glad_glPointParameteriv = (PFNGLPOINTPARAMETERIVPROC)load("glPointParameteriv");
+ glad_glFogCoordf = (PFNGLFOGCOORDFPROC)load("glFogCoordf");
+ glad_glFogCoordfv = (PFNGLFOGCOORDFVPROC)load("glFogCoordfv");
+ glad_glFogCoordd = (PFNGLFOGCOORDDPROC)load("glFogCoordd");
+ glad_glFogCoorddv = (PFNGLFOGCOORDDVPROC)load("glFogCoorddv");
+ glad_glFogCoordPointer = (PFNGLFOGCOORDPOINTERPROC)load("glFogCoordPointer");
+ glad_glSecondaryColor3b = (PFNGLSECONDARYCOLOR3BPROC)load("glSecondaryColor3b");
+ glad_glSecondaryColor3bv = (PFNGLSECONDARYCOLOR3BVPROC)load("glSecondaryColor3bv");
+ glad_glSecondaryColor3d = (PFNGLSECONDARYCOLOR3DPROC)load("glSecondaryColor3d");
+ glad_glSecondaryColor3dv = (PFNGLSECONDARYCOLOR3DVPROC)load("glSecondaryColor3dv");
+ glad_glSecondaryColor3f = (PFNGLSECONDARYCOLOR3FPROC)load("glSecondaryColor3f");
+ glad_glSecondaryColor3fv = (PFNGLSECONDARYCOLOR3FVPROC)load("glSecondaryColor3fv");
+ glad_glSecondaryColor3i = (PFNGLSECONDARYCOLOR3IPROC)load("glSecondaryColor3i");
+ glad_glSecondaryColor3iv = (PFNGLSECONDARYCOLOR3IVPROC)load("glSecondaryColor3iv");
+ glad_glSecondaryColor3s = (PFNGLSECONDARYCOLOR3SPROC)load("glSecondaryColor3s");
+ glad_glSecondaryColor3sv = (PFNGLSECONDARYCOLOR3SVPROC)load("glSecondaryColor3sv");
+ glad_glSecondaryColor3ub = (PFNGLSECONDARYCOLOR3UBPROC)load("glSecondaryColor3ub");
+ glad_glSecondaryColor3ubv = (PFNGLSECONDARYCOLOR3UBVPROC)load("glSecondaryColor3ubv");
+ glad_glSecondaryColor3ui = (PFNGLSECONDARYCOLOR3UIPROC)load("glSecondaryColor3ui");
+ glad_glSecondaryColor3uiv = (PFNGLSECONDARYCOLOR3UIVPROC)load("glSecondaryColor3uiv");
+ glad_glSecondaryColor3us = (PFNGLSECONDARYCOLOR3USPROC)load("glSecondaryColor3us");
+ glad_glSecondaryColor3usv = (PFNGLSECONDARYCOLOR3USVPROC)load("glSecondaryColor3usv");
+ glad_glSecondaryColorPointer = (PFNGLSECONDARYCOLORPOINTERPROC)load("glSecondaryColorPointer");
+ glad_glWindowPos2d = (PFNGLWINDOWPOS2DPROC)load("glWindowPos2d");
+ glad_glWindowPos2dv = (PFNGLWINDOWPOS2DVPROC)load("glWindowPos2dv");
+ glad_glWindowPos2f = (PFNGLWINDOWPOS2FPROC)load("glWindowPos2f");
+ glad_glWindowPos2fv = (PFNGLWINDOWPOS2FVPROC)load("glWindowPos2fv");
+ glad_glWindowPos2i = (PFNGLWINDOWPOS2IPROC)load("glWindowPos2i");
+ glad_glWindowPos2iv = (PFNGLWINDOWPOS2IVPROC)load("glWindowPos2iv");
+ glad_glWindowPos2s = (PFNGLWINDOWPOS2SPROC)load("glWindowPos2s");
+ glad_glWindowPos2sv = (PFNGLWINDOWPOS2SVPROC)load("glWindowPos2sv");
+ glad_glWindowPos3d = (PFNGLWINDOWPOS3DPROC)load("glWindowPos3d");
+ glad_glWindowPos3dv = (PFNGLWINDOWPOS3DVPROC)load("glWindowPos3dv");
+ glad_glWindowPos3f = (PFNGLWINDOWPOS3FPROC)load("glWindowPos3f");
+ glad_glWindowPos3fv = (PFNGLWINDOWPOS3FVPROC)load("glWindowPos3fv");
+ glad_glWindowPos3i = (PFNGLWINDOWPOS3IPROC)load("glWindowPos3i");
+ glad_glWindowPos3iv = (PFNGLWINDOWPOS3IVPROC)load("glWindowPos3iv");
+ glad_glWindowPos3s = (PFNGLWINDOWPOS3SPROC)load("glWindowPos3s");
+ glad_glWindowPos3sv = (PFNGLWINDOWPOS3SVPROC)load("glWindowPos3sv");
+ glad_glBlendColor = (PFNGLBLENDCOLORPROC)load("glBlendColor");
+ glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC)load("glBlendEquation");
+}
+static void load_GL_VERSION_1_5(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_1_5)
+ return;
+ glad_glGenQueries = (PFNGLGENQUERIESPROC)load("glGenQueries");
+ glad_glDeleteQueries = (PFNGLDELETEQUERIESPROC)load("glDeleteQueries");
+ glad_glIsQuery = (PFNGLISQUERYPROC)load("glIsQuery");
+ glad_glBeginQuery = (PFNGLBEGINQUERYPROC)load("glBeginQuery");
+ glad_glEndQuery = (PFNGLENDQUERYPROC)load("glEndQuery");
+ glad_glGetQueryiv = (PFNGLGETQUERYIVPROC)load("glGetQueryiv");
+ glad_glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC)load("glGetQueryObjectiv");
+ glad_glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC)load("glGetQueryObjectuiv");
+ glad_glBindBuffer = (PFNGLBINDBUFFERPROC)load("glBindBuffer");
+ glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)load("glDeleteBuffers");
+ glad_glGenBuffers = (PFNGLGENBUFFERSPROC)load("glGenBuffers");
+ glad_glIsBuffer = (PFNGLISBUFFERPROC)load("glIsBuffer");
+ glad_glBufferData = (PFNGLBUFFERDATAPROC)load("glBufferData");
+ glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC)load("glBufferSubData");
+ glad_glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC)load("glGetBufferSubData");
+ glad_glMapBuffer = (PFNGLMAPBUFFERPROC)load("glMapBuffer");
+ glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC)load("glUnmapBuffer");
+ glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC)load("glGetBufferParameteriv");
+ glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC)load("glGetBufferPointerv");
+}
+static void load_GL_VERSION_2_0(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_2_0)
+ return;
+ glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC)load("glBlendEquationSeparate");
+ glad_glDrawBuffers = (PFNGLDRAWBUFFERSPROC)load("glDrawBuffers");
+ glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC)load("glStencilOpSeparate");
+ glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC)load("glStencilFuncSeparate");
+ glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC)load("glStencilMaskSeparate");
+ glad_glAttachShader = (PFNGLATTACHSHADERPROC)load("glAttachShader");
+ glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC)load("glBindAttribLocation");
+ glad_glCompileShader = (PFNGLCOMPILESHADERPROC)load("glCompileShader");
+ glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC)load("glCreateProgram");
+ glad_glCreateShader = (PFNGLCREATESHADERPROC)load("glCreateShader");
+ glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC)load("glDeleteProgram");
+ glad_glDeleteShader = (PFNGLDELETESHADERPROC)load("glDeleteShader");
+ glad_glDetachShader = (PFNGLDETACHSHADERPROC)load("glDetachShader");
+ glad_glDisableVertexAttribArray
+ = (PFNGLDISABLEVERTEXATTRIBARRAYPROC)load("glDisableVertexAttribArray");
+ glad_glEnableVertexAttribArray
+ = (PFNGLENABLEVERTEXATTRIBARRAYPROC)load("glEnableVertexAttribArray");
+ glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC)load("glGetActiveAttrib");
+ glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC)load("glGetActiveUniform");
+ glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC)load("glGetAttachedShaders");
+ glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC)load("glGetAttribLocation");
+ glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC)load("glGetProgramiv");
+ glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC)load("glGetProgramInfoLog");
+ glad_glGetShaderiv = (PFNGLGETSHADERIVPROC)load("glGetShaderiv");
+ glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC)load("glGetShaderInfoLog");
+ glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC)load("glGetShaderSource");
+ glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC)load("glGetUniformLocation");
+ glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC)load("glGetUniformfv");
+ glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC)load("glGetUniformiv");
+ glad_glGetVertexAttribdv = (PFNGLGETVERTEXATTRIBDVPROC)load("glGetVertexAttribdv");
+ glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC)load("glGetVertexAttribfv");
+ glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC)load("glGetVertexAttribiv");
+ glad_glGetVertexAttribPointerv
+ = (PFNGLGETVERTEXATTRIBPOINTERVPROC)load("glGetVertexAttribPointerv");
+ glad_glIsProgram = (PFNGLISPROGRAMPROC)load("glIsProgram");
+ glad_glIsShader = (PFNGLISSHADERPROC)load("glIsShader");
+ glad_glLinkProgram = (PFNGLLINKPROGRAMPROC)load("glLinkProgram");
+ glad_glShaderSource = (PFNGLSHADERSOURCEPROC)load("glShaderSource");
+ glad_glUseProgram = (PFNGLUSEPROGRAMPROC)load("glUseProgram");
+ glad_glUniform1f = (PFNGLUNIFORM1FPROC)load("glUniform1f");
+ glad_glUniform2f = (PFNGLUNIFORM2FPROC)load("glUniform2f");
+ glad_glUniform3f = (PFNGLUNIFORM3FPROC)load("glUniform3f");
+ glad_glUniform4f = (PFNGLUNIFORM4FPROC)load("glUniform4f");
+ glad_glUniform1i = (PFNGLUNIFORM1IPROC)load("glUniform1i");
+ glad_glUniform2i = (PFNGLUNIFORM2IPROC)load("glUniform2i");
+ glad_glUniform3i = (PFNGLUNIFORM3IPROC)load("glUniform3i");
+ glad_glUniform4i = (PFNGLUNIFORM4IPROC)load("glUniform4i");
+ glad_glUniform1fv = (PFNGLUNIFORM1FVPROC)load("glUniform1fv");
+ glad_glUniform2fv = (PFNGLUNIFORM2FVPROC)load("glUniform2fv");
+ glad_glUniform3fv = (PFNGLUNIFORM3FVPROC)load("glUniform3fv");
+ glad_glUniform4fv = (PFNGLUNIFORM4FVPROC)load("glUniform4fv");
+ glad_glUniform1iv = (PFNGLUNIFORM1IVPROC)load("glUniform1iv");
+ glad_glUniform2iv = (PFNGLUNIFORM2IVPROC)load("glUniform2iv");
+ glad_glUniform3iv = (PFNGLUNIFORM3IVPROC)load("glUniform3iv");
+ glad_glUniform4iv = (PFNGLUNIFORM4IVPROC)load("glUniform4iv");
+ glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC)load("glUniformMatrix2fv");
+ glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC)load("glUniformMatrix3fv");
+ glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC)load("glUniformMatrix4fv");
+ glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC)load("glValidateProgram");
+ glad_glVertexAttrib1d = (PFNGLVERTEXATTRIB1DPROC)load("glVertexAttrib1d");
+ glad_glVertexAttrib1dv = (PFNGLVERTEXATTRIB1DVPROC)load("glVertexAttrib1dv");
+ glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC)load("glVertexAttrib1f");
+ glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC)load("glVertexAttrib1fv");
+ glad_glVertexAttrib1s = (PFNGLVERTEXATTRIB1SPROC)load("glVertexAttrib1s");
+ glad_glVertexAttrib1sv = (PFNGLVERTEXATTRIB1SVPROC)load("glVertexAttrib1sv");
+ glad_glVertexAttrib2d = (PFNGLVERTEXATTRIB2DPROC)load("glVertexAttrib2d");
+ glad_glVertexAttrib2dv = (PFNGLVERTEXATTRIB2DVPROC)load("glVertexAttrib2dv");
+ glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC)load("glVertexAttrib2f");
+ glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC)load("glVertexAttrib2fv");
+ glad_glVertexAttrib2s = (PFNGLVERTEXATTRIB2SPROC)load("glVertexAttrib2s");
+ glad_glVertexAttrib2sv = (PFNGLVERTEXATTRIB2SVPROC)load("glVertexAttrib2sv");
+ glad_glVertexAttrib3d = (PFNGLVERTEXATTRIB3DPROC)load("glVertexAttrib3d");
+ glad_glVertexAttrib3dv = (PFNGLVERTEXATTRIB3DVPROC)load("glVertexAttrib3dv");
+ glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC)load("glVertexAttrib3f");
+ glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC)load("glVertexAttrib3fv");
+ glad_glVertexAttrib3s = (PFNGLVERTEXATTRIB3SPROC)load("glVertexAttrib3s");
+ glad_glVertexAttrib3sv = (PFNGLVERTEXATTRIB3SVPROC)load("glVertexAttrib3sv");
+ glad_glVertexAttrib4Nbv = (PFNGLVERTEXATTRIB4NBVPROC)load("glVertexAttrib4Nbv");
+ glad_glVertexAttrib4Niv = (PFNGLVERTEXATTRIB4NIVPROC)load("glVertexAttrib4Niv");
+ glad_glVertexAttrib4Nsv = (PFNGLVERTEXATTRIB4NSVPROC)load("glVertexAttrib4Nsv");
+ glad_glVertexAttrib4Nub = (PFNGLVERTEXATTRIB4NUBPROC)load("glVertexAttrib4Nub");
+ glad_glVertexAttrib4Nubv = (PFNGLVERTEXATTRIB4NUBVPROC)load("glVertexAttrib4Nubv");
+ glad_glVertexAttrib4Nuiv = (PFNGLVERTEXATTRIB4NUIVPROC)load("glVertexAttrib4Nuiv");
+ glad_glVertexAttrib4Nusv = (PFNGLVERTEXATTRIB4NUSVPROC)load("glVertexAttrib4Nusv");
+ glad_glVertexAttrib4bv = (PFNGLVERTEXATTRIB4BVPROC)load("glVertexAttrib4bv");
+ glad_glVertexAttrib4d = (PFNGLVERTEXATTRIB4DPROC)load("glVertexAttrib4d");
+ glad_glVertexAttrib4dv = (PFNGLVERTEXATTRIB4DVPROC)load("glVertexAttrib4dv");
+ glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC)load("glVertexAttrib4f");
+ glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC)load("glVertexAttrib4fv");
+ glad_glVertexAttrib4iv = (PFNGLVERTEXATTRIB4IVPROC)load("glVertexAttrib4iv");
+ glad_glVertexAttrib4s = (PFNGLVERTEXATTRIB4SPROC)load("glVertexAttrib4s");
+ glad_glVertexAttrib4sv = (PFNGLVERTEXATTRIB4SVPROC)load("glVertexAttrib4sv");
+ glad_glVertexAttrib4ubv = (PFNGLVERTEXATTRIB4UBVPROC)load("glVertexAttrib4ubv");
+ glad_glVertexAttrib4uiv = (PFNGLVERTEXATTRIB4UIVPROC)load("glVertexAttrib4uiv");
+ glad_glVertexAttrib4usv = (PFNGLVERTEXATTRIB4USVPROC)load("glVertexAttrib4usv");
+ glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC)load("glVertexAttribPointer");
+}
+static void load_GL_VERSION_2_1(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_2_1)
+ return;
+ glad_glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC)load("glUniformMatrix2x3fv");
+ glad_glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC)load("glUniformMatrix3x2fv");
+ glad_glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC)load("glUniformMatrix2x4fv");
+ glad_glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC)load("glUniformMatrix4x2fv");
+ glad_glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC)load("glUniformMatrix3x4fv");
+ glad_glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC)load("glUniformMatrix4x3fv");
+}
+static void load_GL_VERSION_3_0(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_3_0)
+ return;
+ glad_glColorMaski = (PFNGLCOLORMASKIPROC)load("glColorMaski");
+ glad_glGetBooleani_v = (PFNGLGETBOOLEANI_VPROC)load("glGetBooleani_v");
+ glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v");
+ glad_glEnablei = (PFNGLENABLEIPROC)load("glEnablei");
+ glad_glDisablei = (PFNGLDISABLEIPROC)load("glDisablei");
+ glad_glIsEnabledi = (PFNGLISENABLEDIPROC)load("glIsEnabledi");
+ glad_glBeginTransformFeedback
+ = (PFNGLBEGINTRANSFORMFEEDBACKPROC)load("glBeginTransformFeedback");
+ glad_glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC)load("glEndTransformFeedback");
+ glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange");
+ glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase");
+ glad_glTransformFeedbackVaryings
+ = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC)load("glTransformFeedbackVaryings");
+ glad_glGetTransformFeedbackVarying
+ = (PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)load("glGetTransformFeedbackVarying");
+ glad_glClampColor = (PFNGLCLAMPCOLORPROC)load("glClampColor");
+ glad_glBeginConditionalRender
+ = (PFNGLBEGINCONDITIONALRENDERPROC)load("glBeginConditionalRender");
+ glad_glEndConditionalRender = (PFNGLENDCONDITIONALRENDERPROC)load("glEndConditionalRender");
+ glad_glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC)load("glVertexAttribIPointer");
+ glad_glGetVertexAttribIiv = (PFNGLGETVERTEXATTRIBIIVPROC)load("glGetVertexAttribIiv");
+ glad_glGetVertexAttribIuiv = (PFNGLGETVERTEXATTRIBIUIVPROC)load("glGetVertexAttribIuiv");
+ glad_glVertexAttribI1i = (PFNGLVERTEXATTRIBI1IPROC)load("glVertexAttribI1i");
+ glad_glVertexAttribI2i = (PFNGLVERTEXATTRIBI2IPROC)load("glVertexAttribI2i");
+ glad_glVertexAttribI3i = (PFNGLVERTEXATTRIBI3IPROC)load("glVertexAttribI3i");
+ glad_glVertexAttribI4i = (PFNGLVERTEXATTRIBI4IPROC)load("glVertexAttribI4i");
+ glad_glVertexAttribI1ui = (PFNGLVERTEXATTRIBI1UIPROC)load("glVertexAttribI1ui");
+ glad_glVertexAttribI2ui = (PFNGLVERTEXATTRIBI2UIPROC)load("glVertexAttribI2ui");
+ glad_glVertexAttribI3ui = (PFNGLVERTEXATTRIBI3UIPROC)load("glVertexAttribI3ui");
+ glad_glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC)load("glVertexAttribI4ui");
+ glad_glVertexAttribI1iv = (PFNGLVERTEXATTRIBI1IVPROC)load("glVertexAttribI1iv");
+ glad_glVertexAttribI2iv = (PFNGLVERTEXATTRIBI2IVPROC)load("glVertexAttribI2iv");
+ glad_glVertexAttribI3iv = (PFNGLVERTEXATTRIBI3IVPROC)load("glVertexAttribI3iv");
+ glad_glVertexAttribI4iv = (PFNGLVERTEXATTRIBI4IVPROC)load("glVertexAttribI4iv");
+ glad_glVertexAttribI1uiv = (PFNGLVERTEXATTRIBI1UIVPROC)load("glVertexAttribI1uiv");
+ glad_glVertexAttribI2uiv = (PFNGLVERTEXATTRIBI2UIVPROC)load("glVertexAttribI2uiv");
+ glad_glVertexAttribI3uiv = (PFNGLVERTEXATTRIBI3UIVPROC)load("glVertexAttribI3uiv");
+ glad_glVertexAttribI4uiv = (PFNGLVERTEXATTRIBI4UIVPROC)load("glVertexAttribI4uiv");
+ glad_glVertexAttribI4bv = (PFNGLVERTEXATTRIBI4BVPROC)load("glVertexAttribI4bv");
+ glad_glVertexAttribI4sv = (PFNGLVERTEXATTRIBI4SVPROC)load("glVertexAttribI4sv");
+ glad_glVertexAttribI4ubv = (PFNGLVERTEXATTRIBI4UBVPROC)load("glVertexAttribI4ubv");
+ glad_glVertexAttribI4usv = (PFNGLVERTEXATTRIBI4USVPROC)load("glVertexAttribI4usv");
+ glad_glGetUniformuiv = (PFNGLGETUNIFORMUIVPROC)load("glGetUniformuiv");
+ glad_glBindFragDataLocation = (PFNGLBINDFRAGDATALOCATIONPROC)load("glBindFragDataLocation");
+ glad_glGetFragDataLocation = (PFNGLGETFRAGDATALOCATIONPROC)load("glGetFragDataLocation");
+ glad_glUniform1ui = (PFNGLUNIFORM1UIPROC)load("glUniform1ui");
+ glad_glUniform2ui = (PFNGLUNIFORM2UIPROC)load("glUniform2ui");
+ glad_glUniform3ui = (PFNGLUNIFORM3UIPROC)load("glUniform3ui");
+ glad_glUniform4ui = (PFNGLUNIFORM4UIPROC)load("glUniform4ui");
+ glad_glUniform1uiv = (PFNGLUNIFORM1UIVPROC)load("glUniform1uiv");
+ glad_glUniform2uiv = (PFNGLUNIFORM2UIVPROC)load("glUniform2uiv");
+ glad_glUniform3uiv = (PFNGLUNIFORM3UIVPROC)load("glUniform3uiv");
+ glad_glUniform4uiv = (PFNGLUNIFORM4UIVPROC)load("glUniform4uiv");
+ glad_glTexParameterIiv = (PFNGLTEXPARAMETERIIVPROC)load("glTexParameterIiv");
+ glad_glTexParameterIuiv = (PFNGLTEXPARAMETERIUIVPROC)load("glTexParameterIuiv");
+ glad_glGetTexParameterIiv = (PFNGLGETTEXPARAMETERIIVPROC)load("glGetTexParameterIiv");
+ glad_glGetTexParameterIuiv = (PFNGLGETTEXPARAMETERIUIVPROC)load("glGetTexParameterIuiv");
+ glad_glClearBufferiv = (PFNGLCLEARBUFFERIVPROC)load("glClearBufferiv");
+ glad_glClearBufferuiv = (PFNGLCLEARBUFFERUIVPROC)load("glClearBufferuiv");
+ glad_glClearBufferfv = (PFNGLCLEARBUFFERFVPROC)load("glClearBufferfv");
+ glad_glClearBufferfi = (PFNGLCLEARBUFFERFIPROC)load("glClearBufferfi");
+ glad_glGetStringi = (PFNGLGETSTRINGIPROC)load("glGetStringi");
+ glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC)load("glIsRenderbuffer");
+ glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC)load("glBindRenderbuffer");
+ glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC)load("glDeleteRenderbuffers");
+ glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC)load("glGenRenderbuffers");
+ glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC)load("glRenderbufferStorage");
+ glad_glGetRenderbufferParameteriv
+ = (PFNGLGETRENDERBUFFERPARAMETERIVPROC)load("glGetRenderbufferParameteriv");
+ glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC)load("glIsFramebuffer");
+ glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)load("glBindFramebuffer");
+ glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC)load("glDeleteFramebuffers");
+ glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)load("glGenFramebuffers");
+ glad_glCheckFramebufferStatus
+ = (PFNGLCHECKFRAMEBUFFERSTATUSPROC)load("glCheckFramebufferStatus");
+ glad_glFramebufferTexture1D = (PFNGLFRAMEBUFFERTEXTURE1DPROC)load("glFramebufferTexture1D");
+ glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)load("glFramebufferTexture2D");
+ glad_glFramebufferTexture3D = (PFNGLFRAMEBUFFERTEXTURE3DPROC)load("glFramebufferTexture3D");
+ glad_glFramebufferRenderbuffer
+ = (PFNGLFRAMEBUFFERRENDERBUFFERPROC)load("glFramebufferRenderbuffer");
+ glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load(
+ "glGetFramebufferAttachmentParameteriv");
+ glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC)load("glGenerateMipmap");
+ glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC)load("glBlitFramebuffer");
+ glad_glRenderbufferStorageMultisample
+ = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glRenderbufferStorageMultisample");
+ glad_glFramebufferTextureLayer
+ = (PFNGLFRAMEBUFFERTEXTURELAYERPROC)load("glFramebufferTextureLayer");
+ glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC)load("glMapBufferRange");
+ glad_glFlushMappedBufferRange
+ = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC)load("glFlushMappedBufferRange");
+ glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC)load("glBindVertexArray");
+ glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC)load("glDeleteVertexArrays");
+ glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC)load("glGenVertexArrays");
+ glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC)load("glIsVertexArray");
+}
+static void load_GL_VERSION_3_1(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_3_1)
+ return;
+ glad_glDrawArraysInstanced = (PFNGLDRAWARRAYSINSTANCEDPROC)load("glDrawArraysInstanced");
+ glad_glDrawElementsInstanced = (PFNGLDRAWELEMENTSINSTANCEDPROC)load("glDrawElementsInstanced");
+ glad_glTexBuffer = (PFNGLTEXBUFFERPROC)load("glTexBuffer");
+ glad_glPrimitiveRestartIndex = (PFNGLPRIMITIVERESTARTINDEXPROC)load("glPrimitiveRestartIndex");
+ glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC)load("glCopyBufferSubData");
+ glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC)load("glGetUniformIndices");
+ glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC)load("glGetActiveUniformsiv");
+ glad_glGetActiveUniformName = (PFNGLGETACTIVEUNIFORMNAMEPROC)load("glGetActiveUniformName");
+ glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC)load("glGetUniformBlockIndex");
+ glad_glGetActiveUniformBlockiv
+ = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC)load("glGetActiveUniformBlockiv");
+ glad_glGetActiveUniformBlockName
+ = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)load("glGetActiveUniformBlockName");
+ glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC)load("glUniformBlockBinding");
+ glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange");
+ glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase");
+ glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v");
+}
+static void load_GL_VERSION_3_2(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_3_2)
+ return;
+ glad_glDrawElementsBaseVertex
+ = (PFNGLDRAWELEMENTSBASEVERTEXPROC)load("glDrawElementsBaseVertex");
+ glad_glDrawRangeElementsBaseVertex
+ = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)load("glDrawRangeElementsBaseVertex");
+ glad_glDrawElementsInstancedBaseVertex
+ = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)load("glDrawElementsInstancedBaseVertex");
+ glad_glMultiDrawElementsBaseVertex
+ = (PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)load("glMultiDrawElementsBaseVertex");
+ glad_glProvokingVertex = (PFNGLPROVOKINGVERTEXPROC)load("glProvokingVertex");
+ glad_glFenceSync = (PFNGLFENCESYNCPROC)load("glFenceSync");
+ glad_glIsSync = (PFNGLISSYNCPROC)load("glIsSync");
+ glad_glDeleteSync = (PFNGLDELETESYNCPROC)load("glDeleteSync");
+ glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC)load("glClientWaitSync");
+ glad_glWaitSync = (PFNGLWAITSYNCPROC)load("glWaitSync");
+ glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC)load("glGetInteger64v");
+ glad_glGetSynciv = (PFNGLGETSYNCIVPROC)load("glGetSynciv");
+ glad_glGetInteger64i_v = (PFNGLGETINTEGER64I_VPROC)load("glGetInteger64i_v");
+ glad_glGetBufferParameteri64v
+ = (PFNGLGETBUFFERPARAMETERI64VPROC)load("glGetBufferParameteri64v");
+ glad_glFramebufferTexture = (PFNGLFRAMEBUFFERTEXTUREPROC)load("glFramebufferTexture");
+ glad_glTexImage2DMultisample = (PFNGLTEXIMAGE2DMULTISAMPLEPROC)load("glTexImage2DMultisample");
+ glad_glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC)load("glTexImage3DMultisample");
+ glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC)load("glGetMultisamplefv");
+ glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC)load("glSampleMaski");
+}
+static void load_GL_VERSION_3_3(GLADloadproc load)
+{
+ if(!GLAD_GL_VERSION_3_3)
+ return;
+ glad_glBindFragDataLocationIndexed
+ = (PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)load("glBindFragDataLocationIndexed");
+ glad_glGetFragDataIndex = (PFNGLGETFRAGDATAINDEXPROC)load("glGetFragDataIndex");
+ glad_glGenSamplers = (PFNGLGENSAMPLERSPROC)load("glGenSamplers");
+ glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC)load("glDeleteSamplers");
+ glad_glIsSampler = (PFNGLISSAMPLERPROC)load("glIsSampler");
+ glad_glBindSampler = (PFNGLBINDSAMPLERPROC)load("glBindSampler");
+ glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC)load("glSamplerParameteri");
+ glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC)load("glSamplerParameteriv");
+ glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC)load("glSamplerParameterf");
+ glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC)load("glSamplerParameterfv");
+ glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC)load("glSamplerParameterIiv");
+ glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC)load("glSamplerParameterIuiv");
+ glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC)load("glGetSamplerParameteriv");
+ glad_glGetSamplerParameterIiv
+ = (PFNGLGETSAMPLERPARAMETERIIVPROC)load("glGetSamplerParameterIiv");
+ glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC)load("glGetSamplerParameterfv");
+ glad_glGetSamplerParameterIuiv
+ = (PFNGLGETSAMPLERPARAMETERIUIVPROC)load("glGetSamplerParameterIuiv");
+ glad_glQueryCounter = (PFNGLQUERYCOUNTERPROC)load("glQueryCounter");
+ glad_glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC)load("glGetQueryObjecti64v");
+ glad_glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC)load("glGetQueryObjectui64v");
+ glad_glVertexAttribDivisor = (PFNGLVERTEXATTRIBDIVISORPROC)load("glVertexAttribDivisor");
+ glad_glVertexAttribP1ui = (PFNGLVERTEXATTRIBP1UIPROC)load("glVertexAttribP1ui");
+ glad_glVertexAttribP1uiv = (PFNGLVERTEXATTRIBP1UIVPROC)load("glVertexAttribP1uiv");
+ glad_glVertexAttribP2ui = (PFNGLVERTEXATTRIBP2UIPROC)load("glVertexAttribP2ui");
+ glad_glVertexAttribP2uiv = (PFNGLVERTEXATTRIBP2UIVPROC)load("glVertexAttribP2uiv");
+ glad_glVertexAttribP3ui = (PFNGLVERTEXATTRIBP3UIPROC)load("glVertexAttribP3ui");
+ glad_glVertexAttribP3uiv = (PFNGLVERTEXATTRIBP3UIVPROC)load("glVertexAttribP3uiv");
+ glad_glVertexAttribP4ui = (PFNGLVERTEXATTRIBP4UIPROC)load("glVertexAttribP4ui");
+ glad_glVertexAttribP4uiv = (PFNGLVERTEXATTRIBP4UIVPROC)load("glVertexAttribP4uiv");
+ glad_glVertexP2ui = (PFNGLVERTEXP2UIPROC)load("glVertexP2ui");
+ glad_glVertexP2uiv = (PFNGLVERTEXP2UIVPROC)load("glVertexP2uiv");
+ glad_glVertexP3ui = (PFNGLVERTEXP3UIPROC)load("glVertexP3ui");
+ glad_glVertexP3uiv = (PFNGLVERTEXP3UIVPROC)load("glVertexP3uiv");
+ glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC)load("glVertexP4ui");
+ glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC)load("glVertexP4uiv");
+ glad_glTexCoordP1ui = (PFNGLTEXCOORDP1UIPROC)load("glTexCoordP1ui");
+ glad_glTexCoordP1uiv = (PFNGLTEXCOORDP1UIVPROC)load("glTexCoordP1uiv");
+ glad_glTexCoordP2ui = (PFNGLTEXCOORDP2UIPROC)load("glTexCoordP2ui");
+ glad_glTexCoordP2uiv = (PFNGLTEXCOORDP2UIVPROC)load("glTexCoordP2uiv");
+ glad_glTexCoordP3ui = (PFNGLTEXCOORDP3UIPROC)load("glTexCoordP3ui");
+ glad_glTexCoordP3uiv = (PFNGLTEXCOORDP3UIVPROC)load("glTexCoordP3uiv");
+ glad_glTexCoordP4ui = (PFNGLTEXCOORDP4UIPROC)load("glTexCoordP4ui");
+ glad_glTexCoordP4uiv = (PFNGLTEXCOORDP4UIVPROC)load("glTexCoordP4uiv");
+ glad_glMultiTexCoordP1ui = (PFNGLMULTITEXCOORDP1UIPROC)load("glMultiTexCoordP1ui");
+ glad_glMultiTexCoordP1uiv = (PFNGLMULTITEXCOORDP1UIVPROC)load("glMultiTexCoordP1uiv");
+ glad_glMultiTexCoordP2ui = (PFNGLMULTITEXCOORDP2UIPROC)load("glMultiTexCoordP2ui");
+ glad_glMultiTexCoordP2uiv = (PFNGLMULTITEXCOORDP2UIVPROC)load("glMultiTexCoordP2uiv");
+ glad_glMultiTexCoordP3ui = (PFNGLMULTITEXCOORDP3UIPROC)load("glMultiTexCoordP3ui");
+ glad_glMultiTexCoordP3uiv = (PFNGLMULTITEXCOORDP3UIVPROC)load("glMultiTexCoordP3uiv");
+ glad_glMultiTexCoordP4ui = (PFNGLMULTITEXCOORDP4UIPROC)load("glMultiTexCoordP4ui");
+ glad_glMultiTexCoordP4uiv = (PFNGLMULTITEXCOORDP4UIVPROC)load("glMultiTexCoordP4uiv");
+ glad_glNormalP3ui = (PFNGLNORMALP3UIPROC)load("glNormalP3ui");
+ glad_glNormalP3uiv = (PFNGLNORMALP3UIVPROC)load("glNormalP3uiv");
+ glad_glColorP3ui = (PFNGLCOLORP3UIPROC)load("glColorP3ui");
+ glad_glColorP3uiv = (PFNGLCOLORP3UIVPROC)load("glColorP3uiv");
+ glad_glColorP4ui = (PFNGLCOLORP4UIPROC)load("glColorP4ui");
+ glad_glColorP4uiv = (PFNGLCOLORP4UIVPROC)load("glColorP4uiv");
+ glad_glSecondaryColorP3ui = (PFNGLSECONDARYCOLORP3UIPROC)load("glSecondaryColorP3ui");
+ glad_glSecondaryColorP3uiv = (PFNGLSECONDARYCOLORP3UIVPROC)load("glSecondaryColorP3uiv");
+}
+static void load_GL_ARB_debug_output(GLADloadproc load)
+{
+ if(!GLAD_GL_ARB_debug_output)
+ return;
+ glad_glDebugMessageControlARB
+ = (PFNGLDEBUGMESSAGECONTROLARBPROC)load("glDebugMessageControlARB");
+ glad_glDebugMessageInsertARB = (PFNGLDEBUGMESSAGEINSERTARBPROC)load("glDebugMessageInsertARB");
+ glad_glDebugMessageCallbackARB
+ = (PFNGLDEBUGMESSAGECALLBACKARBPROC)load("glDebugMessageCallbackARB");
+ glad_glGetDebugMessageLogARB = (PFNGLGETDEBUGMESSAGELOGARBPROC)load("glGetDebugMessageLogARB");
+}
+static int find_extensionsGL(void)
+{
+ if(!get_exts())
+ return 0;
+ GLAD_GL_ARB_debug_output = has_ext("GL_ARB_debug_output");
+ free_exts();
+ return 1;
+}
+
+static void find_coreGL(void)
+{
+
+ /* Thank you @elmindreda
+ * https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176
+ * https://github.com/glfw/glfw/blob/master/src/context.c#L36
+ */
+ int i, major, minor;
+
+ const char* version;
+ const char* prefixes[] = {"OpenGL ES-CM ", "OpenGL ES-CL ", "OpenGL ES ", NULL};
+
+ version = (const char*)glGetString(GL_VERSION);
+ if(!version)
+ return;
+
+ for(i = 0; prefixes[i]; i++)
+ {
+ const size_t length = strlen(prefixes[i]);
+ if(strncmp(version, prefixes[i], length) == 0)
+ {
+ version += length;
+ break;
+ }
+ }
+
+/* PR #18 */
+#ifdef _MSC_VER
+ sscanf_s(version, "%d.%d", &major, &minor);
+#else
+ sscanf(version, "%d.%d", &major, &minor);
+#endif
+
+ GLVersion.major = major;
+ GLVersion.minor = minor;
+ max_loaded_major = major;
+ GLAD_GL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1;
+ GLAD_GL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1;
+ GLAD_GL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1;
+ GLAD_GL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1;
+ GLAD_GL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1;
+ GLAD_GL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1;
+ GLAD_GL_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2;
+ GLAD_GL_VERSION_2_1 = (major == 2 && minor >= 1) || major > 2;
+ GLAD_GL_VERSION_3_0 = (major == 3 && minor >= 0) || major > 3;
+ GLAD_GL_VERSION_3_1 = (major == 3 && minor >= 1) || major > 3;
+ GLAD_GL_VERSION_3_2 = (major == 3 && minor >= 2) || major > 3;
+ GLAD_GL_VERSION_3_3 = (major == 3 && minor >= 3) || major > 3;
+ if(GLVersion.major > 3 || (GLVersion.major >= 3 && GLVersion.minor >= 3))
+ {
+ max_loaded_major = 3;
+ }
+}
+
+int gladLoadGLLoader(GLADloadproc load)
+{
+ GLVersion.major = 0;
+ GLVersion.minor = 0;
+ glGetString = (PFNGLGETSTRINGPROC)load("glGetString");
+ if(glGetString == NULL)
+ return 0;
+ if(glGetString(GL_VERSION) == NULL)
+ return 0;
+ find_coreGL();
+ load_GL_VERSION_1_0(load);
+ load_GL_VERSION_1_1(load);
+ load_GL_VERSION_1_2(load);
+ load_GL_VERSION_1_3(load);
+ load_GL_VERSION_1_4(load);
+ load_GL_VERSION_1_5(load);
+ load_GL_VERSION_2_0(load);
+ load_GL_VERSION_2_1(load);
+ load_GL_VERSION_3_0(load);
+ load_GL_VERSION_3_1(load);
+ load_GL_VERSION_3_2(load);
+ load_GL_VERSION_3_3(load);
+
+ if(!find_extensionsGL())
+ return 0;
+ load_GL_ARB_debug_output(load);
+ return GLVersion.major != 0 || GLVersion.minor != 0;
+}
diff --git a/External/glad/glad.h b/External/glad/glad.h
new file mode 100644
index 000000000..0a3533813
--- /dev/null
+++ b/External/glad/glad.h
@@ -0,0 +1,3649 @@
+/*
+
+ OpenGL loader generated by glad 0.1.36 on Fri Oct 28 09:33:23 2022.
+
+ Language/Generator: C/C++
+ Specification: gl
+ APIs: gl=3.3
+ Profile: compatibility
+ Extensions:
+ GL_ARB_debug_output
+ Loader: True
+ Local files: False
+ Omit khrplatform: False
+ Reproducible: False
+
+ Commandline:
+ --profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions="GL_ARB_debug_output"
+ Online:
+ https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3&extensions=GL_ARB_debug_output
+*/
+
+
+#ifndef __glad_h_
+#define __glad_h_
+
+#ifdef __gl_h_
+#error OpenGL header already included, remove this include, glad already provides it
+#endif
+#define __gl_h_
+
+#if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__)
+#define APIENTRY __stdcall
+#endif
+
+#ifndef APIENTRY
+#define APIENTRY
+#endif
+#ifndef APIENTRYP
+#define APIENTRYP APIENTRY *
+#endif
+
+#ifndef GLAPIENTRY
+#define GLAPIENTRY APIENTRY
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct gladGLversionStruct {
+ int major;
+ int minor;
+};
+
+typedef void* (* GLADloadproc)(const char *name);
+
+#ifndef GLAPI
+# if defined(GLAD_GLAPI_EXPORT)
+# if defined(_WIN32) || defined(__CYGWIN__)
+# if defined(GLAD_GLAPI_EXPORT_BUILD)
+# if defined(__GNUC__)
+# define GLAPI __attribute__ ((dllexport)) extern
+# else
+# define GLAPI __declspec(dllexport) extern
+# endif
+# else
+# if defined(__GNUC__)
+# define GLAPI __attribute__ ((dllimport)) extern
+# else
+# define GLAPI __declspec(dllimport) extern
+# endif
+# endif
+# elif defined(__GNUC__) && defined(GLAD_GLAPI_EXPORT_BUILD)
+# define GLAPI __attribute__ ((visibility ("default"))) extern
+# else
+# define GLAPI extern
+# endif
+# else
+# define GLAPI extern
+# endif
+#endif
+
+GLAPI struct gladGLversionStruct GLVersion;
+
+GLAPI int gladLoadGL(void);
+
+GLAPI int gladLoadGLLoader(GLADloadproc);
+
+#include
+typedef unsigned int GLenum;
+typedef unsigned char GLboolean;
+typedef unsigned int GLbitfield;
+typedef void GLvoid;
+typedef khronos_int8_t GLbyte;
+typedef khronos_uint8_t GLubyte;
+typedef khronos_int16_t GLshort;
+typedef khronos_uint16_t GLushort;
+typedef int GLint;
+typedef unsigned int GLuint;
+typedef khronos_int32_t GLclampx;
+typedef int GLsizei;
+typedef khronos_float_t GLfloat;
+typedef khronos_float_t GLclampf;
+typedef double GLdouble;
+typedef double GLclampd;
+typedef void *GLeglClientBufferEXT;
+typedef void *GLeglImageOES;
+typedef char GLchar;
+typedef char GLcharARB;
+#ifdef __APPLE__
+typedef void *GLhandleARB;
+#else
+typedef unsigned int GLhandleARB;
+#endif
+typedef khronos_uint16_t GLhalf;
+typedef khronos_uint16_t GLhalfARB;
+typedef khronos_int32_t GLfixed;
+typedef khronos_intptr_t GLintptr;
+typedef khronos_intptr_t GLintptrARB;
+typedef khronos_ssize_t GLsizeiptr;
+typedef khronos_ssize_t GLsizeiptrARB;
+typedef khronos_int64_t GLint64;
+typedef khronos_int64_t GLint64EXT;
+typedef khronos_uint64_t GLuint64;
+typedef khronos_uint64_t GLuint64EXT;
+typedef struct __GLsync *GLsync;
+struct _cl_context;
+struct _cl_event;
+typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam);
+typedef void (APIENTRY *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam);
+typedef void (APIENTRY *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam);
+typedef void (APIENTRY *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam);
+typedef unsigned short GLhalfNV;
+typedef GLintptr GLvdpauSurfaceNV;
+typedef void (APIENTRY *GLVULKANPROCNV)(void);
+#define GL_DEPTH_BUFFER_BIT 0x00000100
+#define GL_STENCIL_BUFFER_BIT 0x00000400
+#define GL_COLOR_BUFFER_BIT 0x00004000
+#define GL_FALSE 0
+#define GL_TRUE 1
+#define GL_POINTS 0x0000
+#define GL_LINES 0x0001
+#define GL_LINE_LOOP 0x0002
+#define GL_LINE_STRIP 0x0003
+#define GL_TRIANGLES 0x0004
+#define GL_TRIANGLE_STRIP 0x0005
+#define GL_TRIANGLE_FAN 0x0006
+#define GL_QUADS 0x0007
+#define GL_NEVER 0x0200
+#define GL_LESS 0x0201
+#define GL_EQUAL 0x0202
+#define GL_LEQUAL 0x0203
+#define GL_GREATER 0x0204
+#define GL_NOTEQUAL 0x0205
+#define GL_GEQUAL 0x0206
+#define GL_ALWAYS 0x0207
+#define GL_ZERO 0
+#define GL_ONE 1
+#define GL_SRC_COLOR 0x0300
+#define GL_ONE_MINUS_SRC_COLOR 0x0301
+#define GL_SRC_ALPHA 0x0302
+#define GL_ONE_MINUS_SRC_ALPHA 0x0303
+#define GL_DST_ALPHA 0x0304
+#define GL_ONE_MINUS_DST_ALPHA 0x0305
+#define GL_DST_COLOR 0x0306
+#define GL_ONE_MINUS_DST_COLOR 0x0307
+#define GL_SRC_ALPHA_SATURATE 0x0308
+#define GL_NONE 0
+#define GL_FRONT_LEFT 0x0400
+#define GL_FRONT_RIGHT 0x0401
+#define GL_BACK_LEFT 0x0402
+#define GL_BACK_RIGHT 0x0403
+#define GL_FRONT 0x0404
+#define GL_BACK 0x0405
+#define GL_LEFT 0x0406
+#define GL_RIGHT 0x0407
+#define GL_FRONT_AND_BACK 0x0408
+#define GL_NO_ERROR 0
+#define GL_INVALID_ENUM 0x0500
+#define GL_INVALID_VALUE 0x0501
+#define GL_INVALID_OPERATION 0x0502
+#define GL_OUT_OF_MEMORY 0x0505
+#define GL_CW 0x0900
+#define GL_CCW 0x0901
+#define GL_POINT_SIZE 0x0B11
+#define GL_POINT_SIZE_RANGE 0x0B12
+#define GL_POINT_SIZE_GRANULARITY 0x0B13
+#define GL_LINE_SMOOTH 0x0B20
+#define GL_LINE_WIDTH 0x0B21
+#define GL_LINE_WIDTH_RANGE 0x0B22
+#define GL_LINE_WIDTH_GRANULARITY 0x0B23
+#define GL_POLYGON_MODE 0x0B40
+#define GL_POLYGON_SMOOTH 0x0B41
+#define GL_CULL_FACE 0x0B44
+#define GL_CULL_FACE_MODE 0x0B45
+#define GL_FRONT_FACE 0x0B46
+#define GL_DEPTH_RANGE 0x0B70
+#define GL_DEPTH_TEST 0x0B71
+#define GL_DEPTH_WRITEMASK 0x0B72
+#define GL_DEPTH_CLEAR_VALUE 0x0B73
+#define GL_DEPTH_FUNC 0x0B74
+#define GL_STENCIL_TEST 0x0B90
+#define GL_STENCIL_CLEAR_VALUE 0x0B91
+#define GL_STENCIL_FUNC 0x0B92
+#define GL_STENCIL_VALUE_MASK 0x0B93
+#define GL_STENCIL_FAIL 0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96
+#define GL_STENCIL_REF 0x0B97
+#define GL_STENCIL_WRITEMASK 0x0B98
+#define GL_VIEWPORT 0x0BA2
+#define GL_DITHER 0x0BD0
+#define GL_BLEND_DST 0x0BE0
+#define GL_BLEND_SRC 0x0BE1
+#define GL_BLEND 0x0BE2
+#define GL_LOGIC_OP_MODE 0x0BF0
+#define GL_DRAW_BUFFER 0x0C01
+#define GL_READ_BUFFER 0x0C02
+#define GL_SCISSOR_BOX 0x0C10
+#define GL_SCISSOR_TEST 0x0C11
+#define GL_COLOR_CLEAR_VALUE 0x0C22
+#define GL_COLOR_WRITEMASK 0x0C23
+#define GL_DOUBLEBUFFER 0x0C32
+#define GL_STEREO 0x0C33
+#define GL_LINE_SMOOTH_HINT 0x0C52
+#define GL_POLYGON_SMOOTH_HINT 0x0C53
+#define GL_UNPACK_SWAP_BYTES 0x0CF0
+#define GL_UNPACK_LSB_FIRST 0x0CF1
+#define GL_UNPACK_ROW_LENGTH 0x0CF2
+#define GL_UNPACK_SKIP_ROWS 0x0CF3
+#define GL_UNPACK_SKIP_PIXELS 0x0CF4
+#define GL_UNPACK_ALIGNMENT 0x0CF5
+#define GL_PACK_SWAP_BYTES 0x0D00
+#define GL_PACK_LSB_FIRST 0x0D01
+#define GL_PACK_ROW_LENGTH 0x0D02
+#define GL_PACK_SKIP_ROWS 0x0D03
+#define GL_PACK_SKIP_PIXELS 0x0D04
+#define GL_PACK_ALIGNMENT 0x0D05
+#define GL_MAX_TEXTURE_SIZE 0x0D33
+#define GL_MAX_VIEWPORT_DIMS 0x0D3A
+#define GL_SUBPIXEL_BITS 0x0D50
+#define GL_TEXTURE_1D 0x0DE0
+#define GL_TEXTURE_2D 0x0DE1
+#define GL_TEXTURE_WIDTH 0x1000
+#define GL_TEXTURE_HEIGHT 0x1001
+#define GL_TEXTURE_BORDER_COLOR 0x1004
+#define GL_DONT_CARE 0x1100
+#define GL_FASTEST 0x1101
+#define GL_NICEST 0x1102
+#define GL_BYTE 0x1400
+#define GL_UNSIGNED_BYTE 0x1401
+#define GL_SHORT 0x1402
+#define GL_UNSIGNED_SHORT 0x1403
+#define GL_INT 0x1404
+#define GL_UNSIGNED_INT 0x1405
+#define GL_FLOAT 0x1406
+#define GL_STACK_OVERFLOW 0x0503
+#define GL_STACK_UNDERFLOW 0x0504
+#define GL_CLEAR 0x1500
+#define GL_AND 0x1501
+#define GL_AND_REVERSE 0x1502
+#define GL_COPY 0x1503
+#define GL_AND_INVERTED 0x1504
+#define GL_NOOP 0x1505
+#define GL_XOR 0x1506
+#define GL_OR 0x1507
+#define GL_NOR 0x1508
+#define GL_EQUIV 0x1509
+#define GL_INVERT 0x150A
+#define GL_OR_REVERSE 0x150B
+#define GL_COPY_INVERTED 0x150C
+#define GL_OR_INVERTED 0x150D
+#define GL_NAND 0x150E
+#define GL_SET 0x150F
+#define GL_TEXTURE 0x1702
+#define GL_COLOR 0x1800
+#define GL_DEPTH 0x1801
+#define GL_STENCIL 0x1802
+#define GL_STENCIL_INDEX 0x1901
+#define GL_DEPTH_COMPONENT 0x1902
+#define GL_RED 0x1903
+#define GL_GREEN 0x1904
+#define GL_BLUE 0x1905
+#define GL_ALPHA 0x1906
+#define GL_RGB 0x1907
+#define GL_RGBA 0x1908
+#define GL_POINT 0x1B00
+#define GL_LINE 0x1B01
+#define GL_FILL 0x1B02
+#define GL_KEEP 0x1E00
+#define GL_REPLACE 0x1E01
+#define GL_INCR 0x1E02
+#define GL_DECR 0x1E03
+#define GL_VENDOR 0x1F00
+#define GL_RENDERER 0x1F01
+#define GL_VERSION 0x1F02
+#define GL_EXTENSIONS 0x1F03
+#define GL_NEAREST 0x2600
+#define GL_LINEAR 0x2601
+#define GL_NEAREST_MIPMAP_NEAREST 0x2700
+#define GL_LINEAR_MIPMAP_NEAREST 0x2701
+#define GL_NEAREST_MIPMAP_LINEAR 0x2702
+#define GL_LINEAR_MIPMAP_LINEAR 0x2703
+#define GL_TEXTURE_MAG_FILTER 0x2800
+#define GL_TEXTURE_MIN_FILTER 0x2801
+#define GL_TEXTURE_WRAP_S 0x2802
+#define GL_TEXTURE_WRAP_T 0x2803
+#define GL_REPEAT 0x2901
+#define GL_CURRENT_BIT 0x00000001
+#define GL_POINT_BIT 0x00000002
+#define GL_LINE_BIT 0x00000004
+#define GL_POLYGON_BIT 0x00000008
+#define GL_POLYGON_STIPPLE_BIT 0x00000010
+#define GL_PIXEL_MODE_BIT 0x00000020
+#define GL_LIGHTING_BIT 0x00000040
+#define GL_FOG_BIT 0x00000080
+#define GL_ACCUM_BUFFER_BIT 0x00000200
+#define GL_VIEWPORT_BIT 0x00000800
+#define GL_TRANSFORM_BIT 0x00001000
+#define GL_ENABLE_BIT 0x00002000
+#define GL_HINT_BIT 0x00008000
+#define GL_EVAL_BIT 0x00010000
+#define GL_LIST_BIT 0x00020000
+#define GL_TEXTURE_BIT 0x00040000
+#define GL_SCISSOR_BIT 0x00080000
+#define GL_ALL_ATTRIB_BITS 0xFFFFFFFF
+#define GL_QUAD_STRIP 0x0008
+#define GL_POLYGON 0x0009
+#define GL_ACCUM 0x0100
+#define GL_LOAD 0x0101
+#define GL_RETURN 0x0102
+#define GL_MULT 0x0103
+#define GL_ADD 0x0104
+#define GL_AUX0 0x0409
+#define GL_AUX1 0x040A
+#define GL_AUX2 0x040B
+#define GL_AUX3 0x040C
+#define GL_2D 0x0600
+#define GL_3D 0x0601
+#define GL_3D_COLOR 0x0602
+#define GL_3D_COLOR_TEXTURE 0x0603
+#define GL_4D_COLOR_TEXTURE 0x0604
+#define GL_PASS_THROUGH_TOKEN 0x0700
+#define GL_POINT_TOKEN 0x0701
+#define GL_LINE_TOKEN 0x0702
+#define GL_POLYGON_TOKEN 0x0703
+#define GL_BITMAP_TOKEN 0x0704
+#define GL_DRAW_PIXEL_TOKEN 0x0705
+#define GL_COPY_PIXEL_TOKEN 0x0706
+#define GL_LINE_RESET_TOKEN 0x0707
+#define GL_EXP 0x0800
+#define GL_EXP2 0x0801
+#define GL_COEFF 0x0A00
+#define GL_ORDER 0x0A01
+#define GL_DOMAIN 0x0A02
+#define GL_PIXEL_MAP_I_TO_I 0x0C70
+#define GL_PIXEL_MAP_S_TO_S 0x0C71
+#define GL_PIXEL_MAP_I_TO_R 0x0C72
+#define GL_PIXEL_MAP_I_TO_G 0x0C73
+#define GL_PIXEL_MAP_I_TO_B 0x0C74
+#define GL_PIXEL_MAP_I_TO_A 0x0C75
+#define GL_PIXEL_MAP_R_TO_R 0x0C76
+#define GL_PIXEL_MAP_G_TO_G 0x0C77
+#define GL_PIXEL_MAP_B_TO_B 0x0C78
+#define GL_PIXEL_MAP_A_TO_A 0x0C79
+#define GL_CURRENT_COLOR 0x0B00
+#define GL_CURRENT_INDEX 0x0B01
+#define GL_CURRENT_NORMAL 0x0B02
+#define GL_CURRENT_TEXTURE_COORDS 0x0B03
+#define GL_CURRENT_RASTER_COLOR 0x0B04
+#define GL_CURRENT_RASTER_INDEX 0x0B05
+#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06
+#define GL_CURRENT_RASTER_POSITION 0x0B07
+#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08
+#define GL_CURRENT_RASTER_DISTANCE 0x0B09
+#define GL_POINT_SMOOTH 0x0B10
+#define GL_LINE_STIPPLE 0x0B24
+#define GL_LINE_STIPPLE_PATTERN 0x0B25
+#define GL_LINE_STIPPLE_REPEAT 0x0B26
+#define GL_LIST_MODE 0x0B30
+#define GL_MAX_LIST_NESTING 0x0B31
+#define GL_LIST_BASE 0x0B32
+#define GL_LIST_INDEX 0x0B33
+#define GL_POLYGON_STIPPLE 0x0B42
+#define GL_EDGE_FLAG 0x0B43
+#define GL_LIGHTING 0x0B50
+#define GL_LIGHT_MODEL_LOCAL_VIEWER 0x0B51
+#define GL_LIGHT_MODEL_TWO_SIDE 0x0B52
+#define GL_LIGHT_MODEL_AMBIENT 0x0B53
+#define GL_SHADE_MODEL 0x0B54
+#define GL_COLOR_MATERIAL_FACE 0x0B55
+#define GL_COLOR_MATERIAL_PARAMETER 0x0B56
+#define GL_COLOR_MATERIAL 0x0B57
+#define GL_FOG 0x0B60
+#define GL_FOG_INDEX 0x0B61
+#define GL_FOG_DENSITY 0x0B62
+#define GL_FOG_START 0x0B63
+#define GL_FOG_END 0x0B64
+#define GL_FOG_MODE 0x0B65
+#define GL_FOG_COLOR 0x0B66
+#define GL_ACCUM_CLEAR_VALUE 0x0B80
+#define GL_MATRIX_MODE 0x0BA0
+#define GL_NORMALIZE 0x0BA1
+#define GL_MODELVIEW_STACK_DEPTH 0x0BA3
+#define GL_PROJECTION_STACK_DEPTH 0x0BA4
+#define GL_TEXTURE_STACK_DEPTH 0x0BA5
+#define GL_MODELVIEW_MATRIX 0x0BA6
+#define GL_PROJECTION_MATRIX 0x0BA7
+#define GL_TEXTURE_MATRIX 0x0BA8
+#define GL_ATTRIB_STACK_DEPTH 0x0BB0
+#define GL_ALPHA_TEST 0x0BC0
+#define GL_ALPHA_TEST_FUNC 0x0BC1
+#define GL_ALPHA_TEST_REF 0x0BC2
+#define GL_LOGIC_OP 0x0BF1
+#define GL_AUX_BUFFERS 0x0C00
+#define GL_INDEX_CLEAR_VALUE 0x0C20
+#define GL_INDEX_WRITEMASK 0x0C21
+#define GL_INDEX_MODE 0x0C30
+#define GL_RGBA_MODE 0x0C31
+#define GL_RENDER_MODE 0x0C40
+#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50
+#define GL_POINT_SMOOTH_HINT 0x0C51
+#define GL_FOG_HINT 0x0C54
+#define GL_TEXTURE_GEN_S 0x0C60
+#define GL_TEXTURE_GEN_T 0x0C61
+#define GL_TEXTURE_GEN_R 0x0C62
+#define GL_TEXTURE_GEN_Q 0x0C63
+#define GL_PIXEL_MAP_I_TO_I_SIZE 0x0CB0
+#define GL_PIXEL_MAP_S_TO_S_SIZE 0x0CB1
+#define GL_PIXEL_MAP_I_TO_R_SIZE 0x0CB2
+#define GL_PIXEL_MAP_I_TO_G_SIZE 0x0CB3
+#define GL_PIXEL_MAP_I_TO_B_SIZE 0x0CB4
+#define GL_PIXEL_MAP_I_TO_A_SIZE 0x0CB5
+#define GL_PIXEL_MAP_R_TO_R_SIZE 0x0CB6
+#define GL_PIXEL_MAP_G_TO_G_SIZE 0x0CB7
+#define GL_PIXEL_MAP_B_TO_B_SIZE 0x0CB8
+#define GL_PIXEL_MAP_A_TO_A_SIZE 0x0CB9
+#define GL_MAP_COLOR 0x0D10
+#define GL_MAP_STENCIL 0x0D11
+#define GL_INDEX_SHIFT 0x0D12
+#define GL_INDEX_OFFSET 0x0D13
+#define GL_RED_SCALE 0x0D14
+#define GL_RED_BIAS 0x0D15
+#define GL_ZOOM_X 0x0D16
+#define GL_ZOOM_Y 0x0D17
+#define GL_GREEN_SCALE 0x0D18
+#define GL_GREEN_BIAS 0x0D19
+#define GL_BLUE_SCALE 0x0D1A
+#define GL_BLUE_BIAS 0x0D1B
+#define GL_ALPHA_SCALE 0x0D1C
+#define GL_ALPHA_BIAS 0x0D1D
+#define GL_DEPTH_SCALE 0x0D1E
+#define GL_DEPTH_BIAS 0x0D1F
+#define GL_MAX_EVAL_ORDER 0x0D30
+#define GL_MAX_LIGHTS 0x0D31
+#define GL_MAX_CLIP_PLANES 0x0D32
+#define GL_MAX_PIXEL_MAP_TABLE 0x0D34
+#define GL_MAX_ATTRIB_STACK_DEPTH 0x0D35
+#define GL_MAX_MODELVIEW_STACK_DEPTH 0x0D36
+#define GL_MAX_NAME_STACK_DEPTH 0x0D37
+#define GL_MAX_PROJECTION_STACK_DEPTH 0x0D38
+#define GL_MAX_TEXTURE_STACK_DEPTH 0x0D39
+#define GL_INDEX_BITS 0x0D51
+#define GL_RED_BITS 0x0D52
+#define GL_GREEN_BITS 0x0D53
+#define GL_BLUE_BITS 0x0D54
+#define GL_ALPHA_BITS 0x0D55
+#define GL_DEPTH_BITS 0x0D56
+#define GL_STENCIL_BITS 0x0D57
+#define GL_ACCUM_RED_BITS 0x0D58
+#define GL_ACCUM_GREEN_BITS 0x0D59
+#define GL_ACCUM_BLUE_BITS 0x0D5A
+#define GL_ACCUM_ALPHA_BITS 0x0D5B
+#define GL_NAME_STACK_DEPTH 0x0D70
+#define GL_AUTO_NORMAL 0x0D80
+#define GL_MAP1_COLOR_4 0x0D90
+#define GL_MAP1_INDEX 0x0D91
+#define GL_MAP1_NORMAL 0x0D92
+#define GL_MAP1_TEXTURE_COORD_1 0x0D93
+#define GL_MAP1_TEXTURE_COORD_2 0x0D94
+#define GL_MAP1_TEXTURE_COORD_3 0x0D95
+#define GL_MAP1_TEXTURE_COORD_4 0x0D96
+#define GL_MAP1_VERTEX_3 0x0D97
+#define GL_MAP1_VERTEX_4 0x0D98
+#define GL_MAP2_COLOR_4 0x0DB0
+#define GL_MAP2_INDEX 0x0DB1
+#define GL_MAP2_NORMAL 0x0DB2
+#define GL_MAP2_TEXTURE_COORD_1 0x0DB3
+#define GL_MAP2_TEXTURE_COORD_2 0x0DB4
+#define GL_MAP2_TEXTURE_COORD_3 0x0DB5
+#define GL_MAP2_TEXTURE_COORD_4 0x0DB6
+#define GL_MAP2_VERTEX_3 0x0DB7
+#define GL_MAP2_VERTEX_4 0x0DB8
+#define GL_MAP1_GRID_DOMAIN 0x0DD0
+#define GL_MAP1_GRID_SEGMENTS 0x0DD1
+#define GL_MAP2_GRID_DOMAIN 0x0DD2
+#define GL_MAP2_GRID_SEGMENTS 0x0DD3
+#define GL_TEXTURE_COMPONENTS 0x1003
+#define GL_TEXTURE_BORDER 0x1005
+#define GL_AMBIENT 0x1200
+#define GL_DIFFUSE 0x1201
+#define GL_SPECULAR 0x1202
+#define GL_POSITION 0x1203
+#define GL_SPOT_DIRECTION 0x1204
+#define GL_SPOT_EXPONENT 0x1205
+#define GL_SPOT_CUTOFF 0x1206
+#define GL_CONSTANT_ATTENUATION 0x1207
+#define GL_LINEAR_ATTENUATION 0x1208
+#define GL_QUADRATIC_ATTENUATION 0x1209
+#define GL_COMPILE 0x1300
+#define GL_COMPILE_AND_EXECUTE 0x1301
+#define GL_2_BYTES 0x1407
+#define GL_3_BYTES 0x1408
+#define GL_4_BYTES 0x1409
+#define GL_EMISSION 0x1600
+#define GL_SHININESS 0x1601
+#define GL_AMBIENT_AND_DIFFUSE 0x1602
+#define GL_COLOR_INDEXES 0x1603
+#define GL_MODELVIEW 0x1700
+#define GL_PROJECTION 0x1701
+#define GL_COLOR_INDEX 0x1900
+#define GL_LUMINANCE 0x1909
+#define GL_LUMINANCE_ALPHA 0x190A
+#define GL_BITMAP 0x1A00
+#define GL_RENDER 0x1C00
+#define GL_FEEDBACK 0x1C01
+#define GL_SELECT 0x1C02
+#define GL_FLAT 0x1D00
+#define GL_SMOOTH 0x1D01
+#define GL_S 0x2000
+#define GL_T 0x2001
+#define GL_R 0x2002
+#define GL_Q 0x2003
+#define GL_MODULATE 0x2100
+#define GL_DECAL 0x2101
+#define GL_TEXTURE_ENV_MODE 0x2200
+#define GL_TEXTURE_ENV_COLOR 0x2201
+#define GL_TEXTURE_ENV 0x2300
+#define GL_EYE_LINEAR 0x2400
+#define GL_OBJECT_LINEAR 0x2401
+#define GL_SPHERE_MAP 0x2402
+#define GL_TEXTURE_GEN_MODE 0x2500
+#define GL_OBJECT_PLANE 0x2501
+#define GL_EYE_PLANE 0x2502
+#define GL_CLAMP 0x2900
+#define GL_CLIP_PLANE0 0x3000
+#define GL_CLIP_PLANE1 0x3001
+#define GL_CLIP_PLANE2 0x3002
+#define GL_CLIP_PLANE3 0x3003
+#define GL_CLIP_PLANE4 0x3004
+#define GL_CLIP_PLANE5 0x3005
+#define GL_LIGHT0 0x4000
+#define GL_LIGHT1 0x4001
+#define GL_LIGHT2 0x4002
+#define GL_LIGHT3 0x4003
+#define GL_LIGHT4 0x4004
+#define GL_LIGHT5 0x4005
+#define GL_LIGHT6 0x4006
+#define GL_LIGHT7 0x4007
+#define GL_COLOR_LOGIC_OP 0x0BF2
+#define GL_POLYGON_OFFSET_UNITS 0x2A00
+#define GL_POLYGON_OFFSET_POINT 0x2A01
+#define GL_POLYGON_OFFSET_LINE 0x2A02
+#define GL_POLYGON_OFFSET_FILL 0x8037
+#define GL_POLYGON_OFFSET_FACTOR 0x8038
+#define GL_TEXTURE_BINDING_1D 0x8068
+#define GL_TEXTURE_BINDING_2D 0x8069
+#define GL_TEXTURE_INTERNAL_FORMAT 0x1003
+#define GL_TEXTURE_RED_SIZE 0x805C
+#define GL_TEXTURE_GREEN_SIZE 0x805D
+#define GL_TEXTURE_BLUE_SIZE 0x805E
+#define GL_TEXTURE_ALPHA_SIZE 0x805F
+#define GL_DOUBLE 0x140A
+#define GL_PROXY_TEXTURE_1D 0x8063
+#define GL_PROXY_TEXTURE_2D 0x8064
+#define GL_R3_G3_B2 0x2A10
+#define GL_RGB4 0x804F
+#define GL_RGB5 0x8050
+#define GL_RGB8 0x8051
+#define GL_RGB10 0x8052
+#define GL_RGB12 0x8053
+#define GL_RGB16 0x8054
+#define GL_RGBA2 0x8055
+#define GL_RGBA4 0x8056
+#define GL_RGB5_A1 0x8057
+#define GL_RGBA8 0x8058
+#define GL_RGB10_A2 0x8059
+#define GL_RGBA12 0x805A
+#define GL_RGBA16 0x805B
+#define GL_CLIENT_PIXEL_STORE_BIT 0x00000001
+#define GL_CLIENT_VERTEX_ARRAY_BIT 0x00000002
+#define GL_CLIENT_ALL_ATTRIB_BITS 0xFFFFFFFF
+#define GL_VERTEX_ARRAY_POINTER 0x808E
+#define GL_NORMAL_ARRAY_POINTER 0x808F
+#define GL_COLOR_ARRAY_POINTER 0x8090
+#define GL_INDEX_ARRAY_POINTER 0x8091
+#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092
+#define GL_EDGE_FLAG_ARRAY_POINTER 0x8093
+#define GL_FEEDBACK_BUFFER_POINTER 0x0DF0
+#define GL_SELECTION_BUFFER_POINTER 0x0DF3
+#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1
+#define GL_INDEX_LOGIC_OP 0x0BF1
+#define GL_MAX_CLIENT_ATTRIB_STACK_DEPTH 0x0D3B
+#define GL_FEEDBACK_BUFFER_SIZE 0x0DF1
+#define GL_FEEDBACK_BUFFER_TYPE 0x0DF2
+#define GL_SELECTION_BUFFER_SIZE 0x0DF4
+#define GL_VERTEX_ARRAY 0x8074
+#define GL_NORMAL_ARRAY 0x8075
+#define GL_COLOR_ARRAY 0x8076
+#define GL_INDEX_ARRAY 0x8077
+#define GL_TEXTURE_COORD_ARRAY 0x8078
+#define GL_EDGE_FLAG_ARRAY 0x8079
+#define GL_VERTEX_ARRAY_SIZE 0x807A
+#define GL_VERTEX_ARRAY_TYPE 0x807B
+#define GL_VERTEX_ARRAY_STRIDE 0x807C
+#define GL_NORMAL_ARRAY_TYPE 0x807E
+#define GL_NORMAL_ARRAY_STRIDE 0x807F
+#define GL_COLOR_ARRAY_SIZE 0x8081
+#define GL_COLOR_ARRAY_TYPE 0x8082
+#define GL_COLOR_ARRAY_STRIDE 0x8083
+#define GL_INDEX_ARRAY_TYPE 0x8085
+#define GL_INDEX_ARRAY_STRIDE 0x8086
+#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088
+#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089
+#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A
+#define GL_EDGE_FLAG_ARRAY_STRIDE 0x808C
+#define GL_TEXTURE_LUMINANCE_SIZE 0x8060
+#define GL_TEXTURE_INTENSITY_SIZE 0x8061
+#define GL_TEXTURE_PRIORITY 0x8066
+#define GL_TEXTURE_RESIDENT 0x8067
+#define GL_ALPHA4 0x803B
+#define GL_ALPHA8 0x803C
+#define GL_ALPHA12 0x803D
+#define GL_ALPHA16 0x803E
+#define GL_LUMINANCE4 0x803F
+#define GL_LUMINANCE8 0x8040
+#define GL_LUMINANCE12 0x8041
+#define GL_LUMINANCE16 0x8042
+#define GL_LUMINANCE4_ALPHA4 0x8043
+#define GL_LUMINANCE6_ALPHA2 0x8044
+#define GL_LUMINANCE8_ALPHA8 0x8045
+#define GL_LUMINANCE12_ALPHA4 0x8046
+#define GL_LUMINANCE12_ALPHA12 0x8047
+#define GL_LUMINANCE16_ALPHA16 0x8048
+#define GL_INTENSITY 0x8049
+#define GL_INTENSITY4 0x804A
+#define GL_INTENSITY8 0x804B
+#define GL_INTENSITY12 0x804C
+#define GL_INTENSITY16 0x804D
+#define GL_V2F 0x2A20
+#define GL_V3F 0x2A21
+#define GL_C4UB_V2F 0x2A22
+#define GL_C4UB_V3F 0x2A23
+#define GL_C3F_V3F 0x2A24
+#define GL_N3F_V3F 0x2A25
+#define GL_C4F_N3F_V3F 0x2A26
+#define GL_T2F_V3F 0x2A27
+#define GL_T4F_V4F 0x2A28
+#define GL_T2F_C4UB_V3F 0x2A29
+#define GL_T2F_C3F_V3F 0x2A2A
+#define GL_T2F_N3F_V3F 0x2A2B
+#define GL_T2F_C4F_N3F_V3F 0x2A2C
+#define GL_T4F_C4F_N3F_V4F 0x2A2D
+#define GL_UNSIGNED_BYTE_3_3_2 0x8032
+#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034
+#define GL_UNSIGNED_INT_8_8_8_8 0x8035
+#define GL_UNSIGNED_INT_10_10_10_2 0x8036
+#define GL_TEXTURE_BINDING_3D 0x806A
+#define GL_PACK_SKIP_IMAGES 0x806B
+#define GL_PACK_IMAGE_HEIGHT 0x806C
+#define GL_UNPACK_SKIP_IMAGES 0x806D
+#define GL_UNPACK_IMAGE_HEIGHT 0x806E
+#define GL_TEXTURE_3D 0x806F
+#define GL_PROXY_TEXTURE_3D 0x8070
+#define GL_TEXTURE_DEPTH 0x8071
+#define GL_TEXTURE_WRAP_R 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE 0x8073
+#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362
+#define GL_UNSIGNED_SHORT_5_6_5 0x8363
+#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
+#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
+#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
+#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
+#define GL_BGR 0x80E0
+#define GL_BGRA 0x80E1
+#define GL_MAX_ELEMENTS_VERTICES 0x80E8
+#define GL_MAX_ELEMENTS_INDICES 0x80E9
+#define GL_CLAMP_TO_EDGE 0x812F
+#define GL_TEXTURE_MIN_LOD 0x813A
+#define GL_TEXTURE_MAX_LOD 0x813B
+#define GL_TEXTURE_BASE_LEVEL 0x813C
+#define GL_TEXTURE_MAX_LEVEL 0x813D
+#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12
+#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13
+#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22
+#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23
+#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E
+#define GL_RESCALE_NORMAL 0x803A
+#define GL_LIGHT_MODEL_COLOR_CONTROL 0x81F8
+#define GL_SINGLE_COLOR 0x81F9
+#define GL_SEPARATE_SPECULAR_COLOR 0x81FA
+#define GL_ALIASED_POINT_SIZE_RANGE 0x846D
+#define GL_TEXTURE0 0x84C0
+#define GL_TEXTURE1 0x84C1
+#define GL_TEXTURE2 0x84C2
+#define GL_TEXTURE3 0x84C3
+#define GL_TEXTURE4 0x84C4
+#define GL_TEXTURE5 0x84C5
+#define GL_TEXTURE6 0x84C6
+#define GL_TEXTURE7 0x84C7
+#define GL_TEXTURE8 0x84C8
+#define GL_TEXTURE9 0x84C9
+#define GL_TEXTURE10 0x84CA
+#define GL_TEXTURE11 0x84CB
+#define GL_TEXTURE12 0x84CC
+#define GL_TEXTURE13 0x84CD
+#define GL_TEXTURE14 0x84CE
+#define GL_TEXTURE15 0x84CF
+#define GL_TEXTURE16 0x84D0
+#define GL_TEXTURE17 0x84D1
+#define GL_TEXTURE18 0x84D2
+#define GL_TEXTURE19 0x84D3
+#define GL_TEXTURE20 0x84D4
+#define GL_TEXTURE21 0x84D5
+#define GL_TEXTURE22 0x84D6
+#define GL_TEXTURE23 0x84D7
+#define GL_TEXTURE24 0x84D8
+#define GL_TEXTURE25 0x84D9
+#define GL_TEXTURE26 0x84DA
+#define GL_TEXTURE27 0x84DB
+#define GL_TEXTURE28 0x84DC
+#define GL_TEXTURE29 0x84DD
+#define GL_TEXTURE30 0x84DE
+#define GL_TEXTURE31 0x84DF
+#define GL_ACTIVE_TEXTURE 0x84E0
+#define GL_MULTISAMPLE 0x809D
+#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE 0x809F
+#define GL_SAMPLE_COVERAGE 0x80A0
+#define GL_SAMPLE_BUFFERS 0x80A8
+#define GL_SAMPLES 0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE 0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT 0x80AB
+#define GL_TEXTURE_CUBE_MAP 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C
+#define GL_COMPRESSED_RGB 0x84ED
+#define GL_COMPRESSED_RGBA 0x84EE
+#define GL_TEXTURE_COMPRESSION_HINT 0x84EF
+#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0
+#define GL_TEXTURE_COMPRESSED 0x86A1
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3
+#define GL_CLAMP_TO_BORDER 0x812D
+#define GL_CLIENT_ACTIVE_TEXTURE 0x84E1
+#define GL_MAX_TEXTURE_UNITS 0x84E2
+#define GL_TRANSPOSE_MODELVIEW_MATRIX 0x84E3
+#define GL_TRANSPOSE_PROJECTION_MATRIX 0x84E4
+#define GL_TRANSPOSE_TEXTURE_MATRIX 0x84E5
+#define GL_TRANSPOSE_COLOR_MATRIX 0x84E6
+#define GL_MULTISAMPLE_BIT 0x20000000
+#define GL_NORMAL_MAP 0x8511
+#define GL_REFLECTION_MAP 0x8512
+#define GL_COMPRESSED_ALPHA 0x84E9
+#define GL_COMPRESSED_LUMINANCE 0x84EA
+#define GL_COMPRESSED_LUMINANCE_ALPHA 0x84EB
+#define GL_COMPRESSED_INTENSITY 0x84EC
+#define GL_COMBINE 0x8570
+#define GL_COMBINE_RGB 0x8571
+#define GL_COMBINE_ALPHA 0x8572
+#define GL_SOURCE0_RGB 0x8580
+#define GL_SOURCE1_RGB 0x8581
+#define GL_SOURCE2_RGB 0x8582
+#define GL_SOURCE0_ALPHA 0x8588
+#define GL_SOURCE1_ALPHA 0x8589
+#define GL_SOURCE2_ALPHA 0x858A
+#define GL_OPERAND0_RGB 0x8590
+#define GL_OPERAND1_RGB 0x8591
+#define GL_OPERAND2_RGB 0x8592
+#define GL_OPERAND0_ALPHA 0x8598
+#define GL_OPERAND1_ALPHA 0x8599
+#define GL_OPERAND2_ALPHA 0x859A
+#define GL_RGB_SCALE 0x8573
+#define GL_ADD_SIGNED 0x8574
+#define GL_INTERPOLATE 0x8575
+#define GL_SUBTRACT 0x84E7
+#define GL_CONSTANT 0x8576
+#define GL_PRIMARY_COLOR 0x8577
+#define GL_PREVIOUS 0x8578
+#define GL_DOT3_RGB 0x86AE
+#define GL_DOT3_RGBA 0x86AF
+#define GL_BLEND_DST_RGB 0x80C8
+#define GL_BLEND_SRC_RGB 0x80C9
+#define GL_BLEND_DST_ALPHA 0x80CA
+#define GL_BLEND_SRC_ALPHA 0x80CB
+#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128
+#define GL_DEPTH_COMPONENT16 0x81A5
+#define GL_DEPTH_COMPONENT24 0x81A6
+#define GL_DEPTH_COMPONENT32 0x81A7
+#define GL_MIRRORED_REPEAT 0x8370
+#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD
+#define GL_TEXTURE_LOD_BIAS 0x8501
+#define GL_INCR_WRAP 0x8507
+#define GL_DECR_WRAP 0x8508
+#define GL_TEXTURE_DEPTH_SIZE 0x884A
+#define GL_TEXTURE_COMPARE_MODE 0x884C
+#define GL_TEXTURE_COMPARE_FUNC 0x884D
+#define GL_POINT_SIZE_MIN 0x8126
+#define GL_POINT_SIZE_MAX 0x8127
+#define GL_POINT_DISTANCE_ATTENUATION 0x8129
+#define GL_GENERATE_MIPMAP 0x8191
+#define GL_GENERATE_MIPMAP_HINT 0x8192
+#define GL_FOG_COORDINATE_SOURCE 0x8450
+#define GL_FOG_COORDINATE 0x8451
+#define GL_FRAGMENT_DEPTH 0x8452
+#define GL_CURRENT_FOG_COORDINATE 0x8453
+#define GL_FOG_COORDINATE_ARRAY_TYPE 0x8454
+#define GL_FOG_COORDINATE_ARRAY_STRIDE 0x8455
+#define GL_FOG_COORDINATE_ARRAY_POINTER 0x8456
+#define GL_FOG_COORDINATE_ARRAY 0x8457
+#define GL_COLOR_SUM 0x8458
+#define GL_CURRENT_SECONDARY_COLOR 0x8459
+#define GL_SECONDARY_COLOR_ARRAY_SIZE 0x845A
+#define GL_SECONDARY_COLOR_ARRAY_TYPE 0x845B
+#define GL_SECONDARY_COLOR_ARRAY_STRIDE 0x845C
+#define GL_SECONDARY_COLOR_ARRAY_POINTER 0x845D
+#define GL_SECONDARY_COLOR_ARRAY 0x845E
+#define GL_TEXTURE_FILTER_CONTROL 0x8500
+#define GL_DEPTH_TEXTURE_MODE 0x884B
+#define GL_COMPARE_R_TO_TEXTURE 0x884E
+#define GL_BLEND_COLOR 0x8005
+#define GL_BLEND_EQUATION 0x8009
+#define GL_CONSTANT_COLOR 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
+#define GL_CONSTANT_ALPHA 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#define GL_FUNC_ADD 0x8006
+#define GL_FUNC_REVERSE_SUBTRACT 0x800B
+#define GL_FUNC_SUBTRACT 0x800A
+#define GL_MIN 0x8007
+#define GL_MAX 0x8008
+#define GL_BUFFER_SIZE 0x8764
+#define GL_BUFFER_USAGE 0x8765
+#define GL_QUERY_COUNTER_BITS 0x8864
+#define GL_CURRENT_QUERY 0x8865
+#define GL_QUERY_RESULT 0x8866
+#define GL_QUERY_RESULT_AVAILABLE 0x8867
+#define GL_ARRAY_BUFFER 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER 0x8893
+#define GL_ARRAY_BUFFER_BINDING 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
+#define GL_READ_ONLY 0x88B8
+#define GL_WRITE_ONLY 0x88B9
+#define GL_READ_WRITE 0x88BA
+#define GL_BUFFER_ACCESS 0x88BB
+#define GL_BUFFER_MAPPED 0x88BC
+#define GL_BUFFER_MAP_POINTER 0x88BD
+#define GL_STREAM_DRAW 0x88E0
+#define GL_STREAM_READ 0x88E1
+#define GL_STREAM_COPY 0x88E2
+#define GL_STATIC_DRAW 0x88E4
+#define GL_STATIC_READ 0x88E5
+#define GL_STATIC_COPY 0x88E6
+#define GL_DYNAMIC_DRAW 0x88E8
+#define GL_DYNAMIC_READ 0x88E9
+#define GL_DYNAMIC_COPY 0x88EA
+#define GL_SAMPLES_PASSED 0x8914
+#define GL_SRC1_ALPHA 0x8589
+#define GL_VERTEX_ARRAY_BUFFER_BINDING 0x8896
+#define GL_NORMAL_ARRAY_BUFFER_BINDING 0x8897
+#define GL_COLOR_ARRAY_BUFFER_BINDING 0x8898
+#define GL_INDEX_ARRAY_BUFFER_BINDING 0x8899
+#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A
+#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING 0x889B
+#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING 0x889C
+#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING 0x889D
+#define GL_WEIGHT_ARRAY_BUFFER_BINDING 0x889E
+#define GL_FOG_COORD_SRC 0x8450
+#define GL_FOG_COORD 0x8451
+#define GL_CURRENT_FOG_COORD 0x8453
+#define GL_FOG_COORD_ARRAY_TYPE 0x8454
+#define GL_FOG_COORD_ARRAY_STRIDE 0x8455
+#define GL_FOG_COORD_ARRAY_POINTER 0x8456
+#define GL_FOG_COORD_ARRAY 0x8457
+#define GL_FOG_COORD_ARRAY_BUFFER_BINDING 0x889D
+#define GL_SRC0_RGB 0x8580
+#define GL_SRC1_RGB 0x8581
+#define GL_SRC2_RGB 0x8582
+#define GL_SRC0_ALPHA 0x8588
+#define GL_SRC2_ALPHA 0x858A
+#define GL_BLEND_EQUATION_RGB 0x8009
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625
+#define GL_CURRENT_VERTEX_ATTRIB 0x8626
+#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645
+#define GL_STENCIL_BACK_FUNC 0x8800
+#define GL_STENCIL_BACK_FAIL 0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803
+#define GL_MAX_DRAW_BUFFERS 0x8824
+#define GL_DRAW_BUFFER0 0x8825
+#define GL_DRAW_BUFFER1 0x8826
+#define GL_DRAW_BUFFER2 0x8827
+#define GL_DRAW_BUFFER3 0x8828
+#define GL_DRAW_BUFFER4 0x8829
+#define GL_DRAW_BUFFER5 0x882A
+#define GL_DRAW_BUFFER6 0x882B
+#define GL_DRAW_BUFFER7 0x882C
+#define GL_DRAW_BUFFER8 0x882D
+#define GL_DRAW_BUFFER9 0x882E
+#define GL_DRAW_BUFFER10 0x882F
+#define GL_DRAW_BUFFER11 0x8830
+#define GL_DRAW_BUFFER12 0x8831
+#define GL_DRAW_BUFFER13 0x8832
+#define GL_DRAW_BUFFER14 0x8833
+#define GL_DRAW_BUFFER15 0x8834
+#define GL_BLEND_EQUATION_ALPHA 0x883D
+#define GL_MAX_VERTEX_ATTRIBS 0x8869
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A
+#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872
+#define GL_FRAGMENT_SHADER 0x8B30
+#define GL_VERTEX_SHADER 0x8B31
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A
+#define GL_MAX_VARYING_FLOATS 0x8B4B
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
+#define GL_SHADER_TYPE 0x8B4F
+#define GL_FLOAT_VEC2 0x8B50
+#define GL_FLOAT_VEC3 0x8B51
+#define GL_FLOAT_VEC4 0x8B52
+#define GL_INT_VEC2 0x8B53
+#define GL_INT_VEC3 0x8B54
+#define GL_INT_VEC4 0x8B55
+#define GL_BOOL 0x8B56
+#define GL_BOOL_VEC2 0x8B57
+#define GL_BOOL_VEC3 0x8B58
+#define GL_BOOL_VEC4 0x8B59
+#define GL_FLOAT_MAT2 0x8B5A
+#define GL_FLOAT_MAT3 0x8B5B
+#define GL_FLOAT_MAT4 0x8B5C
+#define GL_SAMPLER_1D 0x8B5D
+#define GL_SAMPLER_2D 0x8B5E
+#define GL_SAMPLER_3D 0x8B5F
+#define GL_SAMPLER_CUBE 0x8B60
+#define GL_SAMPLER_1D_SHADOW 0x8B61
+#define GL_SAMPLER_2D_SHADOW 0x8B62
+#define GL_DELETE_STATUS 0x8B80
+#define GL_COMPILE_STATUS 0x8B81
+#define GL_LINK_STATUS 0x8B82
+#define GL_VALIDATE_STATUS 0x8B83
+#define GL_INFO_LOG_LENGTH 0x8B84
+#define GL_ATTACHED_SHADERS 0x8B85
+#define GL_ACTIVE_UNIFORMS 0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87
+#define GL_SHADER_SOURCE_LENGTH 0x8B88
+#define GL_ACTIVE_ATTRIBUTES 0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B
+#define GL_SHADING_LANGUAGE_VERSION 0x8B8C
+#define GL_CURRENT_PROGRAM 0x8B8D
+#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0
+#define GL_LOWER_LEFT 0x8CA1
+#define GL_UPPER_LEFT 0x8CA2
+#define GL_STENCIL_BACK_REF 0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK 0x8CA5
+#define GL_VERTEX_PROGRAM_TWO_SIDE 0x8643
+#define GL_POINT_SPRITE 0x8861
+#define GL_COORD_REPLACE 0x8862
+#define GL_MAX_TEXTURE_COORDS 0x8871
+#define GL_PIXEL_PACK_BUFFER 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF
+#define GL_FLOAT_MAT2x3 0x8B65
+#define GL_FLOAT_MAT2x4 0x8B66
+#define GL_FLOAT_MAT3x2 0x8B67
+#define GL_FLOAT_MAT3x4 0x8B68
+#define GL_FLOAT_MAT4x2 0x8B69
+#define GL_FLOAT_MAT4x3 0x8B6A
+#define GL_SRGB 0x8C40
+#define GL_SRGB8 0x8C41
+#define GL_SRGB_ALPHA 0x8C42
+#define GL_SRGB8_ALPHA8 0x8C43
+#define GL_COMPRESSED_SRGB 0x8C48
+#define GL_COMPRESSED_SRGB_ALPHA 0x8C49
+#define GL_CURRENT_RASTER_SECONDARY_COLOR 0x845F
+#define GL_SLUMINANCE_ALPHA 0x8C44
+#define GL_SLUMINANCE8_ALPHA8 0x8C45
+#define GL_SLUMINANCE 0x8C46
+#define GL_SLUMINANCE8 0x8C47
+#define GL_COMPRESSED_SLUMINANCE 0x8C4A
+#define GL_COMPRESSED_SLUMINANCE_ALPHA 0x8C4B
+#define GL_COMPARE_REF_TO_TEXTURE 0x884E
+#define GL_CLIP_DISTANCE0 0x3000
+#define GL_CLIP_DISTANCE1 0x3001
+#define GL_CLIP_DISTANCE2 0x3002
+#define GL_CLIP_DISTANCE3 0x3003
+#define GL_CLIP_DISTANCE4 0x3004
+#define GL_CLIP_DISTANCE5 0x3005
+#define GL_CLIP_DISTANCE6 0x3006
+#define GL_CLIP_DISTANCE7 0x3007
+#define GL_MAX_CLIP_DISTANCES 0x0D32
+#define GL_MAJOR_VERSION 0x821B
+#define GL_MINOR_VERSION 0x821C
+#define GL_NUM_EXTENSIONS 0x821D
+#define GL_CONTEXT_FLAGS 0x821E
+#define GL_COMPRESSED_RED 0x8225
+#define GL_COMPRESSED_RG 0x8226
+#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x00000001
+#define GL_RGBA32F 0x8814
+#define GL_RGB32F 0x8815
+#define GL_RGBA16F 0x881A
+#define GL_RGB16F 0x881B
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD
+#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF
+#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905
+#define GL_CLAMP_READ_COLOR 0x891C
+#define GL_FIXED_ONLY 0x891D
+#define GL_MAX_VARYING_COMPONENTS 0x8B4B
+#define GL_TEXTURE_1D_ARRAY 0x8C18
+#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19
+#define GL_TEXTURE_2D_ARRAY 0x8C1A
+#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B
+#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C
+#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D
+#define GL_R11F_G11F_B10F 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B
+#define GL_RGB9_E5 0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E
+#define GL_TEXTURE_SHARED_SIZE 0x8C3F
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85
+#define GL_PRIMITIVES_GENERATED 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88
+#define GL_RASTERIZER_DISCARD 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS 0x8C8C
+#define GL_SEPARATE_ATTRIBS 0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F
+#define GL_RGBA32UI 0x8D70
+#define GL_RGB32UI 0x8D71
+#define GL_RGBA16UI 0x8D76
+#define GL_RGB16UI 0x8D77
+#define GL_RGBA8UI 0x8D7C
+#define GL_RGB8UI 0x8D7D
+#define GL_RGBA32I 0x8D82
+#define GL_RGB32I 0x8D83
+#define GL_RGBA16I 0x8D88
+#define GL_RGB16I 0x8D89
+#define GL_RGBA8I 0x8D8E
+#define GL_RGB8I 0x8D8F
+#define GL_RED_INTEGER 0x8D94
+#define GL_GREEN_INTEGER 0x8D95
+#define GL_BLUE_INTEGER 0x8D96
+#define GL_RGB_INTEGER 0x8D98
+#define GL_RGBA_INTEGER 0x8D99
+#define GL_BGR_INTEGER 0x8D9A
+#define GL_BGRA_INTEGER 0x8D9B
+#define GL_SAMPLER_1D_ARRAY 0x8DC0
+#define GL_SAMPLER_2D_ARRAY 0x8DC1
+#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3
+#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW 0x8DC5
+#define GL_UNSIGNED_INT_VEC2 0x8DC6
+#define GL_UNSIGNED_INT_VEC3 0x8DC7
+#define GL_UNSIGNED_INT_VEC4 0x8DC8
+#define GL_INT_SAMPLER_1D 0x8DC9
+#define GL_INT_SAMPLER_2D 0x8DCA
+#define GL_INT_SAMPLER_3D 0x8DCB
+#define GL_INT_SAMPLER_CUBE 0x8DCC
+#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE
+#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF
+#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1
+#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7
+#define GL_QUERY_WAIT 0x8E13
+#define GL_QUERY_NO_WAIT 0x8E14
+#define GL_QUERY_BY_REGION_WAIT 0x8E15
+#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16
+#define GL_BUFFER_ACCESS_FLAGS 0x911F
+#define GL_BUFFER_MAP_LENGTH 0x9120
+#define GL_BUFFER_MAP_OFFSET 0x9121
+#define GL_DEPTH_COMPONENT32F 0x8CAC
+#define GL_DEPTH32F_STENCIL8 0x8CAD
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD
+#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211
+#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212
+#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213
+#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214
+#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215
+#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216
+#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217
+#define GL_FRAMEBUFFER_DEFAULT 0x8218
+#define GL_FRAMEBUFFER_UNDEFINED 0x8219
+#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A
+#define GL_MAX_RENDERBUFFER_SIZE 0x84E8
+#define GL_DEPTH_STENCIL 0x84F9
+#define GL_UNSIGNED_INT_24_8 0x84FA
+#define GL_DEPTH24_STENCIL8 0x88F0
+#define GL_TEXTURE_STENCIL_SIZE 0x88F1
+#define GL_TEXTURE_RED_TYPE 0x8C10
+#define GL_TEXTURE_GREEN_TYPE 0x8C11
+#define GL_TEXTURE_BLUE_TYPE 0x8C12
+#define GL_TEXTURE_ALPHA_TYPE 0x8C13
+#define GL_TEXTURE_DEPTH_TYPE 0x8C16
+#define GL_UNSIGNED_NORMALIZED 0x8C17
+#define GL_FRAMEBUFFER_BINDING 0x8CA6
+#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6
+#define GL_RENDERBUFFER_BINDING 0x8CA7
+#define GL_READ_FRAMEBUFFER 0x8CA8
+#define GL_DRAW_FRAMEBUFFER 0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA
+#define GL_RENDERBUFFER_SAMPLES 0x8CAB
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
+#define GL_FRAMEBUFFER_COMPLETE 0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB
+#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC
+#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD
+#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF
+#define GL_COLOR_ATTACHMENT0 0x8CE0
+#define GL_COLOR_ATTACHMENT1 0x8CE1
+#define GL_COLOR_ATTACHMENT2 0x8CE2
+#define GL_COLOR_ATTACHMENT3 0x8CE3
+#define GL_COLOR_ATTACHMENT4 0x8CE4
+#define GL_COLOR_ATTACHMENT5 0x8CE5
+#define GL_COLOR_ATTACHMENT6 0x8CE6
+#define GL_COLOR_ATTACHMENT7 0x8CE7
+#define GL_COLOR_ATTACHMENT8 0x8CE8
+#define GL_COLOR_ATTACHMENT9 0x8CE9
+#define GL_COLOR_ATTACHMENT10 0x8CEA
+#define GL_COLOR_ATTACHMENT11 0x8CEB
+#define GL_COLOR_ATTACHMENT12 0x8CEC
+#define GL_COLOR_ATTACHMENT13 0x8CED
+#define GL_COLOR_ATTACHMENT14 0x8CEE
+#define GL_COLOR_ATTACHMENT15 0x8CEF
+#define GL_COLOR_ATTACHMENT16 0x8CF0
+#define GL_COLOR_ATTACHMENT17 0x8CF1
+#define GL_COLOR_ATTACHMENT18 0x8CF2
+#define GL_COLOR_ATTACHMENT19 0x8CF3
+#define GL_COLOR_ATTACHMENT20 0x8CF4
+#define GL_COLOR_ATTACHMENT21 0x8CF5
+#define GL_COLOR_ATTACHMENT22 0x8CF6
+#define GL_COLOR_ATTACHMENT23 0x8CF7
+#define GL_COLOR_ATTACHMENT24 0x8CF8
+#define GL_COLOR_ATTACHMENT25 0x8CF9
+#define GL_COLOR_ATTACHMENT26 0x8CFA
+#define GL_COLOR_ATTACHMENT27 0x8CFB
+#define GL_COLOR_ATTACHMENT28 0x8CFC
+#define GL_COLOR_ATTACHMENT29 0x8CFD
+#define GL_COLOR_ATTACHMENT30 0x8CFE
+#define GL_COLOR_ATTACHMENT31 0x8CFF
+#define GL_DEPTH_ATTACHMENT 0x8D00
+#define GL_STENCIL_ATTACHMENT 0x8D20
+#define GL_FRAMEBUFFER 0x8D40
+#define GL_RENDERBUFFER 0x8D41
+#define GL_RENDERBUFFER_WIDTH 0x8D42
+#define GL_RENDERBUFFER_HEIGHT 0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44
+#define GL_STENCIL_INDEX1 0x8D46
+#define GL_STENCIL_INDEX4 0x8D47
+#define GL_STENCIL_INDEX8 0x8D48
+#define GL_STENCIL_INDEX16 0x8D49
+#define GL_RENDERBUFFER_RED_SIZE 0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56
+#define GL_MAX_SAMPLES 0x8D57
+#define GL_INDEX 0x8222
+#define GL_TEXTURE_LUMINANCE_TYPE 0x8C14
+#define GL_TEXTURE_INTENSITY_TYPE 0x8C15
+#define GL_FRAMEBUFFER_SRGB 0x8DB9
+#define GL_HALF_FLOAT 0x140B
+#define GL_MAP_READ_BIT 0x0001
+#define GL_MAP_WRITE_BIT 0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020
+#define GL_COMPRESSED_RED_RGTC1 0x8DBB
+#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC
+#define GL_COMPRESSED_RG_RGTC2 0x8DBD
+#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE
+#define GL_RG 0x8227
+#define GL_RG_INTEGER 0x8228
+#define GL_R8 0x8229
+#define GL_R16 0x822A
+#define GL_RG8 0x822B
+#define GL_RG16 0x822C
+#define GL_R16F 0x822D
+#define GL_R32F 0x822E
+#define GL_RG16F 0x822F
+#define GL_RG32F 0x8230
+#define GL_R8I 0x8231
+#define GL_R8UI 0x8232
+#define GL_R16I 0x8233
+#define GL_R16UI 0x8234
+#define GL_R32I 0x8235
+#define GL_R32UI 0x8236
+#define GL_RG8I 0x8237
+#define GL_RG8UI 0x8238
+#define GL_RG16I 0x8239
+#define GL_RG16UI 0x823A
+#define GL_RG32I 0x823B
+#define GL_RG32UI 0x823C
+#define GL_VERTEX_ARRAY_BINDING 0x85B5
+#define GL_CLAMP_VERTEX_COLOR 0x891A
+#define GL_CLAMP_FRAGMENT_COLOR 0x891B
+#define GL_ALPHA_INTEGER 0x8D97
+#define GL_SAMPLER_2D_RECT 0x8B63
+#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64
+#define GL_SAMPLER_BUFFER 0x8DC2
+#define GL_INT_SAMPLER_2D_RECT 0x8DCD
+#define GL_INT_SAMPLER_BUFFER 0x8DD0
+#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5
+#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8
+#define GL_TEXTURE_BUFFER 0x8C2A
+#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B
+#define GL_TEXTURE_BINDING_BUFFER 0x8C2C
+#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING 0x8C2D
+#define GL_TEXTURE_RECTANGLE 0x84F5
+#define GL_TEXTURE_BINDING_RECTANGLE 0x84F6
+#define GL_PROXY_TEXTURE_RECTANGLE 0x84F7
+#define GL_MAX_RECTANGLE_TEXTURE_SIZE 0x84F8
+#define GL_R8_SNORM 0x8F94
+#define GL_RG8_SNORM 0x8F95
+#define GL_RGB8_SNORM 0x8F96
+#define GL_RGBA8_SNORM 0x8F97
+#define GL_R16_SNORM 0x8F98
+#define GL_RG16_SNORM 0x8F99
+#define GL_RGB16_SNORM 0x8F9A
+#define GL_RGBA16_SNORM 0x8F9B
+#define GL_SIGNED_NORMALIZED 0x8F9C
+#define GL_PRIMITIVE_RESTART 0x8F9D
+#define GL_PRIMITIVE_RESTART_INDEX 0x8F9E
+#define GL_COPY_READ_BUFFER 0x8F36
+#define GL_COPY_WRITE_BUFFER 0x8F37
+#define GL_UNIFORM_BUFFER 0x8A11
+#define GL_UNIFORM_BUFFER_BINDING 0x8A28
+#define GL_UNIFORM_BUFFER_START 0x8A29
+#define GL_UNIFORM_BUFFER_SIZE 0x8A2A
+#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B
+#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C
+#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D
+#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E
+#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F
+#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30
+#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31
+#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32
+#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33
+#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34
+#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35
+#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36
+#define GL_UNIFORM_TYPE 0x8A37
+#define GL_UNIFORM_SIZE 0x8A38
+#define GL_UNIFORM_NAME_LENGTH 0x8A39
+#define GL_UNIFORM_BLOCK_INDEX 0x8A3A
+#define GL_UNIFORM_OFFSET 0x8A3B
+#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C
+#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D
+#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E
+#define GL_UNIFORM_BLOCK_BINDING 0x8A3F
+#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40
+#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46
+#define GL_INVALID_INDEX 0xFFFFFFFF
+#define GL_CONTEXT_CORE_PROFILE_BIT 0x00000001
+#define GL_CONTEXT_COMPATIBILITY_PROFILE_BIT 0x00000002
+#define GL_LINES_ADJACENCY 0x000A
+#define GL_LINE_STRIP_ADJACENCY 0x000B
+#define GL_TRIANGLES_ADJACENCY 0x000C
+#define GL_TRIANGLE_STRIP_ADJACENCY 0x000D
+#define GL_PROGRAM_POINT_SIZE 0x8642
+#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS 0x8C29
+#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED 0x8DA7
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS 0x8DA8
+#define GL_GEOMETRY_SHADER 0x8DD9
+#define GL_GEOMETRY_VERTICES_OUT 0x8916
+#define GL_GEOMETRY_INPUT_TYPE 0x8917
+#define GL_GEOMETRY_OUTPUT_TYPE 0x8918
+#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS 0x8DDF
+#define GL_MAX_GEOMETRY_OUTPUT_VERTICES 0x8DE0
+#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS 0x8DE1
+#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122
+#define GL_MAX_GEOMETRY_INPUT_COMPONENTS 0x9123
+#define GL_MAX_GEOMETRY_OUTPUT_COMPONENTS 0x9124
+#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125
+#define GL_CONTEXT_PROFILE_MASK 0x9126
+#define GL_DEPTH_CLAMP 0x864F
+#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C
+#define GL_FIRST_VERTEX_CONVENTION 0x8E4D
+#define GL_LAST_VERTEX_CONVENTION 0x8E4E
+#define GL_PROVOKING_VERTEX 0x8E4F
+#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F
+#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
+#define GL_OBJECT_TYPE 0x9112
+#define GL_SYNC_CONDITION 0x9113
+#define GL_SYNC_STATUS 0x9114
+#define GL_SYNC_FLAGS 0x9115
+#define GL_SYNC_FENCE 0x9116
+#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
+#define GL_UNSIGNALED 0x9118
+#define GL_SIGNALED 0x9119
+#define GL_ALREADY_SIGNALED 0x911A
+#define GL_TIMEOUT_EXPIRED 0x911B
+#define GL_CONDITION_SATISFIED 0x911C
+#define GL_WAIT_FAILED 0x911D
+#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFF
+#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
+#define GL_SAMPLE_POSITION 0x8E50
+#define GL_SAMPLE_MASK 0x8E51
+#define GL_SAMPLE_MASK_VALUE 0x8E52
+#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59
+#define GL_TEXTURE_2D_MULTISAMPLE 0x9100
+#define GL_PROXY_TEXTURE_2D_MULTISAMPLE 0x9101
+#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102
+#define GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9103
+#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104
+#define GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY 0x9105
+#define GL_TEXTURE_SAMPLES 0x9106
+#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107
+#define GL_SAMPLER_2D_MULTISAMPLE 0x9108
+#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109
+#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A
+#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B
+#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C
+#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D
+#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E
+#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F
+#define GL_MAX_INTEGER_SAMPLES 0x9110
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE
+#define GL_SRC1_COLOR 0x88F9
+#define GL_ONE_MINUS_SRC1_COLOR 0x88FA
+#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB
+#define GL_MAX_DUAL_SOURCE_DRAW_BUFFERS 0x88FC
+#define GL_ANY_SAMPLES_PASSED 0x8C2F
+#define GL_SAMPLER_BINDING 0x8919
+#define GL_RGB10_A2UI 0x906F
+#define GL_TEXTURE_SWIZZLE_R 0x8E42
+#define GL_TEXTURE_SWIZZLE_G 0x8E43
+#define GL_TEXTURE_SWIZZLE_B 0x8E44
+#define GL_TEXTURE_SWIZZLE_A 0x8E45
+#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46
+#define GL_TIME_ELAPSED 0x88BF
+#define GL_TIMESTAMP 0x8E28
+#define GL_INT_2_10_10_10_REV 0x8D9F
+#ifndef GL_VERSION_1_0
+#define GL_VERSION_1_0 1
+GLAPI int GLAD_GL_VERSION_1_0;
+typedef void (APIENTRYP PFNGLCULLFACEPROC)(GLenum mode);
+GLAPI PFNGLCULLFACEPROC glad_glCullFace;
+#define glCullFace glad_glCullFace
+typedef void (APIENTRYP PFNGLFRONTFACEPROC)(GLenum mode);
+GLAPI PFNGLFRONTFACEPROC glad_glFrontFace;
+#define glFrontFace glad_glFrontFace
+typedef void (APIENTRYP PFNGLHINTPROC)(GLenum target, GLenum mode);
+GLAPI PFNGLHINTPROC glad_glHint;
+#define glHint glad_glHint
+typedef void (APIENTRYP PFNGLLINEWIDTHPROC)(GLfloat width);
+GLAPI PFNGLLINEWIDTHPROC glad_glLineWidth;
+#define glLineWidth glad_glLineWidth
+typedef void (APIENTRYP PFNGLPOINTSIZEPROC)(GLfloat size);
+GLAPI PFNGLPOINTSIZEPROC glad_glPointSize;
+#define glPointSize glad_glPointSize
+typedef void (APIENTRYP PFNGLPOLYGONMODEPROC)(GLenum face, GLenum mode);
+GLAPI PFNGLPOLYGONMODEPROC glad_glPolygonMode;
+#define glPolygonMode glad_glPolygonMode
+typedef void (APIENTRYP PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height);
+GLAPI PFNGLSCISSORPROC glad_glScissor;
+#define glScissor glad_glScissor
+typedef void (APIENTRYP PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param);
+GLAPI PFNGLTEXPARAMETERFPROC glad_glTexParameterf;
+#define glTexParameterf glad_glTexParameterf
+typedef void (APIENTRYP PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat *params);
+GLAPI PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv;
+#define glTexParameterfv glad_glTexParameterfv
+typedef void (APIENTRYP PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
+GLAPI PFNGLTEXPARAMETERIPROC glad_glTexParameteri;
+#define glTexParameteri glad_glTexParameteri
+typedef void (APIENTRYP PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint *params);
+GLAPI PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv;
+#define glTexParameteriv glad_glTexParameteriv
+typedef void (APIENTRYP PFNGLTEXIMAGE1DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLTEXIMAGE1DPROC glad_glTexImage1D;
+#define glTexImage1D glad_glTexImage1D
+typedef void (APIENTRYP PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLTEXIMAGE2DPROC glad_glTexImage2D;
+#define glTexImage2D glad_glTexImage2D
+typedef void (APIENTRYP PFNGLDRAWBUFFERPROC)(GLenum buf);
+GLAPI PFNGLDRAWBUFFERPROC glad_glDrawBuffer;
+#define glDrawBuffer glad_glDrawBuffer
+typedef void (APIENTRYP PFNGLCLEARPROC)(GLbitfield mask);
+GLAPI PFNGLCLEARPROC glad_glClear;
+#define glClear glad_glClear
+typedef void (APIENTRYP PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GLAPI PFNGLCLEARCOLORPROC glad_glClearColor;
+#define glClearColor glad_glClearColor
+typedef void (APIENTRYP PFNGLCLEARSTENCILPROC)(GLint s);
+GLAPI PFNGLCLEARSTENCILPROC glad_glClearStencil;
+#define glClearStencil glad_glClearStencil
+typedef void (APIENTRYP PFNGLCLEARDEPTHPROC)(GLdouble depth);
+GLAPI PFNGLCLEARDEPTHPROC glad_glClearDepth;
+#define glClearDepth glad_glClearDepth
+typedef void (APIENTRYP PFNGLSTENCILMASKPROC)(GLuint mask);
+GLAPI PFNGLSTENCILMASKPROC glad_glStencilMask;
+#define glStencilMask glad_glStencilMask
+typedef void (APIENTRYP PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+GLAPI PFNGLCOLORMASKPROC glad_glColorMask;
+#define glColorMask glad_glColorMask
+typedef void (APIENTRYP PFNGLDEPTHMASKPROC)(GLboolean flag);
+GLAPI PFNGLDEPTHMASKPROC glad_glDepthMask;
+#define glDepthMask glad_glDepthMask
+typedef void (APIENTRYP PFNGLDISABLEPROC)(GLenum cap);
+GLAPI PFNGLDISABLEPROC glad_glDisable;
+#define glDisable glad_glDisable
+typedef void (APIENTRYP PFNGLENABLEPROC)(GLenum cap);
+GLAPI PFNGLENABLEPROC glad_glEnable;
+#define glEnable glad_glEnable
+typedef void (APIENTRYP PFNGLFINISHPROC)(void);
+GLAPI PFNGLFINISHPROC glad_glFinish;
+#define glFinish glad_glFinish
+typedef void (APIENTRYP PFNGLFLUSHPROC)(void);
+GLAPI PFNGLFLUSHPROC glad_glFlush;
+#define glFlush glad_glFlush
+typedef void (APIENTRYP PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor);
+GLAPI PFNGLBLENDFUNCPROC glad_glBlendFunc;
+#define glBlendFunc glad_glBlendFunc
+typedef void (APIENTRYP PFNGLLOGICOPPROC)(GLenum opcode);
+GLAPI PFNGLLOGICOPPROC glad_glLogicOp;
+#define glLogicOp glad_glLogicOp
+typedef void (APIENTRYP PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask);
+GLAPI PFNGLSTENCILFUNCPROC glad_glStencilFunc;
+#define glStencilFunc glad_glStencilFunc
+typedef void (APIENTRYP PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass);
+GLAPI PFNGLSTENCILOPPROC glad_glStencilOp;
+#define glStencilOp glad_glStencilOp
+typedef void (APIENTRYP PFNGLDEPTHFUNCPROC)(GLenum func);
+GLAPI PFNGLDEPTHFUNCPROC glad_glDepthFunc;
+#define glDepthFunc glad_glDepthFunc
+typedef void (APIENTRYP PFNGLPIXELSTOREFPROC)(GLenum pname, GLfloat param);
+GLAPI PFNGLPIXELSTOREFPROC glad_glPixelStoref;
+#define glPixelStoref glad_glPixelStoref
+typedef void (APIENTRYP PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param);
+GLAPI PFNGLPIXELSTOREIPROC glad_glPixelStorei;
+#define glPixelStorei glad_glPixelStorei
+typedef void (APIENTRYP PFNGLREADBUFFERPROC)(GLenum src);
+GLAPI PFNGLREADBUFFERPROC glad_glReadBuffer;
+#define glReadBuffer glad_glReadBuffer
+typedef void (APIENTRYP PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void *pixels);
+GLAPI PFNGLREADPIXELSPROC glad_glReadPixels;
+#define glReadPixels glad_glReadPixels
+typedef void (APIENTRYP PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean *data);
+GLAPI PFNGLGETBOOLEANVPROC glad_glGetBooleanv;
+#define glGetBooleanv glad_glGetBooleanv
+typedef void (APIENTRYP PFNGLGETDOUBLEVPROC)(GLenum pname, GLdouble *data);
+GLAPI PFNGLGETDOUBLEVPROC glad_glGetDoublev;
+#define glGetDoublev glad_glGetDoublev
+typedef GLenum (APIENTRYP PFNGLGETERRORPROC)(void);
+GLAPI PFNGLGETERRORPROC glad_glGetError;
+#define glGetError glad_glGetError
+typedef void (APIENTRYP PFNGLGETFLOATVPROC)(GLenum pname, GLfloat *data);
+GLAPI PFNGLGETFLOATVPROC glad_glGetFloatv;
+#define glGetFloatv glad_glGetFloatv
+typedef void (APIENTRYP PFNGLGETINTEGERVPROC)(GLenum pname, GLint *data);
+GLAPI PFNGLGETINTEGERVPROC glad_glGetIntegerv;
+#define glGetIntegerv glad_glGetIntegerv
+typedef const GLubyte * (APIENTRYP PFNGLGETSTRINGPROC)(GLenum name);
+GLAPI PFNGLGETSTRINGPROC glad_glGetString;
+#define glGetString glad_glGetString
+typedef void (APIENTRYP PFNGLGETTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, void *pixels);
+GLAPI PFNGLGETTEXIMAGEPROC glad_glGetTexImage;
+#define glGetTexImage glad_glGetTexImage
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv;
+#define glGetTexParameterfv glad_glGetTexParameterfv
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint *params);
+GLAPI PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv;
+#define glGetTexParameteriv glad_glGetTexParameteriv
+typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERFVPROC)(GLenum target, GLint level, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv;
+#define glGetTexLevelParameterfv glad_glGetTexLevelParameterfv
+typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERIVPROC)(GLenum target, GLint level, GLenum pname, GLint *params);
+GLAPI PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv;
+#define glGetTexLevelParameteriv glad_glGetTexLevelParameteriv
+typedef GLboolean (APIENTRYP PFNGLISENABLEDPROC)(GLenum cap);
+GLAPI PFNGLISENABLEDPROC glad_glIsEnabled;
+#define glIsEnabled glad_glIsEnabled
+typedef void (APIENTRYP PFNGLDEPTHRANGEPROC)(GLdouble n, GLdouble f);
+GLAPI PFNGLDEPTHRANGEPROC glad_glDepthRange;
+#define glDepthRange glad_glDepthRange
+typedef void (APIENTRYP PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height);
+GLAPI PFNGLVIEWPORTPROC glad_glViewport;
+#define glViewport glad_glViewport
+typedef void (APIENTRYP PFNGLNEWLISTPROC)(GLuint list, GLenum mode);
+GLAPI PFNGLNEWLISTPROC glad_glNewList;
+#define glNewList glad_glNewList
+typedef void (APIENTRYP PFNGLENDLISTPROC)(void);
+GLAPI PFNGLENDLISTPROC glad_glEndList;
+#define glEndList glad_glEndList
+typedef void (APIENTRYP PFNGLCALLLISTPROC)(GLuint list);
+GLAPI PFNGLCALLLISTPROC glad_glCallList;
+#define glCallList glad_glCallList
+typedef void (APIENTRYP PFNGLCALLLISTSPROC)(GLsizei n, GLenum type, const void *lists);
+GLAPI PFNGLCALLLISTSPROC glad_glCallLists;
+#define glCallLists glad_glCallLists
+typedef void (APIENTRYP PFNGLDELETELISTSPROC)(GLuint list, GLsizei range);
+GLAPI PFNGLDELETELISTSPROC glad_glDeleteLists;
+#define glDeleteLists glad_glDeleteLists
+typedef GLuint (APIENTRYP PFNGLGENLISTSPROC)(GLsizei range);
+GLAPI PFNGLGENLISTSPROC glad_glGenLists;
+#define glGenLists glad_glGenLists
+typedef void (APIENTRYP PFNGLLISTBASEPROC)(GLuint base);
+GLAPI PFNGLLISTBASEPROC glad_glListBase;
+#define glListBase glad_glListBase
+typedef void (APIENTRYP PFNGLBEGINPROC)(GLenum mode);
+GLAPI PFNGLBEGINPROC glad_glBegin;
+#define glBegin glad_glBegin
+typedef void (APIENTRYP PFNGLBITMAPPROC)(GLsizei width, GLsizei height, GLfloat xorig, GLfloat yorig, GLfloat xmove, GLfloat ymove, const GLubyte *bitmap);
+GLAPI PFNGLBITMAPPROC glad_glBitmap;
+#define glBitmap glad_glBitmap
+typedef void (APIENTRYP PFNGLCOLOR3BPROC)(GLbyte red, GLbyte green, GLbyte blue);
+GLAPI PFNGLCOLOR3BPROC glad_glColor3b;
+#define glColor3b glad_glColor3b
+typedef void (APIENTRYP PFNGLCOLOR3BVPROC)(const GLbyte *v);
+GLAPI PFNGLCOLOR3BVPROC glad_glColor3bv;
+#define glColor3bv glad_glColor3bv
+typedef void (APIENTRYP PFNGLCOLOR3DPROC)(GLdouble red, GLdouble green, GLdouble blue);
+GLAPI PFNGLCOLOR3DPROC glad_glColor3d;
+#define glColor3d glad_glColor3d
+typedef void (APIENTRYP PFNGLCOLOR3DVPROC)(const GLdouble *v);
+GLAPI PFNGLCOLOR3DVPROC glad_glColor3dv;
+#define glColor3dv glad_glColor3dv
+typedef void (APIENTRYP PFNGLCOLOR3FPROC)(GLfloat red, GLfloat green, GLfloat blue);
+GLAPI PFNGLCOLOR3FPROC glad_glColor3f;
+#define glColor3f glad_glColor3f
+typedef void (APIENTRYP PFNGLCOLOR3FVPROC)(const GLfloat *v);
+GLAPI PFNGLCOLOR3FVPROC glad_glColor3fv;
+#define glColor3fv glad_glColor3fv
+typedef void (APIENTRYP PFNGLCOLOR3IPROC)(GLint red, GLint green, GLint blue);
+GLAPI PFNGLCOLOR3IPROC glad_glColor3i;
+#define glColor3i glad_glColor3i
+typedef void (APIENTRYP PFNGLCOLOR3IVPROC)(const GLint *v);
+GLAPI PFNGLCOLOR3IVPROC glad_glColor3iv;
+#define glColor3iv glad_glColor3iv
+typedef void (APIENTRYP PFNGLCOLOR3SPROC)(GLshort red, GLshort green, GLshort blue);
+GLAPI PFNGLCOLOR3SPROC glad_glColor3s;
+#define glColor3s glad_glColor3s
+typedef void (APIENTRYP PFNGLCOLOR3SVPROC)(const GLshort *v);
+GLAPI PFNGLCOLOR3SVPROC glad_glColor3sv;
+#define glColor3sv glad_glColor3sv
+typedef void (APIENTRYP PFNGLCOLOR3UBPROC)(GLubyte red, GLubyte green, GLubyte blue);
+GLAPI PFNGLCOLOR3UBPROC glad_glColor3ub;
+#define glColor3ub glad_glColor3ub
+typedef void (APIENTRYP PFNGLCOLOR3UBVPROC)(const GLubyte *v);
+GLAPI PFNGLCOLOR3UBVPROC glad_glColor3ubv;
+#define glColor3ubv glad_glColor3ubv
+typedef void (APIENTRYP PFNGLCOLOR3UIPROC)(GLuint red, GLuint green, GLuint blue);
+GLAPI PFNGLCOLOR3UIPROC glad_glColor3ui;
+#define glColor3ui glad_glColor3ui
+typedef void (APIENTRYP PFNGLCOLOR3UIVPROC)(const GLuint *v);
+GLAPI PFNGLCOLOR3UIVPROC glad_glColor3uiv;
+#define glColor3uiv glad_glColor3uiv
+typedef void (APIENTRYP PFNGLCOLOR3USPROC)(GLushort red, GLushort green, GLushort blue);
+GLAPI PFNGLCOLOR3USPROC glad_glColor3us;
+#define glColor3us glad_glColor3us
+typedef void (APIENTRYP PFNGLCOLOR3USVPROC)(const GLushort *v);
+GLAPI PFNGLCOLOR3USVPROC glad_glColor3usv;
+#define glColor3usv glad_glColor3usv
+typedef void (APIENTRYP PFNGLCOLOR4BPROC)(GLbyte red, GLbyte green, GLbyte blue, GLbyte alpha);
+GLAPI PFNGLCOLOR4BPROC glad_glColor4b;
+#define glColor4b glad_glColor4b
+typedef void (APIENTRYP PFNGLCOLOR4BVPROC)(const GLbyte *v);
+GLAPI PFNGLCOLOR4BVPROC glad_glColor4bv;
+#define glColor4bv glad_glColor4bv
+typedef void (APIENTRYP PFNGLCOLOR4DPROC)(GLdouble red, GLdouble green, GLdouble blue, GLdouble alpha);
+GLAPI PFNGLCOLOR4DPROC glad_glColor4d;
+#define glColor4d glad_glColor4d
+typedef void (APIENTRYP PFNGLCOLOR4DVPROC)(const GLdouble *v);
+GLAPI PFNGLCOLOR4DVPROC glad_glColor4dv;
+#define glColor4dv glad_glColor4dv
+typedef void (APIENTRYP PFNGLCOLOR4FPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GLAPI PFNGLCOLOR4FPROC glad_glColor4f;
+#define glColor4f glad_glColor4f
+typedef void (APIENTRYP PFNGLCOLOR4FVPROC)(const GLfloat *v);
+GLAPI PFNGLCOLOR4FVPROC glad_glColor4fv;
+#define glColor4fv glad_glColor4fv
+typedef void (APIENTRYP PFNGLCOLOR4IPROC)(GLint red, GLint green, GLint blue, GLint alpha);
+GLAPI PFNGLCOLOR4IPROC glad_glColor4i;
+#define glColor4i glad_glColor4i
+typedef void (APIENTRYP PFNGLCOLOR4IVPROC)(const GLint *v);
+GLAPI PFNGLCOLOR4IVPROC glad_glColor4iv;
+#define glColor4iv glad_glColor4iv
+typedef void (APIENTRYP PFNGLCOLOR4SPROC)(GLshort red, GLshort green, GLshort blue, GLshort alpha);
+GLAPI PFNGLCOLOR4SPROC glad_glColor4s;
+#define glColor4s glad_glColor4s
+typedef void (APIENTRYP PFNGLCOLOR4SVPROC)(const GLshort *v);
+GLAPI PFNGLCOLOR4SVPROC glad_glColor4sv;
+#define glColor4sv glad_glColor4sv
+typedef void (APIENTRYP PFNGLCOLOR4UBPROC)(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha);
+GLAPI PFNGLCOLOR4UBPROC glad_glColor4ub;
+#define glColor4ub glad_glColor4ub
+typedef void (APIENTRYP PFNGLCOLOR4UBVPROC)(const GLubyte *v);
+GLAPI PFNGLCOLOR4UBVPROC glad_glColor4ubv;
+#define glColor4ubv glad_glColor4ubv
+typedef void (APIENTRYP PFNGLCOLOR4UIPROC)(GLuint red, GLuint green, GLuint blue, GLuint alpha);
+GLAPI PFNGLCOLOR4UIPROC glad_glColor4ui;
+#define glColor4ui glad_glColor4ui
+typedef void (APIENTRYP PFNGLCOLOR4UIVPROC)(const GLuint *v);
+GLAPI PFNGLCOLOR4UIVPROC glad_glColor4uiv;
+#define glColor4uiv glad_glColor4uiv
+typedef void (APIENTRYP PFNGLCOLOR4USPROC)(GLushort red, GLushort green, GLushort blue, GLushort alpha);
+GLAPI PFNGLCOLOR4USPROC glad_glColor4us;
+#define glColor4us glad_glColor4us
+typedef void (APIENTRYP PFNGLCOLOR4USVPROC)(const GLushort *v);
+GLAPI PFNGLCOLOR4USVPROC glad_glColor4usv;
+#define glColor4usv glad_glColor4usv
+typedef void (APIENTRYP PFNGLEDGEFLAGPROC)(GLboolean flag);
+GLAPI PFNGLEDGEFLAGPROC glad_glEdgeFlag;
+#define glEdgeFlag glad_glEdgeFlag
+typedef void (APIENTRYP PFNGLEDGEFLAGVPROC)(const GLboolean *flag);
+GLAPI PFNGLEDGEFLAGVPROC glad_glEdgeFlagv;
+#define glEdgeFlagv glad_glEdgeFlagv
+typedef void (APIENTRYP PFNGLENDPROC)(void);
+GLAPI PFNGLENDPROC glad_glEnd;
+#define glEnd glad_glEnd
+typedef void (APIENTRYP PFNGLINDEXDPROC)(GLdouble c);
+GLAPI PFNGLINDEXDPROC glad_glIndexd;
+#define glIndexd glad_glIndexd
+typedef void (APIENTRYP PFNGLINDEXDVPROC)(const GLdouble *c);
+GLAPI PFNGLINDEXDVPROC glad_glIndexdv;
+#define glIndexdv glad_glIndexdv
+typedef void (APIENTRYP PFNGLINDEXFPROC)(GLfloat c);
+GLAPI PFNGLINDEXFPROC glad_glIndexf;
+#define glIndexf glad_glIndexf
+typedef void (APIENTRYP PFNGLINDEXFVPROC)(const GLfloat *c);
+GLAPI PFNGLINDEXFVPROC glad_glIndexfv;
+#define glIndexfv glad_glIndexfv
+typedef void (APIENTRYP PFNGLINDEXIPROC)(GLint c);
+GLAPI PFNGLINDEXIPROC glad_glIndexi;
+#define glIndexi glad_glIndexi
+typedef void (APIENTRYP PFNGLINDEXIVPROC)(const GLint *c);
+GLAPI PFNGLINDEXIVPROC glad_glIndexiv;
+#define glIndexiv glad_glIndexiv
+typedef void (APIENTRYP PFNGLINDEXSPROC)(GLshort c);
+GLAPI PFNGLINDEXSPROC glad_glIndexs;
+#define glIndexs glad_glIndexs
+typedef void (APIENTRYP PFNGLINDEXSVPROC)(const GLshort *c);
+GLAPI PFNGLINDEXSVPROC glad_glIndexsv;
+#define glIndexsv glad_glIndexsv
+typedef void (APIENTRYP PFNGLNORMAL3BPROC)(GLbyte nx, GLbyte ny, GLbyte nz);
+GLAPI PFNGLNORMAL3BPROC glad_glNormal3b;
+#define glNormal3b glad_glNormal3b
+typedef void (APIENTRYP PFNGLNORMAL3BVPROC)(const GLbyte *v);
+GLAPI PFNGLNORMAL3BVPROC glad_glNormal3bv;
+#define glNormal3bv glad_glNormal3bv
+typedef void (APIENTRYP PFNGLNORMAL3DPROC)(GLdouble nx, GLdouble ny, GLdouble nz);
+GLAPI PFNGLNORMAL3DPROC glad_glNormal3d;
+#define glNormal3d glad_glNormal3d
+typedef void (APIENTRYP PFNGLNORMAL3DVPROC)(const GLdouble *v);
+GLAPI PFNGLNORMAL3DVPROC glad_glNormal3dv;
+#define glNormal3dv glad_glNormal3dv
+typedef void (APIENTRYP PFNGLNORMAL3FPROC)(GLfloat nx, GLfloat ny, GLfloat nz);
+GLAPI PFNGLNORMAL3FPROC glad_glNormal3f;
+#define glNormal3f glad_glNormal3f
+typedef void (APIENTRYP PFNGLNORMAL3FVPROC)(const GLfloat *v);
+GLAPI PFNGLNORMAL3FVPROC glad_glNormal3fv;
+#define glNormal3fv glad_glNormal3fv
+typedef void (APIENTRYP PFNGLNORMAL3IPROC)(GLint nx, GLint ny, GLint nz);
+GLAPI PFNGLNORMAL3IPROC glad_glNormal3i;
+#define glNormal3i glad_glNormal3i
+typedef void (APIENTRYP PFNGLNORMAL3IVPROC)(const GLint *v);
+GLAPI PFNGLNORMAL3IVPROC glad_glNormal3iv;
+#define glNormal3iv glad_glNormal3iv
+typedef void (APIENTRYP PFNGLNORMAL3SPROC)(GLshort nx, GLshort ny, GLshort nz);
+GLAPI PFNGLNORMAL3SPROC glad_glNormal3s;
+#define glNormal3s glad_glNormal3s
+typedef void (APIENTRYP PFNGLNORMAL3SVPROC)(const GLshort *v);
+GLAPI PFNGLNORMAL3SVPROC glad_glNormal3sv;
+#define glNormal3sv glad_glNormal3sv
+typedef void (APIENTRYP PFNGLRASTERPOS2DPROC)(GLdouble x, GLdouble y);
+GLAPI PFNGLRASTERPOS2DPROC glad_glRasterPos2d;
+#define glRasterPos2d glad_glRasterPos2d
+typedef void (APIENTRYP PFNGLRASTERPOS2DVPROC)(const GLdouble *v);
+GLAPI PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv;
+#define glRasterPos2dv glad_glRasterPos2dv
+typedef void (APIENTRYP PFNGLRASTERPOS2FPROC)(GLfloat x, GLfloat y);
+GLAPI PFNGLRASTERPOS2FPROC glad_glRasterPos2f;
+#define glRasterPos2f glad_glRasterPos2f
+typedef void (APIENTRYP PFNGLRASTERPOS2FVPROC)(const GLfloat *v);
+GLAPI PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv;
+#define glRasterPos2fv glad_glRasterPos2fv
+typedef void (APIENTRYP PFNGLRASTERPOS2IPROC)(GLint x, GLint y);
+GLAPI PFNGLRASTERPOS2IPROC glad_glRasterPos2i;
+#define glRasterPos2i glad_glRasterPos2i
+typedef void (APIENTRYP PFNGLRASTERPOS2IVPROC)(const GLint *v);
+GLAPI PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv;
+#define glRasterPos2iv glad_glRasterPos2iv
+typedef void (APIENTRYP PFNGLRASTERPOS2SPROC)(GLshort x, GLshort y);
+GLAPI PFNGLRASTERPOS2SPROC glad_glRasterPos2s;
+#define glRasterPos2s glad_glRasterPos2s
+typedef void (APIENTRYP PFNGLRASTERPOS2SVPROC)(const GLshort *v);
+GLAPI PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv;
+#define glRasterPos2sv glad_glRasterPos2sv
+typedef void (APIENTRYP PFNGLRASTERPOS3DPROC)(GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLRASTERPOS3DPROC glad_glRasterPos3d;
+#define glRasterPos3d glad_glRasterPos3d
+typedef void (APIENTRYP PFNGLRASTERPOS3DVPROC)(const GLdouble *v);
+GLAPI PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv;
+#define glRasterPos3dv glad_glRasterPos3dv
+typedef void (APIENTRYP PFNGLRASTERPOS3FPROC)(GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLRASTERPOS3FPROC glad_glRasterPos3f;
+#define glRasterPos3f glad_glRasterPos3f
+typedef void (APIENTRYP PFNGLRASTERPOS3FVPROC)(const GLfloat *v);
+GLAPI PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv;
+#define glRasterPos3fv glad_glRasterPos3fv
+typedef void (APIENTRYP PFNGLRASTERPOS3IPROC)(GLint x, GLint y, GLint z);
+GLAPI PFNGLRASTERPOS3IPROC glad_glRasterPos3i;
+#define glRasterPos3i glad_glRasterPos3i
+typedef void (APIENTRYP PFNGLRASTERPOS3IVPROC)(const GLint *v);
+GLAPI PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv;
+#define glRasterPos3iv glad_glRasterPos3iv
+typedef void (APIENTRYP PFNGLRASTERPOS3SPROC)(GLshort x, GLshort y, GLshort z);
+GLAPI PFNGLRASTERPOS3SPROC glad_glRasterPos3s;
+#define glRasterPos3s glad_glRasterPos3s
+typedef void (APIENTRYP PFNGLRASTERPOS3SVPROC)(const GLshort *v);
+GLAPI PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv;
+#define glRasterPos3sv glad_glRasterPos3sv
+typedef void (APIENTRYP PFNGLRASTERPOS4DPROC)(GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+GLAPI PFNGLRASTERPOS4DPROC glad_glRasterPos4d;
+#define glRasterPos4d glad_glRasterPos4d
+typedef void (APIENTRYP PFNGLRASTERPOS4DVPROC)(const GLdouble *v);
+GLAPI PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv;
+#define glRasterPos4dv glad_glRasterPos4dv
+typedef void (APIENTRYP PFNGLRASTERPOS4FPROC)(GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GLAPI PFNGLRASTERPOS4FPROC glad_glRasterPos4f;
+#define glRasterPos4f glad_glRasterPos4f
+typedef void (APIENTRYP PFNGLRASTERPOS4FVPROC)(const GLfloat *v);
+GLAPI PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv;
+#define glRasterPos4fv glad_glRasterPos4fv
+typedef void (APIENTRYP PFNGLRASTERPOS4IPROC)(GLint x, GLint y, GLint z, GLint w);
+GLAPI PFNGLRASTERPOS4IPROC glad_glRasterPos4i;
+#define glRasterPos4i glad_glRasterPos4i
+typedef void (APIENTRYP PFNGLRASTERPOS4IVPROC)(const GLint *v);
+GLAPI PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv;
+#define glRasterPos4iv glad_glRasterPos4iv
+typedef void (APIENTRYP PFNGLRASTERPOS4SPROC)(GLshort x, GLshort y, GLshort z, GLshort w);
+GLAPI PFNGLRASTERPOS4SPROC glad_glRasterPos4s;
+#define glRasterPos4s glad_glRasterPos4s
+typedef void (APIENTRYP PFNGLRASTERPOS4SVPROC)(const GLshort *v);
+GLAPI PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv;
+#define glRasterPos4sv glad_glRasterPos4sv
+typedef void (APIENTRYP PFNGLRECTDPROC)(GLdouble x1, GLdouble y1, GLdouble x2, GLdouble y2);
+GLAPI PFNGLRECTDPROC glad_glRectd;
+#define glRectd glad_glRectd
+typedef void (APIENTRYP PFNGLRECTDVPROC)(const GLdouble *v1, const GLdouble *v2);
+GLAPI PFNGLRECTDVPROC glad_glRectdv;
+#define glRectdv glad_glRectdv
+typedef void (APIENTRYP PFNGLRECTFPROC)(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2);
+GLAPI PFNGLRECTFPROC glad_glRectf;
+#define glRectf glad_glRectf
+typedef void (APIENTRYP PFNGLRECTFVPROC)(const GLfloat *v1, const GLfloat *v2);
+GLAPI PFNGLRECTFVPROC glad_glRectfv;
+#define glRectfv glad_glRectfv
+typedef void (APIENTRYP PFNGLRECTIPROC)(GLint x1, GLint y1, GLint x2, GLint y2);
+GLAPI PFNGLRECTIPROC glad_glRecti;
+#define glRecti glad_glRecti
+typedef void (APIENTRYP PFNGLRECTIVPROC)(const GLint *v1, const GLint *v2);
+GLAPI PFNGLRECTIVPROC glad_glRectiv;
+#define glRectiv glad_glRectiv
+typedef void (APIENTRYP PFNGLRECTSPROC)(GLshort x1, GLshort y1, GLshort x2, GLshort y2);
+GLAPI PFNGLRECTSPROC glad_glRects;
+#define glRects glad_glRects
+typedef void (APIENTRYP PFNGLRECTSVPROC)(const GLshort *v1, const GLshort *v2);
+GLAPI PFNGLRECTSVPROC glad_glRectsv;
+#define glRectsv glad_glRectsv
+typedef void (APIENTRYP PFNGLTEXCOORD1DPROC)(GLdouble s);
+GLAPI PFNGLTEXCOORD1DPROC glad_glTexCoord1d;
+#define glTexCoord1d glad_glTexCoord1d
+typedef void (APIENTRYP PFNGLTEXCOORD1DVPROC)(const GLdouble *v);
+GLAPI PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv;
+#define glTexCoord1dv glad_glTexCoord1dv
+typedef void (APIENTRYP PFNGLTEXCOORD1FPROC)(GLfloat s);
+GLAPI PFNGLTEXCOORD1FPROC glad_glTexCoord1f;
+#define glTexCoord1f glad_glTexCoord1f
+typedef void (APIENTRYP PFNGLTEXCOORD1FVPROC)(const GLfloat *v);
+GLAPI PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv;
+#define glTexCoord1fv glad_glTexCoord1fv
+typedef void (APIENTRYP PFNGLTEXCOORD1IPROC)(GLint s);
+GLAPI PFNGLTEXCOORD1IPROC glad_glTexCoord1i;
+#define glTexCoord1i glad_glTexCoord1i
+typedef void (APIENTRYP PFNGLTEXCOORD1IVPROC)(const GLint *v);
+GLAPI PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv;
+#define glTexCoord1iv glad_glTexCoord1iv
+typedef void (APIENTRYP PFNGLTEXCOORD1SPROC)(GLshort s);
+GLAPI PFNGLTEXCOORD1SPROC glad_glTexCoord1s;
+#define glTexCoord1s glad_glTexCoord1s
+typedef void (APIENTRYP PFNGLTEXCOORD1SVPROC)(const GLshort *v);
+GLAPI PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv;
+#define glTexCoord1sv glad_glTexCoord1sv
+typedef void (APIENTRYP PFNGLTEXCOORD2DPROC)(GLdouble s, GLdouble t);
+GLAPI PFNGLTEXCOORD2DPROC glad_glTexCoord2d;
+#define glTexCoord2d glad_glTexCoord2d
+typedef void (APIENTRYP PFNGLTEXCOORD2DVPROC)(const GLdouble *v);
+GLAPI PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv;
+#define glTexCoord2dv glad_glTexCoord2dv
+typedef void (APIENTRYP PFNGLTEXCOORD2FPROC)(GLfloat s, GLfloat t);
+GLAPI PFNGLTEXCOORD2FPROC glad_glTexCoord2f;
+#define glTexCoord2f glad_glTexCoord2f
+typedef void (APIENTRYP PFNGLTEXCOORD2FVPROC)(const GLfloat *v);
+GLAPI PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv;
+#define glTexCoord2fv glad_glTexCoord2fv
+typedef void (APIENTRYP PFNGLTEXCOORD2IPROC)(GLint s, GLint t);
+GLAPI PFNGLTEXCOORD2IPROC glad_glTexCoord2i;
+#define glTexCoord2i glad_glTexCoord2i
+typedef void (APIENTRYP PFNGLTEXCOORD2IVPROC)(const GLint *v);
+GLAPI PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv;
+#define glTexCoord2iv glad_glTexCoord2iv
+typedef void (APIENTRYP PFNGLTEXCOORD2SPROC)(GLshort s, GLshort t);
+GLAPI PFNGLTEXCOORD2SPROC glad_glTexCoord2s;
+#define glTexCoord2s glad_glTexCoord2s
+typedef void (APIENTRYP PFNGLTEXCOORD2SVPROC)(const GLshort *v);
+GLAPI PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv;
+#define glTexCoord2sv glad_glTexCoord2sv
+typedef void (APIENTRYP PFNGLTEXCOORD3DPROC)(GLdouble s, GLdouble t, GLdouble r);
+GLAPI PFNGLTEXCOORD3DPROC glad_glTexCoord3d;
+#define glTexCoord3d glad_glTexCoord3d
+typedef void (APIENTRYP PFNGLTEXCOORD3DVPROC)(const GLdouble *v);
+GLAPI PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv;
+#define glTexCoord3dv glad_glTexCoord3dv
+typedef void (APIENTRYP PFNGLTEXCOORD3FPROC)(GLfloat s, GLfloat t, GLfloat r);
+GLAPI PFNGLTEXCOORD3FPROC glad_glTexCoord3f;
+#define glTexCoord3f glad_glTexCoord3f
+typedef void (APIENTRYP PFNGLTEXCOORD3FVPROC)(const GLfloat *v);
+GLAPI PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv;
+#define glTexCoord3fv glad_glTexCoord3fv
+typedef void (APIENTRYP PFNGLTEXCOORD3IPROC)(GLint s, GLint t, GLint r);
+GLAPI PFNGLTEXCOORD3IPROC glad_glTexCoord3i;
+#define glTexCoord3i glad_glTexCoord3i
+typedef void (APIENTRYP PFNGLTEXCOORD3IVPROC)(const GLint *v);
+GLAPI PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv;
+#define glTexCoord3iv glad_glTexCoord3iv
+typedef void (APIENTRYP PFNGLTEXCOORD3SPROC)(GLshort s, GLshort t, GLshort r);
+GLAPI PFNGLTEXCOORD3SPROC glad_glTexCoord3s;
+#define glTexCoord3s glad_glTexCoord3s
+typedef void (APIENTRYP PFNGLTEXCOORD3SVPROC)(const GLshort *v);
+GLAPI PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv;
+#define glTexCoord3sv glad_glTexCoord3sv
+typedef void (APIENTRYP PFNGLTEXCOORD4DPROC)(GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+GLAPI PFNGLTEXCOORD4DPROC glad_glTexCoord4d;
+#define glTexCoord4d glad_glTexCoord4d
+typedef void (APIENTRYP PFNGLTEXCOORD4DVPROC)(const GLdouble *v);
+GLAPI PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv;
+#define glTexCoord4dv glad_glTexCoord4dv
+typedef void (APIENTRYP PFNGLTEXCOORD4FPROC)(GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+GLAPI PFNGLTEXCOORD4FPROC glad_glTexCoord4f;
+#define glTexCoord4f glad_glTexCoord4f
+typedef void (APIENTRYP PFNGLTEXCOORD4FVPROC)(const GLfloat *v);
+GLAPI PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv;
+#define glTexCoord4fv glad_glTexCoord4fv
+typedef void (APIENTRYP PFNGLTEXCOORD4IPROC)(GLint s, GLint t, GLint r, GLint q);
+GLAPI PFNGLTEXCOORD4IPROC glad_glTexCoord4i;
+#define glTexCoord4i glad_glTexCoord4i
+typedef void (APIENTRYP PFNGLTEXCOORD4IVPROC)(const GLint *v);
+GLAPI PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv;
+#define glTexCoord4iv glad_glTexCoord4iv
+typedef void (APIENTRYP PFNGLTEXCOORD4SPROC)(GLshort s, GLshort t, GLshort r, GLshort q);
+GLAPI PFNGLTEXCOORD4SPROC glad_glTexCoord4s;
+#define glTexCoord4s glad_glTexCoord4s
+typedef void (APIENTRYP PFNGLTEXCOORD4SVPROC)(const GLshort *v);
+GLAPI PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv;
+#define glTexCoord4sv glad_glTexCoord4sv
+typedef void (APIENTRYP PFNGLVERTEX2DPROC)(GLdouble x, GLdouble y);
+GLAPI PFNGLVERTEX2DPROC glad_glVertex2d;
+#define glVertex2d glad_glVertex2d
+typedef void (APIENTRYP PFNGLVERTEX2DVPROC)(const GLdouble *v);
+GLAPI PFNGLVERTEX2DVPROC glad_glVertex2dv;
+#define glVertex2dv glad_glVertex2dv
+typedef void (APIENTRYP PFNGLVERTEX2FPROC)(GLfloat x, GLfloat y);
+GLAPI PFNGLVERTEX2FPROC glad_glVertex2f;
+#define glVertex2f glad_glVertex2f
+typedef void (APIENTRYP PFNGLVERTEX2FVPROC)(const GLfloat *v);
+GLAPI PFNGLVERTEX2FVPROC glad_glVertex2fv;
+#define glVertex2fv glad_glVertex2fv
+typedef void (APIENTRYP PFNGLVERTEX2IPROC)(GLint x, GLint y);
+GLAPI PFNGLVERTEX2IPROC glad_glVertex2i;
+#define glVertex2i glad_glVertex2i
+typedef void (APIENTRYP PFNGLVERTEX2IVPROC)(const GLint *v);
+GLAPI PFNGLVERTEX2IVPROC glad_glVertex2iv;
+#define glVertex2iv glad_glVertex2iv
+typedef void (APIENTRYP PFNGLVERTEX2SPROC)(GLshort x, GLshort y);
+GLAPI PFNGLVERTEX2SPROC glad_glVertex2s;
+#define glVertex2s glad_glVertex2s
+typedef void (APIENTRYP PFNGLVERTEX2SVPROC)(const GLshort *v);
+GLAPI PFNGLVERTEX2SVPROC glad_glVertex2sv;
+#define glVertex2sv glad_glVertex2sv
+typedef void (APIENTRYP PFNGLVERTEX3DPROC)(GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLVERTEX3DPROC glad_glVertex3d;
+#define glVertex3d glad_glVertex3d
+typedef void (APIENTRYP PFNGLVERTEX3DVPROC)(const GLdouble *v);
+GLAPI PFNGLVERTEX3DVPROC glad_glVertex3dv;
+#define glVertex3dv glad_glVertex3dv
+typedef void (APIENTRYP PFNGLVERTEX3FPROC)(GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLVERTEX3FPROC glad_glVertex3f;
+#define glVertex3f glad_glVertex3f
+typedef void (APIENTRYP PFNGLVERTEX3FVPROC)(const GLfloat *v);
+GLAPI PFNGLVERTEX3FVPROC glad_glVertex3fv;
+#define glVertex3fv glad_glVertex3fv
+typedef void (APIENTRYP PFNGLVERTEX3IPROC)(GLint x, GLint y, GLint z);
+GLAPI PFNGLVERTEX3IPROC glad_glVertex3i;
+#define glVertex3i glad_glVertex3i
+typedef void (APIENTRYP PFNGLVERTEX3IVPROC)(const GLint *v);
+GLAPI PFNGLVERTEX3IVPROC glad_glVertex3iv;
+#define glVertex3iv glad_glVertex3iv
+typedef void (APIENTRYP PFNGLVERTEX3SPROC)(GLshort x, GLshort y, GLshort z);
+GLAPI PFNGLVERTEX3SPROC glad_glVertex3s;
+#define glVertex3s glad_glVertex3s
+typedef void (APIENTRYP PFNGLVERTEX3SVPROC)(const GLshort *v);
+GLAPI PFNGLVERTEX3SVPROC glad_glVertex3sv;
+#define glVertex3sv glad_glVertex3sv
+typedef void (APIENTRYP PFNGLVERTEX4DPROC)(GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+GLAPI PFNGLVERTEX4DPROC glad_glVertex4d;
+#define glVertex4d glad_glVertex4d
+typedef void (APIENTRYP PFNGLVERTEX4DVPROC)(const GLdouble *v);
+GLAPI PFNGLVERTEX4DVPROC glad_glVertex4dv;
+#define glVertex4dv glad_glVertex4dv
+typedef void (APIENTRYP PFNGLVERTEX4FPROC)(GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GLAPI PFNGLVERTEX4FPROC glad_glVertex4f;
+#define glVertex4f glad_glVertex4f
+typedef void (APIENTRYP PFNGLVERTEX4FVPROC)(const GLfloat *v);
+GLAPI PFNGLVERTEX4FVPROC glad_glVertex4fv;
+#define glVertex4fv glad_glVertex4fv
+typedef void (APIENTRYP PFNGLVERTEX4IPROC)(GLint x, GLint y, GLint z, GLint w);
+GLAPI PFNGLVERTEX4IPROC glad_glVertex4i;
+#define glVertex4i glad_glVertex4i
+typedef void (APIENTRYP PFNGLVERTEX4IVPROC)(const GLint *v);
+GLAPI PFNGLVERTEX4IVPROC glad_glVertex4iv;
+#define glVertex4iv glad_glVertex4iv
+typedef void (APIENTRYP PFNGLVERTEX4SPROC)(GLshort x, GLshort y, GLshort z, GLshort w);
+GLAPI PFNGLVERTEX4SPROC glad_glVertex4s;
+#define glVertex4s glad_glVertex4s
+typedef void (APIENTRYP PFNGLVERTEX4SVPROC)(const GLshort *v);
+GLAPI PFNGLVERTEX4SVPROC glad_glVertex4sv;
+#define glVertex4sv glad_glVertex4sv
+typedef void (APIENTRYP PFNGLCLIPPLANEPROC)(GLenum plane, const GLdouble *equation);
+GLAPI PFNGLCLIPPLANEPROC glad_glClipPlane;
+#define glClipPlane glad_glClipPlane
+typedef void (APIENTRYP PFNGLCOLORMATERIALPROC)(GLenum face, GLenum mode);
+GLAPI PFNGLCOLORMATERIALPROC glad_glColorMaterial;
+#define glColorMaterial glad_glColorMaterial
+typedef void (APIENTRYP PFNGLFOGFPROC)(GLenum pname, GLfloat param);
+GLAPI PFNGLFOGFPROC glad_glFogf;
+#define glFogf glad_glFogf
+typedef void (APIENTRYP PFNGLFOGFVPROC)(GLenum pname, const GLfloat *params);
+GLAPI PFNGLFOGFVPROC glad_glFogfv;
+#define glFogfv glad_glFogfv
+typedef void (APIENTRYP PFNGLFOGIPROC)(GLenum pname, GLint param);
+GLAPI PFNGLFOGIPROC glad_glFogi;
+#define glFogi glad_glFogi
+typedef void (APIENTRYP PFNGLFOGIVPROC)(GLenum pname, const GLint *params);
+GLAPI PFNGLFOGIVPROC glad_glFogiv;
+#define glFogiv glad_glFogiv
+typedef void (APIENTRYP PFNGLLIGHTFPROC)(GLenum light, GLenum pname, GLfloat param);
+GLAPI PFNGLLIGHTFPROC glad_glLightf;
+#define glLightf glad_glLightf
+typedef void (APIENTRYP PFNGLLIGHTFVPROC)(GLenum light, GLenum pname, const GLfloat *params);
+GLAPI PFNGLLIGHTFVPROC glad_glLightfv;
+#define glLightfv glad_glLightfv
+typedef void (APIENTRYP PFNGLLIGHTIPROC)(GLenum light, GLenum pname, GLint param);
+GLAPI PFNGLLIGHTIPROC glad_glLighti;
+#define glLighti glad_glLighti
+typedef void (APIENTRYP PFNGLLIGHTIVPROC)(GLenum light, GLenum pname, const GLint *params);
+GLAPI PFNGLLIGHTIVPROC glad_glLightiv;
+#define glLightiv glad_glLightiv
+typedef void (APIENTRYP PFNGLLIGHTMODELFPROC)(GLenum pname, GLfloat param);
+GLAPI PFNGLLIGHTMODELFPROC glad_glLightModelf;
+#define glLightModelf glad_glLightModelf
+typedef void (APIENTRYP PFNGLLIGHTMODELFVPROC)(GLenum pname, const GLfloat *params);
+GLAPI PFNGLLIGHTMODELFVPROC glad_glLightModelfv;
+#define glLightModelfv glad_glLightModelfv
+typedef void (APIENTRYP PFNGLLIGHTMODELIPROC)(GLenum pname, GLint param);
+GLAPI PFNGLLIGHTMODELIPROC glad_glLightModeli;
+#define glLightModeli glad_glLightModeli
+typedef void (APIENTRYP PFNGLLIGHTMODELIVPROC)(GLenum pname, const GLint *params);
+GLAPI PFNGLLIGHTMODELIVPROC glad_glLightModeliv;
+#define glLightModeliv glad_glLightModeliv
+typedef void (APIENTRYP PFNGLLINESTIPPLEPROC)(GLint factor, GLushort pattern);
+GLAPI PFNGLLINESTIPPLEPROC glad_glLineStipple;
+#define glLineStipple glad_glLineStipple
+typedef void (APIENTRYP PFNGLMATERIALFPROC)(GLenum face, GLenum pname, GLfloat param);
+GLAPI PFNGLMATERIALFPROC glad_glMaterialf;
+#define glMaterialf glad_glMaterialf
+typedef void (APIENTRYP PFNGLMATERIALFVPROC)(GLenum face, GLenum pname, const GLfloat *params);
+GLAPI PFNGLMATERIALFVPROC glad_glMaterialfv;
+#define glMaterialfv glad_glMaterialfv
+typedef void (APIENTRYP PFNGLMATERIALIPROC)(GLenum face, GLenum pname, GLint param);
+GLAPI PFNGLMATERIALIPROC glad_glMateriali;
+#define glMateriali glad_glMateriali
+typedef void (APIENTRYP PFNGLMATERIALIVPROC)(GLenum face, GLenum pname, const GLint *params);
+GLAPI PFNGLMATERIALIVPROC glad_glMaterialiv;
+#define glMaterialiv glad_glMaterialiv
+typedef void (APIENTRYP PFNGLPOLYGONSTIPPLEPROC)(const GLubyte *mask);
+GLAPI PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple;
+#define glPolygonStipple glad_glPolygonStipple
+typedef void (APIENTRYP PFNGLSHADEMODELPROC)(GLenum mode);
+GLAPI PFNGLSHADEMODELPROC glad_glShadeModel;
+#define glShadeModel glad_glShadeModel
+typedef void (APIENTRYP PFNGLTEXENVFPROC)(GLenum target, GLenum pname, GLfloat param);
+GLAPI PFNGLTEXENVFPROC glad_glTexEnvf;
+#define glTexEnvf glad_glTexEnvf
+typedef void (APIENTRYP PFNGLTEXENVFVPROC)(GLenum target, GLenum pname, const GLfloat *params);
+GLAPI PFNGLTEXENVFVPROC glad_glTexEnvfv;
+#define glTexEnvfv glad_glTexEnvfv
+typedef void (APIENTRYP PFNGLTEXENVIPROC)(GLenum target, GLenum pname, GLint param);
+GLAPI PFNGLTEXENVIPROC glad_glTexEnvi;
+#define glTexEnvi glad_glTexEnvi
+typedef void (APIENTRYP PFNGLTEXENVIVPROC)(GLenum target, GLenum pname, const GLint *params);
+GLAPI PFNGLTEXENVIVPROC glad_glTexEnviv;
+#define glTexEnviv glad_glTexEnviv
+typedef void (APIENTRYP PFNGLTEXGENDPROC)(GLenum coord, GLenum pname, GLdouble param);
+GLAPI PFNGLTEXGENDPROC glad_glTexGend;
+#define glTexGend glad_glTexGend
+typedef void (APIENTRYP PFNGLTEXGENDVPROC)(GLenum coord, GLenum pname, const GLdouble *params);
+GLAPI PFNGLTEXGENDVPROC glad_glTexGendv;
+#define glTexGendv glad_glTexGendv
+typedef void (APIENTRYP PFNGLTEXGENFPROC)(GLenum coord, GLenum pname, GLfloat param);
+GLAPI PFNGLTEXGENFPROC glad_glTexGenf;
+#define glTexGenf glad_glTexGenf
+typedef void (APIENTRYP PFNGLTEXGENFVPROC)(GLenum coord, GLenum pname, const GLfloat *params);
+GLAPI PFNGLTEXGENFVPROC glad_glTexGenfv;
+#define glTexGenfv glad_glTexGenfv
+typedef void (APIENTRYP PFNGLTEXGENIPROC)(GLenum coord, GLenum pname, GLint param);
+GLAPI PFNGLTEXGENIPROC glad_glTexGeni;
+#define glTexGeni glad_glTexGeni
+typedef void (APIENTRYP PFNGLTEXGENIVPROC)(GLenum coord, GLenum pname, const GLint *params);
+GLAPI PFNGLTEXGENIVPROC glad_glTexGeniv;
+#define glTexGeniv glad_glTexGeniv
+typedef void (APIENTRYP PFNGLFEEDBACKBUFFERPROC)(GLsizei size, GLenum type, GLfloat *buffer);
+GLAPI PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer;
+#define glFeedbackBuffer glad_glFeedbackBuffer
+typedef void (APIENTRYP PFNGLSELECTBUFFERPROC)(GLsizei size, GLuint *buffer);
+GLAPI PFNGLSELECTBUFFERPROC glad_glSelectBuffer;
+#define glSelectBuffer glad_glSelectBuffer
+typedef GLint (APIENTRYP PFNGLRENDERMODEPROC)(GLenum mode);
+GLAPI PFNGLRENDERMODEPROC glad_glRenderMode;
+#define glRenderMode glad_glRenderMode
+typedef void (APIENTRYP PFNGLINITNAMESPROC)(void);
+GLAPI PFNGLINITNAMESPROC glad_glInitNames;
+#define glInitNames glad_glInitNames
+typedef void (APIENTRYP PFNGLLOADNAMEPROC)(GLuint name);
+GLAPI PFNGLLOADNAMEPROC glad_glLoadName;
+#define glLoadName glad_glLoadName
+typedef void (APIENTRYP PFNGLPASSTHROUGHPROC)(GLfloat token);
+GLAPI PFNGLPASSTHROUGHPROC glad_glPassThrough;
+#define glPassThrough glad_glPassThrough
+typedef void (APIENTRYP PFNGLPOPNAMEPROC)(void);
+GLAPI PFNGLPOPNAMEPROC glad_glPopName;
+#define glPopName glad_glPopName
+typedef void (APIENTRYP PFNGLPUSHNAMEPROC)(GLuint name);
+GLAPI PFNGLPUSHNAMEPROC glad_glPushName;
+#define glPushName glad_glPushName
+typedef void (APIENTRYP PFNGLCLEARACCUMPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GLAPI PFNGLCLEARACCUMPROC glad_glClearAccum;
+#define glClearAccum glad_glClearAccum
+typedef void (APIENTRYP PFNGLCLEARINDEXPROC)(GLfloat c);
+GLAPI PFNGLCLEARINDEXPROC glad_glClearIndex;
+#define glClearIndex glad_glClearIndex
+typedef void (APIENTRYP PFNGLINDEXMASKPROC)(GLuint mask);
+GLAPI PFNGLINDEXMASKPROC glad_glIndexMask;
+#define glIndexMask glad_glIndexMask
+typedef void (APIENTRYP PFNGLACCUMPROC)(GLenum op, GLfloat value);
+GLAPI PFNGLACCUMPROC glad_glAccum;
+#define glAccum glad_glAccum
+typedef void (APIENTRYP PFNGLPOPATTRIBPROC)(void);
+GLAPI PFNGLPOPATTRIBPROC glad_glPopAttrib;
+#define glPopAttrib glad_glPopAttrib
+typedef void (APIENTRYP PFNGLPUSHATTRIBPROC)(GLbitfield mask);
+GLAPI PFNGLPUSHATTRIBPROC glad_glPushAttrib;
+#define glPushAttrib glad_glPushAttrib
+typedef void (APIENTRYP PFNGLMAP1DPROC)(GLenum target, GLdouble u1, GLdouble u2, GLint stride, GLint order, const GLdouble *points);
+GLAPI PFNGLMAP1DPROC glad_glMap1d;
+#define glMap1d glad_glMap1d
+typedef void (APIENTRYP PFNGLMAP1FPROC)(GLenum target, GLfloat u1, GLfloat u2, GLint stride, GLint order, const GLfloat *points);
+GLAPI PFNGLMAP1FPROC glad_glMap1f;
+#define glMap1f glad_glMap1f
+typedef void (APIENTRYP PFNGLMAP2DPROC)(GLenum target, GLdouble u1, GLdouble u2, GLint ustride, GLint uorder, GLdouble v1, GLdouble v2, GLint vstride, GLint vorder, const GLdouble *points);
+GLAPI PFNGLMAP2DPROC glad_glMap2d;
+#define glMap2d glad_glMap2d
+typedef void (APIENTRYP PFNGLMAP2FPROC)(GLenum target, GLfloat u1, GLfloat u2, GLint ustride, GLint uorder, GLfloat v1, GLfloat v2, GLint vstride, GLint vorder, const GLfloat *points);
+GLAPI PFNGLMAP2FPROC glad_glMap2f;
+#define glMap2f glad_glMap2f
+typedef void (APIENTRYP PFNGLMAPGRID1DPROC)(GLint un, GLdouble u1, GLdouble u2);
+GLAPI PFNGLMAPGRID1DPROC glad_glMapGrid1d;
+#define glMapGrid1d glad_glMapGrid1d
+typedef void (APIENTRYP PFNGLMAPGRID1FPROC)(GLint un, GLfloat u1, GLfloat u2);
+GLAPI PFNGLMAPGRID1FPROC glad_glMapGrid1f;
+#define glMapGrid1f glad_glMapGrid1f
+typedef void (APIENTRYP PFNGLMAPGRID2DPROC)(GLint un, GLdouble u1, GLdouble u2, GLint vn, GLdouble v1, GLdouble v2);
+GLAPI PFNGLMAPGRID2DPROC glad_glMapGrid2d;
+#define glMapGrid2d glad_glMapGrid2d
+typedef void (APIENTRYP PFNGLMAPGRID2FPROC)(GLint un, GLfloat u1, GLfloat u2, GLint vn, GLfloat v1, GLfloat v2);
+GLAPI PFNGLMAPGRID2FPROC glad_glMapGrid2f;
+#define glMapGrid2f glad_glMapGrid2f
+typedef void (APIENTRYP PFNGLEVALCOORD1DPROC)(GLdouble u);
+GLAPI PFNGLEVALCOORD1DPROC glad_glEvalCoord1d;
+#define glEvalCoord1d glad_glEvalCoord1d
+typedef void (APIENTRYP PFNGLEVALCOORD1DVPROC)(const GLdouble *u);
+GLAPI PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv;
+#define glEvalCoord1dv glad_glEvalCoord1dv
+typedef void (APIENTRYP PFNGLEVALCOORD1FPROC)(GLfloat u);
+GLAPI PFNGLEVALCOORD1FPROC glad_glEvalCoord1f;
+#define glEvalCoord1f glad_glEvalCoord1f
+typedef void (APIENTRYP PFNGLEVALCOORD1FVPROC)(const GLfloat *u);
+GLAPI PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv;
+#define glEvalCoord1fv glad_glEvalCoord1fv
+typedef void (APIENTRYP PFNGLEVALCOORD2DPROC)(GLdouble u, GLdouble v);
+GLAPI PFNGLEVALCOORD2DPROC glad_glEvalCoord2d;
+#define glEvalCoord2d glad_glEvalCoord2d
+typedef void (APIENTRYP PFNGLEVALCOORD2DVPROC)(const GLdouble *u);
+GLAPI PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv;
+#define glEvalCoord2dv glad_glEvalCoord2dv
+typedef void (APIENTRYP PFNGLEVALCOORD2FPROC)(GLfloat u, GLfloat v);
+GLAPI PFNGLEVALCOORD2FPROC glad_glEvalCoord2f;
+#define glEvalCoord2f glad_glEvalCoord2f
+typedef void (APIENTRYP PFNGLEVALCOORD2FVPROC)(const GLfloat *u);
+GLAPI PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv;
+#define glEvalCoord2fv glad_glEvalCoord2fv
+typedef void (APIENTRYP PFNGLEVALMESH1PROC)(GLenum mode, GLint i1, GLint i2);
+GLAPI PFNGLEVALMESH1PROC glad_glEvalMesh1;
+#define glEvalMesh1 glad_glEvalMesh1
+typedef void (APIENTRYP PFNGLEVALPOINT1PROC)(GLint i);
+GLAPI PFNGLEVALPOINT1PROC glad_glEvalPoint1;
+#define glEvalPoint1 glad_glEvalPoint1
+typedef void (APIENTRYP PFNGLEVALMESH2PROC)(GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2);
+GLAPI PFNGLEVALMESH2PROC glad_glEvalMesh2;
+#define glEvalMesh2 glad_glEvalMesh2
+typedef void (APIENTRYP PFNGLEVALPOINT2PROC)(GLint i, GLint j);
+GLAPI PFNGLEVALPOINT2PROC glad_glEvalPoint2;
+#define glEvalPoint2 glad_glEvalPoint2
+typedef void (APIENTRYP PFNGLALPHAFUNCPROC)(GLenum func, GLfloat ref);
+GLAPI PFNGLALPHAFUNCPROC glad_glAlphaFunc;
+#define glAlphaFunc glad_glAlphaFunc
+typedef void (APIENTRYP PFNGLPIXELZOOMPROC)(GLfloat xfactor, GLfloat yfactor);
+GLAPI PFNGLPIXELZOOMPROC glad_glPixelZoom;
+#define glPixelZoom glad_glPixelZoom
+typedef void (APIENTRYP PFNGLPIXELTRANSFERFPROC)(GLenum pname, GLfloat param);
+GLAPI PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf;
+#define glPixelTransferf glad_glPixelTransferf
+typedef void (APIENTRYP PFNGLPIXELTRANSFERIPROC)(GLenum pname, GLint param);
+GLAPI PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi;
+#define glPixelTransferi glad_glPixelTransferi
+typedef void (APIENTRYP PFNGLPIXELMAPFVPROC)(GLenum map, GLsizei mapsize, const GLfloat *values);
+GLAPI PFNGLPIXELMAPFVPROC glad_glPixelMapfv;
+#define glPixelMapfv glad_glPixelMapfv
+typedef void (APIENTRYP PFNGLPIXELMAPUIVPROC)(GLenum map, GLsizei mapsize, const GLuint *values);
+GLAPI PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv;
+#define glPixelMapuiv glad_glPixelMapuiv
+typedef void (APIENTRYP PFNGLPIXELMAPUSVPROC)(GLenum map, GLsizei mapsize, const GLushort *values);
+GLAPI PFNGLPIXELMAPUSVPROC glad_glPixelMapusv;
+#define glPixelMapusv glad_glPixelMapusv
+typedef void (APIENTRYP PFNGLCOPYPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum type);
+GLAPI PFNGLCOPYPIXELSPROC glad_glCopyPixels;
+#define glCopyPixels glad_glCopyPixels
+typedef void (APIENTRYP PFNGLDRAWPIXELSPROC)(GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLDRAWPIXELSPROC glad_glDrawPixels;
+#define glDrawPixels glad_glDrawPixels
+typedef void (APIENTRYP PFNGLGETCLIPPLANEPROC)(GLenum plane, GLdouble *equation);
+GLAPI PFNGLGETCLIPPLANEPROC glad_glGetClipPlane;
+#define glGetClipPlane glad_glGetClipPlane
+typedef void (APIENTRYP PFNGLGETLIGHTFVPROC)(GLenum light, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETLIGHTFVPROC glad_glGetLightfv;
+#define glGetLightfv glad_glGetLightfv
+typedef void (APIENTRYP PFNGLGETLIGHTIVPROC)(GLenum light, GLenum pname, GLint *params);
+GLAPI PFNGLGETLIGHTIVPROC glad_glGetLightiv;
+#define glGetLightiv glad_glGetLightiv
+typedef void (APIENTRYP PFNGLGETMAPDVPROC)(GLenum target, GLenum query, GLdouble *v);
+GLAPI PFNGLGETMAPDVPROC glad_glGetMapdv;
+#define glGetMapdv glad_glGetMapdv
+typedef void (APIENTRYP PFNGLGETMAPFVPROC)(GLenum target, GLenum query, GLfloat *v);
+GLAPI PFNGLGETMAPFVPROC glad_glGetMapfv;
+#define glGetMapfv glad_glGetMapfv
+typedef void (APIENTRYP PFNGLGETMAPIVPROC)(GLenum target, GLenum query, GLint *v);
+GLAPI PFNGLGETMAPIVPROC glad_glGetMapiv;
+#define glGetMapiv glad_glGetMapiv
+typedef void (APIENTRYP PFNGLGETMATERIALFVPROC)(GLenum face, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETMATERIALFVPROC glad_glGetMaterialfv;
+#define glGetMaterialfv glad_glGetMaterialfv
+typedef void (APIENTRYP PFNGLGETMATERIALIVPROC)(GLenum face, GLenum pname, GLint *params);
+GLAPI PFNGLGETMATERIALIVPROC glad_glGetMaterialiv;
+#define glGetMaterialiv glad_glGetMaterialiv
+typedef void (APIENTRYP PFNGLGETPIXELMAPFVPROC)(GLenum map, GLfloat *values);
+GLAPI PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv;
+#define glGetPixelMapfv glad_glGetPixelMapfv
+typedef void (APIENTRYP PFNGLGETPIXELMAPUIVPROC)(GLenum map, GLuint *values);
+GLAPI PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv;
+#define glGetPixelMapuiv glad_glGetPixelMapuiv
+typedef void (APIENTRYP PFNGLGETPIXELMAPUSVPROC)(GLenum map, GLushort *values);
+GLAPI PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv;
+#define glGetPixelMapusv glad_glGetPixelMapusv
+typedef void (APIENTRYP PFNGLGETPOLYGONSTIPPLEPROC)(GLubyte *mask);
+GLAPI PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple;
+#define glGetPolygonStipple glad_glGetPolygonStipple
+typedef void (APIENTRYP PFNGLGETTEXENVFVPROC)(GLenum target, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv;
+#define glGetTexEnvfv glad_glGetTexEnvfv
+typedef void (APIENTRYP PFNGLGETTEXENVIVPROC)(GLenum target, GLenum pname, GLint *params);
+GLAPI PFNGLGETTEXENVIVPROC glad_glGetTexEnviv;
+#define glGetTexEnviv glad_glGetTexEnviv
+typedef void (APIENTRYP PFNGLGETTEXGENDVPROC)(GLenum coord, GLenum pname, GLdouble *params);
+GLAPI PFNGLGETTEXGENDVPROC glad_glGetTexGendv;
+#define glGetTexGendv glad_glGetTexGendv
+typedef void (APIENTRYP PFNGLGETTEXGENFVPROC)(GLenum coord, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETTEXGENFVPROC glad_glGetTexGenfv;
+#define glGetTexGenfv glad_glGetTexGenfv
+typedef void (APIENTRYP PFNGLGETTEXGENIVPROC)(GLenum coord, GLenum pname, GLint *params);
+GLAPI PFNGLGETTEXGENIVPROC glad_glGetTexGeniv;
+#define glGetTexGeniv glad_glGetTexGeniv
+typedef GLboolean (APIENTRYP PFNGLISLISTPROC)(GLuint list);
+GLAPI PFNGLISLISTPROC glad_glIsList;
+#define glIsList glad_glIsList
+typedef void (APIENTRYP PFNGLFRUSTUMPROC)(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+GLAPI PFNGLFRUSTUMPROC glad_glFrustum;
+#define glFrustum glad_glFrustum
+typedef void (APIENTRYP PFNGLLOADIDENTITYPROC)(void);
+GLAPI PFNGLLOADIDENTITYPROC glad_glLoadIdentity;
+#define glLoadIdentity glad_glLoadIdentity
+typedef void (APIENTRYP PFNGLLOADMATRIXFPROC)(const GLfloat *m);
+GLAPI PFNGLLOADMATRIXFPROC glad_glLoadMatrixf;
+#define glLoadMatrixf glad_glLoadMatrixf
+typedef void (APIENTRYP PFNGLLOADMATRIXDPROC)(const GLdouble *m);
+GLAPI PFNGLLOADMATRIXDPROC glad_glLoadMatrixd;
+#define glLoadMatrixd glad_glLoadMatrixd
+typedef void (APIENTRYP PFNGLMATRIXMODEPROC)(GLenum mode);
+GLAPI PFNGLMATRIXMODEPROC glad_glMatrixMode;
+#define glMatrixMode glad_glMatrixMode
+typedef void (APIENTRYP PFNGLMULTMATRIXFPROC)(const GLfloat *m);
+GLAPI PFNGLMULTMATRIXFPROC glad_glMultMatrixf;
+#define glMultMatrixf glad_glMultMatrixf
+typedef void (APIENTRYP PFNGLMULTMATRIXDPROC)(const GLdouble *m);
+GLAPI PFNGLMULTMATRIXDPROC glad_glMultMatrixd;
+#define glMultMatrixd glad_glMultMatrixd
+typedef void (APIENTRYP PFNGLORTHOPROC)(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+GLAPI PFNGLORTHOPROC glad_glOrtho;
+#define glOrtho glad_glOrtho
+typedef void (APIENTRYP PFNGLPOPMATRIXPROC)(void);
+GLAPI PFNGLPOPMATRIXPROC glad_glPopMatrix;
+#define glPopMatrix glad_glPopMatrix
+typedef void (APIENTRYP PFNGLPUSHMATRIXPROC)(void);
+GLAPI PFNGLPUSHMATRIXPROC glad_glPushMatrix;
+#define glPushMatrix glad_glPushMatrix
+typedef void (APIENTRYP PFNGLROTATEDPROC)(GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLROTATEDPROC glad_glRotated;
+#define glRotated glad_glRotated
+typedef void (APIENTRYP PFNGLROTATEFPROC)(GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLROTATEFPROC glad_glRotatef;
+#define glRotatef glad_glRotatef
+typedef void (APIENTRYP PFNGLSCALEDPROC)(GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLSCALEDPROC glad_glScaled;
+#define glScaled glad_glScaled
+typedef void (APIENTRYP PFNGLSCALEFPROC)(GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLSCALEFPROC glad_glScalef;
+#define glScalef glad_glScalef
+typedef void (APIENTRYP PFNGLTRANSLATEDPROC)(GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLTRANSLATEDPROC glad_glTranslated;
+#define glTranslated glad_glTranslated
+typedef void (APIENTRYP PFNGLTRANSLATEFPROC)(GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLTRANSLATEFPROC glad_glTranslatef;
+#define glTranslatef glad_glTranslatef
+#endif
+#ifndef GL_VERSION_1_1
+#define GL_VERSION_1_1 1
+GLAPI int GLAD_GL_VERSION_1_1;
+typedef void (APIENTRYP PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count);
+GLAPI PFNGLDRAWARRAYSPROC glad_glDrawArrays;
+#define glDrawArrays glad_glDrawArrays
+typedef void (APIENTRYP PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices);
+GLAPI PFNGLDRAWELEMENTSPROC glad_glDrawElements;
+#define glDrawElements glad_glDrawElements
+typedef void (APIENTRYP PFNGLGETPOINTERVPROC)(GLenum pname, void **params);
+GLAPI PFNGLGETPOINTERVPROC glad_glGetPointerv;
+#define glGetPointerv glad_glGetPointerv
+typedef void (APIENTRYP PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units);
+GLAPI PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset;
+#define glPolygonOffset glad_glPolygonOffset
+typedef void (APIENTRYP PFNGLCOPYTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+GLAPI PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D;
+#define glCopyTexImage1D glad_glCopyTexImage1D
+typedef void (APIENTRYP PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+GLAPI PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D;
+#define glCopyTexImage2D glad_glCopyTexImage2D
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+GLAPI PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D;
+#define glCopyTexSubImage1D glad_glCopyTexSubImage1D
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GLAPI PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D;
+#define glCopyTexSubImage2D glad_glCopyTexSubImage2D
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D;
+#define glTexSubImage1D glad_glTexSubImage1D
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D;
+#define glTexSubImage2D glad_glTexSubImage2D
+typedef void (APIENTRYP PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture);
+GLAPI PFNGLBINDTEXTUREPROC glad_glBindTexture;
+#define glBindTexture glad_glBindTexture
+typedef void (APIENTRYP PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint *textures);
+GLAPI PFNGLDELETETEXTURESPROC glad_glDeleteTextures;
+#define glDeleteTextures glad_glDeleteTextures
+typedef void (APIENTRYP PFNGLGENTEXTURESPROC)(GLsizei n, GLuint *textures);
+GLAPI PFNGLGENTEXTURESPROC glad_glGenTextures;
+#define glGenTextures glad_glGenTextures
+typedef GLboolean (APIENTRYP PFNGLISTEXTUREPROC)(GLuint texture);
+GLAPI PFNGLISTEXTUREPROC glad_glIsTexture;
+#define glIsTexture glad_glIsTexture
+typedef void (APIENTRYP PFNGLARRAYELEMENTPROC)(GLint i);
+GLAPI PFNGLARRAYELEMENTPROC glad_glArrayElement;
+#define glArrayElement glad_glArrayElement
+typedef void (APIENTRYP PFNGLCOLORPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLCOLORPOINTERPROC glad_glColorPointer;
+#define glColorPointer glad_glColorPointer
+typedef void (APIENTRYP PFNGLDISABLECLIENTSTATEPROC)(GLenum array);
+GLAPI PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState;
+#define glDisableClientState glad_glDisableClientState
+typedef void (APIENTRYP PFNGLEDGEFLAGPOINTERPROC)(GLsizei stride, const void *pointer);
+GLAPI PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer;
+#define glEdgeFlagPointer glad_glEdgeFlagPointer
+typedef void (APIENTRYP PFNGLENABLECLIENTSTATEPROC)(GLenum array);
+GLAPI PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState;
+#define glEnableClientState glad_glEnableClientState
+typedef void (APIENTRYP PFNGLINDEXPOINTERPROC)(GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLINDEXPOINTERPROC glad_glIndexPointer;
+#define glIndexPointer glad_glIndexPointer
+typedef void (APIENTRYP PFNGLINTERLEAVEDARRAYSPROC)(GLenum format, GLsizei stride, const void *pointer);
+GLAPI PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays;
+#define glInterleavedArrays glad_glInterleavedArrays
+typedef void (APIENTRYP PFNGLNORMALPOINTERPROC)(GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLNORMALPOINTERPROC glad_glNormalPointer;
+#define glNormalPointer glad_glNormalPointer
+typedef void (APIENTRYP PFNGLTEXCOORDPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer;
+#define glTexCoordPointer glad_glTexCoordPointer
+typedef void (APIENTRYP PFNGLVERTEXPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLVERTEXPOINTERPROC glad_glVertexPointer;
+#define glVertexPointer glad_glVertexPointer
+typedef GLboolean (APIENTRYP PFNGLARETEXTURESRESIDENTPROC)(GLsizei n, const GLuint *textures, GLboolean *residences);
+GLAPI PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident;
+#define glAreTexturesResident glad_glAreTexturesResident
+typedef void (APIENTRYP PFNGLPRIORITIZETEXTURESPROC)(GLsizei n, const GLuint *textures, const GLfloat *priorities);
+GLAPI PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures;
+#define glPrioritizeTextures glad_glPrioritizeTextures
+typedef void (APIENTRYP PFNGLINDEXUBPROC)(GLubyte c);
+GLAPI PFNGLINDEXUBPROC glad_glIndexub;
+#define glIndexub glad_glIndexub
+typedef void (APIENTRYP PFNGLINDEXUBVPROC)(const GLubyte *c);
+GLAPI PFNGLINDEXUBVPROC glad_glIndexubv;
+#define glIndexubv glad_glIndexubv
+typedef void (APIENTRYP PFNGLPOPCLIENTATTRIBPROC)(void);
+GLAPI PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib;
+#define glPopClientAttrib glad_glPopClientAttrib
+typedef void (APIENTRYP PFNGLPUSHCLIENTATTRIBPROC)(GLbitfield mask);
+GLAPI PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib;
+#define glPushClientAttrib glad_glPushClientAttrib
+#endif
+#ifndef GL_VERSION_1_2
+#define GL_VERSION_1_2 1
+GLAPI int GLAD_GL_VERSION_1_2;
+typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices);
+GLAPI PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements;
+#define glDrawRangeElements glad_glDrawRangeElements
+typedef void (APIENTRYP PFNGLTEXIMAGE3DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLTEXIMAGE3DPROC glad_glTexImage3D;
+#define glTexImage3D glad_glTexImage3D
+typedef void (APIENTRYP PFNGLTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels);
+GLAPI PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D;
+#define glTexSubImage3D glad_glTexSubImage3D
+typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GLAPI PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D;
+#define glCopyTexSubImage3D glad_glCopyTexSubImage3D
+#endif
+#ifndef GL_VERSION_1_3
+#define GL_VERSION_1_3 1
+GLAPI int GLAD_GL_VERSION_1_3;
+typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC)(GLenum texture);
+GLAPI PFNGLACTIVETEXTUREPROC glad_glActiveTexture;
+#define glActiveTexture glad_glActiveTexture
+typedef void (APIENTRYP PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert);
+GLAPI PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage;
+#define glSampleCoverage glad_glSampleCoverage
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void *data);
+GLAPI PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D;
+#define glCompressedTexImage3D glad_glCompressedTexImage3D
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data);
+GLAPI PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D;
+#define glCompressedTexImage2D glad_glCompressedTexImage2D
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void *data);
+GLAPI PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D;
+#define glCompressedTexImage1D glad_glCompressedTexImage1D
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data);
+GLAPI PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D;
+#define glCompressedTexSubImage3D glad_glCompressedTexSubImage3D
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data);
+GLAPI PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D;
+#define glCompressedTexSubImage2D glad_glCompressedTexSubImage2D
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data);
+GLAPI PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D;
+#define glCompressedTexSubImage1D glad_glCompressedTexSubImage1D
+typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint level, void *img);
+GLAPI PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage;
+#define glGetCompressedTexImage glad_glGetCompressedTexImage
+typedef void (APIENTRYP PFNGLCLIENTACTIVETEXTUREPROC)(GLenum texture);
+GLAPI PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture;
+#define glClientActiveTexture glad_glClientActiveTexture
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1DPROC)(GLenum target, GLdouble s);
+GLAPI PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d;
+#define glMultiTexCoord1d glad_glMultiTexCoord1d
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1DVPROC)(GLenum target, const GLdouble *v);
+GLAPI PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv;
+#define glMultiTexCoord1dv glad_glMultiTexCoord1dv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1FPROC)(GLenum target, GLfloat s);
+GLAPI PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f;
+#define glMultiTexCoord1f glad_glMultiTexCoord1f
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1FVPROC)(GLenum target, const GLfloat *v);
+GLAPI PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv;
+#define glMultiTexCoord1fv glad_glMultiTexCoord1fv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1IPROC)(GLenum target, GLint s);
+GLAPI PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i;
+#define glMultiTexCoord1i glad_glMultiTexCoord1i
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1IVPROC)(GLenum target, const GLint *v);
+GLAPI PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv;
+#define glMultiTexCoord1iv glad_glMultiTexCoord1iv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1SPROC)(GLenum target, GLshort s);
+GLAPI PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s;
+#define glMultiTexCoord1s glad_glMultiTexCoord1s
+typedef void (APIENTRYP PFNGLMULTITEXCOORD1SVPROC)(GLenum target, const GLshort *v);
+GLAPI PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv;
+#define glMultiTexCoord1sv glad_glMultiTexCoord1sv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2DPROC)(GLenum target, GLdouble s, GLdouble t);
+GLAPI PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d;
+#define glMultiTexCoord2d glad_glMultiTexCoord2d
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2DVPROC)(GLenum target, const GLdouble *v);
+GLAPI PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv;
+#define glMultiTexCoord2dv glad_glMultiTexCoord2dv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2FPROC)(GLenum target, GLfloat s, GLfloat t);
+GLAPI PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f;
+#define glMultiTexCoord2f glad_glMultiTexCoord2f
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2FVPROC)(GLenum target, const GLfloat *v);
+GLAPI PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv;
+#define glMultiTexCoord2fv glad_glMultiTexCoord2fv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2IPROC)(GLenum target, GLint s, GLint t);
+GLAPI PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i;
+#define glMultiTexCoord2i glad_glMultiTexCoord2i
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2IVPROC)(GLenum target, const GLint *v);
+GLAPI PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv;
+#define glMultiTexCoord2iv glad_glMultiTexCoord2iv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2SPROC)(GLenum target, GLshort s, GLshort t);
+GLAPI PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s;
+#define glMultiTexCoord2s glad_glMultiTexCoord2s
+typedef void (APIENTRYP PFNGLMULTITEXCOORD2SVPROC)(GLenum target, const GLshort *v);
+GLAPI PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv;
+#define glMultiTexCoord2sv glad_glMultiTexCoord2sv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3DPROC)(GLenum target, GLdouble s, GLdouble t, GLdouble r);
+GLAPI PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d;
+#define glMultiTexCoord3d glad_glMultiTexCoord3d
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3DVPROC)(GLenum target, const GLdouble *v);
+GLAPI PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv;
+#define glMultiTexCoord3dv glad_glMultiTexCoord3dv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3FPROC)(GLenum target, GLfloat s, GLfloat t, GLfloat r);
+GLAPI PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f;
+#define glMultiTexCoord3f glad_glMultiTexCoord3f
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3FVPROC)(GLenum target, const GLfloat *v);
+GLAPI PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv;
+#define glMultiTexCoord3fv glad_glMultiTexCoord3fv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3IPROC)(GLenum target, GLint s, GLint t, GLint r);
+GLAPI PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i;
+#define glMultiTexCoord3i glad_glMultiTexCoord3i
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3IVPROC)(GLenum target, const GLint *v);
+GLAPI PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv;
+#define glMultiTexCoord3iv glad_glMultiTexCoord3iv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3SPROC)(GLenum target, GLshort s, GLshort t, GLshort r);
+GLAPI PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s;
+#define glMultiTexCoord3s glad_glMultiTexCoord3s
+typedef void (APIENTRYP PFNGLMULTITEXCOORD3SVPROC)(GLenum target, const GLshort *v);
+GLAPI PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv;
+#define glMultiTexCoord3sv glad_glMultiTexCoord3sv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4DPROC)(GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+GLAPI PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d;
+#define glMultiTexCoord4d glad_glMultiTexCoord4d
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4DVPROC)(GLenum target, const GLdouble *v);
+GLAPI PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv;
+#define glMultiTexCoord4dv glad_glMultiTexCoord4dv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4FPROC)(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+GLAPI PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f;
+#define glMultiTexCoord4f glad_glMultiTexCoord4f
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4FVPROC)(GLenum target, const GLfloat *v);
+GLAPI PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv;
+#define glMultiTexCoord4fv glad_glMultiTexCoord4fv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4IPROC)(GLenum target, GLint s, GLint t, GLint r, GLint q);
+GLAPI PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i;
+#define glMultiTexCoord4i glad_glMultiTexCoord4i
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4IVPROC)(GLenum target, const GLint *v);
+GLAPI PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv;
+#define glMultiTexCoord4iv glad_glMultiTexCoord4iv
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4SPROC)(GLenum target, GLshort s, GLshort t, GLshort r, GLshort q);
+GLAPI PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s;
+#define glMultiTexCoord4s glad_glMultiTexCoord4s
+typedef void (APIENTRYP PFNGLMULTITEXCOORD4SVPROC)(GLenum target, const GLshort *v);
+GLAPI PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv;
+#define glMultiTexCoord4sv glad_glMultiTexCoord4sv
+typedef void (APIENTRYP PFNGLLOADTRANSPOSEMATRIXFPROC)(const GLfloat *m);
+GLAPI PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf;
+#define glLoadTransposeMatrixf glad_glLoadTransposeMatrixf
+typedef void (APIENTRYP PFNGLLOADTRANSPOSEMATRIXDPROC)(const GLdouble *m);
+GLAPI PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd;
+#define glLoadTransposeMatrixd glad_glLoadTransposeMatrixd
+typedef void (APIENTRYP PFNGLMULTTRANSPOSEMATRIXFPROC)(const GLfloat *m);
+GLAPI PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf;
+#define glMultTransposeMatrixf glad_glMultTransposeMatrixf
+typedef void (APIENTRYP PFNGLMULTTRANSPOSEMATRIXDPROC)(const GLdouble *m);
+GLAPI PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd;
+#define glMultTransposeMatrixd glad_glMultTransposeMatrixd
+#endif
+#ifndef GL_VERSION_1_4
+#define GL_VERSION_1_4 1
+GLAPI int GLAD_GL_VERSION_1_4;
+typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+GLAPI PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate;
+#define glBlendFuncSeparate glad_glBlendFuncSeparate
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSPROC)(GLenum mode, const GLint *first, const GLsizei *count, GLsizei drawcount);
+GLAPI PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays;
+#define glMultiDrawArrays glad_glMultiDrawArrays
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSPROC)(GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount);
+GLAPI PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements;
+#define glMultiDrawElements glad_glMultiDrawElements
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFPROC)(GLenum pname, GLfloat param);
+GLAPI PFNGLPOINTPARAMETERFPROC glad_glPointParameterf;
+#define glPointParameterf glad_glPointParameterf
+typedef void (APIENTRYP PFNGLPOINTPARAMETERFVPROC)(GLenum pname, const GLfloat *params);
+GLAPI PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv;
+#define glPointParameterfv glad_glPointParameterfv
+typedef void (APIENTRYP PFNGLPOINTPARAMETERIPROC)(GLenum pname, GLint param);
+GLAPI PFNGLPOINTPARAMETERIPROC glad_glPointParameteri;
+#define glPointParameteri glad_glPointParameteri
+typedef void (APIENTRYP PFNGLPOINTPARAMETERIVPROC)(GLenum pname, const GLint *params);
+GLAPI PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv;
+#define glPointParameteriv glad_glPointParameteriv
+typedef void (APIENTRYP PFNGLFOGCOORDFPROC)(GLfloat coord);
+GLAPI PFNGLFOGCOORDFPROC glad_glFogCoordf;
+#define glFogCoordf glad_glFogCoordf
+typedef void (APIENTRYP PFNGLFOGCOORDFVPROC)(const GLfloat *coord);
+GLAPI PFNGLFOGCOORDFVPROC glad_glFogCoordfv;
+#define glFogCoordfv glad_glFogCoordfv
+typedef void (APIENTRYP PFNGLFOGCOORDDPROC)(GLdouble coord);
+GLAPI PFNGLFOGCOORDDPROC glad_glFogCoordd;
+#define glFogCoordd glad_glFogCoordd
+typedef void (APIENTRYP PFNGLFOGCOORDDVPROC)(const GLdouble *coord);
+GLAPI PFNGLFOGCOORDDVPROC glad_glFogCoorddv;
+#define glFogCoorddv glad_glFogCoorddv
+typedef void (APIENTRYP PFNGLFOGCOORDPOINTERPROC)(GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer;
+#define glFogCoordPointer glad_glFogCoordPointer
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3BPROC)(GLbyte red, GLbyte green, GLbyte blue);
+GLAPI PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b;
+#define glSecondaryColor3b glad_glSecondaryColor3b
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3BVPROC)(const GLbyte *v);
+GLAPI PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv;
+#define glSecondaryColor3bv glad_glSecondaryColor3bv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3DPROC)(GLdouble red, GLdouble green, GLdouble blue);
+GLAPI PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d;
+#define glSecondaryColor3d glad_glSecondaryColor3d
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3DVPROC)(const GLdouble *v);
+GLAPI PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv;
+#define glSecondaryColor3dv glad_glSecondaryColor3dv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3FPROC)(GLfloat red, GLfloat green, GLfloat blue);
+GLAPI PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f;
+#define glSecondaryColor3f glad_glSecondaryColor3f
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3FVPROC)(const GLfloat *v);
+GLAPI PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv;
+#define glSecondaryColor3fv glad_glSecondaryColor3fv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3IPROC)(GLint red, GLint green, GLint blue);
+GLAPI PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i;
+#define glSecondaryColor3i glad_glSecondaryColor3i
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3IVPROC)(const GLint *v);
+GLAPI PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv;
+#define glSecondaryColor3iv glad_glSecondaryColor3iv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3SPROC)(GLshort red, GLshort green, GLshort blue);
+GLAPI PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s;
+#define glSecondaryColor3s glad_glSecondaryColor3s
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3SVPROC)(const GLshort *v);
+GLAPI PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv;
+#define glSecondaryColor3sv glad_glSecondaryColor3sv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UBPROC)(GLubyte red, GLubyte green, GLubyte blue);
+GLAPI PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub;
+#define glSecondaryColor3ub glad_glSecondaryColor3ub
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UBVPROC)(const GLubyte *v);
+GLAPI PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv;
+#define glSecondaryColor3ubv glad_glSecondaryColor3ubv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UIPROC)(GLuint red, GLuint green, GLuint blue);
+GLAPI PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui;
+#define glSecondaryColor3ui glad_glSecondaryColor3ui
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3UIVPROC)(const GLuint *v);
+GLAPI PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv;
+#define glSecondaryColor3uiv glad_glSecondaryColor3uiv
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3USPROC)(GLushort red, GLushort green, GLushort blue);
+GLAPI PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us;
+#define glSecondaryColor3us glad_glSecondaryColor3us
+typedef void (APIENTRYP PFNGLSECONDARYCOLOR3USVPROC)(const GLushort *v);
+GLAPI PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv;
+#define glSecondaryColor3usv glad_glSecondaryColor3usv
+typedef void (APIENTRYP PFNGLSECONDARYCOLORPOINTERPROC)(GLint size, GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer;
+#define glSecondaryColorPointer glad_glSecondaryColorPointer
+typedef void (APIENTRYP PFNGLWINDOWPOS2DPROC)(GLdouble x, GLdouble y);
+GLAPI PFNGLWINDOWPOS2DPROC glad_glWindowPos2d;
+#define glWindowPos2d glad_glWindowPos2d
+typedef void (APIENTRYP PFNGLWINDOWPOS2DVPROC)(const GLdouble *v);
+GLAPI PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv;
+#define glWindowPos2dv glad_glWindowPos2dv
+typedef void (APIENTRYP PFNGLWINDOWPOS2FPROC)(GLfloat x, GLfloat y);
+GLAPI PFNGLWINDOWPOS2FPROC glad_glWindowPos2f;
+#define glWindowPos2f glad_glWindowPos2f
+typedef void (APIENTRYP PFNGLWINDOWPOS2FVPROC)(const GLfloat *v);
+GLAPI PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv;
+#define glWindowPos2fv glad_glWindowPos2fv
+typedef void (APIENTRYP PFNGLWINDOWPOS2IPROC)(GLint x, GLint y);
+GLAPI PFNGLWINDOWPOS2IPROC glad_glWindowPos2i;
+#define glWindowPos2i glad_glWindowPos2i
+typedef void (APIENTRYP PFNGLWINDOWPOS2IVPROC)(const GLint *v);
+GLAPI PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv;
+#define glWindowPos2iv glad_glWindowPos2iv
+typedef void (APIENTRYP PFNGLWINDOWPOS2SPROC)(GLshort x, GLshort y);
+GLAPI PFNGLWINDOWPOS2SPROC glad_glWindowPos2s;
+#define glWindowPos2s glad_glWindowPos2s
+typedef void (APIENTRYP PFNGLWINDOWPOS2SVPROC)(const GLshort *v);
+GLAPI PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv;
+#define glWindowPos2sv glad_glWindowPos2sv
+typedef void (APIENTRYP PFNGLWINDOWPOS3DPROC)(GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLWINDOWPOS3DPROC glad_glWindowPos3d;
+#define glWindowPos3d glad_glWindowPos3d
+typedef void (APIENTRYP PFNGLWINDOWPOS3DVPROC)(const GLdouble *v);
+GLAPI PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv;
+#define glWindowPos3dv glad_glWindowPos3dv
+typedef void (APIENTRYP PFNGLWINDOWPOS3FPROC)(GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLWINDOWPOS3FPROC glad_glWindowPos3f;
+#define glWindowPos3f glad_glWindowPos3f
+typedef void (APIENTRYP PFNGLWINDOWPOS3FVPROC)(const GLfloat *v);
+GLAPI PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv;
+#define glWindowPos3fv glad_glWindowPos3fv
+typedef void (APIENTRYP PFNGLWINDOWPOS3IPROC)(GLint x, GLint y, GLint z);
+GLAPI PFNGLWINDOWPOS3IPROC glad_glWindowPos3i;
+#define glWindowPos3i glad_glWindowPos3i
+typedef void (APIENTRYP PFNGLWINDOWPOS3IVPROC)(const GLint *v);
+GLAPI PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv;
+#define glWindowPos3iv glad_glWindowPos3iv
+typedef void (APIENTRYP PFNGLWINDOWPOS3SPROC)(GLshort x, GLshort y, GLshort z);
+GLAPI PFNGLWINDOWPOS3SPROC glad_glWindowPos3s;
+#define glWindowPos3s glad_glWindowPos3s
+typedef void (APIENTRYP PFNGLWINDOWPOS3SVPROC)(const GLshort *v);
+GLAPI PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv;
+#define glWindowPos3sv glad_glWindowPos3sv
+typedef void (APIENTRYP PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GLAPI PFNGLBLENDCOLORPROC glad_glBlendColor;
+#define glBlendColor glad_glBlendColor
+typedef void (APIENTRYP PFNGLBLENDEQUATIONPROC)(GLenum mode);
+GLAPI PFNGLBLENDEQUATIONPROC glad_glBlendEquation;
+#define glBlendEquation glad_glBlendEquation
+#endif
+#ifndef GL_VERSION_1_5
+#define GL_VERSION_1_5 1
+GLAPI int GLAD_GL_VERSION_1_5;
+typedef void (APIENTRYP PFNGLGENQUERIESPROC)(GLsizei n, GLuint *ids);
+GLAPI PFNGLGENQUERIESPROC glad_glGenQueries;
+#define glGenQueries glad_glGenQueries
+typedef void (APIENTRYP PFNGLDELETEQUERIESPROC)(GLsizei n, const GLuint *ids);
+GLAPI PFNGLDELETEQUERIESPROC glad_glDeleteQueries;
+#define glDeleteQueries glad_glDeleteQueries
+typedef GLboolean (APIENTRYP PFNGLISQUERYPROC)(GLuint id);
+GLAPI PFNGLISQUERYPROC glad_glIsQuery;
+#define glIsQuery glad_glIsQuery
+typedef void (APIENTRYP PFNGLBEGINQUERYPROC)(GLenum target, GLuint id);
+GLAPI PFNGLBEGINQUERYPROC glad_glBeginQuery;
+#define glBeginQuery glad_glBeginQuery
+typedef void (APIENTRYP PFNGLENDQUERYPROC)(GLenum target);
+GLAPI PFNGLENDQUERYPROC glad_glEndQuery;
+#define glEndQuery glad_glEndQuery
+typedef void (APIENTRYP PFNGLGETQUERYIVPROC)(GLenum target, GLenum pname, GLint *params);
+GLAPI PFNGLGETQUERYIVPROC glad_glGetQueryiv;
+#define glGetQueryiv glad_glGetQueryiv
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTIVPROC)(GLuint id, GLenum pname, GLint *params);
+GLAPI PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv;
+#define glGetQueryObjectiv glad_glGetQueryObjectiv
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTUIVPROC)(GLuint id, GLenum pname, GLuint *params);
+GLAPI PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv;
+#define glGetQueryObjectuiv glad_glGetQueryObjectuiv
+typedef void (APIENTRYP PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer);
+GLAPI PFNGLBINDBUFFERPROC glad_glBindBuffer;
+#define glBindBuffer glad_glBindBuffer
+typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint *buffers);
+GLAPI PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers;
+#define glDeleteBuffers glad_glDeleteBuffers
+typedef void (APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei n, GLuint *buffers);
+GLAPI PFNGLGENBUFFERSPROC glad_glGenBuffers;
+#define glGenBuffers glad_glGenBuffers
+typedef GLboolean (APIENTRYP PFNGLISBUFFERPROC)(GLuint buffer);
+GLAPI PFNGLISBUFFERPROC glad_glIsBuffer;
+#define glIsBuffer glad_glIsBuffer
+typedef void (APIENTRYP PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void *data, GLenum usage);
+GLAPI PFNGLBUFFERDATAPROC glad_glBufferData;
+#define glBufferData glad_glBufferData
+typedef void (APIENTRYP PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void *data);
+GLAPI PFNGLBUFFERSUBDATAPROC glad_glBufferSubData;
+#define glBufferSubData glad_glBufferSubData
+typedef void (APIENTRYP PFNGLGETBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, void *data);
+GLAPI PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData;
+#define glGetBufferSubData glad_glGetBufferSubData
+typedef void * (APIENTRYP PFNGLMAPBUFFERPROC)(GLenum target, GLenum access);
+GLAPI PFNGLMAPBUFFERPROC glad_glMapBuffer;
+#define glMapBuffer glad_glMapBuffer
+typedef GLboolean (APIENTRYP PFNGLUNMAPBUFFERPROC)(GLenum target);
+GLAPI PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer;
+#define glUnmapBuffer glad_glUnmapBuffer
+typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint *params);
+GLAPI PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv;
+#define glGetBufferParameteriv glad_glGetBufferParameteriv
+typedef void (APIENTRYP PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void **params);
+GLAPI PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv;
+#define glGetBufferPointerv glad_glGetBufferPointerv
+#endif
+#ifndef GL_VERSION_2_0
+#define GL_VERSION_2_0 1
+GLAPI int GLAD_GL_VERSION_2_0;
+typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha);
+GLAPI PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate;
+#define glBlendEquationSeparate glad_glBlendEquationSeparate
+typedef void (APIENTRYP PFNGLDRAWBUFFERSPROC)(GLsizei n, const GLenum *bufs);
+GLAPI PFNGLDRAWBUFFERSPROC glad_glDrawBuffers;
+#define glDrawBuffers glad_glDrawBuffers
+typedef void (APIENTRYP PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
+GLAPI PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate;
+#define glStencilOpSeparate glad_glStencilOpSeparate
+typedef void (APIENTRYP PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask);
+GLAPI PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate;
+#define glStencilFuncSeparate glad_glStencilFuncSeparate
+typedef void (APIENTRYP PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask);
+GLAPI PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate;
+#define glStencilMaskSeparate glad_glStencilMaskSeparate
+typedef void (APIENTRYP PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader);
+GLAPI PFNGLATTACHSHADERPROC glad_glAttachShader;
+#define glAttachShader glad_glAttachShader
+typedef void (APIENTRYP PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar *name);
+GLAPI PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation;
+#define glBindAttribLocation glad_glBindAttribLocation
+typedef void (APIENTRYP PFNGLCOMPILESHADERPROC)(GLuint shader);
+GLAPI PFNGLCOMPILESHADERPROC glad_glCompileShader;
+#define glCompileShader glad_glCompileShader
+typedef GLuint (APIENTRYP PFNGLCREATEPROGRAMPROC)(void);
+GLAPI PFNGLCREATEPROGRAMPROC glad_glCreateProgram;
+#define glCreateProgram glad_glCreateProgram
+typedef GLuint (APIENTRYP PFNGLCREATESHADERPROC)(GLenum type);
+GLAPI PFNGLCREATESHADERPROC glad_glCreateShader;
+#define glCreateShader glad_glCreateShader
+typedef void (APIENTRYP PFNGLDELETEPROGRAMPROC)(GLuint program);
+GLAPI PFNGLDELETEPROGRAMPROC glad_glDeleteProgram;
+#define glDeleteProgram glad_glDeleteProgram
+typedef void (APIENTRYP PFNGLDELETESHADERPROC)(GLuint shader);
+GLAPI PFNGLDELETESHADERPROC glad_glDeleteShader;
+#define glDeleteShader glad_glDeleteShader
+typedef void (APIENTRYP PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader);
+GLAPI PFNGLDETACHSHADERPROC glad_glDetachShader;
+#define glDetachShader glad_glDetachShader
+typedef void (APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index);
+GLAPI PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray;
+#define glDisableVertexAttribArray glad_glDisableVertexAttribArray
+typedef void (APIENTRYP PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index);
+GLAPI PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray;
+#define glEnableVertexAttribArray glad_glEnableVertexAttribArray
+typedef void (APIENTRYP PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
+GLAPI PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib;
+#define glGetActiveAttrib glad_glGetActiveAttrib
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
+GLAPI PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform;
+#define glGetActiveUniform glad_glGetActiveUniform
+typedef void (APIENTRYP PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders);
+GLAPI PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders;
+#define glGetAttachedShaders glad_glGetAttachedShaders
+typedef GLint (APIENTRYP PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar *name);
+GLAPI PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation;
+#define glGetAttribLocation glad_glGetAttribLocation
+typedef void (APIENTRYP PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint *params);
+GLAPI PFNGLGETPROGRAMIVPROC glad_glGetProgramiv;
+#define glGetProgramiv glad_glGetProgramiv
+typedef void (APIENTRYP PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+GLAPI PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog;
+#define glGetProgramInfoLog glad_glGetProgramInfoLog
+typedef void (APIENTRYP PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint *params);
+GLAPI PFNGLGETSHADERIVPROC glad_glGetShaderiv;
+#define glGetShaderiv glad_glGetShaderiv
+typedef void (APIENTRYP PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+GLAPI PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog;
+#define glGetShaderInfoLog glad_glGetShaderInfoLog
+typedef void (APIENTRYP PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source);
+GLAPI PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource;
+#define glGetShaderSource glad_glGetShaderSource
+typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar *name);
+GLAPI PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation;
+#define glGetUniformLocation glad_glGetUniformLocation
+typedef void (APIENTRYP PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat *params);
+GLAPI PFNGLGETUNIFORMFVPROC glad_glGetUniformfv;
+#define glGetUniformfv glad_glGetUniformfv
+typedef void (APIENTRYP PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint *params);
+GLAPI PFNGLGETUNIFORMIVPROC glad_glGetUniformiv;
+#define glGetUniformiv glad_glGetUniformiv
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBDVPROC)(GLuint index, GLenum pname, GLdouble *params);
+GLAPI PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv;
+#define glGetVertexAttribdv glad_glGetVertexAttribdv
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv;
+#define glGetVertexAttribfv glad_glGetVertexAttribfv
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint *params);
+GLAPI PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv;
+#define glGetVertexAttribiv glad_glGetVertexAttribiv
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void **pointer);
+GLAPI PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv;
+#define glGetVertexAttribPointerv glad_glGetVertexAttribPointerv
+typedef GLboolean (APIENTRYP PFNGLISPROGRAMPROC)(GLuint program);
+GLAPI PFNGLISPROGRAMPROC glad_glIsProgram;
+#define glIsProgram glad_glIsProgram
+typedef GLboolean (APIENTRYP PFNGLISSHADERPROC)(GLuint shader);
+GLAPI PFNGLISSHADERPROC glad_glIsShader;
+#define glIsShader glad_glIsShader
+typedef void (APIENTRYP PFNGLLINKPROGRAMPROC)(GLuint program);
+GLAPI PFNGLLINKPROGRAMPROC glad_glLinkProgram;
+#define glLinkProgram glad_glLinkProgram
+typedef void (APIENTRYP PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar *const*string, const GLint *length);
+GLAPI PFNGLSHADERSOURCEPROC glad_glShaderSource;
+#define glShaderSource glad_glShaderSource
+typedef void (APIENTRYP PFNGLUSEPROGRAMPROC)(GLuint program);
+GLAPI PFNGLUSEPROGRAMPROC glad_glUseProgram;
+#define glUseProgram glad_glUseProgram
+typedef void (APIENTRYP PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0);
+GLAPI PFNGLUNIFORM1FPROC glad_glUniform1f;
+#define glUniform1f glad_glUniform1f
+typedef void (APIENTRYP PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1);
+GLAPI PFNGLUNIFORM2FPROC glad_glUniform2f;
+#define glUniform2f glad_glUniform2f
+typedef void (APIENTRYP PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+GLAPI PFNGLUNIFORM3FPROC glad_glUniform3f;
+#define glUniform3f glad_glUniform3f
+typedef void (APIENTRYP PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+GLAPI PFNGLUNIFORM4FPROC glad_glUniform4f;
+#define glUniform4f glad_glUniform4f
+typedef void (APIENTRYP PFNGLUNIFORM1IPROC)(GLint location, GLint v0);
+GLAPI PFNGLUNIFORM1IPROC glad_glUniform1i;
+#define glUniform1i glad_glUniform1i
+typedef void (APIENTRYP PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1);
+GLAPI PFNGLUNIFORM2IPROC glad_glUniform2i;
+#define glUniform2i glad_glUniform2i
+typedef void (APIENTRYP PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2);
+GLAPI PFNGLUNIFORM3IPROC glad_glUniform3i;
+#define glUniform3i glad_glUniform3i
+typedef void (APIENTRYP PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+GLAPI PFNGLUNIFORM4IPROC glad_glUniform4i;
+#define glUniform4i glad_glUniform4i
+typedef void (APIENTRYP PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat *value);
+GLAPI PFNGLUNIFORM1FVPROC glad_glUniform1fv;
+#define glUniform1fv glad_glUniform1fv
+typedef void (APIENTRYP PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat *value);
+GLAPI PFNGLUNIFORM2FVPROC glad_glUniform2fv;
+#define glUniform2fv glad_glUniform2fv
+typedef void (APIENTRYP PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat *value);
+GLAPI PFNGLUNIFORM3FVPROC glad_glUniform3fv;
+#define glUniform3fv glad_glUniform3fv
+typedef void (APIENTRYP PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat *value);
+GLAPI PFNGLUNIFORM4FVPROC glad_glUniform4fv;
+#define glUniform4fv glad_glUniform4fv
+typedef void (APIENTRYP PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint *value);
+GLAPI PFNGLUNIFORM1IVPROC glad_glUniform1iv;
+#define glUniform1iv glad_glUniform1iv
+typedef void (APIENTRYP PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint *value);
+GLAPI PFNGLUNIFORM2IVPROC glad_glUniform2iv;
+#define glUniform2iv glad_glUniform2iv
+typedef void (APIENTRYP PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint *value);
+GLAPI PFNGLUNIFORM3IVPROC glad_glUniform3iv;
+#define glUniform3iv glad_glUniform3iv
+typedef void (APIENTRYP PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint *value);
+GLAPI PFNGLUNIFORM4IVPROC glad_glUniform4iv;
+#define glUniform4iv glad_glUniform4iv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv;
+#define glUniformMatrix2fv glad_glUniformMatrix2fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv;
+#define glUniformMatrix3fv glad_glUniformMatrix3fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv;
+#define glUniformMatrix4fv glad_glUniformMatrix4fv
+typedef void (APIENTRYP PFNGLVALIDATEPROGRAMPROC)(GLuint program);
+GLAPI PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram;
+#define glValidateProgram glad_glValidateProgram
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DPROC)(GLuint index, GLdouble x);
+GLAPI PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d;
+#define glVertexAttrib1d glad_glVertexAttrib1d
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1DVPROC)(GLuint index, const GLdouble *v);
+GLAPI PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv;
+#define glVertexAttrib1dv glad_glVertexAttrib1dv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x);
+GLAPI PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f;
+#define glVertexAttrib1f glad_glVertexAttrib1f
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat *v);
+GLAPI PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv;
+#define glVertexAttrib1fv glad_glVertexAttrib1fv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SPROC)(GLuint index, GLshort x);
+GLAPI PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s;
+#define glVertexAttrib1s glad_glVertexAttrib1s
+typedef void (APIENTRYP PFNGLVERTEXATTRIB1SVPROC)(GLuint index, const GLshort *v);
+GLAPI PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv;
+#define glVertexAttrib1sv glad_glVertexAttrib1sv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DPROC)(GLuint index, GLdouble x, GLdouble y);
+GLAPI PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d;
+#define glVertexAttrib2d glad_glVertexAttrib2d
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2DVPROC)(GLuint index, const GLdouble *v);
+GLAPI PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv;
+#define glVertexAttrib2dv glad_glVertexAttrib2dv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y);
+GLAPI PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f;
+#define glVertexAttrib2f glad_glVertexAttrib2f
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat *v);
+GLAPI PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv;
+#define glVertexAttrib2fv glad_glVertexAttrib2fv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SPROC)(GLuint index, GLshort x, GLshort y);
+GLAPI PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s;
+#define glVertexAttrib2s glad_glVertexAttrib2s
+typedef void (APIENTRYP PFNGLVERTEXATTRIB2SVPROC)(GLuint index, const GLshort *v);
+GLAPI PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv;
+#define glVertexAttrib2sv glad_glVertexAttrib2sv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z);
+GLAPI PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d;
+#define glVertexAttrib3d glad_glVertexAttrib3d
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3DVPROC)(GLuint index, const GLdouble *v);
+GLAPI PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv;
+#define glVertexAttrib3dv glad_glVertexAttrib3dv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z);
+GLAPI PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f;
+#define glVertexAttrib3f glad_glVertexAttrib3f
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat *v);
+GLAPI PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv;
+#define glVertexAttrib3fv glad_glVertexAttrib3fv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SPROC)(GLuint index, GLshort x, GLshort y, GLshort z);
+GLAPI PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s;
+#define glVertexAttrib3s glad_glVertexAttrib3s
+typedef void (APIENTRYP PFNGLVERTEXATTRIB3SVPROC)(GLuint index, const GLshort *v);
+GLAPI PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv;
+#define glVertexAttrib3sv glad_glVertexAttrib3sv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NBVPROC)(GLuint index, const GLbyte *v);
+GLAPI PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv;
+#define glVertexAttrib4Nbv glad_glVertexAttrib4Nbv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NIVPROC)(GLuint index, const GLint *v);
+GLAPI PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv;
+#define glVertexAttrib4Niv glad_glVertexAttrib4Niv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NSVPROC)(GLuint index, const GLshort *v);
+GLAPI PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv;
+#define glVertexAttrib4Nsv glad_glVertexAttrib4Nsv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBPROC)(GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+GLAPI PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub;
+#define glVertexAttrib4Nub glad_glVertexAttrib4Nub
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBVPROC)(GLuint index, const GLubyte *v);
+GLAPI PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv;
+#define glVertexAttrib4Nubv glad_glVertexAttrib4Nubv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUIVPROC)(GLuint index, const GLuint *v);
+GLAPI PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv;
+#define glVertexAttrib4Nuiv glad_glVertexAttrib4Nuiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUSVPROC)(GLuint index, const GLushort *v);
+GLAPI PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv;
+#define glVertexAttrib4Nusv glad_glVertexAttrib4Nusv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4BVPROC)(GLuint index, const GLbyte *v);
+GLAPI PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv;
+#define glVertexAttrib4bv glad_glVertexAttrib4bv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+GLAPI PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d;
+#define glVertexAttrib4d glad_glVertexAttrib4d
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVPROC)(GLuint index, const GLdouble *v);
+GLAPI PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv;
+#define glVertexAttrib4dv glad_glVertexAttrib4dv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GLAPI PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f;
+#define glVertexAttrib4f glad_glVertexAttrib4f
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat *v);
+GLAPI PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv;
+#define glVertexAttrib4fv glad_glVertexAttrib4fv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4IVPROC)(GLuint index, const GLint *v);
+GLAPI PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv;
+#define glVertexAttrib4iv glad_glVertexAttrib4iv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SPROC)(GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+GLAPI PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s;
+#define glVertexAttrib4s glad_glVertexAttrib4s
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4SVPROC)(GLuint index, const GLshort *v);
+GLAPI PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv;
+#define glVertexAttrib4sv glad_glVertexAttrib4sv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBVPROC)(GLuint index, const GLubyte *v);
+GLAPI PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv;
+#define glVertexAttrib4ubv glad_glVertexAttrib4ubv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4UIVPROC)(GLuint index, const GLuint *v);
+GLAPI PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv;
+#define glVertexAttrib4uiv glad_glVertexAttrib4uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIB4USVPROC)(GLuint index, const GLushort *v);
+GLAPI PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv;
+#define glVertexAttrib4usv glad_glVertexAttrib4usv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer);
+GLAPI PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer;
+#define glVertexAttribPointer glad_glVertexAttribPointer
+#endif
+#ifndef GL_VERSION_2_1
+#define GL_VERSION_2_1 1
+GLAPI int GLAD_GL_VERSION_2_1;
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv;
+#define glUniformMatrix2x3fv glad_glUniformMatrix2x3fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv;
+#define glUniformMatrix3x2fv glad_glUniformMatrix3x2fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv;
+#define glUniformMatrix2x4fv glad_glUniformMatrix2x4fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv;
+#define glUniformMatrix4x2fv glad_glUniformMatrix4x2fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv;
+#define glUniformMatrix3x4fv glad_glUniformMatrix3x4fv
+typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GLAPI PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv;
+#define glUniformMatrix4x3fv glad_glUniformMatrix4x3fv
+#endif
+#ifndef GL_VERSION_3_0
+#define GL_VERSION_3_0 1
+GLAPI int GLAD_GL_VERSION_3_0;
+typedef void (APIENTRYP PFNGLCOLORMASKIPROC)(GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a);
+GLAPI PFNGLCOLORMASKIPROC glad_glColorMaski;
+#define glColorMaski glad_glColorMaski
+typedef void (APIENTRYP PFNGLGETBOOLEANI_VPROC)(GLenum target, GLuint index, GLboolean *data);
+GLAPI PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v;
+#define glGetBooleani_v glad_glGetBooleani_v
+typedef void (APIENTRYP PFNGLGETINTEGERI_VPROC)(GLenum target, GLuint index, GLint *data);
+GLAPI PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v;
+#define glGetIntegeri_v glad_glGetIntegeri_v
+typedef void (APIENTRYP PFNGLENABLEIPROC)(GLenum target, GLuint index);
+GLAPI PFNGLENABLEIPROC glad_glEnablei;
+#define glEnablei glad_glEnablei
+typedef void (APIENTRYP PFNGLDISABLEIPROC)(GLenum target, GLuint index);
+GLAPI PFNGLDISABLEIPROC glad_glDisablei;
+#define glDisablei glad_glDisablei
+typedef GLboolean (APIENTRYP PFNGLISENABLEDIPROC)(GLenum target, GLuint index);
+GLAPI PFNGLISENABLEDIPROC glad_glIsEnabledi;
+#define glIsEnabledi glad_glIsEnabledi
+typedef void (APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKPROC)(GLenum primitiveMode);
+GLAPI PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback;
+#define glBeginTransformFeedback glad_glBeginTransformFeedback
+typedef void (APIENTRYP PFNGLENDTRANSFORMFEEDBACKPROC)(void);
+GLAPI PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback;
+#define glEndTransformFeedback glad_glEndTransformFeedback
+typedef void (APIENTRYP PFNGLBINDBUFFERRANGEPROC)(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+GLAPI PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange;
+#define glBindBufferRange glad_glBindBufferRange
+typedef void (APIENTRYP PFNGLBINDBUFFERBASEPROC)(GLenum target, GLuint index, GLuint buffer);
+GLAPI PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase;
+#define glBindBufferBase glad_glBindBufferBase
+typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSPROC)(GLuint program, GLsizei count, const GLchar *const*varyings, GLenum bufferMode);
+GLAPI PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings;
+#define glTransformFeedbackVaryings glad_glTransformFeedbackVaryings
+typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name);
+GLAPI PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying;
+#define glGetTransformFeedbackVarying glad_glGetTransformFeedbackVarying
+typedef void (APIENTRYP PFNGLCLAMPCOLORPROC)(GLenum target, GLenum clamp);
+GLAPI PFNGLCLAMPCOLORPROC glad_glClampColor;
+#define glClampColor glad_glClampColor
+typedef void (APIENTRYP PFNGLBEGINCONDITIONALRENDERPROC)(GLuint id, GLenum mode);
+GLAPI PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender;
+#define glBeginConditionalRender glad_glBeginConditionalRender
+typedef void (APIENTRYP PFNGLENDCONDITIONALRENDERPROC)(void);
+GLAPI PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender;
+#define glEndConditionalRender glad_glEndConditionalRender
+typedef void (APIENTRYP PFNGLVERTEXATTRIBIPOINTERPROC)(GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer);
+GLAPI PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer;
+#define glVertexAttribIPointer glad_glVertexAttribIPointer
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIIVPROC)(GLuint index, GLenum pname, GLint *params);
+GLAPI PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv;
+#define glGetVertexAttribIiv glad_glGetVertexAttribIiv
+typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIUIVPROC)(GLuint index, GLenum pname, GLuint *params);
+GLAPI PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv;
+#define glGetVertexAttribIuiv glad_glGetVertexAttribIuiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IPROC)(GLuint index, GLint x);
+GLAPI PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i;
+#define glVertexAttribI1i glad_glVertexAttribI1i
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IPROC)(GLuint index, GLint x, GLint y);
+GLAPI PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i;
+#define glVertexAttribI2i glad_glVertexAttribI2i
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IPROC)(GLuint index, GLint x, GLint y, GLint z);
+GLAPI PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i;
+#define glVertexAttribI3i glad_glVertexAttribI3i
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IPROC)(GLuint index, GLint x, GLint y, GLint z, GLint w);
+GLAPI PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i;
+#define glVertexAttribI4i glad_glVertexAttribI4i
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIPROC)(GLuint index, GLuint x);
+GLAPI PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui;
+#define glVertexAttribI1ui glad_glVertexAttribI1ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIPROC)(GLuint index, GLuint x, GLuint y);
+GLAPI PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui;
+#define glVertexAttribI2ui glad_glVertexAttribI2ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z);
+GLAPI PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui;
+#define glVertexAttribI3ui glad_glVertexAttribI3ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+GLAPI PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui;
+#define glVertexAttribI4ui glad_glVertexAttribI4ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IVPROC)(GLuint index, const GLint *v);
+GLAPI PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv;
+#define glVertexAttribI1iv glad_glVertexAttribI1iv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IVPROC)(GLuint index, const GLint *v);
+GLAPI PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv;
+#define glVertexAttribI2iv glad_glVertexAttribI2iv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IVPROC)(GLuint index, const GLint *v);
+GLAPI PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv;
+#define glVertexAttribI3iv glad_glVertexAttribI3iv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IVPROC)(GLuint index, const GLint *v);
+GLAPI PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv;
+#define glVertexAttribI4iv glad_glVertexAttribI4iv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIVPROC)(GLuint index, const GLuint *v);
+GLAPI PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv;
+#define glVertexAttribI1uiv glad_glVertexAttribI1uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIVPROC)(GLuint index, const GLuint *v);
+GLAPI PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv;
+#define glVertexAttribI2uiv glad_glVertexAttribI2uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIVPROC)(GLuint index, const GLuint *v);
+GLAPI PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv;
+#define glVertexAttribI3uiv glad_glVertexAttribI3uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIVPROC)(GLuint index, const GLuint *v);
+GLAPI PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv;
+#define glVertexAttribI4uiv glad_glVertexAttribI4uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4BVPROC)(GLuint index, const GLbyte *v);
+GLAPI PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv;
+#define glVertexAttribI4bv glad_glVertexAttribI4bv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4SVPROC)(GLuint index, const GLshort *v);
+GLAPI PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv;
+#define glVertexAttribI4sv glad_glVertexAttribI4sv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UBVPROC)(GLuint index, const GLubyte *v);
+GLAPI PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv;
+#define glVertexAttribI4ubv glad_glVertexAttribI4ubv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4USVPROC)(GLuint index, const GLushort *v);
+GLAPI PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv;
+#define glVertexAttribI4usv glad_glVertexAttribI4usv
+typedef void (APIENTRYP PFNGLGETUNIFORMUIVPROC)(GLuint program, GLint location, GLuint *params);
+GLAPI PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv;
+#define glGetUniformuiv glad_glGetUniformuiv
+typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC)(GLuint program, GLuint color, const GLchar *name);
+GLAPI PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation;
+#define glBindFragDataLocation glad_glBindFragDataLocation
+typedef GLint (APIENTRYP PFNGLGETFRAGDATALOCATIONPROC)(GLuint program, const GLchar *name);
+GLAPI PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation;
+#define glGetFragDataLocation glad_glGetFragDataLocation
+typedef void (APIENTRYP PFNGLUNIFORM1UIPROC)(GLint location, GLuint v0);
+GLAPI PFNGLUNIFORM1UIPROC glad_glUniform1ui;
+#define glUniform1ui glad_glUniform1ui
+typedef void (APIENTRYP PFNGLUNIFORM2UIPROC)(GLint location, GLuint v0, GLuint v1);
+GLAPI PFNGLUNIFORM2UIPROC glad_glUniform2ui;
+#define glUniform2ui glad_glUniform2ui
+typedef void (APIENTRYP PFNGLUNIFORM3UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2);
+GLAPI PFNGLUNIFORM3UIPROC glad_glUniform3ui;
+#define glUniform3ui glad_glUniform3ui
+typedef void (APIENTRYP PFNGLUNIFORM4UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+GLAPI PFNGLUNIFORM4UIPROC glad_glUniform4ui;
+#define glUniform4ui glad_glUniform4ui
+typedef void (APIENTRYP PFNGLUNIFORM1UIVPROC)(GLint location, GLsizei count, const GLuint *value);
+GLAPI PFNGLUNIFORM1UIVPROC glad_glUniform1uiv;
+#define glUniform1uiv glad_glUniform1uiv
+typedef void (APIENTRYP PFNGLUNIFORM2UIVPROC)(GLint location, GLsizei count, const GLuint *value);
+GLAPI PFNGLUNIFORM2UIVPROC glad_glUniform2uiv;
+#define glUniform2uiv glad_glUniform2uiv
+typedef void (APIENTRYP PFNGLUNIFORM3UIVPROC)(GLint location, GLsizei count, const GLuint *value);
+GLAPI PFNGLUNIFORM3UIVPROC glad_glUniform3uiv;
+#define glUniform3uiv glad_glUniform3uiv
+typedef void (APIENTRYP PFNGLUNIFORM4UIVPROC)(GLint location, GLsizei count, const GLuint *value);
+GLAPI PFNGLUNIFORM4UIVPROC glad_glUniform4uiv;
+#define glUniform4uiv glad_glUniform4uiv
+typedef void (APIENTRYP PFNGLTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, const GLint *params);
+GLAPI PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv;
+#define glTexParameterIiv glad_glTexParameterIiv
+typedef void (APIENTRYP PFNGLTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, const GLuint *params);
+GLAPI PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv;
+#define glTexParameterIuiv glad_glTexParameterIuiv
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, GLint *params);
+GLAPI PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv;
+#define glGetTexParameterIiv glad_glGetTexParameterIiv
+typedef void (APIENTRYP PFNGLGETTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, GLuint *params);
+GLAPI PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv;
+#define glGetTexParameterIuiv glad_glGetTexParameterIuiv
+typedef void (APIENTRYP PFNGLCLEARBUFFERIVPROC)(GLenum buffer, GLint drawbuffer, const GLint *value);
+GLAPI PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv;
+#define glClearBufferiv glad_glClearBufferiv
+typedef void (APIENTRYP PFNGLCLEARBUFFERUIVPROC)(GLenum buffer, GLint drawbuffer, const GLuint *value);
+GLAPI PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv;
+#define glClearBufferuiv glad_glClearBufferuiv
+typedef void (APIENTRYP PFNGLCLEARBUFFERFVPROC)(GLenum buffer, GLint drawbuffer, const GLfloat *value);
+GLAPI PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv;
+#define glClearBufferfv glad_glClearBufferfv
+typedef void (APIENTRYP PFNGLCLEARBUFFERFIPROC)(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
+GLAPI PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi;
+#define glClearBufferfi glad_glClearBufferfi
+typedef const GLubyte * (APIENTRYP PFNGLGETSTRINGIPROC)(GLenum name, GLuint index);
+GLAPI PFNGLGETSTRINGIPROC glad_glGetStringi;
+#define glGetStringi glad_glGetStringi
+typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer);
+GLAPI PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer;
+#define glIsRenderbuffer glad_glIsRenderbuffer
+typedef void (APIENTRYP PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer);
+GLAPI PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer;
+#define glBindRenderbuffer glad_glBindRenderbuffer
+typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint *renderbuffers);
+GLAPI PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers;
+#define glDeleteRenderbuffers glad_glDeleteRenderbuffers
+typedef void (APIENTRYP PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint *renderbuffers);
+GLAPI PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers;
+#define glGenRenderbuffers glad_glGenRenderbuffers
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage;
+#define glRenderbufferStorage glad_glRenderbufferStorage
+typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint *params);
+GLAPI PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv;
+#define glGetRenderbufferParameteriv glad_glGetRenderbufferParameteriv
+typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer);
+GLAPI PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer;
+#define glIsFramebuffer glad_glIsFramebuffer
+typedef void (APIENTRYP PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer);
+GLAPI PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer;
+#define glBindFramebuffer glad_glBindFramebuffer
+typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint *framebuffers);
+GLAPI PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers;
+#define glDeleteFramebuffers glad_glDeleteFramebuffers
+typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint *framebuffers);
+GLAPI PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers;
+#define glGenFramebuffers glad_glGenFramebuffers
+typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target);
+GLAPI PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus;
+#define glCheckFramebufferStatus glad_glCheckFramebufferStatus
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+GLAPI PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D;
+#define glFramebufferTexture1D glad_glFramebufferTexture1D
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+GLAPI PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D;
+#define glFramebufferTexture2D glad_glFramebufferTexture2D
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+GLAPI PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D;
+#define glFramebufferTexture3D glad_glFramebufferTexture3D
+typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+GLAPI PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer;
+#define glFramebufferRenderbuffer glad_glFramebufferRenderbuffer
+typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint *params);
+GLAPI PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv;
+#define glGetFramebufferAttachmentParameteriv glad_glGetFramebufferAttachmentParameteriv
+typedef void (APIENTRYP PFNGLGENERATEMIPMAPPROC)(GLenum target);
+GLAPI PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap;
+#define glGenerateMipmap glad_glGenerateMipmap
+typedef void (APIENTRYP PFNGLBLITFRAMEBUFFERPROC)(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+GLAPI PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer;
+#define glBlitFramebuffer glad_glBlitFramebuffer
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample;
+#define glRenderbufferStorageMultisample glad_glRenderbufferStorageMultisample
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+GLAPI PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer;
+#define glFramebufferTextureLayer glad_glFramebufferTextureLayer
+typedef void * (APIENTRYP PFNGLMAPBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+GLAPI PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange;
+#define glMapBufferRange glad_glMapBufferRange
+typedef void (APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length);
+GLAPI PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange;
+#define glFlushMappedBufferRange glad_glFlushMappedBufferRange
+typedef void (APIENTRYP PFNGLBINDVERTEXARRAYPROC)(GLuint array);
+GLAPI PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray;
+#define glBindVertexArray glad_glBindVertexArray
+typedef void (APIENTRYP PFNGLDELETEVERTEXARRAYSPROC)(GLsizei n, const GLuint *arrays);
+GLAPI PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays;
+#define glDeleteVertexArrays glad_glDeleteVertexArrays
+typedef void (APIENTRYP PFNGLGENVERTEXARRAYSPROC)(GLsizei n, GLuint *arrays);
+GLAPI PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays;
+#define glGenVertexArrays glad_glGenVertexArrays
+typedef GLboolean (APIENTRYP PFNGLISVERTEXARRAYPROC)(GLuint array);
+GLAPI PFNGLISVERTEXARRAYPROC glad_glIsVertexArray;
+#define glIsVertexArray glad_glIsVertexArray
+#endif
+#ifndef GL_VERSION_3_1
+#define GL_VERSION_3_1 1
+GLAPI int GLAD_GL_VERSION_3_1;
+typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC)(GLenum mode, GLint first, GLsizei count, GLsizei instancecount);
+GLAPI PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced;
+#define glDrawArraysInstanced glad_glDrawArraysInstanced
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount);
+GLAPI PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced;
+#define glDrawElementsInstanced glad_glDrawElementsInstanced
+typedef void (APIENTRYP PFNGLTEXBUFFERPROC)(GLenum target, GLenum internalformat, GLuint buffer);
+GLAPI PFNGLTEXBUFFERPROC glad_glTexBuffer;
+#define glTexBuffer glad_glTexBuffer
+typedef void (APIENTRYP PFNGLPRIMITIVERESTARTINDEXPROC)(GLuint index);
+GLAPI PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex;
+#define glPrimitiveRestartIndex glad_glPrimitiveRestartIndex
+typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC)(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
+GLAPI PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData;
+#define glCopyBufferSubData glad_glCopyBufferSubData
+typedef void (APIENTRYP PFNGLGETUNIFORMINDICESPROC)(GLuint program, GLsizei uniformCount, const GLchar *const*uniformNames, GLuint *uniformIndices);
+GLAPI PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices;
+#define glGetUniformIndices glad_glGetUniformIndices
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMSIVPROC)(GLuint program, GLsizei uniformCount, const GLuint *uniformIndices, GLenum pname, GLint *params);
+GLAPI PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv;
+#define glGetActiveUniformsiv glad_glGetActiveUniformsiv
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMNAMEPROC)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformName);
+GLAPI PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName;
+#define glGetActiveUniformName glad_glGetActiveUniformName
+typedef GLuint (APIENTRYP PFNGLGETUNIFORMBLOCKINDEXPROC)(GLuint program, const GLchar *uniformBlockName);
+GLAPI PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex;
+#define glGetUniformBlockIndex glad_glGetUniformBlockIndex
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKIVPROC)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params);
+GLAPI PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv;
+#define glGetActiveUniformBlockiv glad_glGetActiveUniformBlockiv
+typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName);
+GLAPI PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName;
+#define glGetActiveUniformBlockName glad_glGetActiveUniformBlockName
+typedef void (APIENTRYP PFNGLUNIFORMBLOCKBINDINGPROC)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
+GLAPI PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding;
+#define glUniformBlockBinding glad_glUniformBlockBinding
+#endif
+#ifndef GL_VERSION_3_2
+#define GL_VERSION_3_2 1
+GLAPI int GLAD_GL_VERSION_3_2;
+typedef void (APIENTRYP PFNGLDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex);
+GLAPI PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex;
+#define glDrawElementsBaseVertex glad_glDrawElementsBaseVertex
+typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex);
+GLAPI PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex;
+#define glDrawRangeElementsBaseVertex glad_glDrawRangeElementsBaseVertex
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex);
+GLAPI PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex;
+#define glDrawElementsInstancedBaseVertex glad_glDrawElementsInstancedBaseVertex
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount, const GLint *basevertex);
+GLAPI PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex;
+#define glMultiDrawElementsBaseVertex glad_glMultiDrawElementsBaseVertex
+typedef void (APIENTRYP PFNGLPROVOKINGVERTEXPROC)(GLenum mode);
+GLAPI PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex;
+#define glProvokingVertex glad_glProvokingVertex
+typedef GLsync (APIENTRYP PFNGLFENCESYNCPROC)(GLenum condition, GLbitfield flags);
+GLAPI PFNGLFENCESYNCPROC glad_glFenceSync;
+#define glFenceSync glad_glFenceSync
+typedef GLboolean (APIENTRYP PFNGLISSYNCPROC)(GLsync sync);
+GLAPI PFNGLISSYNCPROC glad_glIsSync;
+#define glIsSync glad_glIsSync
+typedef void (APIENTRYP PFNGLDELETESYNCPROC)(GLsync sync);
+GLAPI PFNGLDELETESYNCPROC glad_glDeleteSync;
+#define glDeleteSync glad_glDeleteSync
+typedef GLenum (APIENTRYP PFNGLCLIENTWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout);
+GLAPI PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync;
+#define glClientWaitSync glad_glClientWaitSync
+typedef void (APIENTRYP PFNGLWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout);
+GLAPI PFNGLWAITSYNCPROC glad_glWaitSync;
+#define glWaitSync glad_glWaitSync
+typedef void (APIENTRYP PFNGLGETINTEGER64VPROC)(GLenum pname, GLint64 *data);
+GLAPI PFNGLGETINTEGER64VPROC glad_glGetInteger64v;
+#define glGetInteger64v glad_glGetInteger64v
+typedef void (APIENTRYP PFNGLGETSYNCIVPROC)(GLsync sync, GLenum pname, GLsizei count, GLsizei *length, GLint *values);
+GLAPI PFNGLGETSYNCIVPROC glad_glGetSynciv;
+#define glGetSynciv glad_glGetSynciv
+typedef void (APIENTRYP PFNGLGETINTEGER64I_VPROC)(GLenum target, GLuint index, GLint64 *data);
+GLAPI PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v;
+#define glGetInteger64i_v glad_glGetInteger64i_v
+typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERI64VPROC)(GLenum target, GLenum pname, GLint64 *params);
+GLAPI PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v;
+#define glGetBufferParameteri64v glad_glGetBufferParameteri64v
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level);
+GLAPI PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture;
+#define glFramebufferTexture glad_glFramebufferTexture
+typedef void (APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
+GLAPI PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample;
+#define glTexImage2DMultisample glad_glTexImage2DMultisample
+typedef void (APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
+GLAPI PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample;
+#define glTexImage3DMultisample glad_glTexImage3DMultisample
+typedef void (APIENTRYP PFNGLGETMULTISAMPLEFVPROC)(GLenum pname, GLuint index, GLfloat *val);
+GLAPI PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv;
+#define glGetMultisamplefv glad_glGetMultisamplefv
+typedef void (APIENTRYP PFNGLSAMPLEMASKIPROC)(GLuint maskNumber, GLbitfield mask);
+GLAPI PFNGLSAMPLEMASKIPROC glad_glSampleMaski;
+#define glSampleMaski glad_glSampleMaski
+#endif
+#ifndef GL_VERSION_3_3
+#define GL_VERSION_3_3 1
+GLAPI int GLAD_GL_VERSION_3_3;
+typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)(GLuint program, GLuint colorNumber, GLuint index, const GLchar *name);
+GLAPI PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed;
+#define glBindFragDataLocationIndexed glad_glBindFragDataLocationIndexed
+typedef GLint (APIENTRYP PFNGLGETFRAGDATAINDEXPROC)(GLuint program, const GLchar *name);
+GLAPI PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex;
+#define glGetFragDataIndex glad_glGetFragDataIndex
+typedef void (APIENTRYP PFNGLGENSAMPLERSPROC)(GLsizei count, GLuint *samplers);
+GLAPI PFNGLGENSAMPLERSPROC glad_glGenSamplers;
+#define glGenSamplers glad_glGenSamplers
+typedef void (APIENTRYP PFNGLDELETESAMPLERSPROC)(GLsizei count, const GLuint *samplers);
+GLAPI PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers;
+#define glDeleteSamplers glad_glDeleteSamplers
+typedef GLboolean (APIENTRYP PFNGLISSAMPLERPROC)(GLuint sampler);
+GLAPI PFNGLISSAMPLERPROC glad_glIsSampler;
+#define glIsSampler glad_glIsSampler
+typedef void (APIENTRYP PFNGLBINDSAMPLERPROC)(GLuint unit, GLuint sampler);
+GLAPI PFNGLBINDSAMPLERPROC glad_glBindSampler;
+#define glBindSampler glad_glBindSampler
+typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIPROC)(GLuint sampler, GLenum pname, GLint param);
+GLAPI PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri;
+#define glSamplerParameteri glad_glSamplerParameteri
+typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, const GLint *param);
+GLAPI PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv;
+#define glSamplerParameteriv glad_glSamplerParameteriv
+typedef void (APIENTRYP PFNGLSAMPLERPARAMETERFPROC)(GLuint sampler, GLenum pname, GLfloat param);
+GLAPI PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf;
+#define glSamplerParameterf glad_glSamplerParameterf
+typedef void (APIENTRYP PFNGLSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, const GLfloat *param);
+GLAPI PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv;
+#define glSamplerParameterfv glad_glSamplerParameterfv
+typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, const GLint *param);
+GLAPI PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv;
+#define glSamplerParameterIiv glad_glSamplerParameterIiv
+typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, const GLuint *param);
+GLAPI PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv;
+#define glSamplerParameterIuiv glad_glSamplerParameterIuiv
+typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, GLint *params);
+GLAPI PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv;
+#define glGetSamplerParameteriv glad_glGetSamplerParameteriv
+typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, GLint *params);
+GLAPI PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv;
+#define glGetSamplerParameterIiv glad_glGetSamplerParameterIiv
+typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, GLfloat *params);
+GLAPI PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv;
+#define glGetSamplerParameterfv glad_glGetSamplerParameterfv
+typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, GLuint *params);
+GLAPI PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv;
+#define glGetSamplerParameterIuiv glad_glGetSamplerParameterIuiv
+typedef void (APIENTRYP PFNGLQUERYCOUNTERPROC)(GLuint id, GLenum target);
+GLAPI PFNGLQUERYCOUNTERPROC glad_glQueryCounter;
+#define glQueryCounter glad_glQueryCounter
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTI64VPROC)(GLuint id, GLenum pname, GLint64 *params);
+GLAPI PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v;
+#define glGetQueryObjecti64v glad_glGetQueryObjecti64v
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTUI64VPROC)(GLuint id, GLenum pname, GLuint64 *params);
+GLAPI PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v;
+#define glGetQueryObjectui64v glad_glGetQueryObjectui64v
+typedef void (APIENTRYP PFNGLVERTEXATTRIBDIVISORPROC)(GLuint index, GLuint divisor);
+GLAPI PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor;
+#define glVertexAttribDivisor glad_glVertexAttribDivisor
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP1UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
+GLAPI PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui;
+#define glVertexAttribP1ui glad_glVertexAttribP1ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP1UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value);
+GLAPI PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv;
+#define glVertexAttribP1uiv glad_glVertexAttribP1uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP2UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
+GLAPI PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui;
+#define glVertexAttribP2ui glad_glVertexAttribP2ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP2UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value);
+GLAPI PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv;
+#define glVertexAttribP2uiv glad_glVertexAttribP2uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP3UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
+GLAPI PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui;
+#define glVertexAttribP3ui glad_glVertexAttribP3ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP3UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value);
+GLAPI PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv;
+#define glVertexAttribP3uiv glad_glVertexAttribP3uiv
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP4UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
+GLAPI PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui;
+#define glVertexAttribP4ui glad_glVertexAttribP4ui
+typedef void (APIENTRYP PFNGLVERTEXATTRIBP4UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value);
+GLAPI PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv;
+#define glVertexAttribP4uiv glad_glVertexAttribP4uiv
+typedef void (APIENTRYP PFNGLVERTEXP2UIPROC)(GLenum type, GLuint value);
+GLAPI PFNGLVERTEXP2UIPROC glad_glVertexP2ui;
+#define glVertexP2ui glad_glVertexP2ui
+typedef void (APIENTRYP PFNGLVERTEXP2UIVPROC)(GLenum type, const GLuint *value);
+GLAPI PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv;
+#define glVertexP2uiv glad_glVertexP2uiv
+typedef void (APIENTRYP PFNGLVERTEXP3UIPROC)(GLenum type, GLuint value);
+GLAPI PFNGLVERTEXP3UIPROC glad_glVertexP3ui;
+#define glVertexP3ui glad_glVertexP3ui
+typedef void (APIENTRYP PFNGLVERTEXP3UIVPROC)(GLenum type, const GLuint *value);
+GLAPI PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv;
+#define glVertexP3uiv glad_glVertexP3uiv
+typedef void (APIENTRYP PFNGLVERTEXP4UIPROC)(GLenum type, GLuint value);
+GLAPI PFNGLVERTEXP4UIPROC glad_glVertexP4ui;
+#define glVertexP4ui glad_glVertexP4ui
+typedef void (APIENTRYP PFNGLVERTEXP4UIVPROC)(GLenum type, const GLuint *value);
+GLAPI PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv;
+#define glVertexP4uiv glad_glVertexP4uiv
+typedef void (APIENTRYP PFNGLTEXCOORDP1UIPROC)(GLenum type, GLuint coords);
+GLAPI PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui;
+#define glTexCoordP1ui glad_glTexCoordP1ui
+typedef void (APIENTRYP PFNGLTEXCOORDP1UIVPROC)(GLenum type, const GLuint *coords);
+GLAPI PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv;
+#define glTexCoordP1uiv glad_glTexCoordP1uiv
+typedef void (APIENTRYP PFNGLTEXCOORDP2UIPROC)(GLenum type, GLuint coords);
+GLAPI PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui;
+#define glTexCoordP2ui glad_glTexCoordP2ui
+typedef void (APIENTRYP PFNGLTEXCOORDP2UIVPROC)(GLenum type, const GLuint *coords);
+GLAPI PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv;
+#define glTexCoordP2uiv glad_glTexCoordP2uiv
+typedef void (APIENTRYP PFNGLTEXCOORDP3UIPROC)(GLenum type, GLuint coords);
+GLAPI PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui;
+#define glTexCoordP3ui glad_glTexCoordP3ui
+typedef void (APIENTRYP PFNGLTEXCOORDP3UIVPROC)(GLenum type, const GLuint *coords);
+GLAPI PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv;
+#define glTexCoordP3uiv glad_glTexCoordP3uiv
+typedef void (APIENTRYP PFNGLTEXCOORDP4UIPROC)(GLenum type, GLuint coords);
+GLAPI PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui;
+#define glTexCoordP4ui glad_glTexCoordP4ui
+typedef void (APIENTRYP PFNGLTEXCOORDP4UIVPROC)(GLenum type, const GLuint *coords);
+GLAPI PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv;
+#define glTexCoordP4uiv glad_glTexCoordP4uiv
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP1UIPROC)(GLenum texture, GLenum type, GLuint coords);
+GLAPI PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui;
+#define glMultiTexCoordP1ui glad_glMultiTexCoordP1ui
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP1UIVPROC)(GLenum texture, GLenum type, const GLuint *coords);
+GLAPI PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv;
+#define glMultiTexCoordP1uiv glad_glMultiTexCoordP1uiv
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP2UIPROC)(GLenum texture, GLenum type, GLuint coords);
+GLAPI PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui;
+#define glMultiTexCoordP2ui glad_glMultiTexCoordP2ui
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP2UIVPROC)(GLenum texture, GLenum type, const GLuint *coords);
+GLAPI PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv;
+#define glMultiTexCoordP2uiv glad_glMultiTexCoordP2uiv
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP3UIPROC)(GLenum texture, GLenum type, GLuint coords);
+GLAPI PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui;
+#define glMultiTexCoordP3ui glad_glMultiTexCoordP3ui
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP3UIVPROC)(GLenum texture, GLenum type, const GLuint *coords);
+GLAPI PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv;
+#define glMultiTexCoordP3uiv glad_glMultiTexCoordP3uiv
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP4UIPROC)(GLenum texture, GLenum type, GLuint coords);
+GLAPI PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui;
+#define glMultiTexCoordP4ui glad_glMultiTexCoordP4ui
+typedef void (APIENTRYP PFNGLMULTITEXCOORDP4UIVPROC)(GLenum texture, GLenum type, const GLuint *coords);
+GLAPI PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv;
+#define glMultiTexCoordP4uiv glad_glMultiTexCoordP4uiv
+typedef void (APIENTRYP PFNGLNORMALP3UIPROC)(GLenum type, GLuint coords);
+GLAPI PFNGLNORMALP3UIPROC glad_glNormalP3ui;
+#define glNormalP3ui glad_glNormalP3ui
+typedef void (APIENTRYP PFNGLNORMALP3UIVPROC)(GLenum type, const GLuint *coords);
+GLAPI PFNGLNORMALP3UIVPROC glad_glNormalP3uiv;
+#define glNormalP3uiv glad_glNormalP3uiv
+typedef void (APIENTRYP PFNGLCOLORP3UIPROC)(GLenum type, GLuint color);
+GLAPI PFNGLCOLORP3UIPROC glad_glColorP3ui;
+#define glColorP3ui glad_glColorP3ui
+typedef void (APIENTRYP PFNGLCOLORP3UIVPROC)(GLenum type, const GLuint *color);
+GLAPI PFNGLCOLORP3UIVPROC glad_glColorP3uiv;
+#define glColorP3uiv glad_glColorP3uiv
+typedef void (APIENTRYP PFNGLCOLORP4UIPROC)(GLenum type, GLuint color);
+GLAPI PFNGLCOLORP4UIPROC glad_glColorP4ui;
+#define glColorP4ui glad_glColorP4ui
+typedef void (APIENTRYP PFNGLCOLORP4UIVPROC)(GLenum type, const GLuint *color);
+GLAPI PFNGLCOLORP4UIVPROC glad_glColorP4uiv;
+#define glColorP4uiv glad_glColorP4uiv
+typedef void (APIENTRYP PFNGLSECONDARYCOLORP3UIPROC)(GLenum type, GLuint color);
+GLAPI PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui;
+#define glSecondaryColorP3ui glad_glSecondaryColorP3ui
+typedef void (APIENTRYP PFNGLSECONDARYCOLORP3UIVPROC)(GLenum type, const GLuint *color);
+GLAPI PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv;
+#define glSecondaryColorP3uiv glad_glSecondaryColorP3uiv
+#endif
+#define GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB 0x8242
+#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH_ARB 0x8243
+#define GL_DEBUG_CALLBACK_FUNCTION_ARB 0x8244
+#define GL_DEBUG_CALLBACK_USER_PARAM_ARB 0x8245
+#define GL_DEBUG_SOURCE_API_ARB 0x8246
+#define GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB 0x8247
+#define GL_DEBUG_SOURCE_SHADER_COMPILER_ARB 0x8248
+#define GL_DEBUG_SOURCE_THIRD_PARTY_ARB 0x8249
+#define GL_DEBUG_SOURCE_APPLICATION_ARB 0x824A
+#define GL_DEBUG_SOURCE_OTHER_ARB 0x824B
+#define GL_DEBUG_TYPE_ERROR_ARB 0x824C
+#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB 0x824D
+#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB 0x824E
+#define GL_DEBUG_TYPE_PORTABILITY_ARB 0x824F
+#define GL_DEBUG_TYPE_PERFORMANCE_ARB 0x8250
+#define GL_DEBUG_TYPE_OTHER_ARB 0x8251
+#define GL_MAX_DEBUG_MESSAGE_LENGTH_ARB 0x9143
+#define GL_MAX_DEBUG_LOGGED_MESSAGES_ARB 0x9144
+#define GL_DEBUG_LOGGED_MESSAGES_ARB 0x9145
+#define GL_DEBUG_SEVERITY_HIGH_ARB 0x9146
+#define GL_DEBUG_SEVERITY_MEDIUM_ARB 0x9147
+#define GL_DEBUG_SEVERITY_LOW_ARB 0x9148
+#ifndef GL_ARB_debug_output
+#define GL_ARB_debug_output 1
+GLAPI int GLAD_GL_ARB_debug_output;
+typedef void (APIENTRYP PFNGLDEBUGMESSAGECONTROLARBPROC)(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled);
+GLAPI PFNGLDEBUGMESSAGECONTROLARBPROC glad_glDebugMessageControlARB;
+#define glDebugMessageControlARB glad_glDebugMessageControlARB
+typedef void (APIENTRYP PFNGLDEBUGMESSAGEINSERTARBPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf);
+GLAPI PFNGLDEBUGMESSAGEINSERTARBPROC glad_glDebugMessageInsertARB;
+#define glDebugMessageInsertARB glad_glDebugMessageInsertARB
+typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKARBPROC)(GLDEBUGPROCARB callback, const void *userParam);
+GLAPI PFNGLDEBUGMESSAGECALLBACKARBPROC glad_glDebugMessageCallbackARB;
+#define glDebugMessageCallbackARB glad_glDebugMessageCallbackARB
+typedef GLuint (APIENTRYP PFNGLGETDEBUGMESSAGELOGARBPROC)(GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog);
+GLAPI PFNGLGETDEBUGMESSAGELOGARBPROC glad_glGetDebugMessageLogARB;
+#define glGetDebugMessageLogARB glad_glGetDebugMessageLogARB
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/HIP-Basic/CMakeLists.txt b/HIP-Basic/CMakeLists.txt
index 34abda256..fbf0ae353 100644
--- a/HIP-Basic/CMakeLists.txt
+++ b/HIP-Basic/CMakeLists.txt
@@ -21,26 +21,63 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(HIP-Basic)
+project(HIP-Basic LANGUAGES CXX)
# Only supported on HIP (not CUDA)
if(NOT "${GPU_RUNTIME}" STREQUAL "CUDA")
add_subdirectory(assembly_to_executable)
add_subdirectory(llvm_ir_to_executable)
+ add_subdirectory(module_api)
endif()
add_subdirectory(bandwidth)
+add_subdirectory(bit_extract)
+add_subdirectory(cooperative_groups)
add_subdirectory(device_query)
+add_subdirectory(device_globals)
add_subdirectory(dynamic_shared)
add_subdirectory(events)
+add_subdirectory(gpu_arch)
if(NOT WIN32)
add_subdirectory(hello_world)
- add_subdirectory(hipify)
+
+ find_package(Perl)
+
+ if(Perl_FOUND)
+ add_subdirectory(hipify)
+ else()
+ message("Perl not found, not building hipify example")
+ endif()
endif()
+add_subdirectory(inline_assembly)
add_subdirectory(matrix_multiplication)
+add_subdirectory(moving_average)
+add_subdirectory(multi_gpu_data_transfer)
add_subdirectory(occupancy)
add_subdirectory(runtime_compilation)
add_subdirectory(saxpy)
add_subdirectory(shared_memory)
+add_subdirectory(static_host_library)
add_subdirectory(streams)
+# temporarily exclude texture management on Windows
+if(NOT WIN32)
+ add_subdirectory(texture_management)
+endif()
add_subdirectory(warp_shuffle)
+
+find_package(glfw3)
+find_package(Vulkan COMPONENTS glslangValidator)
+
+if(NOT glfw3_FOUND)
+ message("GLFW not found, not building OpenGL interop example")
+else()
+ add_subdirectory(opengl_interop)
+endif()
+
+if(NOT glfw3_FOUND)
+ message("GLFW not found, not building Vulkan interop example")
+elseif(NOT Vulkan_FOUND)
+ message("Vulkan not found, not building Vulkan interop example")
+else()
+ add_subdirectory(vulkan_interop)
+endif()
diff --git a/HIP-Basic/Makefile b/HIP-Basic/Makefile
index b04c07496..18913b6ab 100644
--- a/HIP-Basic/Makefile
+++ b/HIP-Basic/Makefile
@@ -22,24 +22,37 @@
EXAMPLES := \
bandwidth \
+ bit_extract \
+ cooperative_groups \
device_query \
+ device_globals \
dynamic_shared \
events \
+ gpu_arch \
hello_world \
hipify \
+ inline_assembly \
matrix_multiplication \
+ moving_average \
+ multi_gpu_data_transfer \
occupancy \
+ opengl_interop \
runtime_compilation \
saxpy \
shared_memory \
streams \
+ static_host_library \
+ texture_management \
+ vulkan_interop \
warp_shuffle
# Only supported on HIP (not CUDA).
ifneq ($(GPU_RUNTIME), CUDA)
- EXAMPLES += \
- assembly_to_executable \
- llvm_ir_to_executable
+ EXAMPLES += \
+ assembly_to_executable \
+ llvm_ir_to_executable \
+ module_api \
+ static_device_library
endif
all: $(EXAMPLES)
diff --git a/HIP-Basic/README.md b/HIP-Basic/README.md
new file mode 100644
index 000000000..3f79faf0d
--- /dev/null
+++ b/HIP-Basic/README.md
@@ -0,0 +1,45 @@
+# HIP-Basic Examples
+
+## Summary
+The examples in this subdirectory showcase the functionality of the HIP runtime. The examples build on Linux for the ROCm (AMD GPU) backend. Some examples additionally support Windows, some examples additionally support the CUDA (NVIDIA GPU) backend.
+
+## Prerequisites
+### Linux
+- [CMake](https://cmake.org/download/) (at least version 3.21)
+- OR GNU Make - available via the distribution's package manager
+- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x)
+
+### Windows
+- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
+- ROCm toolchain for Windows (No public release yet)
+ - The Visual Studio ROCm extension needs to be installed to build with the solution files.
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
+
+## Building
+### Linux
+Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment.
+
+#### Using CMake
+All examples in the `HIP-Basic` subdirectory can either be built by a single CMake project or be built independently.
+
+- `$ cd Libraries/HIP-Basic`
+- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA, when supported)
+- `$ cmake --build build`
+
+#### Using Make
+All examples can be built by a single invocation to Make or be built independently.
+
+- `$ cd Libraries/HIP-Basic`
+- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA, when supported)
+
+### Windows
+Not all HIP runtime examples support building on Windows. See the README file in the directory of the example for more details.
+
+#### Visual Studio
+Visual Studio solution files are available for the individual examples. To build all supported HIP runtime examples open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for HIP-Basic.
+
+For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `HIP-Basic` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/HIP-Basic/assembly_to_executable/Makefile b/HIP-Basic/assembly_to_executable/Makefile
index 3d26a352a..371e40ec7 100644
--- a/HIP-Basic/assembly_to_executable/Makefile
+++ b/HIP-Basic/assembly_to_executable/Makefile
@@ -23,10 +23,9 @@ COMMON_INCLUDE_DIR := ../../Common
GPU_RUNTIME ?= HIP
ifneq ($(GPU_RUNTIME), HIP)
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
endif
-
# HIP variables
ROCM_INSTALL_DIR := /opt/rocm
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
@@ -37,11 +36,11 @@ LLVM_MC ?= $(ROCM_INSTALL_DIR)/llvm/bin/llvm-mc
CLANG_OFFLOAD_BUNDLER ?= $(ROCM_INSTALL_DIR)/llvm/bin/clang-offload-bundler
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
# Compile for these GPU architectures
HIP_ARCHITECTURES ?= gfx803;gfx900;gfx906;gfx908;gfx90a;gfx1030
@@ -60,7 +59,7 @@ GPU_ARCH_TRIPLES := $(subst $(space),$(comma),$(GPU_ARCHS:%=hipv4-amdgcn-amd-amd
all: $(EXAMPLE)
$(EXAMPLE): main.o main_device.o
- $(HIPCXX) -o $@ $^
+ $(HIPCXX) $(ILDFLAGS) -o $@ $^ $(ILDLIBS)
main_device.o: hip_obj_gen.mcin offload_bundle.hipfb
$(LLVM_MC) -triple x86_64-unknown-linux-gnu -o $@ $< --filetype=obj
@@ -73,7 +72,7 @@ offload_bundle.hipfb: $(GPU_ARCHS:%=main_%.o)
-output=$@
main.o: main.hip
- $(HIPCXX) $(CXXFLAGS) $(CPPFLAGS) -c --cuda-host-only $<
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) -c --cuda-host-only $<
main_%.o: main_%.s
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$* -o $@ $<
diff --git a/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.sln b/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.sln
index 1ceb39e18..3edd3db04 100644
--- a/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.sln
+++ b/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.sln
@@ -1,25 +1,25 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.32630.194
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "assembly_to_executable_vs2019", "assembly_to_executable_vs2019.vcxproj", "{60B4ADE0-8286-46AE-B884-5DA51B541DED}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|x64 = Debug|x64
- Release|x64 = Release|x64
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.ActiveCfg = Debug|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.Build.0 = Debug|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.ActiveCfg = Release|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.Build.0 = Release|x64
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- SolutionGuid = {5EAD9B5F-41B6-452E-922F-D5782C75EB8F}
- EndGlobalSection
-EndGlobal
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "assembly_to_executable_vs2019", "assembly_to_executable_vs2019.vcxproj", "{60B4ADE0-8286-46AE-B884-5DA51B541DED}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.ActiveCfg = Debug|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.Build.0 = Debug|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.ActiveCfg = Release|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {5EAD9B5F-41B6-452E-922F-D5782C75EB8F}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj b/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj
index 7783b217a..315ba3ac6 100644
--- a/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj
+++ b/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj
@@ -1,183 +1,183 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
- --cuda-host-only
- --cuda-host-only
-
-
-
-
-
-
-
- Document
- copy %(Identity) "$(IntDir)%(Identity)"
- Copying %(Identity)
- $(IntDir)%(Identity)
- copy %(Identity) "$(IntDir)%(Identity)"
- Copying %(Identity)
- $(IntDir)%(Identity)
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
-
-
- Document
- "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
- "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
-
-
-
- 15.0
- {60b4ade0-8286-46ae-b884-5da51b541ded}
- Win32Proj
- assembly_to_executable_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- hip_$(ProjectName)
- ClCompile
-
-
- false
- hip_$(ProjectName)
- ClCompile
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
-
-
- Console
- true
- $(IntDir)main_device.obj;%(AdditionalDependencies)
-
-
- Compiling Device Assembly %(Identity)
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
- $(IntDir)%(FileName).o
-
-
- "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
-cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
-
-
- Generating Device Offload Object
-
-
- $(IntDIr)main_device.obj
-
-
- $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
-
-
- Console
- true
- true
- true
- $(IntDir)main_device.obj;%(AdditionalDependencies)
-
-
- Compiling Device Assembly %(Identity)
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
- $(IntDir)%(FileName).o
-
-
- "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
-cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
-
-
- Generating Device Offload Object
-
-
- $(IntDIr)main_device.obj
-
-
- $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
-
-
-
-
-
-
-
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+ --cuda-host-only
+ --cuda-host-only
+
+
+
+
+
+
+
+ Document
+ copy %(Identity) "$(IntDir)%(Identity)"
+ Copying %(Identity)
+ $(IntDir)%(Identity)
+ copy %(Identity) "$(IntDir)%(Identity)"
+ Copying %(Identity)
+ $(IntDir)%(Identity)
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
+
+
+ Document
+ "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
+ "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
+
+
+
+ 15.0
+ {60b4ade0-8286-46ae-b884-5da51b541ded}
+ Win32Proj
+ assembly_to_executable_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+ ClCompile
+
+
+ false
+ hip_$(ProjectName)
+ ClCompile
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ $(IntDir)main_device.obj;%(AdditionalDependencies)
+
+
+ Compiling Device Assembly %(Identity)
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
+ $(IntDir)%(FileName).o
+
+
+ "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
+cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
+
+
+ Generating Device Offload Object
+
+
+ $(IntDIr)main_device.obj
+
+
+ $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+ $(IntDir)main_device.obj;%(AdditionalDependencies)
+
+
+ Compiling Device Assembly %(Identity)
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
+ $(IntDir)%(FileName).o
+
+
+ "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
+cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
+
+
+ Generating Device Offload Object
+
+
+ $(IntDIr)main_device.obj
+
+
+ $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
+
+
+
+
+
+
+
diff --git a/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj.filters b/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj.filters
index 205bad8df..bb92852bf 100644
--- a/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj.filters
+++ b/HIP-Basic/assembly_to_executable/assembly_to_executable_vs2019.vcxproj.filters
@@ -1,53 +1,53 @@
-
-
-
-
- {4f2a1544-a556-4afb-b630-36ba54c0ab4a}
- cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
-
-
- {b93521e0-9944-411a-9f6e-4071af6bc7ea}
- h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
-
-
- {972f07c3-b925-4516-bd65-2d5a3f626888}
- rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
-
-
-
-
- Source Files
-
-
-
-
- Header Files
-
-
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
-
+
+
+
+
+ {4f2a1544-a556-4afb-b630-36ba54c0ab4a}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {b93521e0-9944-411a-9f6e-4071af6bc7ea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {972f07c3-b925-4516-bd65-2d5a3f626888}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+
diff --git a/HIP-Basic/assembly_to_executable/main.hip b/HIP-Basic/assembly_to_executable/main.hip
index 588fc0706..b09dd694a 100644
--- a/HIP-Basic/assembly_to_executable/main.hip
+++ b/HIP-Basic/assembly_to_executable/main.hip
@@ -31,7 +31,7 @@
/// \brief Device function to square each element
/// in the array `in` and write to array `out`.
template
-__global__ void vector_square_kernel(T* out, const T* in, const long long size)
+__global__ void vector_square_kernel(T* out, const T* in, const unsigned long long size)
{
// Get the unique global thread ID
const size_t offset = blockIdx.x * blockDim.x + threadIdx.x;
diff --git a/HIP-Basic/assembly_to_executable/main_gfx1030.s b/HIP-Basic/assembly_to_executable/main_gfx1030.s
index ce4952afa..d6cb4d8ab 100644
--- a/HIP-Basic/assembly_to_executable/main_gfx1030.s
+++ b/HIP-Basic/assembly_to_executable/main_gfx1030.s
@@ -1,55 +1,55 @@
.text
.amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
- .protected _Z20vector_square_kernelIfEvPT_PKS0_x ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_x
- .globl _Z20vector_square_kernelIfEvPT_PKS0_x
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
+ .protected _Z20vector_square_kernelIfEvPT_PKS0_y ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_y
+ .globl _Z20vector_square_kernelIfEvPT_PKS0_y
.p2align 8
- .type _Z20vector_square_kernelIfEvPT_PKS0_x,@function
-_Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
+ .type _Z20vector_square_kernelIfEvPT_PKS0_y,@function
+_Z20vector_square_kernelIfEvPT_PKS0_y: ; @_Z20vector_square_kernelIfEvPT_PKS0_y
; %bb.0:
s_load_dword s0, s[4:5], 0x4
s_load_dwordx2 s[2:3], s[6:7], 0x10
- v_mov_b32_e32 v1, 0
s_waitcnt lgkmcnt(0)
s_and_b32 s0, s0, 0xffff
- s_mul_i32 s8, s8, s0
- v_add_nc_u32_e32 v0, s8, v0
- v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
- s_and_saveexec_b32 s0, vcc_lo
- s_cbranch_execz BB0_3
+ v_mad_u64_u32 v[0:1], null, s8, s0, v[0:1]
+ v_mov_b32_e32 v1, 0
+ s_mov_b32 s0, exec_lo
+ v_cmpx_gt_u64_e64 s[2:3], v[0:1]
+ s_cbranch_execz .LBB0_3
; %bb.1:
s_load_dword s8, s[4:5], 0xc
s_load_dwordx4 s[4:7], s[6:7], 0x0
v_lshlrev_b64 v[2:3], 2, v[0:1]
s_mov_b32 s9, 0
- s_mov_b32 s1, s9
s_waitcnt lgkmcnt(0)
s_lshl_b64 s[10:11], s[8:9], 2
.p2align 6
-BB0_2: ; =>This Inner Loop Header: Depth=1
+.LBB0_2: ; =>This Inner Loop Header: Depth=1
v_add_co_u32 v4, vcc_lo, s6, v2
v_add_co_ci_u32_e32 v5, vcc_lo, s7, v3, vcc_lo
v_add_co_u32 v0, vcc_lo, v0, s8
- v_add_co_ci_u32_e32 v1, vcc_lo, s9, v1, vcc_lo
+ v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
global_load_dword v6, v[4:5], off
v_add_co_u32 v4, vcc_lo, s4, v2
v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo
v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
v_add_co_u32 v2, s0, v2, s10
v_add_co_ci_u32_e64 v3, s0, s11, v3, s0
- s_or_b32 s1, vcc_lo, s1
+ s_or_b32 s9, vcc_lo, s9
s_waitcnt vmcnt(0)
v_mul_f32_e32 v6, v6, v6
global_store_dword v[4:5], v6, off
- s_andn2_b32 exec_lo, exec_lo, s1
- s_cbranch_execnz BB0_2
-BB0_3:
+ s_andn2_b32 exec_lo, exec_lo, s9
+ s_cbranch_execnz .LBB0_2
+.LBB0_3:
s_endpgm
.section .rodata,#alloc
.p2align 6
- .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_x
+ .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_y
.amdhsa_group_segment_fixed_size 0
.amdhsa_private_segment_fixed_size 0
- .amdhsa_kernarg_size 80
+ .amdhsa_kernarg_size 24
+ .amdhsa_user_sgpr_count 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 0
@@ -77,6 +77,7 @@ BB0_3:
.amdhsa_workgroup_processor_mode 1
.amdhsa_memory_ordered 1
.amdhsa_forward_progress 0
+ .amdhsa_shared_vgpr_count 0
.amdhsa_exception_fp_ieee_invalid_op 0
.amdhsa_exception_fp_denorm_src 0
.amdhsa_exception_fp_ieee_div_zero 0
@@ -85,13 +86,13 @@ BB0_3:
.amdhsa_exception_fp_ieee_inexact 0
.amdhsa_exception_int_div_zero 0
.end_amdhsa_kernel
- .text
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
.Lfunc_end0:
- .size _Z20vector_square_kernelIfEvPT_PKS0_x, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_x
+ .size _Z20vector_square_kernelIfEvPT_PKS0_y, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_y
; -- End function
.section .AMDGPU.csdata
; Kernel info:
-; codeLenInByte = 188
+; codeLenInByte = 212
; NumSgprs: 14
; NumVgprs: 7
; ScratchSize: 0
@@ -147,7 +148,7 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.zero 1
.size _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, 1
- .ident "AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"
+ .ident "AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"
.section ".note.GNU-stack"
.addrsig
.addrsig_sym _ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE
@@ -169,44 +170,19 @@ amdhsa.kernels:
- .offset: 16
.size: 8
.value_kind: by_value
- - .offset: 24
- .size: 8
- .value_kind: hidden_global_offset_x
- - .offset: 32
- .size: 8
- .value_kind: hidden_global_offset_y
- - .offset: 40
- .size: 8
- .value_kind: hidden_global_offset_z
- - .address_space: global
- .offset: 48
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 56
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 64
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 72
- .size: 8
- .value_kind: hidden_multigrid_sync_arg
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
- .kernarg_segment_size: 80
+ .kernarg_segment_size: 24
.language: OpenCL C
.language_version:
- 2
- 0
.max_flat_workgroup_size: 1024
- .name: _Z20vector_square_kernelIfEvPT_PKS0_x
+ .name: _Z20vector_square_kernelIfEvPT_PKS0_y
.private_segment_fixed_size: 0
.sgpr_count: 14
.sgpr_spill_count: 0
- .symbol: _Z20vector_square_kernelIfEvPT_PKS0_x.kd
+ .symbol: _Z20vector_square_kernelIfEvPT_PKS0_y.kd
.vgpr_count: 7
.vgpr_spill_count: 0
.wavefront_size: 32
diff --git a/HIP-Basic/assembly_to_executable/main_gfx803.s b/HIP-Basic/assembly_to_executable/main_gfx803.s
index 7f9c7f3fc..300e7509f 100644
--- a/HIP-Basic/assembly_to_executable/main_gfx803.s
+++ b/HIP-Basic/assembly_to_executable/main_gfx803.s
@@ -1,10 +1,11 @@
.text
.amdgcn_target "amdgcn-amd-amdhsa--gfx803"
- .protected _Z20vector_square_kernelIfEvPT_PKS0_x ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_x
- .globl _Z20vector_square_kernelIfEvPT_PKS0_x
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
+ .protected _Z20vector_square_kernelIfEvPT_PKS0_y ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_y
+ .globl _Z20vector_square_kernelIfEvPT_PKS0_y
.p2align 8
- .type _Z20vector_square_kernelIfEvPT_PKS0_x,@function
-_Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
+ .type _Z20vector_square_kernelIfEvPT_PKS0_y,@function
+_Z20vector_square_kernelIfEvPT_PKS0_y: ; @_Z20vector_square_kernelIfEvPT_PKS0_y
; %bb.0:
s_load_dword s0, s[4:5], 0x4
s_load_dwordx2 s[10:11], s[6:7], 0x10
@@ -15,7 +16,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
v_add_u32_e32 v0, vcc, s8, v0
v_cmp_gt_u64_e32 vcc, s[10:11], v[0:1]
s_and_saveexec_b64 s[0:1], vcc
- s_cbranch_execz BB0_3
+ s_cbranch_execz .LBB0_3
; %bb.1:
s_load_dword s8, s[4:5], 0xc
s_load_dwordx4 s[4:7], s[6:7], 0x0
@@ -24,7 +25,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
s_mov_b64 s[14:15], 0
s_waitcnt lgkmcnt(0)
s_lshl_b64 s[12:13], s[8:9], 2
-BB0_2: ; =>This Inner Loop Header: Depth=1
+.LBB0_2: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v5, s7
v_add_u32_e32 v4, vcc, s6, v2
v_addc_u32_e32 v5, vcc, v5, v3, vcc
@@ -44,15 +45,16 @@ BB0_2: ; =>This Inner Loop Header: Depth=1
v_mul_f32_e32 v6, v6, v6
flat_store_dword v[4:5], v6
s_andn2_b64 exec, exec, s[14:15]
- s_cbranch_execnz BB0_2
-BB0_3:
+ s_cbranch_execnz .LBB0_2
+.LBB0_3:
s_endpgm
.section .rodata,#alloc
.p2align 6
- .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_x
+ .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_y
.amdhsa_group_segment_fixed_size 0
.amdhsa_private_segment_fixed_size 0
- .amdhsa_kernarg_size 80
+ .amdhsa_kernarg_size 24
+ .amdhsa_user_sgpr_count 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 0
@@ -83,13 +85,13 @@ BB0_3:
.amdhsa_exception_fp_ieee_inexact 0
.amdhsa_exception_int_div_zero 0
.end_amdhsa_kernel
- .text
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
.Lfunc_end0:
- .size _Z20vector_square_kernelIfEvPT_PKS0_x, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_x
+ .size _Z20vector_square_kernelIfEvPT_PKS0_y, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_y
; -- End function
.section .AMDGPU.csdata
; Kernel info:
-; codeLenInByte = 200
+; codeLenInByte = 216
; NumSgprs: 18
; NumVgprs: 9
; ScratchSize: 0
@@ -142,7 +144,7 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.zero 1
.size _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, 1
- .ident "AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"
+ .ident "AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"
.section ".note.GNU-stack"
.addrsig
.addrsig_sym _ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE
@@ -164,44 +166,19 @@ amdhsa.kernels:
- .offset: 16
.size: 8
.value_kind: by_value
- - .offset: 24
- .size: 8
- .value_kind: hidden_global_offset_x
- - .offset: 32
- .size: 8
- .value_kind: hidden_global_offset_y
- - .offset: 40
- .size: 8
- .value_kind: hidden_global_offset_z
- - .address_space: global
- .offset: 48
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 56
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 64
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 72
- .size: 8
- .value_kind: hidden_multigrid_sync_arg
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
- .kernarg_segment_size: 80
+ .kernarg_segment_size: 24
.language: OpenCL C
.language_version:
- 2
- 0
.max_flat_workgroup_size: 1024
- .name: _Z20vector_square_kernelIfEvPT_PKS0_x
+ .name: _Z20vector_square_kernelIfEvPT_PKS0_y
.private_segment_fixed_size: 0
.sgpr_count: 18
.sgpr_spill_count: 0
- .symbol: _Z20vector_square_kernelIfEvPT_PKS0_x.kd
+ .symbol: _Z20vector_square_kernelIfEvPT_PKS0_y.kd
.vgpr_count: 9
.vgpr_spill_count: 0
.wavefront_size: 64
diff --git a/HIP-Basic/assembly_to_executable/main_gfx900.s b/HIP-Basic/assembly_to_executable/main_gfx900.s
index 6ca519c15..5f2cad928 100644
--- a/HIP-Basic/assembly_to_executable/main_gfx900.s
+++ b/HIP-Basic/assembly_to_executable/main_gfx900.s
@@ -1,10 +1,11 @@
.text
.amdgcn_target "amdgcn-amd-amdhsa--gfx900"
- .protected _Z20vector_square_kernelIfEvPT_PKS0_x ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_x
- .globl _Z20vector_square_kernelIfEvPT_PKS0_x
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
+ .protected _Z20vector_square_kernelIfEvPT_PKS0_y ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_y
+ .globl _Z20vector_square_kernelIfEvPT_PKS0_y
.p2align 8
- .type _Z20vector_square_kernelIfEvPT_PKS0_x,@function
-_Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
+ .type _Z20vector_square_kernelIfEvPT_PKS0_y,@function
+_Z20vector_square_kernelIfEvPT_PKS0_y: ; @_Z20vector_square_kernelIfEvPT_PKS0_y
; %bb.0:
s_load_dword s0, s[4:5], 0x4
s_load_dwordx2 s[12:13], s[6:7], 0x10
@@ -15,7 +16,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
v_add_u32_e32 v0, s8, v0
v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
s_and_saveexec_b64 s[0:1], vcc
- s_cbranch_execz BB0_3
+ s_cbranch_execz .LBB0_3
; %bb.1:
s_load_dword s14, s[4:5], 0xc
s_load_dwordx4 s[8:11], s[6:7], 0x0
@@ -24,7 +25,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
s_mov_b64 s[6:7], 0
s_waitcnt lgkmcnt(0)
s_lshl_b64 s[4:5], s[14:15], 2
-BB0_2: ; =>This Inner Loop Header: Depth=1
+.LBB0_2: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v5, s11
v_add_co_u32_e32 v4, vcc, s10, v2
v_addc_co_u32_e32 v5, vcc, v5, v3, vcc
@@ -44,15 +45,16 @@ BB0_2: ; =>This Inner Loop Header: Depth=1
v_mul_f32_e32 v6, v6, v6
global_store_dword v[4:5], v6, off
s_andn2_b64 exec, exec, s[6:7]
- s_cbranch_execnz BB0_2
-BB0_3:
+ s_cbranch_execnz .LBB0_2
+.LBB0_3:
s_endpgm
.section .rodata,#alloc
.p2align 6
- .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_x
+ .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_y
.amdhsa_group_segment_fixed_size 0
.amdhsa_private_segment_fixed_size 0
- .amdhsa_kernarg_size 80
+ .amdhsa_kernarg_size 24
+ .amdhsa_user_sgpr_count 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 0
@@ -85,13 +87,13 @@ BB0_3:
.amdhsa_exception_fp_ieee_inexact 0
.amdhsa_exception_int_div_zero 0
.end_amdhsa_kernel
- .text
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
.Lfunc_end0:
- .size _Z20vector_square_kernelIfEvPT_PKS0_x, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_x
+ .size _Z20vector_square_kernelIfEvPT_PKS0_y, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_y
; -- End function
.section .AMDGPU.csdata
; Kernel info:
-; codeLenInByte = 200
+; codeLenInByte = 216
; NumSgprs: 18
; NumVgprs: 9
; ScratchSize: 0
@@ -144,7 +146,7 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.zero 1
.size _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, 1
- .ident "AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"
+ .ident "AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"
.section ".note.GNU-stack"
.addrsig
.addrsig_sym _ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE
@@ -166,44 +168,19 @@ amdhsa.kernels:
- .offset: 16
.size: 8
.value_kind: by_value
- - .offset: 24
- .size: 8
- .value_kind: hidden_global_offset_x
- - .offset: 32
- .size: 8
- .value_kind: hidden_global_offset_y
- - .offset: 40
- .size: 8
- .value_kind: hidden_global_offset_z
- - .address_space: global
- .offset: 48
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 56
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 64
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 72
- .size: 8
- .value_kind: hidden_multigrid_sync_arg
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
- .kernarg_segment_size: 80
+ .kernarg_segment_size: 24
.language: OpenCL C
.language_version:
- 2
- 0
.max_flat_workgroup_size: 1024
- .name: _Z20vector_square_kernelIfEvPT_PKS0_x
+ .name: _Z20vector_square_kernelIfEvPT_PKS0_y
.private_segment_fixed_size: 0
.sgpr_count: 18
.sgpr_spill_count: 0
- .symbol: _Z20vector_square_kernelIfEvPT_PKS0_x.kd
+ .symbol: _Z20vector_square_kernelIfEvPT_PKS0_y.kd
.vgpr_count: 9
.vgpr_spill_count: 0
.wavefront_size: 64
diff --git a/HIP-Basic/assembly_to_executable/main_gfx906.s b/HIP-Basic/assembly_to_executable/main_gfx906.s
index 2447c87b0..ea02712e6 100644
--- a/HIP-Basic/assembly_to_executable/main_gfx906.s
+++ b/HIP-Basic/assembly_to_executable/main_gfx906.s
@@ -1,10 +1,11 @@
.text
.amdgcn_target "amdgcn-amd-amdhsa--gfx906"
- .protected _Z20vector_square_kernelIfEvPT_PKS0_x ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_x
- .globl _Z20vector_square_kernelIfEvPT_PKS0_x
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
+ .protected _Z20vector_square_kernelIfEvPT_PKS0_y ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_y
+ .globl _Z20vector_square_kernelIfEvPT_PKS0_y
.p2align 8
- .type _Z20vector_square_kernelIfEvPT_PKS0_x,@function
-_Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
+ .type _Z20vector_square_kernelIfEvPT_PKS0_y,@function
+_Z20vector_square_kernelIfEvPT_PKS0_y: ; @_Z20vector_square_kernelIfEvPT_PKS0_y
; %bb.0:
s_load_dword s0, s[4:5], 0x4
s_load_dwordx2 s[12:13], s[6:7], 0x10
@@ -15,7 +16,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
v_add_u32_e32 v0, s8, v0
v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
s_and_saveexec_b64 s[0:1], vcc
- s_cbranch_execz BB0_3
+ s_cbranch_execz .LBB0_3
; %bb.1:
s_load_dword s14, s[4:5], 0xc
s_load_dwordx4 s[8:11], s[6:7], 0x0
@@ -24,7 +25,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
s_mov_b64 s[6:7], 0
s_waitcnt lgkmcnt(0)
s_lshl_b64 s[4:5], s[14:15], 2
-BB0_2: ; =>This Inner Loop Header: Depth=1
+.LBB0_2: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v5, s11
v_add_co_u32_e32 v4, vcc, s10, v2
v_addc_co_u32_e32 v5, vcc, v5, v3, vcc
@@ -44,15 +45,16 @@ BB0_2: ; =>This Inner Loop Header: Depth=1
v_mul_f32_e32 v6, v6, v6
global_store_dword v[4:5], v6, off
s_andn2_b64 exec, exec, s[6:7]
- s_cbranch_execnz BB0_2
-BB0_3:
+ s_cbranch_execnz .LBB0_2
+.LBB0_3:
s_endpgm
.section .rodata,#alloc
.p2align 6
- .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_x
+ .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_y
.amdhsa_group_segment_fixed_size 0
.amdhsa_private_segment_fixed_size 0
- .amdhsa_kernarg_size 80
+ .amdhsa_kernarg_size 24
+ .amdhsa_user_sgpr_count 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 0
@@ -85,13 +87,13 @@ BB0_3:
.amdhsa_exception_fp_ieee_inexact 0
.amdhsa_exception_int_div_zero 0
.end_amdhsa_kernel
- .text
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
.Lfunc_end0:
- .size _Z20vector_square_kernelIfEvPT_PKS0_x, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_x
+ .size _Z20vector_square_kernelIfEvPT_PKS0_y, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_y
; -- End function
.section .AMDGPU.csdata
; Kernel info:
-; codeLenInByte = 200
+; codeLenInByte = 216
; NumSgprs: 18
; NumVgprs: 9
; ScratchSize: 0
@@ -144,7 +146,7 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.zero 1
.size _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, 1
- .ident "AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"
+ .ident "AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"
.section ".note.GNU-stack"
.addrsig
.addrsig_sym _ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE
@@ -166,44 +168,19 @@ amdhsa.kernels:
- .offset: 16
.size: 8
.value_kind: by_value
- - .offset: 24
- .size: 8
- .value_kind: hidden_global_offset_x
- - .offset: 32
- .size: 8
- .value_kind: hidden_global_offset_y
- - .offset: 40
- .size: 8
- .value_kind: hidden_global_offset_z
- - .address_space: global
- .offset: 48
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 56
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 64
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 72
- .size: 8
- .value_kind: hidden_multigrid_sync_arg
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
- .kernarg_segment_size: 80
+ .kernarg_segment_size: 24
.language: OpenCL C
.language_version:
- 2
- 0
.max_flat_workgroup_size: 1024
- .name: _Z20vector_square_kernelIfEvPT_PKS0_x
+ .name: _Z20vector_square_kernelIfEvPT_PKS0_y
.private_segment_fixed_size: 0
.sgpr_count: 18
.sgpr_spill_count: 0
- .symbol: _Z20vector_square_kernelIfEvPT_PKS0_x.kd
+ .symbol: _Z20vector_square_kernelIfEvPT_PKS0_y.kd
.vgpr_count: 9
.vgpr_spill_count: 0
.wavefront_size: 64
diff --git a/HIP-Basic/assembly_to_executable/main_gfx908.s b/HIP-Basic/assembly_to_executable/main_gfx908.s
index 851f0a894..aaf775782 100644
--- a/HIP-Basic/assembly_to_executable/main_gfx908.s
+++ b/HIP-Basic/assembly_to_executable/main_gfx908.s
@@ -1,10 +1,11 @@
.text
.amdgcn_target "amdgcn-amd-amdhsa--gfx908"
- .protected _Z20vector_square_kernelIfEvPT_PKS0_x ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_x
- .globl _Z20vector_square_kernelIfEvPT_PKS0_x
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
+ .protected _Z20vector_square_kernelIfEvPT_PKS0_y ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_y
+ .globl _Z20vector_square_kernelIfEvPT_PKS0_y
.p2align 8
- .type _Z20vector_square_kernelIfEvPT_PKS0_x,@function
-_Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
+ .type _Z20vector_square_kernelIfEvPT_PKS0_y,@function
+_Z20vector_square_kernelIfEvPT_PKS0_y: ; @_Z20vector_square_kernelIfEvPT_PKS0_y
; %bb.0:
s_load_dword s0, s[4:5], 0x4
s_load_dwordx2 s[12:13], s[6:7], 0x10
@@ -15,7 +16,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
v_add_u32_e32 v0, s8, v0
v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
s_and_saveexec_b64 s[0:1], vcc
- s_cbranch_execz BB0_3
+ s_cbranch_execz .LBB0_3
; %bb.1:
s_load_dword s14, s[4:5], 0xc
s_load_dwordx4 s[8:11], s[6:7], 0x0
@@ -24,7 +25,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
s_mov_b64 s[6:7], 0
s_waitcnt lgkmcnt(0)
s_lshl_b64 s[4:5], s[14:15], 2
-BB0_2: ; =>This Inner Loop Header: Depth=1
+.LBB0_2: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v5, s11
v_add_co_u32_e32 v4, vcc, s10, v2
v_addc_co_u32_e32 v5, vcc, v5, v3, vcc
@@ -44,15 +45,16 @@ BB0_2: ; =>This Inner Loop Header: Depth=1
v_mul_f32_e32 v6, v6, v6
global_store_dword v[4:5], v6, off
s_andn2_b64 exec, exec, s[6:7]
- s_cbranch_execnz BB0_2
-BB0_3:
+ s_cbranch_execnz .LBB0_2
+.LBB0_3:
s_endpgm
.section .rodata,#alloc
.p2align 6
- .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_x
+ .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_y
.amdhsa_group_segment_fixed_size 0
.amdhsa_private_segment_fixed_size 0
- .amdhsa_kernarg_size 80
+ .amdhsa_kernarg_size 24
+ .amdhsa_user_sgpr_count 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 0
@@ -85,13 +87,13 @@ BB0_3:
.amdhsa_exception_fp_ieee_inexact 0
.amdhsa_exception_int_div_zero 0
.end_amdhsa_kernel
- .text
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
.Lfunc_end0:
- .size _Z20vector_square_kernelIfEvPT_PKS0_x, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_x
+ .size _Z20vector_square_kernelIfEvPT_PKS0_y, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_y
; -- End function
.section .AMDGPU.csdata
; Kernel info:
-; codeLenInByte = 200
+; codeLenInByte = 216
; NumSgprs: 18
; NumVgprs: 9
; NumAgprs: 0
@@ -146,7 +148,7 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.zero 1
.size _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, 1
- .ident "AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"
+ .ident "AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"
.section ".note.GNU-stack"
.addrsig
.addrsig_sym _ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE
@@ -156,7 +158,8 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.amdgpu_metadata
---
amdhsa.kernels:
- - .args:
+ - .agpr_count: 0
+ .args:
- .address_space: global
.offset: 0
.size: 8
@@ -168,44 +171,19 @@ amdhsa.kernels:
- .offset: 16
.size: 8
.value_kind: by_value
- - .offset: 24
- .size: 8
- .value_kind: hidden_global_offset_x
- - .offset: 32
- .size: 8
- .value_kind: hidden_global_offset_y
- - .offset: 40
- .size: 8
- .value_kind: hidden_global_offset_z
- - .address_space: global
- .offset: 48
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 56
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 64
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 72
- .size: 8
- .value_kind: hidden_multigrid_sync_arg
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
- .kernarg_segment_size: 80
+ .kernarg_segment_size: 24
.language: OpenCL C
.language_version:
- 2
- 0
.max_flat_workgroup_size: 1024
- .name: _Z20vector_square_kernelIfEvPT_PKS0_x
+ .name: _Z20vector_square_kernelIfEvPT_PKS0_y
.private_segment_fixed_size: 0
.sgpr_count: 18
.sgpr_spill_count: 0
- .symbol: _Z20vector_square_kernelIfEvPT_PKS0_x.kd
+ .symbol: _Z20vector_square_kernelIfEvPT_PKS0_y.kd
.vgpr_count: 9
.vgpr_spill_count: 0
.wavefront_size: 64
diff --git a/HIP-Basic/assembly_to_executable/main_gfx90a.s b/HIP-Basic/assembly_to_executable/main_gfx90a.s
index 85575938a..691eb6ed2 100644
--- a/HIP-Basic/assembly_to_executable/main_gfx90a.s
+++ b/HIP-Basic/assembly_to_executable/main_gfx90a.s
@@ -1,10 +1,11 @@
.text
.amdgcn_target "amdgcn-amd-amdhsa--gfx90a"
- .protected _Z20vector_square_kernelIfEvPT_PKS0_x ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_x
- .globl _Z20vector_square_kernelIfEvPT_PKS0_x
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
+ .protected _Z20vector_square_kernelIfEvPT_PKS0_y ; -- Begin function _Z20vector_square_kernelIfEvPT_PKS0_y
+ .globl _Z20vector_square_kernelIfEvPT_PKS0_y
.p2align 8
- .type _Z20vector_square_kernelIfEvPT_PKS0_x,@function
-_Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
+ .type _Z20vector_square_kernelIfEvPT_PKS0_y,@function
+_Z20vector_square_kernelIfEvPT_PKS0_y: ; @_Z20vector_square_kernelIfEvPT_PKS0_y
; %bb.0:
s_load_dword s0, s[4:5], 0x4
s_load_dwordx2 s[12:13], s[6:7], 0x10
@@ -15,7 +16,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
v_add_u32_e32 v0, s8, v0
v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
s_and_saveexec_b64 s[0:1], vcc
- s_cbranch_execz BB0_3
+ s_cbranch_execz .LBB0_3
; %bb.1:
s_load_dword s14, s[4:5], 0xc
s_load_dwordx4 s[8:11], s[6:7], 0x0
@@ -24,7 +25,7 @@ _Z20vector_square_kernelIfEvPT_PKS0_x: ; @_Z20vector_square_kernelIfEvPT_PKS0_x
s_mov_b64 s[6:7], 0
s_waitcnt lgkmcnt(0)
s_lshl_b64 s[4:5], s[14:15], 2
-BB0_2: ; =>This Inner Loop Header: Depth=1
+.LBB0_2: ; =>This Inner Loop Header: Depth=1
v_mov_b32_e32 v5, s11
v_add_co_u32_e32 v4, vcc, s10, v2
v_addc_co_u32_e32 v5, vcc, v5, v3, vcc
@@ -44,15 +45,16 @@ BB0_2: ; =>This Inner Loop Header: Depth=1
v_mul_f32_e32 v6, v6, v6
global_store_dword v[4:5], v6, off
s_andn2_b64 exec, exec, s[6:7]
- s_cbranch_execnz BB0_2
-BB0_3:
+ s_cbranch_execnz .LBB0_2
+.LBB0_3:
s_endpgm
.section .rodata,#alloc
.p2align 6
- .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_x
+ .amdhsa_kernel _Z20vector_square_kernelIfEvPT_PKS0_y
.amdhsa_group_segment_fixed_size 0
.amdhsa_private_segment_fixed_size 0
- .amdhsa_kernarg_size 80
+ .amdhsa_kernarg_size 24
+ .amdhsa_user_sgpr_count 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 0
@@ -87,13 +89,13 @@ BB0_3:
.amdhsa_exception_fp_ieee_inexact 0
.amdhsa_exception_int_div_zero 0
.end_amdhsa_kernel
- .text
+ .section .text._Z20vector_square_kernelIfEvPT_PKS0_y,#alloc,#execinstr
.Lfunc_end0:
- .size _Z20vector_square_kernelIfEvPT_PKS0_x, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_x
+ .size _Z20vector_square_kernelIfEvPT_PKS0_y, .Lfunc_end0-_Z20vector_square_kernelIfEvPT_PKS0_y
; -- End function
.section .AMDGPU.csdata
; Kernel info:
-; codeLenInByte = 200
+; codeLenInByte = 216
; NumSgprs: 18
; NumVgprs: 9
; NumAgprs: 0
@@ -154,7 +156,7 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.zero 1
.size _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, 1
- .ident "AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"
+ .ident "AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"
.section ".note.GNU-stack"
.addrsig
.addrsig_sym _ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE
@@ -164,7 +166,8 @@ _ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE:
.amdgpu_metadata
---
amdhsa.kernels:
- - .args:
+ - .agpr_count: 0
+ .args:
- .address_space: global
.offset: 0
.size: 8
@@ -176,44 +179,19 @@ amdhsa.kernels:
- .offset: 16
.size: 8
.value_kind: by_value
- - .offset: 24
- .size: 8
- .value_kind: hidden_global_offset_x
- - .offset: 32
- .size: 8
- .value_kind: hidden_global_offset_y
- - .offset: 40
- .size: 8
- .value_kind: hidden_global_offset_z
- - .address_space: global
- .offset: 48
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 56
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 64
- .size: 8
- .value_kind: hidden_none
- - .address_space: global
- .offset: 72
- .size: 8
- .value_kind: hidden_multigrid_sync_arg
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
- .kernarg_segment_size: 80
+ .kernarg_segment_size: 24
.language: OpenCL C
.language_version:
- 2
- 0
.max_flat_workgroup_size: 1024
- .name: _Z20vector_square_kernelIfEvPT_PKS0_x
+ .name: _Z20vector_square_kernelIfEvPT_PKS0_y
.private_segment_fixed_size: 0
.sgpr_count: 18
.sgpr_spill_count: 0
- .symbol: _Z20vector_square_kernelIfEvPT_PKS0_x.kd
+ .symbol: _Z20vector_square_kernelIfEvPT_PKS0_y.kd
.vgpr_count: 9
.vgpr_spill_count: 0
.wavefront_size: 64
diff --git a/HIP-Basic/bandwidth/Makefile b/HIP-Basic/bandwidth/Makefile
index 6c821f21f..606e8b453 100644
--- a/HIP-Basic/bandwidth/Makefile
+++ b/HIP-Basic/bandwidth/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
-$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/cmdparser.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/bandwidth/bandwidth_vs2019.sln b/HIP-Basic/bandwidth/bandwidth_vs2019.sln
index 09016afe9..410a07aad 100644
--- a/HIP-Basic/bandwidth/bandwidth_vs2019.sln
+++ b/HIP-Basic/bandwidth/bandwidth_vs2019.sln
@@ -1,25 +1,25 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.32630.194
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidth_vs2019", "bandwidth_vs2019.vcxproj", "{16B11B54-CD72-43B6-B226-38C668B41A79}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|x64 = Debug|x64
- Release|x64 = Release|x64
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.ActiveCfg = Debug|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.Build.0 = Debug|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.ActiveCfg = Release|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.Build.0 = Release|x64
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- SolutionGuid = {1E2ACB7F-1706-491A-9E62-395C1BD8E637}
- EndGlobalSection
-EndGlobal
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidth_vs2019", "bandwidth_vs2019.vcxproj", "{16B11B54-CD72-43B6-B226-38C668B41A79}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.ActiveCfg = Debug|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.Build.0 = Debug|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.ActiveCfg = Release|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {1E2ACB7F-1706-491A-9E62-395C1BD8E637}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj b/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj
index 3283ff1b5..3b3fde4cd 100644
--- a/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj
+++ b/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj
@@ -1,102 +1,102 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
-
- 15.0
- {16b11b54-cd72-43b6-b226-38c668b41a79}
- Win32Proj
- bandwidth_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- hip_$(ProjectName)
-
-
- false
- hip_$(ProjectName)
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- true
-
-
- Console
- true
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- true
-
-
- Console
- true
- true
- true
-
-
-
-
-
-
-
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+ 15.0
+ {16b11b54-cd72-43b6-b226-38c668b41a79}
+ Win32Proj
+ bandwidth_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ true
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ true
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj.filters b/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj.filters
index 7dc35f685..bafcb9d6e 100644
--- a/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj.filters
+++ b/HIP-Basic/bandwidth/bandwidth_vs2019.vcxproj.filters
@@ -1,30 +1,30 @@
-
-
-
-
- {c71d9db2-bf13-49ee-b794-626d24391150}
- cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
-
-
- {520f4985-c9bd-4add-9485-049fafe0cdca}
- h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
-
-
- {006f799a-d711-49a7-93da-7f60d8872b02}
- rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
-
-
-
-
- Source Files
-
-
-
-
- Header Files
-
-
- Header Files
-
-
+
+
+
+
+ {c71d9db2-bf13-49ee-b794-626d24391150}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {520f4985-c9bd-4add-9485-049fafe0cdca}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {006f799a-d711-49a7-93da-7f60d8872b02}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+ Header Files
+
+
\ No newline at end of file
diff --git a/HIP-Basic/bit_extract/.gitignore b/HIP-Basic/bit_extract/.gitignore
new file mode 100644
index 000000000..ff556180d
--- /dev/null
+++ b/HIP-Basic/bit_extract/.gitignore
@@ -0,0 +1 @@
+hip_bit_extract
diff --git a/HIP-Basic/bit_extract/CMakeLists.txt b/HIP-Basic/bit_extract/CMakeLists.txt
new file mode 100644
index 000000000..81eec2d01
--- /dev/null
+++ b/HIP-Basic/bit_extract/CMakeLists.txt
@@ -0,0 +1,57 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_bit_extract)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/bit_extract/Makefile b/HIP-Basic/bit_extract/Makefile
new file mode 100644
index 000000000..c5a710283
--- /dev/null
+++ b/HIP-Basic/bit_extract/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_bit_extract
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/bit_extract/README.md b/HIP-Basic/bit_extract/README.md
new file mode 100644
index 000000000..214bfbb89
--- /dev/null
+++ b/HIP-Basic/bit_extract/README.md
@@ -0,0 +1,33 @@
+# HIP-Basic Bit Extract Example
+
+## Description
+A HIP-specific bit extract solution is presented in this example.
+
+### Application flow
+1. Allocate memory for host vectors.
+2. Fill the input host vector as an arithmetic sequence by the vector index.
+3. Allocate memory for device arrays.
+4. Copy the arithmetic sequence from the host to device memory.
+5. Apply bit extract operator on the sequence element by element and return with result array. If we use HIP, __bitextract_u32() device function is used, otherwise the standard bit shift operator.
+6. Copy the result sequence from the device to the host memory
+7. Compare the result sequence to the expected sequence, element by element. If a mismatch is detected, the vector index and both values are printed, and the program exits with an error code.
+8. Deallocate device and host memory.
+9. "PASSED!" is printed when the flow was successful.
+
+## Key APIs and Concepts
+- `hipLaunchKernelGGL(kernel_name, grid_dim, block_dim, dynamic_shared_memory_size, stream, )` is the HIP kernel launcher where the grid and block dimension, dynamic shared memory size and HIP stream is defined. We use NULL stream in the recent example.
+- `__bitextract_u32(source, bit_start, num_bits)` is the built-in AMD HIP bit extract operator, where we define a source scalar, a `bit_start` start bit and a `num_bits` number of extraction bits. The operator returns with a scalar value.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `threadIdx`, `blockIdx`, `blockDim`, `gridDim`
+- `__bitextract_u32`
+
+#### Host symbols
+- `hipMalloc`
+- `hipFree`
+- `hipMemcpy`
+- `hipMemcpyHostToDevice`
+- `hipMemcpyDeviceToHost`
+- `hipLaunchKernelGGL`
diff --git a/HIP-Basic/bit_extract/bit_extract_vs2019.sln b/HIP-Basic/bit_extract/bit_extract_vs2019.sln
new file mode 100644
index 000000000..37d475406
--- /dev/null
+++ b/HIP-Basic/bit_extract/bit_extract_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bit_extract_vs2019", "bit_extract_vs2019.vcxproj", "{63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Debug|x64.ActiveCfg = Debug|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Debug|x64.Build.0 = Debug|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Release|x64.ActiveCfg = Release|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {D7C4B290-7C93-4D26-85D9-364F6A448EE0}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/bit_extract/bit_extract_vs2019.vcxproj b/HIP-Basic/bit_extract/bit_extract_vs2019.vcxproj
new file mode 100644
index 000000000..4098bdf60
--- /dev/null
+++ b/HIP-Basic/bit_extract/bit_extract_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}
+ Win32Proj
+ bit_extract_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/bit_extract/bit_extract_vs2019.vcxproj.filters b/HIP-Basic/bit_extract/bit_extract_vs2019.vcxproj.filters
new file mode 100644
index 000000000..591e9f2c6
--- /dev/null
+++ b/HIP-Basic/bit_extract/bit_extract_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {2932a426-602b-4926-887e-27c50ba7eab7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {ed043ec4-e8ac-4831-93f5-a58546ec7bea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {0da954bd-e555-4454-b082-b68d10c753b9}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/bit_extract/main.hip b/HIP-Basic/bit_extract/main.hip
new file mode 100644
index 000000000..92851c300
--- /dev/null
+++ b/HIP-Basic/bit_extract/main.hip
@@ -0,0 +1,111 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+
+/// \brief A simple bit extract kernel for unsigned 32-bit integer that returns an 8-bit extracted array.
+/// - If the code was compiled on AMD HIP platform, the __bitextract_u32() built-in function is used.
+/// - Otherwise default C++ extract operator is used.
+
+__global__ void bit_extract_kernel(uint32_t* d_output, const uint32_t* d_input, size_t size)
+{
+ const size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
+ const size_t stride = blockDim.x * gridDim.x;
+
+ for(size_t i = offset; i < size; i += stride)
+ {
+#ifdef __HIP_PLATFORM_AMD__
+ d_output[i] = __bitextract_u32(d_input[i], 8, 4);
+#else /* defined __HIP_PLATFORM_NVIDIA__ or other path */
+ d_output[i] = ((d_input[i] & 0xf00) >> 8);
+#endif
+ }
+}
+
+int main()
+{
+ constexpr size_t size = 1000000;
+ constexpr size_t size_in_bytes = size * sizeof(uint32_t);
+
+ // Allocate host vectors
+ std::vector h_input(size);
+ std::vector h_output(size);
+
+ // Set up input data
+ for(size_t i = 0; i < size; i++)
+ {
+ h_input[i] = i;
+ }
+
+ // Allocate device memory for the input and output data
+ uint32_t *d_input, *d_output;
+ HIP_CHECK(hipMalloc(&d_input, size_in_bytes));
+ HIP_CHECK(hipMalloc(&d_output, size_in_bytes));
+
+ // Copy data from host to device
+ HIP_CHECK(hipMemcpy(d_input, h_input.data(), size_in_bytes, hipMemcpyHostToDevice));
+
+ // Launch bit_extract_kernel()
+ constexpr unsigned int number_of_blocks = 512;
+ constexpr unsigned int threads_per_block = 256;
+ hipLaunchKernelGGL(bit_extract_kernel,
+ dim3(number_of_blocks),
+ dim3(threads_per_block),
+ 0,
+ hipStreamDefault,
+ d_output,
+ d_input,
+ size);
+
+ // Copy data from device to host
+ HIP_CHECK(hipMemcpy(h_output.data(), d_output, size_in_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory
+ HIP_CHECK(hipFree(d_input));
+ HIP_CHECK(hipFree(d_output));
+
+ // Check result validity
+ unsigned int errors{};
+ for(size_t i = 0; i < size; i++)
+ {
+ uint32_t reference_value = ((h_input[i] & 0xf00) >> 8);
+ if(h_output[i] != reference_value)
+ {
+ errors++;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/HIP-Basic/cooperative_groups/.gitignore b/HIP-Basic/cooperative_groups/.gitignore
new file mode 100644
index 000000000..e637ad9aa
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/.gitignore
@@ -0,0 +1 @@
+hip_cooperative_groups
diff --git a/HIP-Basic/cooperative_groups/CMakeLists.txt b/HIP-Basic/cooperative_groups/CMakeLists.txt
new file mode 100644
index 000000000..c7efc0b6c
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/CMakeLists.txt
@@ -0,0 +1,57 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_cooperative_groups)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/cooperative_groups/Makefile b/HIP-Basic/cooperative_groups/Makefile
new file mode 100644
index 000000000..6c69326a4
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_cooperative_groups
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/cooperative_groups/README.md b/HIP-Basic/cooperative_groups/README.md
new file mode 100644
index 000000000..a4d6f3b08
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/README.md
@@ -0,0 +1,41 @@
+# HIP-Basic Cooperative Groups Example
+
+## Description
+This program showcases the usage of Cooperative Groups inside a reduction kernel.
+
+Cooperative groups can be used to gain more control over synchronization.
+
+For more insights, you can read the following blog post:
+[Cooperative Groups: Flexible CUDA Thread Programming](https://developer.nvidia.com/blog/cooperative-groups/)
+
+### Application flow
+1. A number of variables are defined to control the problem details and the kernel launch parameters.
+2. Input vector is set up in host memory.
+3. The input is copied to the device.
+4. The GPU reduction kernel is launched with previously defined arguments.
+5. The kernel will perform two reductions: a reduction of the whole threadblock and a reduction of custom partitions.
+6. The result vectors are copied back to the host and all device memory is freed.
+7. The elements of the result vectors are compared with the expected result. The result of the comparison is printed to the standard output.
+
+## Key APIs and Concepts
+Usually, programmers can only synchronize on warp-level or block-level.
+But cooperative groups allows the programmer to partition threads together and subsequently synchronize those groups.
+The partitioned threads can reside across multiple devices.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `thread_group`
+- `thread_block`
+- `tiled_partition()`
+- `thread_block_tile`
+- All above from the [`cooperative_groups` namespace](https://github.com/ROCm-Developer-Tools/hipamd/blob/develop/include/hip/amd_detail/amd_hip_cooperative_groups.h)
+#### Host symbols
+- `hipMalloc`
+- `hipMemcpy`
+- `hipLaunchCooperativeKernel`
+- `hipDeviceAttributeCooperativeLaunch`
+- `hipDeviceGetAttribute`
+- `HIP_KERNEL_NAME`
+- `hipGetLastError`
+- `hipFree`
diff --git a/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.sln b/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.sln
new file mode 100644
index 000000000..d0410cd72
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cooperative_groups_vs2019", "cooperative_groups_vs2019.vcxproj", "{7A25CE69-BACE-4410-BEB0-12A69890F212}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Debug|x64.ActiveCfg = Debug|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Debug|x64.Build.0 = Debug|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Release|x64.ActiveCfg = Release|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {69A6C748-F535-4DEF-85D1-54825AB819B9}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.vcxproj b/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.vcxproj
new file mode 100644
index 000000000..800b8ec81
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {7a25ce69-bace-4410-beb0-12a69890f212}
+ Win32Proj
+ cooperative_groups_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.vcxproj.filters b/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.vcxproj.filters
new file mode 100644
index 000000000..0e76ed9cd
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/cooperative_groups_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {25db88ec-6f1f-49d0-bd14-b0b028a2f0b6}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {c165da41-0b12-43fe-afa0-eb1ce67ad002}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {5905baad-b4ce-4f30-b9a8-274cdfeea1e0}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/cooperative_groups/main.hip b/HIP-Basic/cooperative_groups/main.hip
new file mode 100644
index 000000000..87eb06f85
--- /dev/null
+++ b/HIP-Basic/cooperative_groups/main.hip
@@ -0,0 +1,249 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+using namespace cooperative_groups;
+
+/// \brief Summation of `unsigned int val`'s in `thread_group g` using shared memory `x`
+__device__ unsigned int reduce_sum(thread_group g, unsigned int* x, unsigned int val)
+{
+ // Rank of this thread in the group
+ const unsigned int group_thread_id = g.thread_rank();
+
+ // We start with half the group size as active threads
+ // Every iteration the number of active threads halves, until we processed all values
+ for(unsigned int i = g.size() / 2; i > 0; i /= 2)
+ {
+ // Store value for this thread in a shared, temporary array
+ x[group_thread_id] = val;
+
+ // Synchronize all threads in the group
+ g.sync();
+
+ // If our thread is still active, sum with its counterpart in the other half
+ if(group_thread_id < i)
+ {
+ val += x[group_thread_id + i];
+ }
+
+ // Synchronize all threads in the group
+ g.sync();
+ }
+
+ // Only the first thread returns a valid value
+ if(g.thread_rank() == 0)
+ return val;
+ else
+ return 0;
+}
+
+/// \brief A vector reduction kernel showcasing the use of cooperative groups.
+/// - First we showcase the use of threadBlockGroup.
+/// - Second we showcase the use of `tiled_partition<>()`.
+/// \param partition_size The number of elements in a cooperative group's tiled_partition.
+template
+__global__ void vector_reduce_kernel(const unsigned int* d_vector,
+ unsigned int* d_block_reduced_vector,
+ unsigned int* d_partition_reduced_vector)
+{
+ // threadBlockGroup consists of all threads in the block
+ thread_block thread_block_group = this_thread_block();
+
+ // Workspace array in shared memory required for reduction
+ __shared__ unsigned int workspace[2048];
+
+ unsigned int output;
+
+ // Input to reduce
+ const unsigned int input = d_vector[thread_block_group.thread_rank()];
+
+ // Perform reduction
+ output = reduce_sum(thread_block_group, workspace, input);
+
+ // Only the first thread returns a valid value
+ if(thread_block_group.thread_rank() == 0)
+ {
+ d_block_reduced_vector[0] = output;
+ }
+
+ // Every custom_partition group consists of 16 threads
+ thread_block_tile custom_partition
+ = tiled_partition(thread_block_group);
+
+ // To make sure every partition has its own piece of shared memory it can work with
+ const unsigned int group_offset
+ = thread_block_group.thread_rank() - custom_partition.thread_rank();
+
+ // Perform reduction
+ output = reduce_sum(custom_partition, &workspace[group_offset], input);
+
+ // Only the first thread in each partition returns a valid value
+ if(custom_partition.thread_rank() == 0)
+ {
+ const unsigned int partition_id = thread_block_group.thread_rank() / PartitionSize;
+ d_partition_reduced_vector[partition_id] = output;
+ }
+ return;
+}
+
+// Host side function to perform the same reductions as executed on the GPU
+std::vector ref_reduced(const unsigned int partition_size,
+ std::vector input)
+{
+ const unsigned int input_size = input.size();
+ const unsigned int result_size = input_size / partition_size;
+ std::vector result(result_size);
+
+ for(unsigned int i = 0; i < result_size; i++)
+ {
+ unsigned int partition_result = 0;
+ for(unsigned int j = 0; j < partition_size; j++)
+ {
+ partition_result += input[partition_size * i + j];
+ }
+ result[i] = partition_result;
+ }
+
+ return result;
+}
+
+int main()
+{
+#ifdef __HIP_PLATFORM_AMD__
+ int device = 0;
+ int supports_coop_launch = 0;
+ // Check support
+ // Use hipDeviceAttributeCooperativeMultiDeviceLaunch when launching across multiple devices
+ HIP_CHECK(hipGetDevice(&device));
+ HIP_CHECK(
+ hipDeviceGetAttribute(&supports_coop_launch, hipDeviceAttributeCooperativeLaunch, device));
+ if(!supports_coop_launch)
+ {
+ std::cout << "Skipping, device " << device << " does not support cooperative groups"
+ << std::endl;
+ return 0;
+ }
+#endif
+
+ // Number of blocks to launch.
+ constexpr unsigned int num_blocks = 1;
+
+ // Number of threads in each kernel block.
+ constexpr unsigned int threads_per_block = 64;
+
+ // Total element count of the input vector.
+ constexpr unsigned int size = num_blocks * threads_per_block;
+
+ // Total elements count of a tiled_partition.
+ constexpr unsigned int partition_size = 16;
+
+ // Total size (in bytes) of the input vector.
+ constexpr size_t size_bytes = sizeof(unsigned int) * size;
+
+ static_assert(threads_per_block % partition_size == 0,
+ "threads_per_block must be a multiple of partition_size");
+
+ // Allocate host vectors.
+ std::vector h_vector(size);
+ std::vector h_block_reduced(num_blocks);
+ std::vector h_partition_reduced(threads_per_block / partition_size);
+
+ // Set up input data.
+ for(unsigned int i = 0; i < size; i++)
+ {
+ h_vector[i] = i;
+ }
+
+ // Allocate device memory for the input and output matrices.
+ unsigned int* d_vector{};
+ unsigned int* d_block_reduced{};
+ unsigned int* d_partition_reduced{};
+ HIP_CHECK(hipMalloc(&d_vector, size_bytes));
+ HIP_CHECK(hipMalloc(&d_block_reduced, sizeof(unsigned int) * h_block_reduced.size()));
+ HIP_CHECK(hipMalloc(&d_partition_reduced, sizeof(unsigned int) * h_partition_reduced.size()));
+
+ // Transfer the input vector to the device memory.
+ HIP_CHECK(hipMemcpy(d_vector, h_vector.data(), size_bytes, hipMemcpyHostToDevice));
+
+ void* params[] = {&d_vector, &d_block_reduced, &d_partition_reduced};
+ // Launching kernel from host.
+ HIP_CHECK(hipLaunchCooperativeKernel(vector_reduce_kernel,
+ dim3(num_blocks),
+ dim3(threads_per_block),
+ params,
+ 0,
+ hipStreamDefault));
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Transfer the result back to the host.
+ HIP_CHECK(hipMemcpy(h_block_reduced.data(),
+ d_block_reduced,
+ sizeof(unsigned int) * h_block_reduced.size(),
+ hipMemcpyDeviceToHost));
+
+ HIP_CHECK(hipMemcpy(h_partition_reduced.data(),
+ d_partition_reduced,
+ sizeof(unsigned int) * h_partition_reduced.size(),
+ hipMemcpyDeviceToHost));
+
+ // Free the resources on the device.
+ HIP_CHECK(hipFree(d_vector));
+ HIP_CHECK(hipFree(d_block_reduced));
+ HIP_CHECK(hipFree(d_partition_reduced));
+
+ // Perform the reference (CPU) calculation.
+ std::vector ref_block_reduced = ref_reduced(threads_per_block, h_vector);
+ std::vector ref_partition_reduced = ref_reduced(partition_size, h_vector);
+
+ // Check the results' validity.
+ unsigned int errors{};
+ for(unsigned int i = 0; i < h_block_reduced.size(); i++)
+ {
+ errors += (h_block_reduced[i] != ref_block_reduced[i]);
+ }
+ for(unsigned int i = 0; i < h_partition_reduced.size(); i++)
+ {
+ errors += (h_partition_reduced[i] != ref_partition_reduced[i]);
+ }
+
+ if(errors)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
\ No newline at end of file
diff --git a/HIP-Basic/device_globals/.gitignore b/HIP-Basic/device_globals/.gitignore
new file mode 100644
index 000000000..e8f727034
--- /dev/null
+++ b/HIP-Basic/device_globals/.gitignore
@@ -0,0 +1 @@
+hip_device_globals
diff --git a/HIP-Basic/device_globals/CMakeLists.txt b/HIP-Basic/device_globals/CMakeLists.txt
new file mode 100644
index 000000000..5cc4ea8c9
--- /dev/null
+++ b/HIP-Basic/device_globals/CMakeLists.txt
@@ -0,0 +1,57 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name device_globals)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/device_globals/Makefile b/HIP-Basic/device_globals/Makefile
new file mode 100644
index 000000000..a84cebfcc
--- /dev/null
+++ b/HIP-Basic/device_globals/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_device_globals
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/device_globals/README.md b/HIP-Basic/device_globals/README.md
new file mode 100644
index 000000000..2001f7b8a
--- /dev/null
+++ b/HIP-Basic/device_globals/README.md
@@ -0,0 +1,45 @@
+# HIP-Basic Device Globals Example
+
+## Description
+This program showcases a simple example that uses device global variables to perform a simple test kernel. Two such global variables are set using different methods: one is a single variable is set by first obtaining a pointer to it and using `hipMemcpy`, as would be done for a pointer to device memory using `hipMalloc`. The other is an array that is initialized without first explicitly obtaining the pointer by using `hipMemcpyToSymbol`.
+
+### Application flow
+1. A number of constants are defined for the kernel launch parameters.
+2. The input and output vectors are initialized in host memory.
+3. The necessary amount of device memory for the input and output vectors is allocated and the input data is copied to the device.
+4. A pointer to the device global variable `global` is obtained via `hipGetSymbolAddress`.
+5. The pointee is initialized by copying a value from the host to it.
+6. The device global variable `global_array` is initialized by copying to it directly with `hipMemcpyToSymbol`.
+7. The GPU kernel is then launched with the previously defined arguments.
+8. The results are copied back to the host.
+9. Device memory backing the input and output vectors is freed.
+10. A reference computation is performed on the host and the results are compared with the expected result. The result of the comparison is printed to standard output.
+## Key APIs and Concepts
+Apart from via kernel parameters, values can also be passed to the device via _device global variables_: global variables that have the `__device__` attribute. These can be used from device kernels, and need to be initialized from the host before they hold a valid value. Device global variables are persistent between kernel launches, so they can also be used to communicate values between lauches without explicitly managing a buffer for the on the host.
+
+A device global variable cannot be used as a regular global variable from the host side. To manage them, a pointer to the device memory that they represent needs to be obtained first. This can be done using the functions `hipGetSymbolAddress(dev_ptr, symbol)` and `hipGetSymbolSize(dev_ptr, symbol)`. A device global variable can be passed directly to this function by using the `HIP_SYMBOL(symbol)` macro. The resulting device pointer can be used in the same ways as memory obtained from `hipMalloc`, and so the corresponding value can be set by using `hipMemcpy`.
+
+Device global variables may also be initialized directly by using the `hipMemcpyToSymbol(symbol, host_source, size_bytes, offset = 0, kind = hipMemcpyHostToDevice)`. This method omits having to fetch the pointer to the device global variable explicitly. Similarly, `hipMemcpyFromSymbol(host_dest, symbol, size_bytes, offset = 0, kind = hipMemcpyDeviceToHost)` can be used to copy from a device global variable back to the host.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `__global__`
+- `__device__`
+- `threadIdx`
+- `blockDim`
+- `blockIdx`
+
+#### Host symbols
+- `hipFree`
+- `hipGetLastError`
+- `hipGetSymbolAddress`
+- `hipGetSymbolSize`
+- `hipLaunchKernelGGL`
+- `hipMalloc`
+- `hipMemcpy`
+- `hipMemcpyDeviceToHost`
+- `hipMemcpyHostToDevice`
+- `hipMemcpyToSymbol`
+- `hipStreamDefault`
+- `HIP_SYMBOL`
diff --git a/HIP-Basic/device_globals/device_globals_vs2019.sln b/HIP-Basic/device_globals/device_globals_vs2019.sln
new file mode 100644
index 000000000..5400f1703
--- /dev/null
+++ b/HIP-Basic/device_globals/device_globals_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_globals_vs2019", "device_globals_vs2019.vcxproj", "{F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Debug|x64.ActiveCfg = Debug|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Debug|x64.Build.0 = Debug|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Release|x64.ActiveCfg = Release|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {2D151D72-1741-4B0B-99F9-50C182082CFC}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/device_globals/device_globals_vs2019.vcxproj b/HIP-Basic/device_globals/device_globals_vs2019.vcxproj
new file mode 100644
index 000000000..147e295e2
--- /dev/null
+++ b/HIP-Basic/device_globals/device_globals_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {f7dd9451-b0ca-4c76-ab92-0e01cbebdbbe}
+ Win32Proj
+ device_globals_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/device_globals/device_globals_vs2019.vcxproj.filters b/HIP-Basic/device_globals/device_globals_vs2019.vcxproj.filters
new file mode 100644
index 000000000..586b43e24
--- /dev/null
+++ b/HIP-Basic/device_globals/device_globals_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {b6be5f33-3a87-4cea-900e-720c76b2bdd7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {116f6a96-2d11-4004-974f-2d651b18763d}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {f1508573-344a-468c-93ba-fa8fccbff0bf}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+
diff --git a/HIP-Basic/device_globals/main.hip b/HIP-Basic/device_globals/main.hip
new file mode 100644
index 000000000..746028393
--- /dev/null
+++ b/HIP-Basic/device_globals/main.hip
@@ -0,0 +1,164 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+#include
+#include
+
+constexpr unsigned int device_array_size = 16;
+
+/// A test global variable of a single element, that will later be set from the host.
+__device__ float global;
+
+/// A test global variable of \p device_array_size elements that will be set from the host.
+__device__ float global_array[device_array_size];
+
+/// \brief A simple test kernel, that reads from in, global, and
+/// global_array. The result will be written to out.
+__global__ void test_globals_kernel(float* out, const float* in, const size_t size)
+{
+ const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
+ if(tid < size)
+ {
+ out[tid] = in[tid] + global + global_array[tid % device_array_size];
+ }
+}
+
+/// \brief Computes a reference result on the host, that is (if everything goes well)
+/// hopefully equal to the results from the \p test_globals_kernel kernel.
+std::vector test_globals_reference(const std::vector& in,
+ const std::vector global_array,
+ const float global)
+{
+ std::vector out(in.size());
+ for(size_t i = 0; i < in.size(); ++i)
+ {
+ out[i] = in[i] + global + global_array[i % global_array.size()];
+ }
+
+ return out;
+}
+
+int main()
+{
+ // The size of the input and output vectors.
+ constexpr unsigned int size = 64;
+
+ // The total number of bytes in the input and output vectors.
+ constexpr size_t size_bytes = size * sizeof(float);
+
+ // Number of threads per kernel block.
+ constexpr unsigned int block_size = size;
+
+ // Number of blocks per kernel grid. The expression below calculates ceil(size/block_size).
+ constexpr unsigned int grid_size = (size + block_size - 1) / block_size;
+
+ // Allocate host vectors for the input and output.
+ std::vector h_in(size);
+ std::vector h_out(size);
+
+ // Fill the input with an increasing sequence (i.e. 1, 2, 3, 4...).
+ std::iota(h_in.begin(), h_in.end(), 1.f);
+
+ // Allocate and copy vectors to device memory.
+ float* d_in{};
+ float* d_out{};
+ HIP_CHECK(hipMalloc(&d_in, size_bytes));
+ HIP_CHECK(hipMalloc(&d_out, size_bytes));
+ HIP_CHECK(hipMemcpy(d_in, h_in.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Fetch a device pointer to the device variable "global". We can pass the relevant
+ // symbol directly to this function.
+ void* d_global{};
+ size_t global_size_bytes{};
+ HIP_CHECK(hipGetSymbolAddress(&d_global, HIP_SYMBOL(global)));
+ HIP_CHECK(hipGetSymbolSize(&global_size_bytes, HIP_SYMBOL(global)));
+ assert(global_size_bytes == sizeof(float));
+
+ // This pointer is a regular device pointer, and so we may use it in the same ways
+ // as pointers allocated using `hipMalloc`.
+ constexpr float h_global = 42.f;
+ HIP_CHECK(hipMemcpy(d_global, &h_global, global_size_bytes, hipMemcpyHostToDevice));
+
+ // Set up the inputs for `global_array`.
+ std::vector h_global_array(device_array_size);
+ for(size_t i = 0; i < h_global_array.size(); ++i)
+ {
+ h_global_array[i] = i * 1000.f;
+ }
+
+ // Initialize `global_array` by copying to it directly, omitting the need to fetch it first.
+ HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(global_array),
+ h_global_array.data(),
+ h_global_array.size() * sizeof(float)));
+
+ // Launch the kernel on the default stream and with the above configuration.
+ hipLaunchKernelGGL(test_globals_kernel,
+ dim3(block_size),
+ dim3(grid_size),
+ 0,
+ hipStreamDefault,
+ d_out,
+ d_in,
+ size);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Copy the results back to the host. This call blocks the host's execution until the copy is finished.
+ HIP_CHECK(hipMemcpy(h_out.data(), d_out, size_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory.
+ HIP_CHECK(hipFree(d_in));
+ HIP_CHECK(hipFree(d_out));
+
+ // Compute the expected values on the host.
+ const std::vector reference = test_globals_reference(h_in, h_global_array, h_global);
+
+ // Check the results' validity.
+ constexpr float eps = 1.0E-6;
+ unsigned int errors{};
+ for(size_t i = 0; i < size; ++i)
+ {
+ if(std::fabs(h_out[i] - reference[i]) > eps)
+ {
+ ++errors;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+
+ return 0;
+}
diff --git a/HIP-Basic/device_query/Makefile b/HIP-Basic/device_query/Makefile
index bafe1076f..9afe6373e 100644
--- a/HIP-Basic/device_query/Makefile
+++ b/HIP-Basic/device_query/Makefile
@@ -31,25 +31,31 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -isystem $(HIP_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -isystem $(HIP_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
+ ICXXFLAGS += -x cu
else ifeq ($(GPU_RUNTIME), HIP)
- HIPCXX = $(CXX)
- CXXFLAGS += -D__HIP_PLATFORM_AMD__
- LDFLAGS += -L $(ROCM_INSTALL_DIR)/lib
- LDLIBS += -lamdhip64
+ CXXFLAGS ?= -Wall -Wextra
+ HIPCXX := $(CXX)
+ ICXXFLAGS += -D__HIP_PLATFORM_AMD__
+ ILDFLAGS += -L $(ROCM_INSTALL_DIR)/lib
+ ILDLIBS += -lamdhip64
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/device_query/device_query_vs2019.vcxproj b/HIP-Basic/device_query/device_query_vs2019.vcxproj
index 841fe5b51..c1d391770 100644
--- a/HIP-Basic/device_query/device_query_vs2019.vcxproj
+++ b/HIP-Basic/device_query/device_query_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/HIP-Basic/dynamic_shared/Makefile b/HIP-Basic/dynamic_shared/Makefile
index bea6b1a9d..30c54918d 100644
--- a/HIP-Basic/dynamic_shared/Makefile
+++ b/HIP-Basic/dynamic_shared/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/dynamic_shared/dynamic_shared_vs2019.vcxproj b/HIP-Basic/dynamic_shared/dynamic_shared_vs2019.vcxproj
index 659e43e1d..9e5337c58 100644
--- a/HIP-Basic/dynamic_shared/dynamic_shared_vs2019.vcxproj
+++ b/HIP-Basic/dynamic_shared/dynamic_shared_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/HIP-Basic/events/Makefile b/HIP-Basic/events/Makefile
index 3985a03da..b33a5ada3 100644
--- a/HIP-Basic/events/Makefile
+++ b/HIP-Basic/events/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/events/events_vs2019.vcxproj b/HIP-Basic/events/events_vs2019.vcxproj
index 0601aaf04..3edd8da0e 100644
--- a/HIP-Basic/events/events_vs2019.vcxproj
+++ b/HIP-Basic/events/events_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
@@ -96,4 +96,4 @@
-
\ No newline at end of file
+
diff --git a/HIP-Basic/gpu_arch/.gitignore b/HIP-Basic/gpu_arch/.gitignore
new file mode 100644
index 000000000..0b64f52d8
--- /dev/null
+++ b/HIP-Basic/gpu_arch/.gitignore
@@ -0,0 +1 @@
+hip_gpu_arch
diff --git a/HIP-Basic/gpu_arch/CMakeLists.txt b/HIP-Basic/gpu_arch/CMakeLists.txt
new file mode 100644
index 000000000..9811d48b0
--- /dev/null
+++ b/HIP-Basic/gpu_arch/CMakeLists.txt
@@ -0,0 +1,57 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_gpu_arch)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/gpu_arch/Makefile b/HIP-Basic/gpu_arch/Makefile
new file mode 100644
index 000000000..39207f522
--- /dev/null
+++ b/HIP-Basic/gpu_arch/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_gpu_arch
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/gpu_arch/README.md b/HIP-Basic/gpu_arch/README.md
new file mode 100644
index 000000000..2d5e3da09
--- /dev/null
+++ b/HIP-Basic/gpu_arch/README.md
@@ -0,0 +1,33 @@
+# HIP-Basic GPU Architecture-specific Code Example
+
+## Description
+This program showcases an implementation of a simple matrix transpose kernel, which uses a different codepath depending on the target architecture.
+
+### Application flow
+1. A number of constants are defined to control the problem details and the kernel launch parameters.
+2. Input matrix is set up in host memory.
+3. The necessary amount of device memory is allocated and input is copied to the device.
+4. The GPU transposition kernel is launched with previously defined arguments.
+5. The kernel will have two different codepaths for its data movement, depending on the target architecture.
+6. The transposed matrix is copied back to the host and all device memory is freed.
+7. The elements of the result matrix are compared with the expected result. The result of the comparison is printed to the standard output.
+
+## Key APIs and Concepts
+This example showcases two different codepaths inside a GPU kernel, depending on the target architecture.
+
+You may want to use architecture-specific inline assembly when compiling for a specific architecture, without losing compatibility with other architectures (see the [inline_assembly](/HIP-Basic/inline_assembly/main.hip) example).
+
+These architecture-specific compiler definitions only exist within GPU kernels. If you would like to have GPU architecture-specific host-side code, you could query the stream/device information at runtime.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `threadIdx`, `blockIdx`, `blockDim`
+- `__gfx1010__`, `__gfx1011__`, `__gfx1012__`, `__gfx1030__`, `__gfx1031__`, `__gfx1100__`, `__gfx1101__`, `__gfx1102__`
+#### Host symbols
+- `hipMalloc`
+- `hipMemcpy`
+- `hipLaunchKernelGGL`
+- `HIP_KERNEL_NAME`
+- `hipGetLastError`
+- `hipFree`
diff --git a/HIP-Basic/gpu_arch/gpu_arch_vs2019.sln b/HIP-Basic/gpu_arch/gpu_arch_vs2019.sln
new file mode 100644
index 000000000..19870669d
--- /dev/null
+++ b/HIP-Basic/gpu_arch/gpu_arch_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC941}") = "gpu_arch_vs2019", "gpu_arch_vs2019.vcxproj", "{4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A8}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Debug|x64.ActiveCfg = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Debug|x64.Build.0 = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Release|x64.ActiveCfg = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {50A6F0A7-FE4A-4B74-BE6E-1A354D8AD065}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/gpu_arch/gpu_arch_vs2019.vcxproj b/HIP-Basic/gpu_arch/gpu_arch_vs2019.vcxproj
new file mode 100644
index 000000000..109c99a52
--- /dev/null
+++ b/HIP-Basic/gpu_arch/gpu_arch_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {4e6b2034-d7ed-4cb4-98b2-7b2d2b71e0a8}
+ Win32Proj
+ gpu_arch_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/gpu_arch/gpu_arch_vs2019.vcxproj.filters b/HIP-Basic/gpu_arch/gpu_arch_vs2019.vcxproj.filters
new file mode 100644
index 000000000..c26cd7b7c
--- /dev/null
+++ b/HIP-Basic/gpu_arch/gpu_arch_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {91fb42b0-13d7-42c2-9f9f-edead539556a}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {dc73d4e0-b3d9-4216-9237-72e4a97ea387}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {ad5f5a22-1e00-4ee8-89fa-ec5047963ec0}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/gpu_arch/main.hip b/HIP-Basic/gpu_arch/main.hip
new file mode 100644
index 000000000..8939eb670
--- /dev/null
+++ b/HIP-Basic/gpu_arch/main.hip
@@ -0,0 +1,150 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+
+#include
+#include
+
+/// \brief A simple matrix transpose kernel that's using inline assembly.
+/// - The number of rows in the input and output matrices is equal, and given by the \p width parameter.
+/// - Each thread in the grid is responsible for one element of the input and output matrices.
+__global__ void matrix_transpose_kernel(float* out, const float* in, const unsigned int width)
+{
+ int x = blockDim.x * blockIdx.x + threadIdx.x;
+ int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+#if(__gfx1010__ || __gfx1011__ || __gfx1012__ || __gfx1030__ || __gfx1031__ || __gfx1100__ \
+ || __gfx1101__ || __gfx1102__)
+ // Codepath for one of the architectures listed above
+ out[y * width + x] = in[x * width + y];
+#else
+ // Codepath if we're not on one of those architectures
+ // Note: to check if we're on AMD or NVIDIA hardware, you could use the more generic:
+ // __HIP_PLATFORM_AMD__ and __HIP_PLATFORM_NVIDIA__
+ out[x * width + y] = in[y * width + x];
+#endif
+}
+
+// CPU implementation of matrix transpose
+std::vector matrix_transpose_reference(const std::vector& input,
+ const unsigned int width)
+{
+ std::vector output(width * width);
+ for(unsigned int j = 0; j < width; j++)
+ {
+ for(unsigned int i = 0; i < width; i++)
+ {
+ output[i * width + j] = input[j * width + i];
+ }
+ }
+ return output;
+}
+
+int main()
+{
+ // Number of rows and columns in the transposed square matrix.
+ constexpr unsigned int width = 1024;
+
+ // Number of threads in each kernel block along the X dimension.
+ constexpr unsigned int threads_per_block_x = 8;
+
+ // Number of threads in each kernel block along the Y dimension.
+ constexpr unsigned int threads_per_block_y = 8;
+
+ // Total element count of the transposed matrix.
+ constexpr unsigned int size = width * width;
+
+ // Total size (in bytes) of the transposed matrix.
+ constexpr size_t size_bytes = sizeof(float) * size;
+
+ // Allocate host vectors.
+ std::vector h_matrix(size);
+ std::vector h_transposed_matrix(size);
+
+ // Set up input data.
+ for(unsigned int i = 0; i < size; i++)
+ {
+ h_matrix[i] = i * 10.0f;
+ }
+
+ // Allocate device memory for the input and output matrices.
+ float* d_matrix{};
+ float* d_transposed_matrix{};
+ HIP_CHECK(hipMalloc(&d_matrix, size_bytes));
+ HIP_CHECK(hipMalloc(&d_transposed_matrix, size_bytes));
+
+ // Transfer the input matrix to the device memory.
+ HIP_CHECK(hipMemcpy(d_matrix, h_matrix.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Lauching kernel from host.
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel),
+ dim3(width / threads_per_block_x, width / threads_per_block_y),
+ dim3(threads_per_block_x, threads_per_block_y),
+ 0,
+ hipStreamDefault,
+ d_transposed_matrix,
+ d_matrix,
+ width);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Transfer the result back to the host.
+ HIP_CHECK(hipMemcpy(h_transposed_matrix.data(),
+ d_transposed_matrix,
+ size_bytes,
+ hipMemcpyDeviceToHost));
+
+ // Free the resources on the device.
+ HIP_CHECK(hipFree(d_matrix));
+ HIP_CHECK(hipFree(d_transposed_matrix));
+
+ // Perform the reference (CPU) calculation.
+ std::vector ref_transposed_matrix = matrix_transpose_reference(h_matrix, width);
+
+ // Check the results' validity.
+ constexpr float eps = 1.0E-6;
+ unsigned int errors{};
+ for(unsigned int i = 0; i < size; i++)
+ {
+ if(std::fabs(h_transposed_matrix[i] - ref_transposed_matrix[i]) > eps)
+ {
+ errors++;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/HIP-Basic/hello_world/Makefile b/HIP-Basic/hello_world/Makefile
index c328d22c2..09ee9b179 100644
--- a/HIP-Basic/hello_world/Makefile
+++ b/HIP-Basic/hello_world/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/hipify/Makefile b/HIP-Basic/hipify/Makefile
index 01d66cb93..ba68ee03b 100644
--- a/HIP-Basic/hipify/Makefile
+++ b/HIP-Basic/hipify/Makefile
@@ -30,27 +30,33 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS :=
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS :=
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
all: $(EXAMPLE)
# Step
main.hip: main.cu
$(ROCM_INSTALL_DIR)/bin/hipify-perl $< -o $@
$(EXAMPLE): main.hip
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE) main.hip *.o
diff --git a/HIP-Basic/inline_assembly/.gitignore b/HIP-Basic/inline_assembly/.gitignore
new file mode 100644
index 000000000..1fbb62d2d
--- /dev/null
+++ b/HIP-Basic/inline_assembly/.gitignore
@@ -0,0 +1 @@
+hip_inline_assembly
diff --git a/HIP-Basic/inline_assembly/CMakeLists.txt b/HIP-Basic/inline_assembly/CMakeLists.txt
new file mode 100644
index 000000000..ef2b541b0
--- /dev/null
+++ b/HIP-Basic/inline_assembly/CMakeLists.txt
@@ -0,0 +1,57 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_inline_assembly)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/inline_assembly/Makefile b/HIP-Basic/inline_assembly/Makefile
new file mode 100644
index 000000000..b61dba10c
--- /dev/null
+++ b/HIP-Basic/inline_assembly/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_inline_assembly
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/inline_assembly/README.md b/HIP-Basic/inline_assembly/README.md
new file mode 100644
index 000000000..4b87000f8
--- /dev/null
+++ b/HIP-Basic/inline_assembly/README.md
@@ -0,0 +1,48 @@
+# HIP-Basic Inline Assembly Example
+
+## Description
+This program showcases an implementation of a simple matrix transpose kernel, which uses inline assembly and works on both AMD and NVIDIA hardware.
+
+By using inline assembly in your kernels, you may be able to gain extra performance.
+It could also enable you to use special GPU hardware features which are not available through compiler intrinsics.
+
+
+For more insights, please read the following blogs by Ben Sander:
+[The Art of AMDGCN Assembly: How to Bend the Machine to Your Will](https://gpuopen.com/learn/amdgcn-assembly/) &
+[AMD GCN Assembly: Cross-Lane Operations](https://gpuopen.com/learn/amd-gcn-assembly-cross-lane-operations/)
+
+For more information:
+[AMD ISA documentation for current architectures](https://gpuopen.com/amd-isa-documentation/) &
+[User Guide for LLVM AMDGPU Back-end](https://llvm.org/docs/AMDGPUUsage.html)
+
+
+### Application flow
+1. A number of variables are defined to control the problem details and the kernel launch parameters.
+2. Input matrix is set up in host memory.
+3. The necessary amount of device memory is allocated and input is copied to the device.
+4. The GPU transposition kernel is launched with previously defined arguments.
+5. The kernel will use different inline assembly for its data movement, depending on the target platform.
+6. The transposed matrix is copied back to the host and all device memory is freed.
+7. The elements of the result matrix are compared with the expected result. The result of the comparison is printed to the standard output.
+
+## Key APIs and Concepts
+Using inline assembly in GPU kernels is somewhat similar to using inline assembly in host-side code. The `volatile` statement tells the compiler to not remove the assembly statement during optimizations.
+
+```c++
+asm volatile("v_mov_b32_e32 %0, %1" : "=v"(variable_0) : "v"(variable_1))
+```
+
+However, since the instruction set differs between GPU architectures, you usually want to use the appropriate GPU architecture compiler defines to support multiple architectures (see the [gpu_arch](/HIP-Basic/gpu_arch/main.hip) example for more fine-grained architecture control).
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `threadIdx`, `blockIdx`, `blockDim`
+- `__HIP_PLATFORM_AMD__`, `__HIP_PLATFORM_NVIDIA__`
+#### Host symbols
+- `hipMalloc`
+- `hipMemcpy`
+- `hipLaunchKernelGGL`
+- `HIP_KERNEL_NAME`
+- `hipGetLastError`
+- `hipFree`
diff --git a/HIP-Basic/inline_assembly/inline_assembly_vs2019.sln b/HIP-Basic/inline_assembly/inline_assembly_vs2019.sln
new file mode 100644
index 000000000..9a6d691e0
--- /dev/null
+++ b/HIP-Basic/inline_assembly/inline_assembly_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC940}") = "inline_assembly_vs2019", "inline_assembly_vs2019.vcxproj", "{4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A7}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Debug|x64.ActiveCfg = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Debug|x64.Build.0 = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Release|x64.ActiveCfg = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {50A6F0A7-FE4A-4B74-BE6E-1A354D8AD064}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/inline_assembly/inline_assembly_vs2019.vcxproj b/HIP-Basic/inline_assembly/inline_assembly_vs2019.vcxproj
new file mode 100644
index 000000000..29c841cd8
--- /dev/null
+++ b/HIP-Basic/inline_assembly/inline_assembly_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {4e6b2034-d7ed-4cb4-98b2-7b2d2b71e0a7}
+ Win32Proj
+ inline_assembly_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/inline_assembly/inline_assembly_vs2019.vcxproj.filters b/HIP-Basic/inline_assembly/inline_assembly_vs2019.vcxproj.filters
new file mode 100644
index 000000000..c26cd7b7c
--- /dev/null
+++ b/HIP-Basic/inline_assembly/inline_assembly_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {91fb42b0-13d7-42c2-9f9f-edead539556a}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {dc73d4e0-b3d9-4216-9237-72e4a97ea387}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {ad5f5a22-1e00-4ee8-89fa-ec5047963ec0}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/inline_assembly/main.hip b/HIP-Basic/inline_assembly/main.hip
new file mode 100644
index 000000000..5913f7019
--- /dev/null
+++ b/HIP-Basic/inline_assembly/main.hip
@@ -0,0 +1,145 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+
+#include
+#include
+
+/// \brief A simple matrix transpose kernel that's using inline assembly.
+/// - The number of rows in the input and output matrices is equal, and given by the \p width parameter.
+/// - Each thread in the grid is responsible for one element of the input and output matrices.
+__global__ void matrix_transpose_kernel(float* out, const float* in, const unsigned int width)
+{
+ int x = blockDim.x * blockIdx.x + threadIdx.x;
+ int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+#ifdef __HIP_PLATFORM_AMD__
+ asm volatile("v_mov_b32_e32 %0, %1" : "=v"(out[x * width + y]) : "v"(in[y * width + x]));
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+ asm volatile("mov.f32 %0, %1;" : "=f"(out[x * width + y]) : "f"(in[y * width + x]));
+#endif
+}
+
+// CPU implementation of matrix transpose
+std::vector matrix_transpose_reference(const std::vector& input,
+ const unsigned int width)
+{
+ std::vector output(width * width);
+ for(unsigned int j = 0; j < width; j++)
+ {
+ for(unsigned int i = 0; i < width; i++)
+ {
+ output[i * width + j] = input[j * width + i];
+ }
+ }
+ return output;
+}
+
+int main()
+{
+ // Number of rows and columns in the transposed square matrix.
+ constexpr unsigned int width = 1024;
+
+ // Number of threads in each kernel block along the X dimension.
+ constexpr unsigned int threads_per_block_x = 8;
+
+ // Number of threads in each kernel block along the Y dimension.
+ constexpr unsigned int threads_per_block_y = 8;
+
+ // Total element count of the transposed matrix.
+ constexpr unsigned int size = width * width;
+
+ // Total size (in bytes) of the transposed matrix.
+ constexpr size_t size_bytes = sizeof(float) * size;
+
+ // Allocate host vectors.
+ std::vector h_matrix(size);
+ std::vector h_transposed_matrix(size);
+
+ // Set up input data.
+ for(unsigned int i = 0; i < size; i++)
+ {
+ h_matrix[i] = i * 10.0f;
+ }
+
+ // Allocate device memory for the input and output matrices.
+ float* d_matrix{};
+ float* d_transposed_matrix{};
+ HIP_CHECK(hipMalloc(&d_matrix, size_bytes));
+ HIP_CHECK(hipMalloc(&d_transposed_matrix, size_bytes));
+
+ // Transfer the input matrix to the device memory.
+ HIP_CHECK(hipMemcpy(d_matrix, h_matrix.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Lauching kernel from host.
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_transpose_kernel),
+ dim3(width / threads_per_block_x, width / threads_per_block_y),
+ dim3(threads_per_block_x, threads_per_block_y),
+ 0,
+ hipStreamDefault,
+ d_transposed_matrix,
+ d_matrix,
+ width);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Transfer the result back to the host.
+ HIP_CHECK(hipMemcpy(h_transposed_matrix.data(),
+ d_transposed_matrix,
+ size_bytes,
+ hipMemcpyDeviceToHost));
+
+ // Free the resources on the device.
+ HIP_CHECK(hipFree(d_matrix));
+ HIP_CHECK(hipFree(d_transposed_matrix));
+
+ // Perform the reference (CPU) calculation.
+ std::vector ref_transposed_matrix = matrix_transpose_reference(h_matrix, width);
+
+ // Check the results' validity.
+ constexpr float eps = 1.0E-6;
+ unsigned int errors{};
+ for(unsigned int i = 0; i < size; i++)
+ {
+ if(std::fabs(h_transposed_matrix[i] - ref_transposed_matrix[i]) > eps)
+ {
+ errors++;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/HIP-Basic/llvm_ir_to_executable/Makefile b/HIP-Basic/llvm_ir_to_executable/Makefile
index 5fb8b2d07..d345c0716 100644
--- a/HIP-Basic/llvm_ir_to_executable/Makefile
+++ b/HIP-Basic/llvm_ir_to_executable/Makefile
@@ -23,10 +23,9 @@ COMMON_INCLUDE_DIR := ../../Common
GPU_RUNTIME ?= HIP
ifneq ($(GPU_RUNTIME), HIP)
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
endif
-
# HIP variables
ROCM_INSTALL_DIR := /opt/rocm
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
@@ -37,11 +36,11 @@ LLVM_MC ?= $(ROCM_INSTALL_DIR)/llvm/bin/llvm-mc
CLANG_OFFLOAD_BUNDLER ?= $(ROCM_INSTALL_DIR)/llvm/bin/clang-offload-bundler
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
# Compile for these GPU architectures
HIP_ARCHITECTURES ?= gfx803;gfx900;gfx906;gfx908;gfx90a;gfx1030
@@ -60,7 +59,7 @@ GPU_ARCH_TRIPLES := $(subst $(space),$(comma),$(GPU_ARCHS:%=hipv4-amdgcn-amd-amd
all: $(EXAMPLE)
$(EXAMPLE): main.o main_device.o
- $(HIPCXX) -o $@ $^
+ $(HIPCXX) $(ILDFLAGS) -o $@ $^ $(ILDLIBS)
main_device.o: hip_obj_gen.mcin offload_bundle.hipfb
$(LLVM_MC) -triple x86_64-unknown-linux-gnu -o $@ $< --filetype=obj
@@ -73,7 +72,7 @@ offload_bundle.hipfb: $(GPU_ARCHS:%=main_%.o)
-output=$@
main.o: main.hip
- $(HIPCXX) $(CXXFLAGS) $(CPPFLAGS) -c --cuda-host-only $<
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) -c --cuda-host-only $<
main_%.o: main_%.ll
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$* -o $@ $<
diff --git a/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.sln b/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.sln
index a53dc2ecf..1bcbe85a6 100644
--- a/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.sln
+++ b/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.sln
@@ -1,25 +1,25 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.32630.194
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "llvm_ir_to_executable_vs2019", "llvm_ir_to_executable_vs2019.vcxproj", "{DBB8DFE9-CB1B-473C-937C-2A8120E0D819}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|x64 = Debug|x64
- Release|x64 = Release|x64
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.ActiveCfg = Debug|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.Build.0 = Debug|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.ActiveCfg = Release|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.Build.0 = Release|x64
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- SolutionGuid = {0A13532C-E06B-4427-9847-54070C1E8622}
- EndGlobalSection
-EndGlobal
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "llvm_ir_to_executable_vs2019", "llvm_ir_to_executable_vs2019.vcxproj", "{DBB8DFE9-CB1B-473C-937C-2A8120E0D819}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.ActiveCfg = Debug|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.Build.0 = Debug|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.ActiveCfg = Release|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {0A13532C-E06B-4427-9847-54070C1E8622}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj b/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj
index c0e820b41..b0404a13c 100644
--- a/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj
+++ b/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj
@@ -1,183 +1,183 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
- --cuda-host-only
- --cuda-host-only
-
-
-
-
-
-
-
- Document
- copy %(Identity) "$(IntDir)%(Identity)"
- Copying %(Identity)
- $(IntDir)%(Identity)
- copy %(Identity) "$(IntDir)%(Identity)"
- Copying %(Identity)
- $(IntDir)%(Identity)
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
-
-
- Document
- "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
- "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
-
-
- Document
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
-
-
-
- 15.0
- {dbb8dfe9-cb1b-473c-937c-2a8120e0d819}
- Win32Proj
- llvm_ir_to_executable_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- hip_$(ProjectName)
- ClCompile
-
-
- false
- hip_$(ProjectName)
- ClCompile
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
-
-
- Console
- true
- $(IntDir)main_device.obj;%(AdditionalDependencies)
-
-
- Compiling Device LLVM IR %(Identity)
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
- $(IntDir)%(FileName).o
-
-
- "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
-cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
-
-
- Generating Device Offload Object
-
-
- $(IntDIr)main_device.obj
-
-
- $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
-
-
- Console
- true
- true
- true
- $(IntDir)main_device.obj;%(AdditionalDependencies)
-
-
- Compiling Device LLVM IR %(Identity)
- "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
- $(IntDir)%(FileName).o
-
-
- "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=NUL "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
-cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
-
-
- Generating Device Offload Object
-
-
- $(IntDIr)main_device.obj
-
-
- $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
-
-
-
-
-
-
-
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+ --cuda-host-only
+ --cuda-host-only
+
+
+
+
+
+
+
+ Document
+ copy %(Identity) "$(IntDir)%(Identity)"
+ Copying %(Identity)
+ $(IntDir)%(Identity)
+ copy %(Identity) "$(IntDir)%(Identity)"
+ Copying %(Identity)
+ $(IntDir)%(Identity)
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx1030
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx803
+
+
+ Document
+ "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
+ "$(ClangToolPath)clang+"+ -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx900
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx906
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx908
+
+
+ Document
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa -mcpu=gfx90a
+
+
+
+ 15.0
+ {dbb8dfe9-cb1b-473c-937c-2a8120e0d819}
+ Win32Proj
+ llvm_ir_to_executable_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+ ClCompile
+
+
+ false
+ hip_$(ProjectName)
+ ClCompile
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ $(IntDir)main_device.obj;%(AdditionalDependencies)
+
+
+ Compiling Device LLVM IR %(Identity)
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
+ $(IntDir)%(FileName).o
+
+
+ "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=nul "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
+cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
+
+
+ Generating Device Offload Object
+
+
+ $(IntDIr)main_device.obj
+
+
+ $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+ $(IntDir)main_device.obj;%(AdditionalDependencies)
+
+
+ Compiling Device LLVM IR %(Identity)
+ "$(ClangToolPath)clang++" -o "$(IntDir)%(FileName).o" "%(Identity)" -target amdgcn-amd-amdhsa
+ $(IntDir)%(FileName).o
+
+
+ "$(ClangToolPath)clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-pc-windows-msvc,hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa-gfx90a,hipv4-amdgcn-amd-amdhsa--gfx1030 -input=NUL "-input=$(IntDir)main_gfx803.o" "-input=$(IntDir)main_gfx900.o" "-input=$(IntDir)main_gfx906.o" "-input=$(IntDir)main_gfx908.o" "-input=$(IntDir)main_gfx90a.o" "-input=$(IntDir)main_gfx1030.o" "-output=$(IntDir)offload_bundle.hipfb"
+cd $(IntDir) && "$(ClangToolPath)llvm-mc" -triple host-x86_64-pc-windows-msvc "hip_obj_gen_win.mcin" -o "main_device.obj" --filetype=obj
+
+
+ Generating Device Offload Object
+
+
+ $(IntDIr)main_device.obj
+
+
+ $(IntDir)main_gfx803.o;$(IntDir)main_gfx900.o;$(IntDir)main_gfx906.o;$(IntDir)main_gfx908.o;$(IntDir)main_gfx90a.o;$(IntDir)main_gfx1030.o;$(IntDir)hip_objgen_win.mcin;%(Inputs)
+
+
+
+
+
+
+
diff --git a/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj.filters b/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj.filters
index 25c408b7e..c4e15e124 100644
--- a/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj.filters
+++ b/HIP-Basic/llvm_ir_to_executable/llvm_ir_to_executable_vs2019.vcxproj.filters
@@ -1,53 +1,53 @@
-
-
-
-
- {4f2a1544-a556-4afb-b630-36ba54c0ab4a}
- cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
-
-
- {b93521e0-9944-411a-9f6e-4071af6bc7ea}
- h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
-
-
- {972f07c3-b925-4516-bd65-2d5a3f626888}
- rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
-
-
-
-
- Source Files
-
-
-
-
- Header Files
-
-
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
- Source Files
-
-
-
+
+
+
+
+ {4f2a1544-a556-4afb-b630-36ba54c0ab4a}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {b93521e0-9944-411a-9f6e-4071af6bc7ea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {972f07c3-b925-4516-bd65-2d5a3f626888}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+
diff --git a/HIP-Basic/llvm_ir_to_executable/main.hip b/HIP-Basic/llvm_ir_to_executable/main.hip
index 588fc0706..b09dd694a 100644
--- a/HIP-Basic/llvm_ir_to_executable/main.hip
+++ b/HIP-Basic/llvm_ir_to_executable/main.hip
@@ -31,7 +31,7 @@
/// \brief Device function to square each element
/// in the array `in` and write to array `out`.
template
-__global__ void vector_square_kernel(T* out, const T* in, const long long size)
+__global__ void vector_square_kernel(T* out, const T* in, const unsigned long long size)
{
// Get the unique global thread ID
const size_t offset = blockIdx.x * blockDim.x + threadIdx.x;
diff --git a/HIP-Basic/llvm_ir_to_executable/main_gfx1030.ll b/HIP-Basic/llvm_ir_to_executable/main_gfx1030.ll
index 31c713de3..bc353fdb7 100644
--- a/HIP-Basic/llvm_ir_to_executable/main_gfx1030.ll
+++ b/HIP-Basic/llvm_ir_to_executable/main_gfx1030.ll
@@ -8,6 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" = type { i8 }
%"struct.__HIP_Coordinates<__HIP_GridDim>::__X" = type { i8 }
+$_Z20vector_square_kernelIfEvPT_PKS0_y = comdat any
+
$_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE = comdat any
$_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE = comdat any
@@ -22,8 +24,8 @@ $_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = comdat any
@_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = weak protected addrspace(4) externally_initialized constant %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" undef, comdat, align 1
@llvm.compiler.used = appending addrspace(1) global [4 x i8*] [i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_GridDim>::__X", %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockDim>::__X", %"struct.__HIP_Coordinates<__HIP_BlockDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X", %"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X", %"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI15__HIP_ThreadIdxE1xE, i32 0, i32 0) to i8*)], section "llvm.metadata"
-; Function Attrs: mustprogress nofree norecurse nosync nounwind
-define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float addrspace(1)* nocapture %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 {
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind
+define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_y(float addrspace(1)* nocapture writeonly %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 comdat {
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #2
%5 = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2
%6 = getelementptr inbounds i8, i8 addrspace(4)* %5, i64 12
@@ -57,15 +59,15 @@ define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float
}
; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
-attributes #0 = { mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
@@ -76,7 +78,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 2, i32 0}
-!3 = !{!"AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"}
+!3 = !{!"AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"}
!4 = !{!5, !9, i64 12}
!5 = !{!"hsa_kernel_dispatch_packet_s", !6, i64 0, !6, i64 2, !6, i64 4, !6, i64 6, !6, i64 8, !6, i64 10, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24, !9, i64 28, !10, i64 32, !11, i64 40, !10, i64 48, !12, i64 56}
!6 = !{!"short", !7, i64 0}
diff --git a/HIP-Basic/llvm_ir_to_executable/main_gfx803.ll b/HIP-Basic/llvm_ir_to_executable/main_gfx803.ll
index a0d9f5880..8aa4e4883 100644
--- a/HIP-Basic/llvm_ir_to_executable/main_gfx803.ll
+++ b/HIP-Basic/llvm_ir_to_executable/main_gfx803.ll
@@ -8,6 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" = type { i8 }
%"struct.__HIP_Coordinates<__HIP_GridDim>::__X" = type { i8 }
+$_Z20vector_square_kernelIfEvPT_PKS0_y = comdat any
+
$_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE = comdat any
$_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE = comdat any
@@ -22,8 +24,8 @@ $_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = comdat any
@_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = weak protected addrspace(4) externally_initialized constant %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" undef, comdat, align 1
@llvm.compiler.used = appending addrspace(1) global [4 x i8*] [i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_GridDim>::__X", %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockDim>::__X", %"struct.__HIP_Coordinates<__HIP_BlockDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X", %"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X", %"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI15__HIP_ThreadIdxE1xE, i32 0, i32 0) to i8*)], section "llvm.metadata"
-; Function Attrs: mustprogress nofree norecurse nosync nounwind
-define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float addrspace(1)* nocapture %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 {
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind
+define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_y(float addrspace(1)* nocapture writeonly %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 comdat {
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #2
%5 = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2
%6 = getelementptr inbounds i8, i8 addrspace(4)* %5, i64 12
@@ -57,15 +59,15 @@ define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float
}
; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
-attributes #0 = { mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="56" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx803" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx803" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
@@ -76,7 +78,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 2, i32 0}
-!3 = !{!"AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"}
+!3 = !{!"AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"}
!4 = !{!5, !9, i64 12}
!5 = !{!"hsa_kernel_dispatch_packet_s", !6, i64 0, !6, i64 2, !6, i64 4, !6, i64 6, !6, i64 8, !6, i64 10, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24, !9, i64 28, !10, i64 32, !11, i64 40, !10, i64 48, !12, i64 56}
!6 = !{!"short", !7, i64 0}
diff --git a/HIP-Basic/llvm_ir_to_executable/main_gfx900.ll b/HIP-Basic/llvm_ir_to_executable/main_gfx900.ll
index 67ff0a306..0ead8d026 100644
--- a/HIP-Basic/llvm_ir_to_executable/main_gfx900.ll
+++ b/HIP-Basic/llvm_ir_to_executable/main_gfx900.ll
@@ -8,6 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" = type { i8 }
%"struct.__HIP_Coordinates<__HIP_GridDim>::__X" = type { i8 }
+$_Z20vector_square_kernelIfEvPT_PKS0_y = comdat any
+
$_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE = comdat any
$_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE = comdat any
@@ -22,8 +24,8 @@ $_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = comdat any
@_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = weak protected addrspace(4) externally_initialized constant %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" undef, comdat, align 1
@llvm.compiler.used = appending addrspace(1) global [4 x i8*] [i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_GridDim>::__X", %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockDim>::__X", %"struct.__HIP_Coordinates<__HIP_BlockDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X", %"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X", %"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI15__HIP_ThreadIdxE1xE, i32 0, i32 0) to i8*)], section "llvm.metadata"
-; Function Attrs: mustprogress nofree norecurse nosync nounwind
-define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float addrspace(1)* nocapture %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 {
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind
+define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_y(float addrspace(1)* nocapture writeonly %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 comdat {
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #2
%5 = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2
%6 = getelementptr inbounds i8, i8 addrspace(4)* %5, i64 12
@@ -57,15 +59,15 @@ define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float
}
; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
-attributes #0 = { mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
@@ -76,7 +78,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 2, i32 0}
-!3 = !{!"AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"}
+!3 = !{!"AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"}
!4 = !{!5, !9, i64 12}
!5 = !{!"hsa_kernel_dispatch_packet_s", !6, i64 0, !6, i64 2, !6, i64 4, !6, i64 6, !6, i64 8, !6, i64 10, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24, !9, i64 28, !10, i64 32, !11, i64 40, !10, i64 48, !12, i64 56}
!6 = !{!"short", !7, i64 0}
diff --git a/HIP-Basic/llvm_ir_to_executable/main_gfx906.ll b/HIP-Basic/llvm_ir_to_executable/main_gfx906.ll
index 76819daf7..7e8fd0edd 100644
--- a/HIP-Basic/llvm_ir_to_executable/main_gfx906.ll
+++ b/HIP-Basic/llvm_ir_to_executable/main_gfx906.ll
@@ -8,6 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" = type { i8 }
%"struct.__HIP_Coordinates<__HIP_GridDim>::__X" = type { i8 }
+$_Z20vector_square_kernelIfEvPT_PKS0_y = comdat any
+
$_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE = comdat any
$_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE = comdat any
@@ -22,8 +24,8 @@ $_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = comdat any
@_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = weak protected addrspace(4) externally_initialized constant %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" undef, comdat, align 1
@llvm.compiler.used = appending addrspace(1) global [4 x i8*] [i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_GridDim>::__X", %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockDim>::__X", %"struct.__HIP_Coordinates<__HIP_BlockDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X", %"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X", %"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI15__HIP_ThreadIdxE1xE, i32 0, i32 0) to i8*)], section "llvm.metadata"
-; Function Attrs: mustprogress nofree norecurse nosync nounwind
-define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float addrspace(1)* nocapture %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 {
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind
+define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_y(float addrspace(1)* nocapture writeonly %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 comdat {
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #2
%5 = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2
%6 = getelementptr inbounds i8, i8 addrspace(4)* %5, i64 12
@@ -57,15 +59,15 @@ define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float
}
; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
-attributes #0 = { mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
@@ -76,7 +78,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 2, i32 0}
-!3 = !{!"AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"}
+!3 = !{!"AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"}
!4 = !{!5, !9, i64 12}
!5 = !{!"hsa_kernel_dispatch_packet_s", !6, i64 0, !6, i64 2, !6, i64 4, !6, i64 6, !6, i64 8, !6, i64 10, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24, !9, i64 28, !10, i64 32, !11, i64 40, !10, i64 48, !12, i64 56}
!6 = !{!"short", !7, i64 0}
diff --git a/HIP-Basic/llvm_ir_to_executable/main_gfx908.ll b/HIP-Basic/llvm_ir_to_executable/main_gfx908.ll
index 50a94f216..effcf4d99 100644
--- a/HIP-Basic/llvm_ir_to_executable/main_gfx908.ll
+++ b/HIP-Basic/llvm_ir_to_executable/main_gfx908.ll
@@ -8,6 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" = type { i8 }
%"struct.__HIP_Coordinates<__HIP_GridDim>::__X" = type { i8 }
+$_Z20vector_square_kernelIfEvPT_PKS0_y = comdat any
+
$_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE = comdat any
$_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE = comdat any
@@ -22,8 +24,8 @@ $_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = comdat any
@_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = weak protected addrspace(4) externally_initialized constant %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" undef, comdat, align 1
@llvm.compiler.used = appending addrspace(1) global [4 x i8*] [i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_GridDim>::__X", %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockDim>::__X", %"struct.__HIP_Coordinates<__HIP_BlockDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X", %"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X", %"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI15__HIP_ThreadIdxE1xE, i32 0, i32 0) to i8*)], section "llvm.metadata"
-; Function Attrs: mustprogress nofree norecurse nosync nounwind
-define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float addrspace(1)* nocapture %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 {
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind
+define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_y(float addrspace(1)* nocapture writeonly %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 comdat {
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #2
%5 = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2
%6 = getelementptr inbounds i8, i8 addrspace(4)* %5, i64 12
@@ -57,15 +59,15 @@ define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float
}
; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
-attributes #0 = { mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
@@ -76,7 +78,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 2, i32 0}
-!3 = !{!"AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"}
+!3 = !{!"AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"}
!4 = !{!5, !9, i64 12}
!5 = !{!"hsa_kernel_dispatch_packet_s", !6, i64 0, !6, i64 2, !6, i64 4, !6, i64 6, !6, i64 8, !6, i64 10, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24, !9, i64 28, !10, i64 32, !11, i64 40, !10, i64 48, !12, i64 56}
!6 = !{!"short", !7, i64 0}
diff --git a/HIP-Basic/llvm_ir_to_executable/main_gfx90a.ll b/HIP-Basic/llvm_ir_to_executable/main_gfx90a.ll
index dc293da38..b70d789fa 100644
--- a/HIP-Basic/llvm_ir_to_executable/main_gfx90a.ll
+++ b/HIP-Basic/llvm_ir_to_executable/main_gfx90a.ll
@@ -8,6 +8,8 @@ target triple = "amdgcn-amd-amdhsa"
%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" = type { i8 }
%"struct.__HIP_Coordinates<__HIP_GridDim>::__X" = type { i8 }
+$_Z20vector_square_kernelIfEvPT_PKS0_y = comdat any
+
$_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE = comdat any
$_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE = comdat any
@@ -22,8 +24,8 @@ $_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = comdat any
@_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE = weak protected addrspace(4) externally_initialized constant %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" undef, comdat, align 1
@llvm.compiler.used = appending addrspace(1) global [4 x i8*] [i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_GridDim>::__X", %"struct.__HIP_Coordinates<__HIP_GridDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI13__HIP_GridDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockDim>::__X", %"struct.__HIP_Coordinates<__HIP_BlockDim>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockDimE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X", %"struct.__HIP_Coordinates<__HIP_BlockIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI14__HIP_BlockIdxE1xE, i32 0, i32 0) to i8*), i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds (%"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X", %"struct.__HIP_Coordinates<__HIP_ThreadIdx>::__X" addrspace(4)* @_ZN17__HIP_CoordinatesI15__HIP_ThreadIdxE1xE, i32 0, i32 0) to i8*)], section "llvm.metadata"
-; Function Attrs: mustprogress nofree norecurse nosync nounwind
-define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float addrspace(1)* nocapture %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 {
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind
+define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_y(float addrspace(1)* nocapture writeonly %0, float addrspace(1)* nocapture readonly %1, i64 %2) local_unnamed_addr #0 comdat {
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #2
%5 = tail call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2
%6 = getelementptr inbounds i8, i8 addrspace(4)* %5, i64 12
@@ -57,15 +59,15 @@ define protected amdgpu_kernel void @_Z20vector_square_kernelIfEvPT_PKS0_x(float
}
; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
-attributes #0 = { mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind "amdgpu-flat-work-group-size"="1,1024" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { nounwind }
@@ -76,7 +78,7 @@ attributes #2 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 2, i32 0}
-!3 = !{!"AMD clang version 14.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.0.0 22051 235b6880e2e515507478181ec11a20c1ec87945b)"}
+!3 = !{!"AMD clang version 15.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.3.0 22362 3cf23f77f8208174a2ee7c616f4be23674d7b081)"}
!4 = !{!5, !9, i64 12}
!5 = !{!"hsa_kernel_dispatch_packet_s", !6, i64 0, !6, i64 2, !6, i64 4, !6, i64 6, !6, i64 8, !6, i64 10, !9, i64 12, !9, i64 16, !9, i64 20, !9, i64 24, !9, i64 28, !10, i64 32, !11, i64 40, !10, i64 48, !12, i64 56}
!6 = !{!"short", !7, i64 0}
diff --git a/HIP-Basic/matrix_multiplication/Makefile b/HIP-Basic/matrix_multiplication/Makefile
index ba6d2aded..41539d627 100644
--- a/HIP-Basic/matrix_multiplication/Makefile
+++ b/HIP-Basic/matrix_multiplication/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
-$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/cmdparser.hpp $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/matrix_multiplication/matrix_multiplication_vs2019.vcxproj b/HIP-Basic/matrix_multiplication/matrix_multiplication_vs2019.vcxproj
index 81bac082a..90569f9b0 100644
--- a/HIP-Basic/matrix_multiplication/matrix_multiplication_vs2019.vcxproj
+++ b/HIP-Basic/matrix_multiplication/matrix_multiplication_vs2019.vcxproj
@@ -1,101 +1,101 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
- 15.0
- {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}
- Win32Proj
- matrix_multiplication_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- hip_$(ProjectName)
-
-
- false
- hip_$(ProjectName)
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- true
-
-
- Console
- true
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- stdcpp17
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- true
-
-
- Console
- true
- true
- true
-
-
-
-
-
-
-
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}
+ Win32Proj
+ matrix_multiplication_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ true
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ true
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/module_api/.gitignore b/HIP-Basic/module_api/.gitignore
new file mode 100644
index 000000000..ff941c6a3
--- /dev/null
+++ b/HIP-Basic/module_api/.gitignore
@@ -0,0 +1,2 @@
+hip_module_api
+module.co
diff --git a/HIP-Basic/module_api/CMakeLists.txt b/HIP-Basic/module_api/CMakeLists.txt
new file mode 100644
index 000000000..f5fc951fb
--- /dev/null
+++ b/HIP-Basic/module_api/CMakeLists.txt
@@ -0,0 +1,76 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_module_api)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+
+# Only supported on HIP (not CUDA)
+if(NOT "${GPU_RUNTIME}" STREQUAL "HIP")
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be HIP.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+set(offload_archs ${CMAKE_HIP_ARCHITECTURES})
+list(TRANSFORM offload_archs PREPEND "--offload-arch=")
+
+set(module ${CMAKE_CURRENT_BINARY_DIR}/module.co)
+set(module_sources ${CMAKE_CURRENT_SOURCE_DIR}/module.hip)
+if(CMAKE_BUILD_TYPE EQUAL "Debug")
+ set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_DEBUG})
+elseif(CMAKE_BUILD_TYPE EQUAL "Release")
+ set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_RELEASE})
+elseif(CMAKE_BUILD_TYPE EQUAL "MinSizeRel")
+ set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_MINSIZEREL})
+elseif(CMAKE_BUILD_TYPE EQUAL "RelWithDebInfo")
+ set(module_flags ${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_RELWITHDEBINFO})
+endif()
+
+add_custom_command(
+ OUTPUT ${module}
+ COMMAND ${CMAKE_HIP_COMPILER} ${module_flags} ${module_sources} ${offload_archs} --cuda-device-only -o ${module}
+ DEPENDS ${module_sources}
+ COMMENT "Compiling HIP code object module.co"
+)
+
+add_custom_target(module ALL DEPENDS ${module})
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/module_api/Makefile b/HIP-Basic/module_api/Makefile
new file mode 100644
index 000000000..b206412c4
--- /dev/null
+++ b/HIP-Basic/module_api/Makefile
@@ -0,0 +1,54 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_module_api
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp module.co
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+module.co: module.hip
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) --genco -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE) module.co
+
+.PHONY: clean
diff --git a/HIP-Basic/module_api/README.md b/HIP-Basic/module_api/README.md
new file mode 100644
index 000000000..89af8cc61
--- /dev/null
+++ b/HIP-Basic/module_api/README.md
@@ -0,0 +1,68 @@
+# HIP-Basic Module API Example
+
+## Description
+This example shows how to load and execute a HIP module in runtime without linking it to the rest of the code during compilation.
+
+### Application flow
+1. Set up the name of the compiled module code object file `(*.co)`, located in the same directory.
+2. Define kernel launch parameters.
+3. Initialize input and output vectors in host memory.
+4. Allocate arrays and copy the input and output vectors to the device memory.
+5. Get the module path from the module file name.
+6. Load module by `hipModuleLoad()`.
+7. Fetch a reference to the kernel by `hipModuleGetFunction()`.
+8. Create and fill the array with kernel arguments.
+9. Launch the kernel on the default stream by `hipModuleLaunchKernel()`.
+10. Copy the result back to the host.
+11. Free input and output arrays on device memory.
+12. Compare input and output vectors. The result of the comparison is printed to standard output.
+
+## Building
+The kernel module needs to be compiled as a non-linked device code object file (`*.co`), in one of the following ways:
+ - `hipcc --genco --offload-arch=[TARGET GPU] [INPUT FILE] -o [OUTPUT FILE]`
+ - `clang++ --cuda-device-only --offload-arch=[TARGET GPU] [INPUT FILE] -o [OUTPUT FILE]`
+
+where the parameters are:
+ - `[TARGET GPU]`: GPU architecture (e.g. `gfx908` or `gfx90a:xnack-`).
+ - `[INPUT FILE]`: Name of the file containing kernels (e.g. `module.hip`).
+ - `[OUTPUT FILE]`: Name of the generated code object file (e.g. `module.co`).
+
+The `main.hip` example file is compiled similarly as in the other examples.
+
+## Key APIs and Concepts
+- The `hipModuleLoad(hipModule_t *module, const char *file_name)` will load a HIP module in execution time from the path that is given as an input parameter or return an error.
+
+- The `hipModuleGetFunction(hipFunction_t *kernel_function, hipModule_t module, const char *kernel_name)` will fetch a reference to the `__global__` kernel function in the HIP module.
+
+- `hipModuleLaunchKernel` will launch kernel function on the device. The input parameters are:
+ - `hipFunction_t kernel_function` Kernel function.
+ - `unsigned int gridDimX`: Number of blocks in the dimension X.
+ - `unsigned int gridDimY`: Number of blocks in the dimension Y.
+ - `unsigned int gridDimZ`: Number of blocks in the dimension Z.
+ - `unsigned int blockDimX`: Number of threads in the dimension X in a block.
+ - `unsigned int blockDimY`: Number of threads in the dimension Y in a block.
+ - `unsigned int blockDimZ`: Number of threads in the dimension Z in a block.
+ - `unsigned int sharedMemBytes`: Amount of dynamic shared memory that will be available to each workgroup, in bytes. (Not used in this example.)
+ - `hipStream_t stream`: The device stream, on which the kernel should be dispatched. (`hipStreamDefault` int this example.)
+ - `void **kernelParams`: Pointer to the arguments needed by the kernel. Note that this parameter is not yet implemented, and thus the _extra_ parameter (the last one described in this list) should be used to pass arguments to the kernel. (Thereby `nullptr` is used in the example.)
+ - `void **extra`: Pointer to all extra arguments passed to the kernel. They must be in the memory layout and alignment expected by the kernel. The list of arguments must end with `HIP_LAUNCH_PARAM_END`.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `__global__`
+- `threadIdx`
+
+#### Host symbols
+- `hipGetLastError`
+- `hipGetSymbolAddress`
+- `hipGetSymbolSize`
+- `hipLaunchKernelGGL`
+- `hipMalloc`
+- `hipMemcpy`
+- `hipMemcpyHostToDevice`
+- `hipMemcpyDeviceToHost`
+- `hipFree`
+- `hipModuleLoad`
+- `hipModuleGetFunction`
+- `hipModuleLaunchKernel`
diff --git a/HIP-Basic/module_api/main.hip b/HIP-Basic/module_api/main.hip
new file mode 100644
index 000000000..5c4b0084a
--- /dev/null
+++ b/HIP-Basic/module_api/main.hip
@@ -0,0 +1,137 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+#include
+#include
+
+int main(int, char* argv[])
+{
+ // The module file that contains the kernel that we want to invoke. This
+ // file is expected to be in the same directory as the executable.
+ constexpr const char* module_file_name = "module.co";
+
+ // The size of the input and output vectors.
+ constexpr unsigned int size = 64;
+
+ // The total number of bytes in the input and output vectors.
+ constexpr size_t size_bytes = size * sizeof(float);
+
+ // Number of threads per kernel block.
+ constexpr unsigned int block_size = size;
+
+ // Number of blocks per kernel grid. The expression below calculates ceil(size/block_size).
+ constexpr unsigned int grid_size = (size + block_size - 1) / block_size;
+
+ // Allocate host vectors for the input and output.
+ std::vector h_in(size);
+ std::vector h_out(size);
+
+ // Fill the input with an increasing sequence (i.e. 1, 2, 3, 4...).
+ std::iota(h_in.begin(), h_in.end(), 1.f);
+
+ // Allocate and copy vectors to device memory.
+ float* d_in{};
+ float* d_out{};
+ HIP_CHECK(hipMalloc(&d_in, size_bytes));
+ HIP_CHECK(hipMalloc(&d_out, size_bytes));
+ HIP_CHECK(hipMemcpy(d_in, h_in.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Compute an absolute path to the module that we are going to load.
+ // To do that, find the directory where the example executable is placed in from the 0th argument.
+ // Note that this does not always work (the executable may be invoked with a completely different
+ // value for argv[0]), but works for the purposes of this example.
+ std::filesystem::path exe_dir
+ = std::filesystem::weakly_canonical(std::filesystem::path(argv[0])).parent_path();
+ std::filesystem::path module_path = exe_dir / module_file_name;
+
+ // Load the module from the path that we just constructed.
+ // If the module does not exist, this function will return an error.
+ hipModule_t module;
+ HIP_CHECK(hipModuleLoad(&module, module_path.u8string().c_str()));
+
+ // Fetch a reference to the kernel that we are going to invoke.
+ hipFunction_t kernel;
+ HIP_CHECK(hipModuleGetFunction(&kernel, module, "test_module_api_kernel"));
+
+ // Create and fill array with kernel arguments.
+ size_t offset = 0;
+ char args[256] = {};
+
+ *(reinterpret_cast(&args[offset])) = d_out;
+ offset += sizeof(d_out);
+ *(reinterpret_cast(&args[offset])) = d_in;
+ offset += sizeof(d_in);
+
+ void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
+ args,
+ HIP_LAUNCH_PARAM_BUFFER_SIZE,
+ &offset,
+ HIP_LAUNCH_PARAM_END};
+
+ // Launch the kernel on the default stream and with the above configuration.
+ HIP_CHECK(hipModuleLaunchKernel(kernel,
+ grid_size,
+ 1,
+ 1,
+ block_size,
+ 1,
+ 1,
+ 0,
+ hipStreamDefault,
+ nullptr,
+ config));
+
+ // Copy the results back to the host. This call blocks the host's execution until the copy is finished.
+ HIP_CHECK(hipMemcpy(h_out.data(), d_out, size_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory.
+ HIP_CHECK(hipFree(d_in));
+ HIP_CHECK(hipFree(d_out));
+
+ // Check the results' validity.
+ unsigned int errors{};
+ for(size_t i = 0; i < size; ++i)
+ {
+ if(h_out[i] != h_in[i])
+ {
+ ++errors;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+
+ return 0;
+}
diff --git a/HIP-Basic/module_api/module.hip b/HIP-Basic/module_api/module.hip
new file mode 100644
index 000000000..f309303a2
--- /dev/null
+++ b/HIP-Basic/module_api/module.hip
@@ -0,0 +1,34 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include
+
+// This file represents a HIP module. It will be compiled ahead of time together with
+// the rest of the example, but will not be linked to it. Instead, this module will be
+// loaded at runtime by the program, and executed that way.
+
+/// \brief A simple test kernel, that reads from in and writes back to to out.
+extern "C" __global__ void test_module_api_kernel(float* out, const float* in)
+{
+ const unsigned int tid = threadIdx.x;
+ out[tid] = in[tid];
+}
diff --git a/HIP-Basic/module_api/module_api_vs2019.sln b/HIP-Basic/module_api/module_api_vs2019.sln
new file mode 100644
index 000000000..42c550432
--- /dev/null
+++ b/HIP-Basic/module_api/module_api_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "module_api_vs2019", "module_api_vs2019.vcxproj", "{306EB993-653A-45F6-863A-5f43BC86DA79}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Debug|x64.ActiveCfg = Debug|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Debug|x64.Build.0 = Debug|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Release|x64.ActiveCfg = Release|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {2D151D72-1741-4B0B-99F9-50C182082CFC}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/module_api/module_api_vs2019.vcxproj b/HIP-Basic/module_api/module_api_vs2019.vcxproj
new file mode 100644
index 000000000..a0995a224
--- /dev/null
+++ b/HIP-Basic/module_api/module_api_vs2019.vcxproj
@@ -0,0 +1,112 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+ Document
+ "$(ClangToolPath)$(ClangToolExe)" --cuda-device-only -o $(OutDir)module.co %(Identity) --offload-arch=gfx1030
+ Compiling HIP Code Object module.co
+ $(OutDir)module.co
+ "$(ClangToolPath)$(ClangToolExe)" --cuda-device-only -o $(OutDir)module.co %(Identity) --offload-arch=gfx1030
+ Compiling HIP Code Object module.co
+ $(OutDir)module.co
+ $(BinDir)module.co
+ $(BinDir)module.co
+
+
+
+
+
+
+ 15.0
+ {306eb993-653a-45f6-863a-5f43bc86da79}
+ Win32Proj
+ module_api_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+ ClCompile
+
+
+ false
+ hip_$(ProjectName)
+ ClCompile
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/module_api/module_api_vs2019.vcxproj.filters b/HIP-Basic/module_api/module_api_vs2019.vcxproj.filters
new file mode 100644
index 000000000..bb3e10eea
--- /dev/null
+++ b/HIP-Basic/module_api/module_api_vs2019.vcxproj.filters
@@ -0,0 +1,32 @@
+
+
+
+
+ {b6be5f33-3a87-4cea-900e-720c76b2bdd7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {116f6a96-2d11-4004-974f-2d651b18763d}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {f1508573-344a-468c-93ba-fa8fccbff0bf}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/moving_average/.gitignore b/HIP-Basic/moving_average/.gitignore
new file mode 100644
index 000000000..e0f33dd07
--- /dev/null
+++ b/HIP-Basic/moving_average/.gitignore
@@ -0,0 +1 @@
+hip_moving_average
diff --git a/HIP-Basic/moving_average/CMakeLists.txt b/HIP-Basic/moving_average/CMakeLists.txt
new file mode 100644
index 000000000..00ac50492
--- /dev/null
+++ b/HIP-Basic/moving_average/CMakeLists.txt
@@ -0,0 +1,57 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_moving_average)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/moving_average/Makefile b/HIP-Basic/moving_average/Makefile
new file mode 100644
index 000000000..d8c154b42
--- /dev/null
+++ b/HIP-Basic/moving_average/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_moving_average
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/moving_average/README.md b/HIP-Basic/moving_average/README.md
new file mode 100644
index 000000000..5084694da
--- /dev/null
+++ b/HIP-Basic/moving_average/README.md
@@ -0,0 +1,35 @@
+# HIP-Basic Moving Average Example
+
+## Description
+This example shows the use of a kernel that computes a moving average on one-dimensional data. In a sequential program, the moving average of a given input array is found by processing the elements one by one. The average of the previous $n$ elements is called the moving average, where $n$ is called the _window size_. In this example, a kernel is implemented to compute the moving average in parallel, using the shared memory as a cache.
+
+### Application flow
+1. Define constants to control the problem size and the kernel launch parameters.
+2. Allocate and initialize the input array. This array is initialized as the sequentially increasing sequence $0, 1, 2, \ldots\mod n$.
+3. Allocate the device array and copy the host array to it.
+4. Launch the kernel to compute the moving average.
+5. Copy the result back to the host and validate it. As each average is computed using $n$ consecutive values from the input array, the average is computed over the values $0, 1, 2,\ldots, n - 1 $, the average of which is equal to $(n-1)/2$.
+
+## Key APIs and Concepts
+Device memory is allocated with `hipMalloc`, deallocated with `hipFree`. Copies to and from the device are made with `hipMemcpy` with options `hipMemcpyHostToDevice` and `hipMemcpyDeviceToHost`, respectively. A kernel is launched with `hipLaunchKernelGGL`, which requires the use of `HIP_KERNEL_NAME` to pass kernel template arguments. Shared memory is allocated in the kernel with the `__shared__` memory space specifier.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `__shared__`
+- `__syncthreads`
+- `blockDim`
+- `blockIdx`
+- `threadIdx`
+
+#### Host symbols
+- `__global__`
+- `hipFree`
+- `hipGetLastError`
+- `hipLaunchKernelGGL`
+- `hipMalloc`
+- `hipMemcpy`
+- `hipMemcpyDeviceToHost`
+- `hipMemcpyHostToDevice`
+- `hipStreamDefault`
+- `HIP_KERNEL_NAME`
diff --git a/HIP-Basic/moving_average/main.hip b/HIP-Basic/moving_average/main.hip
new file mode 100644
index 000000000..c9223f79f
--- /dev/null
+++ b/HIP-Basic/moving_average/main.hip
@@ -0,0 +1,158 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+#include
+#include
+
+/// \brief Compute the moving average of \p input_size elements with a window size of \p WindowSize.
+/// Shared memory is used to cache the values needed by all threads in the block.
+/// Thread \p i computes the average of values [i, i + WindowSize).
+template
+__global__ void
+ moving_average(const unsigned int* input, unsigned int* output, const unsigned int input_size)
+{
+ // The offset of this block from the start of the grid.
+ const unsigned int block_offset = blockIdx.x * blockDim.x;
+ // The index of this thread in the grid.
+ const unsigned int thread_idx = block_offset + threadIdx.x;
+
+ // The number of values needed to compute BlockSize averages.
+ constexpr unsigned int buffer_size = BlockSize + WindowSize - 1;
+ __shared__ unsigned int buffer[buffer_size];
+
+ // Load values into shared memory.
+ // Note that threadIdx.x is the index in the block.
+ for(unsigned int i = threadIdx.x; i < buffer_size; i += BlockSize)
+ {
+ // Index in the input array.
+ const unsigned int index = block_offset + i;
+ // Prevent reading outside the input array.
+ if(index < input_size)
+ {
+ buffer[i] = input[index];
+ }
+ }
+
+ // Wait for all threads to load into shared memory.
+ __syncthreads();
+
+ // Compute the average using the cached data.
+ const unsigned int output_size = input_size - WindowSize + 1;
+ if(thread_idx < output_size)
+ {
+ unsigned int sum = 0;
+ for(unsigned int i = 0; i < WindowSize; i++)
+ {
+ sum += buffer[threadIdx.x + i];
+ }
+
+ sum /= WindowSize;
+
+ output[thread_idx] = sum;
+ }
+}
+
+int main()
+{
+ // The number of elements to process.
+ constexpr unsigned int input_size = 10000000;
+
+ // The number of elements to compute the average over.
+ constexpr unsigned int window_size = 97;
+ static_assert(input_size >= window_size);
+
+ // The number of moving average values produced.
+ constexpr unsigned int output_size = input_size - window_size + 1;
+
+ // The number of threads per kernel block.
+ constexpr unsigned int block_size = 256;
+
+ // Number of blocks per kernel grid.
+ constexpr unsigned int grid_size = (output_size + block_size - 1) / block_size;
+
+ // Allocate and initialize input data on the host.
+ std::vector h_input(input_size);
+ std::iota(h_input.begin(), h_input.end(), 0);
+ std::transform(h_input.begin(),
+ h_input.end(),
+ h_input.begin(),
+ [](unsigned int i) { return i % window_size; });
+
+ // Allocate device input data and copy host data to it.
+ unsigned int* d_input{};
+ constexpr size_t input_size_bytes = input_size * sizeof(unsigned int);
+ HIP_CHECK(hipMalloc(&d_input, input_size_bytes));
+ HIP_CHECK(hipMemcpy(d_input, h_input.data(), input_size_bytes, hipMemcpyHostToDevice));
+
+ // Allocate device output data.
+ unsigned int* d_output{};
+ constexpr size_t output_size_bytes = output_size * sizeof(unsigned int);
+ HIP_CHECK(hipMalloc(&d_output, output_size_bytes));
+
+ std::cout << "Calculating the moving average of " << input_size << " elements with window size "
+ << window_size << std::endl;
+
+ // Launch the kernel on the default stream.
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(moving_average),
+ dim3(grid_size),
+ dim3(block_size),
+ 0,
+ hipStreamDefault,
+ d_input,
+ d_output,
+ input_size);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Copy the results back to the host. This call blocks the host's execution until the copy is finished.
+ std::vector h_output(output_size);
+ HIP_CHECK(hipMemcpy(h_output.data(), d_output, output_size_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory.
+ HIP_CHECK(hipFree(d_output));
+ HIP_CHECK(hipFree(d_input));
+
+ // Validate output.
+ unsigned int incorrect_count = 0;
+ for(unsigned int i = 0; i < output_size; i++)
+ {
+ // Average of [0, n) is given by (n - 1) / 2.
+ incorrect_count += (h_output[i] != (window_size - 1) / 2);
+ }
+
+ if(incorrect_count)
+ {
+ std::cout << "Validation failed. Errors: " << incorrect_count << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/HIP-Basic/moving_average/moving_average_vs2019.sln b/HIP-Basic/moving_average/moving_average_vs2019.sln
new file mode 100644
index 000000000..a03478ad5
--- /dev/null
+++ b/HIP-Basic/moving_average/moving_average_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moving_average_vs2019", "moving_average_vs2019.vcxproj", "{B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Debug|x64.ActiveCfg = Debug|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Debug|x64.Build.0 = Debug|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Release|x64.ActiveCfg = Release|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {D7C4B290-7C93-4D26-85D9-364F6A448EE0}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/moving_average/moving_average_vs2019.vcxproj b/HIP-Basic/moving_average/moving_average_vs2019.vcxproj
new file mode 100644
index 000000000..c0e2d9b87
--- /dev/null
+++ b/HIP-Basic/moving_average/moving_average_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {628390E3-DB62-4D52-9594-DE6BC15F9943}
+ Win32Proj
+ moving_average_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/moving_average/moving_average_vs2019.vcxproj.filters b/HIP-Basic/moving_average/moving_average_vs2019.vcxproj.filters
new file mode 100644
index 000000000..ba3f442c9
--- /dev/null
+++ b/HIP-Basic/moving_average/moving_average_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {2932a426-602b-4926-887e-27c50ba7eab7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {ed043ec4-e8ac-4831-93f5-a58546ec7bea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {0da954bd-e555-4454-b082-b68d10c753b9}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/multi_gpu_data_transfer/.gitignore b/HIP-Basic/multi_gpu_data_transfer/.gitignore
new file mode 100644
index 000000000..a1dc97b57
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/.gitignore
@@ -0,0 +1 @@
+hip_multi_gpu_data_transfer
diff --git a/HIP-Basic/multi_gpu_data_transfer/CMakeLists.txt b/HIP-Basic/multi_gpu_data_transfer/CMakeLists.txt
new file mode 100644
index 000000000..6b93529a4
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/CMakeLists.txt
@@ -0,0 +1,58 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_multi_gpu_data_transfer)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+set(include_dirs "../../Common")
+# For examples targeting NVIDIA, include the HIP header directory.
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/multi_gpu_data_transfer/Makefile b/HIP-Basic/multi_gpu_data_transfer/Makefile
new file mode 100644
index 000000000..63bb0edfb
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_multi_gpu_data_transfer
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/multi_gpu_data_transfer/README.md b/HIP-Basic/multi_gpu_data_transfer/README.md
new file mode 100644
index 000000000..322194546
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/README.md
@@ -0,0 +1,61 @@
+# HIP-Basic Multi GPU Data Transfer Example
+
+## Description
+Peer-to-peer (P2P) communication allows direct communication over PCIe (or NVLINK, in some NVIDIA configurations) between devices. Given that it is not necessary to access the host in order to transfer data between devices, P2P communications provide a lower latency than traditional communications that do need to access the host.
+
+Because P2P communication is done over PCIe/NVLINK, the availability of this type of communication among devices depends mostly on the PCIe/NVLINK topology existing.
+
+In this example, the result of a matrix transpose kernel execution on one device is directly copied to the other one, showcasing how to carry out a P2P communication between two GPUs.
+
+### Application flow
+1. P2P communication support is checked among the available devices. In case two of these devices are found to have it between them, they are selected for the example. A trace message informs about the IDs of the devices selected.
+2. The input and output matrices are allocated and initialized in host memory.
+3. The first device selected is set as the current device, device memory for the input and output matrices is allocated on the current device and the input data is copied from the host.
+4. A matrix transpose kernel using static shared memory is then launched on the current device using the previously defined arguments. A synchronization function is used to wait for the kernel to finish before continuing the host execution.
+5. The second device selected is set as the current device and the necessary amount of device memory for the input and output matrices for the second kernel execution is allocated on the current device.
+6. Direct memory access is enabled from the second device to the first one. This allows memory to be copied from the first device to the second with the usual memory-copy functions.
+7. The input data is copied to the current device from the output matrix allocated on the first device and a matrix transpose kernel using dynamic shared memory is then launched on the current device. A synchronization function is used to wait for the kernel to finish before continuing the host execution.
+8. The output matrix from this second kernel execution is copied back to host memory.
+9. Direct memory access from the second device to the first one is disabled.
+10. Device memory is freed.
+11. Results are validated and printed to the standard output.
+
+## Key APIs and Concepts
+- `hipGetDeviceCount` gives the number of devices available. In this example it allows to check if there is more than one device available.
+- `hipDeviceCanAccessPeer` queries whether a certain device can directly access the memory of a given peer device. A P2P communication is supported between two devices if this function returns true for those two devices.
+- `hipSetDevice` sets the specified device as the default device for the subsequent API calls. Such a device is then known as _current device_.
+- Once a current device is selected, and if a P2P communication is possible with a certain peer device, `hipDeviceEnablePeerAccess` can be used to enable access from the current device to the peer device's memory. With `hipDeviceDisablePeerAccess` it can also be disabled (provided that `hipDeviceEnablePeerAccess` has already been called for the same current and peer devices).
+- `hipMalloc` allocates memory in the global memory of the device. When `hipSetDevice` is called to set a specific current device, the subsequent calls to `hipMalloc` will allocate memory in the current device's global memory. In the example it is showcased how to use these two functions to allocate memory on the two devices used.
+- With `hipMemcpy` data bytes can be transferred from host to device (using `hipMemcpyHostToDevice`), from device to host (using `hipMemcpyDeviceToHost`) or from device to device (using `hipMemcpyDeviceToDevice`). The latter will only work if P2P communication has been enabled from the destination to the source device.
+- `hipLaunchKernelGGL` queues the execution of a kernel in the current device and `hipDeviceSynchronize` makes the host to wait on all active streams on the current device. In this example `hipDeviceSynchronize` is necessary because the second device needs the results obtained from the previous kernel execution on the first device.
+- `hipDeviceReset` discards the state of the current device and updates it to fresh one. It also frees all the resources (e.g. streams, events, ...) associated with the current device.
+
+## Demonstrated API Calls
+
+### HIP runtime
+- `__global__`
+- `__shared__`
+
+#### Device symbols
+- `blockDim`
+- `blockIdx`
+- `threadIdx`
+- `__syncthreads`
+
+#### Host symbols
+- `hipDeviceCanAccessPeer`
+- `hipDeviceDisablePeerAccess`
+- `hipDeviceEnablePeerAccess`
+- `hipDeviceReset`
+- `hipDeviceSynchronize`
+- `hipFree`
+- `hipGetDeviceCount`
+- `hipLaunchKernelGGL`
+- `hipMalloc`
+- `hipMemcpy`
+- `hipMemcpyDeviceToDevice`
+- `hipMemcpyDeviceToHost`
+- `hipMemcpyHostToDevice`
+- `hipSetDevice`
+- `hipStreamDefault`
+- `hipSuccess`
diff --git a/HIP-Basic/multi_gpu_data_transfer/main.hip b/HIP-Basic/multi_gpu_data_transfer/main.hip
new file mode 100644
index 000000000..9a08aeb18
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/main.hip
@@ -0,0 +1,287 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+#include
+
+/// \brief Checks whether peer-to-peer is supported or not among the current available devices.
+/// Returns, if exist, the IDs of the first two devices found with peer-to-peer memory access.
+std::pair check_peer_to_peer_support()
+{
+ // Get number of GPUs available.
+ int gpu_count, can_access_peer;
+ HIP_CHECK(hipGetDeviceCount(&gpu_count));
+
+ // If there are not enough devices (at least 2) peer-to-peer is not possible.
+ if(gpu_count < 2)
+ {
+ std::cout << "Peer-to-peer application requires at least 2 GPU devices." << std::endl;
+ exit(hipSuccess);
+ }
+
+ // Check accessibility for each device available.
+ for(int current_gpu = 0; current_gpu < gpu_count; current_gpu++)
+ {
+ // Check if current_gpu device can access the memory of the devices with lower ID.
+ for(int peer_gpu = 0; peer_gpu < current_gpu; peer_gpu++)
+ {
+ HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, current_gpu, peer_gpu));
+
+ // The first pair found with peer-to-peer memory access is returned.
+ if(can_access_peer)
+ {
+ return std::pair(current_gpu, peer_gpu);
+ }
+ }
+ }
+ // No pair of devices supporting peer-to-peer between them has been found.
+ std::cout << "Peer-to-peer application requires at least 2 GPU devices accesible between them."
+ << std::endl;
+ exit(hipSuccess);
+}
+
+/// \brief Enables (if possible) direct memory access from current_gpu<\tt> to peer_gpu<\tt>.
+void enable_peer_to_peer(const int current_gpu, const int peer_gpu)
+{
+ // Must be on a multi-gpu system.
+ assert(current_gpu != peer_gpu && "Current and peer devices must be different.");
+
+ // Set current GPU as default device for subsequent API calls.
+ HIP_CHECK(hipSetDevice(current_gpu));
+
+ // Enable direct memory access from current to peer device.
+ HIP_CHECK(hipDeviceEnablePeerAccess(peer_gpu, 0 /*flags*/));
+}
+
+/// \brief Disables (if possible) direct memory access from current_gpu<\tt> to peer_gpu<\tt>.
+void disable_peer_to_peer(const unsigned int current_gpu, const unsigned int peer_gpu)
+{
+ // Must be on a multi-gpu system.
+ assert(current_gpu != peer_gpu && "Current and peer devices must be different.");
+
+ // Set current GPU as default device for subsequent API calls.
+ HIP_CHECK(hipSetDevice(current_gpu));
+
+ // Disable direct memory access from current to peer device.
+ HIP_CHECK(hipDeviceDisablePeerAccess(peer_gpu));
+}
+
+/// \brief Simple matrix transpose kernel using static shared memory.
+template
+__global__ void static_shared_matrix_transpose_kernel(float* out, const float* in)
+{
+ // Allocate the necessary amount of shared memory to store the transpose of the matrix.
+ // Note that the amount of shared memory needed is known at compile time.
+ constexpr unsigned int size = Width * Width;
+ __shared__ float shared_matrix_memory[size];
+
+ // Compute the row and column indexes of the matrix element that each thread is going
+ // to process.
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+ // If not out of bounds, transpose element (x,y).
+ if(x < Width && y < Width)
+ {
+ // Store transposed element in shared memory.
+ shared_matrix_memory[y * Width + x] = in[x * Width + y];
+ }
+
+ // Synchronize threads so all writes are done before accessing shared memory again.
+ __syncthreads();
+
+ // If not out of bounds, transpose element (x,y).
+ if(x < Width && y < Width)
+ {
+ // Copy transposed element from shared memory to global memory.
+ out[y * Width + x] = shared_matrix_memory[y * Width + x];
+ }
+}
+
+/// \brief Simple matrix transpose kernel using dynamic shared memory.
+__global__ void
+ dynamic_shared_matrix_transpose_kernel(float* out, const float* in, const unsigned int width)
+{
+ // Declare that this kernel is using dynamic shared memory to store a number of floats.
+ // The unsized array type indicates that the total amount of memory that is going
+ // to be used here is not known ahead of time, and will be computed at runtime and
+ // passed to the kernel launch function.
+ extern __shared__ float shared_matrix_memory[];
+
+ // Compute the row and column indexes of the matrix element that each thread is going
+ // to process.
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+ // If not out of bounds, transpose element (x,y).
+ if(x < width && y < width)
+ {
+ // Store transposed element in shared memory.
+ shared_matrix_memory[y * width + x] = in[x * width + y];
+ }
+
+ // Synchronize threads so all writes are done before accessing shared memory again.
+ __syncthreads();
+
+ // If not out of bounds, transpose element (x,y).
+ if(x < width && y < width)
+ {
+ // Copy transposed element from shared memory to global memory.
+ out[y * width + x] = shared_matrix_memory[y * width + x];
+ }
+}
+
+int main()
+{
+ // Check peer-to-peer access for all devices and get the IDs of the first pair (if exist)
+ // that support peer-to-peer memory access.
+ std::pair gpus = check_peer_to_peer_support();
+
+ std::cout << "Devices with IDs " << gpus.first << " and " << gpus.second << " selected."
+ << std::endl;
+
+ // Number of rows and columns, total number of elements and size in bytes of the matrix
+ // to be transposed.
+ constexpr unsigned int width = 32;
+ constexpr unsigned int size = width * width;
+ constexpr size_t size_bytes = size * sizeof(float);
+
+ // Number of threads in each dimension of the kernel block.
+ constexpr unsigned int block_size = 4;
+
+ // Number of blocks in each dimension of the grid. Calculated as ceiling(width/block_size).
+ constexpr unsigned int grid_size = (width + block_size - 1) / block_size;
+
+ // Block and grid sizes in 2D.
+ constexpr dim3 block_dim(block_size, block_size);
+ constexpr dim3 grid_dim(grid_size, grid_size);
+
+ // Allocate host input matrix and initialize with increasing sequence 1, 2, 3, ....
+ std::vector matrix(size);
+ std::iota(matrix.begin(), matrix.end(), 1.f);
+
+ // Allocate host matrix to store the results of the kernel execution on the second device.
+ std::vector transposed_matrix(size, 0.f);
+
+ // Declare input and output matrices for the executions on both devices.
+ float* d_matrix[2]{};
+ float* d_transposed_matrix[2]{};
+
+ // Set first gpu as default device for subsequent API calls.
+ HIP_CHECK(hipSetDevice(gpus.first));
+
+ // Allocate input and output matrices on current device.
+ HIP_CHECK(hipMalloc(&d_transposed_matrix[0], size_bytes));
+ HIP_CHECK(hipMalloc(&d_matrix[0], size_bytes));
+
+ // Copy input matrix data from host to current device.
+ HIP_CHECK(hipMemcpy(d_matrix[0], matrix.data(), size_bytes, hipMemcpyHostToDevice));
+
+ std::cout << "Computing matrix transpose on device " << gpus.first << "." << std::endl;
+
+ // Launch kernel in current device. Note that, as this kernel uses static shared memory, no
+ // bytes of shared memory need to be allocated when launching the kernel.
+ hipLaunchKernelGGL(static_shared_matrix_transpose_kernel,
+ grid_dim,
+ block_dim,
+ 0 /*shared_memory_bytes*/,
+ hipStreamDefault,
+ d_transposed_matrix[0],
+ d_matrix[0]);
+
+ // Wait on all active streams on the current device.
+ HIP_CHECK(hipDeviceSynchronize());
+
+ // Set second gpu as default device for subsequent API calls.
+ HIP_CHECK(hipSetDevice(gpus.second));
+
+ // Allocate input and output matrices on current device.
+ HIP_CHECK(hipMalloc(&d_transposed_matrix[1], size_bytes));
+ HIP_CHECK(hipMalloc(&d_matrix[1], size_bytes));
+
+ std::cout << "Transferring results from device " << gpus.first << " to device " << gpus.second
+ << "." << std::endl;
+
+ // Enable (if possible) direct memory access from current (second) to peer (first) GPU.
+ enable_peer_to_peer(gpus.second, gpus.first);
+
+ // Copy output matrix from peer device to input matrix on current device. This copy is made
+ // directly between devices (no host needed) because direct access memory was previously
+ // enabled from second to first device.
+ HIP_CHECK(hipMemcpy(d_matrix[1], d_transposed_matrix[0], size_bytes, hipMemcpyDeviceToDevice));
+
+ std::cout << "Computing matrix transpose on device " << gpus.second << "." << std::endl;
+
+ // Launch kernel in current device. Note that size_bytes bytes of shared memory are required to launch
+ // this kernel because it uses dynamically allocated shared memory.
+ hipLaunchKernelGGL(dynamic_shared_matrix_transpose_kernel,
+ grid_dim,
+ block_dim,
+ size_bytes /*shared_memory_bytes*/,
+ hipStreamDefault,
+ d_transposed_matrix[1],
+ d_matrix[1],
+ width);
+
+ // Wait on all active streams on the current device.
+ HIP_CHECK(hipDeviceSynchronize());
+
+ // Copy results from second device to host.
+ HIP_CHECK(hipMemcpy(transposed_matrix.data(),
+ d_transposed_matrix[1],
+ size_bytes,
+ hipMemcpyDeviceToHost));
+
+ // Disable direct memory access.
+ disable_peer_to_peer(gpus.second, gpus.first);
+
+ // Free device memory.
+ for(unsigned int i = 0; i < 2; i++)
+ {
+ HIP_CHECK(hipFree(d_matrix[i]));
+ HIP_CHECK(hipFree(d_transposed_matrix[i]));
+ }
+
+ // Validate results. The input matrix for the kernel execution on the first device must be
+ // the same as the output matrix from the kernel execution on the second device.
+ unsigned int errors = 0;
+ constexpr float eps = 1.0E-6;
+ std::cout << "Validating peer-to-peer." << std::endl;
+ for(unsigned int i = 0; i < size; i++)
+ {
+ errors += (std::fabs(matrix[i] - transposed_matrix[i]) > eps);
+ }
+ if(errors)
+ {
+ std::cout << "Validation failed with " << errors << " errors." << std::endl;
+ return hipSuccess;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.sln b/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.sln
new file mode 100644
index 000000000..a3fbecbb4
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_data_transfer_vs2019", "multi_gpu_data_transfer_vs2019.vcxproj", "{6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Debug|x64.ActiveCfg = Debug|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Debug|x64.Build.0 = Debug|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Release|x64.ActiveCfg = Release|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {D7C4B290-7C93-4D26-85D9-364F6A448EE0}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.vcxproj b/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.vcxproj
new file mode 100644
index 000000000..c99b5c1a4
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}
+ Win32Proj
+ multi_gpu_data_transfer_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.vcxproj.filters b/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.vcxproj.filters
new file mode 100644
index 000000000..591e9f2c6
--- /dev/null
+++ b/HIP-Basic/multi_gpu_data_transfer/multi_gpu_data_transfer_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {2932a426-602b-4926-887e-27c50ba7eab7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {ed043ec4-e8ac-4831-93f5-a58546ec7bea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {0da954bd-e555-4454-b082-b68d10c753b9}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/occupancy/Makefile b/HIP-Basic/occupancy/Makefile
index c801e4248..26fc620be 100644
--- a/HIP-Basic/occupancy/Makefile
+++ b/HIP-Basic/occupancy/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/occupancy/occupancy_vs2019.vcxproj b/HIP-Basic/occupancy/occupancy_vs2019.vcxproj
index a1c151faa..db2a86e75 100644
--- a/HIP-Basic/occupancy/occupancy_vs2019.vcxproj
+++ b/HIP-Basic/occupancy/occupancy_vs2019.vcxproj
@@ -1,99 +1,99 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
- 15.0
- {e5b2fc79-3928-47f6-b57b-33aaa3c5d9c5}
- Win32Proj
- occupancy_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- hip_$(ProjectName)
-
-
- false
- hip_$(ProjectName)
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- stdcpp17
-
-
- Console
- true
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- stdcpp17
-
-
- Console
- true
- true
- true
-
-
-
-
-
-
-
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {e5b2fc79-3928-47f6-b57b-33aaa3c5d9c5}
+ Win32Proj
+ occupancy_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/opengl_interop/.gitignore b/HIP-Basic/opengl_interop/.gitignore
new file mode 100644
index 000000000..212153705
--- /dev/null
+++ b/HIP-Basic/opengl_interop/.gitignore
@@ -0,0 +1 @@
+hip_opengl_interop
diff --git a/HIP-Basic/opengl_interop/CMakeLists.txt b/HIP-Basic/opengl_interop/CMakeLists.txt
new file mode 100644
index 000000000..9a5462a4c
--- /dev/null
+++ b/HIP-Basic/opengl_interop/CMakeLists.txt
@@ -0,0 +1,64 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_opengl_interop)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip ../../External/glad/glad.cpp)
+
+set(include_dirs "../../Common" "../../External")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+find_package(glfw3 REQUIRED)
+
+target_link_libraries(${example_name} PRIVATE glfw)
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+
+if(UNIX)
+ target_link_libraries(${example_name} PRIVATE ${CMAKE_DL_LIBS})
+endif()
+
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
+set_source_files_properties(vulkan_utils.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/opengl_interop/Makefile b/HIP-Basic/opengl_interop/Makefile
new file mode 100644
index 000000000..50eec4993
--- /dev/null
+++ b/HIP-Basic/opengl_interop/Makefile
@@ -0,0 +1,67 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_opengl_interop
+COMMON_INCLUDE_DIR := ../../Common
+EXTERNAL_DIR := ../../External
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -I $(EXTERNAL_DIR) $(shell pkg-config --cflags glfw3)
+ILDFLAGS :=
+ILDLIBS := $(shell pkg-config --libs glfw3) -ldl
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+SRCS := main.hip $(EXTERNAL_DIR)/glad/glad.cpp
+
+$(EXAMPLE): $(SRCS) $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $(SRCS) $(ILDLIBS)
+
+$(EXTERNAL_DIR)/glad/glad.h: $(EXTERNAL_DIR)/KHR/khrplatform.h
+
+$(EXTERNAL_DIR)/glad/glad.cpp: $(EXTERNAL_DIR)/glad/glad.h
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
diff --git a/HIP-Basic/opengl_interop/README.md b/HIP-Basic/opengl_interop/README.md
new file mode 100644
index 000000000..06f5f2c8b
--- /dev/null
+++ b/HIP-Basic/opengl_interop/README.md
@@ -0,0 +1,67 @@
+# HIP-Basic OpenGL Interop Example
+
+## Description
+External device resources and other handles can be shared with HIP in order to provide interoperability between different GPU APIs. This example showcases a HIP program that interacts with OpenGL: a simple HIP kernel is used to simulate a sine wave over a grid of pointers, in a buffer that is shared with OpenGL. The resulting data is then rendered to a window as a grid of triangles using OpenGL.
+
+### Application flow
+#### Initialization
+1. A window is opened using the GLFW library
+2. OpenGL is initialized: the window's context is made active, function pointers are loaded, debug output is enabled if possible.
+3. A HIP device is picked that is OpenGL-interop capable with the current OpenGL context by using `hipGLGetDevices`.
+4. OpenGL resources are initialized: a Vertex Array Object is created, buffers are created and initialized, the GLSL shader used to render the triangles is compiled.
+5. A HIP stream is created on the device.
+6. An OpenGL buffer is imported to HIP using `hipGraphicsGLRegisterBuffer` and `hipGraphicsMapResources`. The device pointer to this buffer is obtained with `hipGraphicsResourceGetMappedPointer`.
+7. OpenGL rendering state is bound.
+
+#### Rendering
+1. The sinewave simulation kernel is launched in order to update the OpenGL shared buffer.
+2. The grid is drawn to the window's framebuffer.
+3. The window's framebuffer is presented to the screen.
+
+## Dependencies
+This example has additional library dependencies besides HIP:
+- [GLFW3](https://glfw.org). GLFW can be installed either through the package manager, or can be obtained from its home page. If using CMake, the `glfw3Config.cmake` file must be in a path that CMake searches by default or must be passed using `-DCMAKE_MODULE_PATH`.
+The official GLFW3 binaries do not ship this file on Windows, and so GLFW3 must either be compiled manually. CMake will be able to find GLFW on Windows if it is installed in `C:\Program Files(x86)\glfw\`. Alternatively, GLFW can be obtained from [vcpkg](https://vcpkg.io/), which does ship the required cmake files. In this case, the vcpkg toolchain path should be passed to CMake using `-DCMAKE_TOOLCHAIN_FILE="/path/to/vcpkg/scripts/buildsystems/vcpkg.cmake"`.
+If using Visual Studio, the easiest way to obtain GLFW is by installing glfw3 from vcpkg. Alternatively, the appropriate path to the GLFW3 library and header directories can be set in Properties->Linker->General->Additional Library Directories and Properties->C/C++->General->Additional Include Directories. When using this method, the appropriate name for the glfw library should also be updated under Properties->C/C++->Linker->Input->Additional Dependencies.
+
+## Key APIs and Concepts
+- `hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList)` can be used to query which HIP devices can be used to share resources with the current OpenGL context. A device returned by this function must be selected using `hipSetDevice` or a stream must be created from such a device before OpenGL interop is possible.
+- `hipGraphicsGLRegisterBuffer(hipGraphicsResource_t* resource, GLuint buffer, unsigned int flags)` is used to import an OpenGL buffer into HIP. `flags` affects how the resource is used in HIP. For example:
+| flag | effect |
+| -------------------------------------- | ----------------------------------------------- |
+| `hipGraphicsRegisterFlagsNone` | HIP functions may read and write to the buffer. |
+| `hipGraphicsRegisterFlagsReadOnly` | HIP functions may only read from the buffer. |
+| `hiPGraphicsRegisterFlagsWriteDiscard` | HIP functions may only write to the buffer. |
+- `hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream = 0)` is used to make imported OpenGL resources available to a HIP device, either the current device or a device used by a specific stream.
+- `hipGraphicsResourceGetMappedPointer(void** pointer, size_t* size, hipGraphicsResource_t resource)` is used to query the device pointer that represents the memory backing the OpenGL resource. The resulting pointer may be used as any other device pointer, like those obtained from `hipMalloc`.
+- `hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream = 0)` is used to unmap an imported resources from a HIP device or stream.
+- `hipGraphicsUnregisterResource(hipGraphicsResource_t resource)` is used to unregister a previously imported OpenGL resource, so that it is no longer shared with HIP.
+
+## Caveats
+### Multi-GPU systems
+When using OpenGL-HIP interop on multi-gpu systems, the OpenGL context must be created with the device that should be used for rendering. This is not done in this example for brevity, but is required in specific scenarios. For example, consider a multi-gpu machine with an AMD and an NVIDIA GPU: when this example is compiled for the HIP runtime, it must be launched such that the AMD GPU is used to render. A simple workaround is to launch the program from the monitor that is physically connected to the GPU to use. For multi-gpu laptops running Linux with an integrated AMD or Intel GPU and an NVIDIA dedicated gpu, the example must be launched with `__GLX_VENDOR_LIBRARY_NAME=nvidia` when compiling for NVIDIA.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `threadIdx`
+- `blockIdx`
+- `blockDim`
+- `__global__`
+
+#### Host symbols
+- `hipGetDeviceProperties`
+- `hipGetLastError`
+- `hipGLDeviceListAll`
+- `hipGLGetDevices`
+- `hipGraphicsGLRegisterBuffer`
+- `hipGraphicsMapResources`
+- `hipGraphicsRegisterFlagsWriteDiscard`
+- `hipGraphicsResourceGetMappedPointer`
+- `hipGraphicsUnmapResources`
+- `hipGraphicsUnregisterResource`
+- `hipLaunchKernelGGL`
+- `hipSetDevice`
+- `hipStreamCreate`
+- `hipStreamDestroy`
+- `hipStreamSynchronize`
diff --git a/HIP-Basic/opengl_interop/main.hip b/HIP-Basic/opengl_interop/main.hip
new file mode 100644
index 000000000..17566cfe1
--- /dev/null
+++ b/HIP-Basic/opengl_interop/main.hip
@@ -0,0 +1,617 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "nvidia_hip_fix.hpp"
+
+#include "example_utils.hpp"
+
+#include "glad/glad.h"
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+/// \brief The number of triangles that the example's grid is in width.
+constexpr uint32_t grid_width = 256;
+/// \brief The number of triangles that the example's grid is in height.
+constexpr uint32_t grid_height = 256;
+
+/// \brief The OpenGL vertex shader that is used to render the triangles in this example.
+/// The grid x- and y-positions are used to set the triangle coordinates in clip space.
+/// The height value is passed on to the fragment shader.
+constexpr const char* vertex_shader = R"(
+#version 330 core
+
+in float in_height;
+in vec2 in_xy;
+
+out float frag_height;
+
+void main()
+{
+ gl_Position = vec4(in_xy, 0, 1);
+ frag_height = in_height;
+}
+)";
+
+/// \brief The OpenGL fragment shader that is used to render the triangles in this example.
+/// The "height" value is used to shade the vertex. Its values are interpolated linearly
+/// between the vertex and fragment shaders.
+constexpr const char* fragment_shader = R"(
+#version 330 core
+
+in float frag_height;
+
+void main()
+{
+ gl_FragColor = vec4(vec3(frag_height * 0.5 + 0.5), 1.0);
+}
+)";
+
+/// \brief Initialize a GLFW window with initial dimensions.
+GLFWwindow* create_window(const int initial_width, const int initial_height)
+{
+ glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+ glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
+ glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+ glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GLFW_TRUE);
+
+ GLFWwindow* window = glfwCreateWindow(initial_width,
+ initial_height,
+ "OpenGL-HIP interop example",
+ nullptr,
+ nullptr);
+ if(window == nullptr)
+ {
+ std::cerr << "Failed to create GLFW window\n";
+ std::exit(error_exit_code);
+ }
+
+ return window;
+}
+
+/// \brief Select a HIP device that is compatible with the current OpenGL context.
+/// \returns A HIP device-id that is capable of rendering the example. If no
+/// suitable device is found, an error is printed and the program is exited.
+int pick_hip_device()
+{
+ unsigned int gl_device_count;
+ int hip_device;
+ HIP_CHECK(hipGLGetDevices(&gl_device_count, &hip_device, 1, hipGLDeviceListAll));
+
+ if(gl_device_count == 0)
+ {
+ std::cerr << "System has no OpenGL-capable HIP devices" << std::endl;
+ std::exit(error_exit_code);
+ }
+
+ return hip_device;
+}
+
+/// \brief Utility function to compile shader source into an OpenGL shader.
+/// If the shader could not be compiled, this function prints the compile log
+/// and exits the program.
+/// \param type - The OpenGL shader type for this shader, for example
+/// \p GL_VERTEX_SHADER or \p GL_FRAGMENT_SHADER.
+/// \param source - The GLSL source code for the shader.
+GLuint compile_shader(const GLenum type, const char* const source)
+{
+ const GLuint shader = glCreateShader(type);
+
+ const GLint length = static_cast(std::strlen(source));
+ glShaderSource(shader, 1, &source, &length);
+ glCompileShader(shader);
+
+ GLint compile_status;
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
+
+ if(compile_status != GL_TRUE)
+ {
+ // Compiling failed, get the shader log and print it to the user.
+ GLint log_length;
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+ std::vector log(log_length);
+ glGetShaderInfoLog(shader, length, nullptr, log.data());
+ std::cerr << "Failed to compile shader:\n";
+ std::cerr.write(log.data(), log.size()) << std::endl;
+ std::exit(error_exit_code);
+ }
+
+ return shader;
+}
+
+/// \brief Utility function to compile and link a vertex and fragment shader into an OpenGL
+/// shader program.
+/// If the shaders could not be compiled, a log is printed and the program is exited.
+/// \param vert_src - The GLSL source code for the shader program's vertex shader.
+/// \param frag_src - The GLSL source code for the shader program's fragment shader.
+GLuint compile_shader_program(const char* const vert_src, const char* const frag_src)
+{
+ const GLuint program = glCreateProgram();
+
+ const GLuint vert = compile_shader(GL_VERTEX_SHADER, vert_src);
+ const GLuint frag = compile_shader(GL_FRAGMENT_SHADER, frag_src);
+
+ glAttachShader(program, frag);
+ glAttachShader(program, vert);
+
+ glLinkProgram(program);
+
+ GLint link_status;
+ glGetProgramiv(program, GL_LINK_STATUS, &link_status);
+ if(link_status != GL_TRUE)
+ {
+ // Linking failed, get the program link log and print it to the user.
+ GLint log_length;
+ glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
+ std::vector log(log_length);
+ glGetProgramInfoLog(program, log_length, nullptr, log.data());
+ std::cerr << "Failed to link program:\n";
+ std::cerr.write(log.data(), log.size()) << std::endl;
+ std::exit(error_exit_code);
+ }
+
+ glDetachShader(program, frag);
+ glDetachShader(program, vert);
+
+ glDeleteShader(frag);
+ glDeleteShader(vert);
+
+ return program;
+}
+
+/// \brief This structure contains the OpenGL handles that this example uses to render the
+/// triangle grid to the screen.
+///
+/// Three buffers are used to render the triangle grid, the color of which is determined by
+/// a HIP compulation in \p simulator:
+/// - One buffer contains the height of each triangle (rendered as color).
+/// - One buffer holds the x- and y-coordinates for each of the corners of the triangle. Note: these
+/// coordinates are unique, as the triangles that are made up from these points are defined by the
+/// - Index buffer, that holds indices into the former two buffers to make up a list of triangles.
+struct renderer
+{
+ /// The total number of vertices for the triangles.
+ constexpr static size_t num_verts = grid_width * grid_height;
+ /// The number of bytes in the x- and y-coordinates buffer. Each x/y coordinate is encoded as
+ /// a pair of floats, which are stored in a packed array-of-structures format: | x | y | x | y | ... |.
+ constexpr static size_t grid_buffer_size = num_verts * sizeof(float) * 2;
+ /// The number of bytes in the height buffer. Each height is encoded as a floating point value.
+ /// This buffer will be shared with HIP, which is why these coordinates are
+ /// stored in a separate buffer.
+ constexpr static size_t height_buffer_size = num_verts * sizeof(float);
+
+ /// The number of indices in the index buffer. Each triangle has 3 points, each square in the grid
+ /// is made up of 2 triangles. There are (width - 1) by (height - 1) squares in the grid.
+ constexpr static size_t num_indices = (grid_width - 1) * (grid_height - 1) * 3 * 2;
+ /// The number of bytes in the index buffer. Each index is encoded as a 32-bit int.
+ constexpr static size_t index_buffer_size = num_indices * sizeof(uint32_t);
+
+ /// An OpenGL handle to a Vertex Array Object, which has the grid and height buffers
+ /// bound to the corresponding attribute in the shader program (program) used for rendering.
+ GLuint vao;
+
+ /// Handle to the buffer that holds the indices for the triangles to render.
+ GLuint index_buffer;
+
+ /// Handle to the buffer that holds the x- and y-coordinates for each grid point.
+ GLuint grid_buffer;
+
+ /// Handle to the buffer that holds the heights each grid point. This buffer is shared with HIP.
+ GLuint height_buffer;
+
+ /// Handle to the OpenGL shader program that this example uses to render the triangles to the screen.
+ GLuint program;
+
+ /// Counters used to keep track of the rendering performance.
+ uint32_t fps_frame = 0;
+ std::chrono::high_resolution_clock::time_point fps_start_time;
+
+ /// \brief Initialize OpenGL rendering resources.
+ renderer()
+ {
+ // Create a vertex array used to bind the attribute buffers.
+ glGenVertexArrays(1, &this->vao);
+
+ // Also generate the buffers in question.
+ GLuint buffers[3];
+ glGenBuffers(std::size(buffers), buffers);
+ this->index_buffer = buffers[0];
+ this->grid_buffer = buffers[1];
+ this->height_buffer = buffers[2];
+
+ // Compile the shader program used to render the triangles.
+ this->program = compile_shader_program(vertex_shader, fragment_shader);
+
+ // Upload the initial data to the buffers.
+ this->initialize_buffer_data();
+
+ // Set up the VAO by binding the height and grid buffers to the attribute locations
+ // in the shader program.
+ glBindVertexArray(this->vao);
+
+ // Note - keep variable "in_height" in sync with shader.
+ glBindBuffer(GL_ARRAY_BUFFER, this->height_buffer);
+ const GLuint height_attrib = glGetAttribLocation(this->program, "in_height");
+ glVertexAttribPointer(height_attrib, 1, GL_FLOAT, GL_FALSE, 0, 0);
+ glEnableVertexAttribArray(height_attrib);
+
+ // Note - keep variable "in_xy" in sync with shader.
+ const GLuint grid_attrib = glGetAttribLocation(this->program, "in_xy");
+ glBindBuffer(GL_ARRAY_BUFFER, this->grid_buffer);
+ glVertexAttribPointer(grid_attrib, 2, GL_FLOAT, GL_FALSE, 0, 0);
+ glEnableVertexAttribArray(grid_attrib);
+
+ this->fps_start_time = std::chrono::high_resolution_clock::now();
+ }
+
+ renderer(const renderer&) = delete;
+ renderer& operator=(const renderer&) = delete;
+
+ renderer(renderer&&) = delete;
+ renderer& operator=(renderer&&) = delete;
+
+ ~renderer()
+ {
+ glDeleteProgram(this->program);
+ GLuint buffers[] = {this->index_buffer, this->grid_buffer, this->height_buffer};
+ glDeleteBuffers(std::size(buffers), buffers);
+ glDeleteVertexArrays(1, &this->vao);
+ }
+
+ /// \brief Upload the initial values for each buffer to Vulkan.
+ void initialize_buffer_data() const
+ {
+ // Initialize the height buffer.
+ glBindBuffer(GL_ARRAY_BUFFER, this->height_buffer);
+ // We do not need to fill it, as that is going to be done from HIP, but we
+ // do need to allocate it from OpenGL. This is done simply by passing `nullptr` as
+ // initial data pointer.
+ // GL_DYNAMIC_DRAW is passed because this buffer is going to be updated every frame,
+ // and is going to be used to hold vertex data for drawing - this may help the driver
+ // to render more efficiently.
+ glBufferData(GL_ARRAY_BUFFER, height_buffer_size, nullptr, GL_DYNAMIC_DRAW);
+
+ // Initialize the grid buffer.
+ {
+ glBindBuffer(GL_ARRAY_BUFFER, this->grid_buffer);
+ // Avoid having to allocate on host by allocating the buffer in OpenGL and then mapping it
+ // into host-memory to initialize it.
+ // This buffer is going to be initialized once and is going to be used for drawing,
+ // so pass GL_STATIC_DRAW as usage hint.
+ glBufferData(GL_ARRAY_BUFFER, grid_buffer_size, nullptr, GL_STATIC_DRAW);
+
+ float* grid = reinterpret_cast(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
+ for(uint32_t y = 0; y < grid_height; ++y)
+ {
+ for(uint32_t x = 0; x < grid_width; ++x)
+ {
+ *grid++ = (2.0f * x) / (grid_width - 1) - 1;
+ *grid++ = (2.0f * y) / (grid_height - 1) - 1;
+ }
+ }
+
+ // Let OpenGL know that we are done with this buffer.
+ glUnmapBuffer(GL_ARRAY_BUFFER);
+ }
+
+ // Initialize the index buffer
+ {
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, this->index_buffer);
+ // Similar as the grid buffer, this buffer is going to be initialized once and is then used
+ // for drawing.
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, index_buffer_size, nullptr, GL_STATIC_DRAW);
+
+ uint32_t* indices
+ = reinterpret_cast(glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY));
+ for(uint32_t y = 0; y < grid_height - 1; ++y)
+ {
+ for(uint32_t x = 0; x < grid_width - 1; ++x)
+ {
+ *indices++ = (y + 0) * grid_width + (x + 0);
+ *indices++ = (y + 1) * grid_width + (x + 0);
+ *indices++ = (y + 0) * grid_width + (x + 1);
+ *indices++ = (y + 1) * grid_width + (x + 0);
+ *indices++ = (y + 1) * grid_width + (x + 1);
+ *indices++ = (y + 0) * grid_width + (x + 1);
+ }
+ }
+
+ glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
+ }
+ }
+
+ /// \brief Bind the OpenGL pipeline state for this renderer.
+ void bind() const
+ {
+ glBindVertexArray(this->vao);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, this->index_buffer);
+ glUseProgram(this->program);
+ }
+
+ /// \brief Draw the next frame to the window. This requires the render state be bound using
+ /// bind.
+ void draw()
+ {
+ glDrawElements(GL_TRIANGLES, num_indices, GL_UNSIGNED_INT, nullptr);
+
+ // Output a native performance measurement.
+ ++this->fps_frame;
+ const auto frame_time = std::chrono::high_resolution_clock::now();
+ const auto time_diff = frame_time - this->fps_start_time;
+ if(time_diff > std::chrono::seconds{5})
+ {
+ const auto time_diff_sec
+ = std::chrono::duration_cast>(time_diff).count();
+ std::cout << "Average FPS (over " << std::fixed << std::setprecision(2) << time_diff_sec
+ << " seconds): " << std::fixed << std::setprecision(2)
+ << this->fps_frame / time_diff_sec << " (" << std::fixed
+ << std::setprecision(2) << (time_diff_sec * 1000) / this->fps_frame
+ << " ms per frame, " << this->fps_frame << " frames)" << std::endl;
+ this->fps_frame = 0;
+ this->fps_start_time = frame_time;
+ }
+ }
+};
+
+/// \brief The main HIP kernel for this example - computes a simple sine wave over a
+/// 2-dimensional grid of points.
+/// \param height_map - the grid of points to compute a sine wave for. It is expected to be
+/// a \p grid_width by \p grid_height array packed into memory.(y on the inner axis).
+/// \param time - The current time relative to the start of the program.
+__global__ void sinewave_kernel(float* height_map, const float time)
+{
+ const float freq = 10.f;
+ const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
+ const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
+ const float u = (2.f * x) / grid_width - 1.f;
+ const float v = (2.f * y) / grid_height - 1.f;
+
+ if(x < grid_width && y < grid_height)
+ {
+ height_map[x * grid_width + y] = sinf(u * freq + time) * cosf(v * freq + time);
+ }
+}
+
+/// \brief This structure contains the HIP state and functionality used to advance the simulation.
+/// Initializing a \p simulator fetches the OpenGL height buffer from the corresponding renderer,
+/// and imports it as a HIP device pointer. This pointer is then passed to the simulation kernel
+/// (sinewave_kernel), which updates the values in it. When renderer::draw is called,
+/// the updated values are read from the buffer in OpenGL and used to render the triangle grid.
+struct simulator
+{
+ /// The HIP stream used to advance the simulation. This must be created from an OpenGL-interop
+ /// capable device, see pick_hip_device.
+ hipStream_t hip_stream;
+ /// A HIP graphics resource that is imported from the OpenGL height buffer to simulate.
+ hipGraphicsResource_t hip_height_buffer;
+ /// A device pointer to the height buffer, imported from the OPenGL height buffer.
+ float* hip_height_ptr;
+
+ /// The start time of the program, used for the simulation.
+ std::chrono::high_resolution_clock::time_point start_time;
+
+ /// \brief Initialize a simulator, that uses a particular HIP device.
+ /// \param renderer - The renderer that will be used to render the example. Its height buffer
+ /// is imported to HIP for use with this simulator.
+ explicit simulator(const int hip_device, const renderer& renderer)
+ {
+ // Create a HIP stream for the target device.
+ HIP_CHECK(hipSetDevice(hip_device));
+ HIP_CHECK(hipStreamCreate(&this->hip_stream));
+
+ // Import the OpenGL height buffer into a HIP graphics resource.
+ HIP_CHECK(hipGraphicsGLRegisterBuffer(
+ &this->hip_height_buffer,
+ renderer.height_buffer,
+ // We are going to write to this buffer from HIP,
+ // but we do not need to read from it.
+ // As an optimization we can pass hipGraphicsRegisterFlagsWriteDiscard,
+ // so that the driver knows that we do not need the old values of
+ // the buffer.
+ hipGraphicsRegisterFlagsWriteDiscard));
+
+ // After importing the OpenGL height buffer into HIP, map it into HIP memory so that we can use it.
+ HIP_CHECK(hipGraphicsMapResources(1, &this->hip_height_buffer, this->hip_stream));
+
+ // Fetch the device pointer that points to the OpenGL buffer's memory.
+ // This function also fetches the size of the buffer. We already know it, but we still need to pass
+ // a valid pointer to hipGraphicsResourceGetMappedPointer.
+ size_t size;
+ HIP_CHECK(
+ hipGraphicsResourceGetMappedPointer(reinterpret_cast(&this->hip_height_ptr),
+ &size,
+ this->hip_height_buffer));
+
+ this->start_time = std::chrono::high_resolution_clock::now();
+ }
+
+ simulator(const simulator&) = delete;
+ simulator& operator=(const simulator&) = delete;
+
+ simulator(simulator&&) = delete;
+ simulator& operator=(simulator&&) = delete;
+
+ ~simulator()
+ {
+ HIP_CHECK(hipStreamSynchronize(this->hip_stream));
+ HIP_CHECK(hipGraphicsUnmapResources(1, &this->hip_height_buffer, this->hip_stream));
+ HIP_CHECK(hipGraphicsUnregisterResource(this->hip_height_buffer));
+ HIP_CHECK(hipStreamDestroy(this->hip_stream));
+ }
+
+ /// \brief Advance the simulation one step.
+ void step()
+ {
+ const auto now = std::chrono::high_resolution_clock::now();
+ const float time
+ = std::chrono::duration(now - this->start_time)
+ .count();
+
+ // The tile size to be used for each block of the computation. A tile is
+ // tile_size by tile_size threads in this case, since we are invoking the
+ // computation over a 2D-grid.
+ constexpr size_t tile_size = 8;
+
+ // Launch the HIP kernel to advance the simulation.
+ hipLaunchKernelGGL(sinewave_kernel,
+ dim3((grid_width + tile_size - 1) / tile_size,
+ (grid_height + tile_size - 1) / tile_size),
+ dim3(tile_size, tile_size),
+ 0,
+ this->hip_stream,
+ this->hip_height_ptr,
+ time);
+
+ // Check that no errors occured while launching the kernel.
+ HIP_CHECK(hipGetLastError());
+ }
+};
+
+/// \brief GLFW window resize callback: If the window is resized then we need to re-size
+/// the OpenGL viewport.
+void resize_callback(GLFWwindow* const window, const int width, const int height)
+{
+ (void)window;
+ glViewport(0, 0, width, height);
+}
+
+/// \brief Program entry point.
+int main()
+{
+ // The initial width of the GLFW window when the example is first started.
+ constexpr int initial_window_width = 1280;
+ // The initial height of the GLFW window.
+ constexpr int initial_window_height = 800;
+
+ // Initialize GLFW.
+ glfwSetErrorCallback(
+ [](int code, const char* const message)
+ { std::cerr << "A glfw error encountered: " << message << "(" << code << ")\n"; });
+
+ if(glfwInit() != GLFW_TRUE)
+ {
+ std::cerr << "failed to initialize GLFW\n";
+ return error_exit_code;
+ }
+
+ // Initialize the GLFW window used to render the example.
+ GLFWwindow* const window = create_window(initial_window_width, initial_window_height);
+
+ // Ensure that we are using the OpenGL context associated to the Window.
+ glfwMakeContextCurrent(window);
+
+ // Make GLFW use a custom loader - we need this for the more recent OpenGL functions,
+ // as these are not loaded by default on all platforms.
+ if(!gladLoadGLLoader(reinterpret_cast(glfwGetProcAddress)))
+ {
+ std::cerr << "Failed to load OpenGL function pointers" << std::endl;
+ return error_exit_code;
+ }
+
+ // Disable vsync.
+ glfwSwapInterval(0);
+
+ // If the OpenGL GL_ARB_debug_output extension is present, set a callback that is called
+ // whenever an OpenGL error occurs. This saves us calling glGetError after every OpenGL function.
+ if(GLAD_GL_ARB_debug_output)
+ {
+ glDebugMessageCallbackARB(
+ [](GLenum,
+ GLenum,
+ GLuint,
+ GLenum severity,
+ GLsizei length,
+ const GLchar* message,
+ const void*)
+ {
+ std::cerr << "[OpenGL] ";
+ std::cerr.write(message, length) << std::endl;
+ if(severity == GL_DEBUG_SEVERITY_HIGH_ARB)
+ {
+ std::exit(error_exit_code);
+ }
+ },
+ nullptr);
+ // We just want the errors: First disable all messaging, and then enable just the
+ // most severe ones.
+ glDebugMessageControlARB(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_FALSE);
+ glDebugMessageControlARB(GL_DONT_CARE,
+ GL_DONT_CARE,
+ GL_DEBUG_SEVERITY_HIGH_ARB,
+ 0,
+ NULL,
+ GL_TRUE);
+ // Report errors synchronously instead of asynchronously.
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
+ }
+
+ // Figure out which HIP device we need to use.
+ // This device needs to be interop-capable (see pick_hip_device).
+ const int hip_device = pick_hip_device();
+
+ // Let the user know which device we are using, on both the OpenGL and HIP sides.
+ hipDeviceProp_t hip_props;
+ HIP_CHECK(hipGetDeviceProperties(&hip_props, hip_device));
+ const GLubyte* const device_name = glGetString(GL_RENDERER);
+ std::cout << "Using device " << device_name << " (hip device " << hip_device
+ << ", compute capability " << hip_props.major << "." << hip_props.minor << ")\n";
+
+ // Sub-scope to call destructors before terminating GLFW.
+ {
+ renderer renderer;
+ simulator simulator(hip_device, renderer);
+
+ // There are no other renderers, so we can bind the OpenGL state once.
+ renderer.bind();
+
+ glfwSetFramebufferSizeCallback(window, resize_callback);
+ glClearColor(0, 0, 0, 1);
+
+ // The main rendering loop.
+ // Repeat for as long as the window is not closed.
+ while(glfwWindowShouldClose(window) == GLFW_FALSE)
+ {
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ // First step the simulation so that the height buffer is ready
+ // for the next frame.
+ simulator.step();
+
+ // Draw the example to the window's framebuffer.
+ renderer.draw();
+
+ // Present the framebuffer on screen.
+ glfwSwapBuffers(window);
+ glfwPollEvents();
+ }
+ }
+
+ // Clean up GLFW.
+ glfwDestroyWindow(window);
+ glfwTerminate();
+}
diff --git a/HIP-Basic/opengl_interop/nvidia_hip_fix.hpp b/HIP-Basic/opengl_interop/nvidia_hip_fix.hpp
new file mode 100644
index 000000000..e8cc7c6a6
--- /dev/null
+++ b/HIP-Basic/opengl_interop/nvidia_hip_fix.hpp
@@ -0,0 +1,63 @@
+#ifndef _HIP_BASIC_VULKAN_INTEROP_NVIDIA_HIP_FIX_HPP
+#define _HIP_BASIC_VULKAN_INTEROP_NVIDIA_HIP_FIX_HPP
+
+#include "glad/glad.h"
+
+#include
+
+// TODO: Remove this once HIP supports these symbols.
+// See https://github.com/ROCm-Developer-Tools/hipamd/issues/49.
+#if defined(__HIP_PLATFORM_NVCC__) && !defined(hipGLDeviceListAll)
+
+ #include
+
+ #define hipGLDeviceListAll cudaGLDeviceListAll
+ #define hipGLDeviceList cudaGLDeviceList
+ #define hipGraphicsResource_t cudaGraphicsResource_t
+ #define hipGraphicsRegisterFlagsWriteDiscard cudaGraphicsRegisterFlagsWriteDiscard
+
+hipError_t hipGLGetDevices(unsigned int* const pHipDeviceCount,
+ int* const pHipDevices,
+ const unsigned int hipDeviceCount,
+ const hipGLDeviceList deviceList)
+{
+ return hipCUDAErrorTohipError(
+ cudaGLGetDevices(pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList));
+}
+
+hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource_t* const resource,
+ const GLuint buffer,
+ const unsigned int flags)
+{
+ return hipCUDAErrorTohipError(cudaGraphicsGLRegisterBuffer(resource, buffer, flags));
+}
+
+hipError_t hipGraphicsMapResources(const int count,
+ hipGraphicsResource_t* const resources,
+ const hipStream_t stream = 0)
+{
+ return hipCUDAErrorTohipError(cudaGraphicsMapResources(count, resources, stream));
+}
+
+hipError_t hipGraphicsResourceGetMappedPointer(void** const dev_ptr,
+ size_t* const size,
+ const cudaGraphicsResource_t resource)
+{
+ return hipCUDAErrorTohipError(cudaGraphicsResourceGetMappedPointer(dev_ptr, size, resource));
+}
+
+hipError_t hipGraphicsUnmapResources(const int count,
+ hipGraphicsResource_t* const resources,
+ const hipStream_t stream = 0)
+{
+ return hipCUDAErrorTohipError(cudaGraphicsUnmapResources(count, resources, stream));
+}
+
+hipError_t hipGraphicsUnregisterResource(const hipGraphicsResource_t resource)
+{
+ return hipCUDAErrorTohipError(cudaGraphicsUnregisterResource(resource));
+}
+
+#endif
+
+#endif
diff --git a/HIP-Basic/opengl_interop/opengl_interop_vs2019.sln b/HIP-Basic/opengl_interop/opengl_interop_vs2019.sln
new file mode 100644
index 000000000..ff158b1a1
--- /dev/null
+++ b/HIP-Basic/opengl_interop/opengl_interop_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "opengl_interop_vs2019", "opengl_interop_vs2019.vcxproj", "{96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Debug|x64.ActiveCfg = Debug|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Debug|x64.Build.0 = Debug|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Release|x64.ActiveCfg = Release|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {2A3E6048-05BE-4A52-AFA6-AE575C6D96A8}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/opengl_interop/opengl_interop_vs2019.vcxproj b/HIP-Basic/opengl_interop/opengl_interop_vs2019.vcxproj
new file mode 100644
index 000000000..54c79d6af
--- /dev/null
+++ b/HIP-Basic/opengl_interop/opengl_interop_vs2019.vcxproj
@@ -0,0 +1,105 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+ 15.0
+ {96f8be41-5c64-4bf2-8a8e-474beaacaa5a}
+ Win32Proj
+ opengl_interop
+ 10.0
+ opengl_interop_vs2019
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(IntDir);$(MSBuildProjectDirectory)\..\..\Common;$(MSBuildProjectDirectory)\..\..\External;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ glfw3dll.lib;%(AdditionalDependencies)
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(IntDir);$(MSBuildProjectDirectory)\..\..\Common;$(MSBuildProjectDirectory)\..\..\External;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ true
+ true
+ glfw3dll.lib;%(AdditionalDependencies)
+
+
+
+
+
+
+
diff --git a/HIP-Basic/opengl_interop/opengl_interop_vs2019.vcxproj.filters b/HIP-Basic/opengl_interop/opengl_interop_vs2019.vcxproj.filters
new file mode 100644
index 000000000..ad0eb88b2
--- /dev/null
+++ b/HIP-Basic/opengl_interop/opengl_interop_vs2019.vcxproj.filters
@@ -0,0 +1,36 @@
+
+
+
+
+ {cf49f87a-a2e3-48b2-890e-73bf2c083001}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {bad110a0-7f33-44c8-acc4-f6bc0ddfb309}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {6ffa9c2c-4fb9-4147-906c-cfd103cd1895}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+ Header Files
+
+
+ Header Files
+
+
+
diff --git a/HIP-Basic/runtime_compilation/CMakeLists.txt b/HIP-Basic/runtime_compilation/CMakeLists.txt
index 47974f940..0ca404580 100644
--- a/HIP-Basic/runtime_compilation/CMakeLists.txt
+++ b/HIP-Basic/runtime_compilation/CMakeLists.txt
@@ -61,10 +61,10 @@ if(GPU_RUNTIME STREQUAL "CUDA")
# Include the HIP header directory.
list(APPEND include_dirs "${ROCM_ROOT}/include")
# In this example we also need to link nvrtc CUDA library
- find_package("CUDAToolkit" REQUIRED)
+ find_package(CUDAToolkit)
list(APPEND link_libs "CUDA::nvrtc")
endif()
-
+
target_link_libraries(${example_name} ${link_libs})
target_include_directories(${example_name} PRIVATE ${include_dirs})
set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/runtime_compilation/Makefile b/HIP-Basic/runtime_compilation/Makefile
index 65ba415d5..e8c6fa5c9 100644
--- a/HIP-Basic/runtime_compilation/Makefile
+++ b/HIP-Basic/runtime_compilation/Makefile
@@ -31,23 +31,29 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
- LDLIBS += -l nvrtc
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ILDLIBS += -l nvrtc
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/runtime_compilation/runtime_compilation_vs2019.vcxproj b/HIP-Basic/runtime_compilation/runtime_compilation_vs2019.vcxproj
index 5e0168bea..2b501b55f 100644
--- a/HIP-Basic/runtime_compilation/runtime_compilation_vs2019.vcxproj
+++ b/HIP-Basic/runtime_compilation/runtime_compilation_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
@@ -98,4 +98,4 @@
-
\ No newline at end of file
+
diff --git a/HIP-Basic/saxpy/Makefile b/HIP-Basic/saxpy/Makefile
index c75108155..4befc9281 100644
--- a/HIP-Basic/saxpy/Makefile
+++ b/HIP-Basic/saxpy/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/saxpy/saxpy_vs2019.vcxproj b/HIP-Basic/saxpy/saxpy_vs2019.vcxproj
index d96024919..560c3deac 100644
--- a/HIP-Basic/saxpy/saxpy_vs2019.vcxproj
+++ b/HIP-Basic/saxpy/saxpy_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/HIP-Basic/shared_memory/Makefile b/HIP-Basic/shared_memory/Makefile
index 36a7d2714..dbbb62a82 100644
--- a/HIP-Basic/shared_memory/Makefile
+++ b/HIP-Basic/shared_memory/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/shared_memory/shared_memory_vs2019.vcxproj b/HIP-Basic/shared_memory/shared_memory_vs2019.vcxproj
index 8f74a5949..523d2181d 100644
--- a/HIP-Basic/shared_memory/shared_memory_vs2019.vcxproj
+++ b/HIP-Basic/shared_memory/shared_memory_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/HIP-Basic/static_device_library/.gitignore b/HIP-Basic/static_device_library/.gitignore
new file mode 100644
index 000000000..b47dd905b
--- /dev/null
+++ b/HIP-Basic/static_device_library/.gitignore
@@ -0,0 +1,3 @@
+hip_static_device_library
+libhip_static_device.a
+library.o
diff --git a/HIP-Basic/static_device_library/Makefile b/HIP-Basic/static_device_library/Makefile
new file mode 100644
index 000000000..2219e8ff8
--- /dev/null
+++ b/HIP-Basic/static_device_library/Makefile
@@ -0,0 +1,61 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_static_device_library
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+AR ?= ar
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -I library $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+EXAMPLE_LIB := hip_static_device
+LIBEXAMPLE := lib$(EXAMPLE_LIB).a
+
+$(EXAMPLE): main.hip $(LIBEXAMPLE)
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -fgpu-rdc -o $@ $(LIBEXAMPLE) $< $(LDLIBS)
+
+$(LIBEXAMPLE): library.o
+ $(AR) rcsD $@ $^
+
+library.o: library/library.hip
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) -c -o $@ $< -fgpu-rdc
+
+clean:
+ $(RM) $(EXAMPLE) $(LIBEXAMPLE) library.o
+
+.PHONY: all clean
diff --git a/HIP-Basic/static_device_library/README.md b/HIP-Basic/static_device_library/README.md
new file mode 100644
index 000000000..434616341
--- /dev/null
+++ b/HIP-Basic/static_device_library/README.md
@@ -0,0 +1,46 @@
+# HIP-Basic Device Static Library Example
+
+## Description
+This example shows how to create a static library that exports device functions.
+
+### Application flow
+1. A number of constants for the example problem are initialized.
+2. A host vector is prepared with an increasing sequence of integers starting from 0.
+3. The necessary amount of device (GPU) memory is allocated and the elements of the input vectors are copied to the device memory.
+4. A simple square kernel is launched with the previously defined arguments.
+5. The kernel defined in `main.hip` fetches inputs from device memory, and calls `device_square`. The `device_square` function is imported from a device static library, which is created from `library/library.hip`
+6. The result from calling `device_square` is written back to device memory.
+7. The results are copied back to the host.
+8. The previously allocated device memory is freed.
+9. The results from the device are compared with the expected results on the host. An error message is printed if the results were not as expected and the function returns with an error code.
+
+## Build Process
+Compiling a HIP static library that exports device functions must be done in two steps:
+1. First, the source files that make up the library must be compiled to object files. This is done similarly to how an object file is created for a regular source file (using the `-c` flag), except that the additional option `-fgpu-rdc` must be passed:
+ ```shell
+ hipcc -c -fgpu-rdc -Ilibrary library/library.hip -o library.o
+ ```
+2. After compiling all library sources into object files, they must be manually bundled into an archive that can act as static library. `hipcc` cannot currently create this archive automatically, hence it must be created manually using `ar`:
+ ```shell
+ ar rcsD liblibrary.a library.o
+ ```
+After the static device library has been compiled, it can be linked with another HIP program or library. Linking with a static device library is done by placing it on the command line directly, and additionally requires `-fgpu-rdc`. The static library should be placed on the command line _before_ any source files. Source files that use the static library can also be compiled to object files first, in this case they also need to be compiled with `-fgpu-rdc`:
+```shell
+hipcc -fgpu-rdc liblibrary.a main.hip -o hip_static_device_library
+```
+**Note**: static device libraries _must_ be linked with `hipcc`. There is no support yet for linking such libraries with (ROCm-bundled) clang, using CMake, or using Visual Studio.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `blockDim`
+- `blockIdx`
+- `threadIdx`
+- `__device__`
+- `__global__`
+#### Host symbols
+- `hipMalloc`
+- `hipMemcpy`
+- `hipLaunchKernelGGL`
+- `hipGetLastError`
+- `hipFree`
diff --git a/HIP-Basic/static_device_library/library/library.hip b/HIP-Basic/static_device_library/library/library.hip
new file mode 100644
index 000000000..67f29b278
--- /dev/null
+++ b/HIP-Basic/static_device_library/library/library.hip
@@ -0,0 +1,31 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "library.hpp"
+
+/// \brief A simple device function for squaring an integer value.
+/// This function is exported from the library, so that another library or
+/// an application may call it from device code after this library is linked to it.
+__device__ int device_square(int x)
+{
+ return x * x;
+}
diff --git a/HIP-Basic/static_device_library/library/library.hpp b/HIP-Basic/static_device_library/library/library.hpp
new file mode 100644
index 000000000..e8b668f23
--- /dev/null
+++ b/HIP-Basic/static_device_library/library/library.hpp
@@ -0,0 +1,30 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef _HIP_BASIC_STATIC_DEVICE_LIBRARY_LIBRARY_HPP
+#define _HIP_BASIC_STATIC_DEVICE_LIBRARY_LIBRARY_HPP
+
+#include
+
+__device__ int device_square(int x);
+
+#endif
diff --git a/HIP-Basic/static_device_library/main.hip b/HIP-Basic/static_device_library/main.hip
new file mode 100644
index 000000000..2cd5b23f4
--- /dev/null
+++ b/HIP-Basic/static_device_library/main.hip
@@ -0,0 +1,112 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "library.hpp"
+
+#include "example_utils.hpp"
+
+#include
+#include
+
+#include
+#include
+#include
+
+/// \brief A simple squaring kernel: Every element of the input is squared and written to the
+/// output.
+__global__ void square_kernel(int* out, const int* in, const unsigned int size)
+{
+ const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
+ if(tid < size)
+ {
+ // Call into the device library. The function below is defined in an external
+ // library (see library/library.hip), which is linked with the current application.
+ out[tid] = device_square(in[tid]);
+ }
+}
+
+int main()
+{
+ // The number of elements in the input vector.
+ constexpr unsigned int size = 512;
+
+ // The number of bytes to allocate for the input- and output device vectors.
+ constexpr size_t size_bytes = size * sizeof(uint32_t);
+
+ // The number of threads per kernel block.
+ constexpr unsigned int block_size = 256;
+
+ // The number of blocks per kernel grid. The expression below calculates `ceil(size / block_size)`.
+ constexpr unsigned int grid_size = (size + block_size - 1) / block_size;
+
+ // Allocate host input vector and fill it with an increasing sequence (i.e. 0, 1, 2, ...).
+ std::vector in(size);
+ std::iota(in.begin(), in.end(), 0);
+
+ // Allocate input and output device vector and copy the input data.
+ int* d_in{};
+ int* d_out{};
+ HIP_CHECK(hipMalloc(&d_in, size_bytes));
+ HIP_CHECK(hipMalloc(&d_out, size_bytes));
+ HIP_CHECK(hipMemcpy(d_in, in.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Launch the kernel on the default stream.
+ hipLaunchKernelGGL(square_kernel,
+ dim3(grid_size),
+ dim3(block_size),
+ 0,
+ hipStreamDefault,
+ d_out,
+ d_in,
+ size);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Copy the results back to the host. This call blocks the host's execution until the copy is finished.
+ std::vector out(size);
+ HIP_CHECK(hipMemcpy(out.data(), d_out, size_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory.
+ HIP_CHECK(hipFree(d_in));
+ HIP_CHECK(hipFree(d_out));
+
+ // Check the results' validity.
+ size_t errors = 0;
+ for(size_t i = 0; i < size; ++i)
+ {
+ if(in[i] * in[i] != out[i])
+ {
+ ++errors;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+}
diff --git a/HIP-Basic/static_host_library/.gitignore b/HIP-Basic/static_host_library/.gitignore
new file mode 100644
index 000000000..c38c4b7c4
--- /dev/null
+++ b/HIP-Basic/static_host_library/.gitignore
@@ -0,0 +1,3 @@
+hip_static_host_library
+hip_static_host_library_cxx
+libhip_static_host.a
diff --git a/HIP-Basic/static_host_library/CMakeLists.txt b/HIP-Basic/static_host_library/CMakeLists.txt
new file mode 100644
index 000000000..45322aaec
--- /dev/null
+++ b/HIP-Basic/static_host_library/CMakeLists.txt
@@ -0,0 +1,91 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_static_host_library)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+set(library_name hip_static_host)
+set(example_name_cxx ${example_name}_cxx)
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+# Create the HIP static host library.
+add_library(${library_name} STATIC library/library.hip)
+target_include_directories(${library_name} PRIVATE ${include_dirs})
+target_include_directories(${library_name} PUBLIC library)
+set_target_properties(${library_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_source_files_properties(library/library.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
+
+# Create a driver executable using ROCm's bundled version of clang.
+add_executable(${example_name} main.cpp)
+# Link the static host library we have just created.
+target_link_libraries(${example_name} PRIVATE ${library_name})
+# We are creating a regular, non-HIP, executable, and so we don't need to pass
+# a list of devices for this target.
+set_target_properties(${example_name} PROPERTIES HIP_ARCHITECTURES FALSE)
+
+# Create a driver executable using the host c++ compiler.
+add_executable(${example_name_cxx} main.cpp)
+add_dependencies(${example_name_cxx} ${library_name})
+# Link the static host library we have just created.
+# Note, pass the linker commands manually to prevent cmake from
+# automatically deducting this as a HIP-compilation. and using
+# the ROCm bundled clang.
+target_link_libraries(${example_name_cxx} PRIVATE -L$ -l${library_name})
+target_include_directories(${example_name_cxx} PRIVATE library)
+
+# If linking with the host c++ compiler, we also need to link the runtime libraries from the respective language.
+if(GPU_RUNTIME STREQUAL "HIP")
+ find_package(HIP)
+ target_link_libraries(${example_name_cxx} PRIVATE hip::host)
+else()
+ find_package(CUDAToolkit)
+ target_link_libraries(${example_name_cxx} PRIVATE CUDA::cudart)
+endif()
+
+# Make examples runnable using ctest
+add_test(${example_name} ${example_name})
+add_test(${example_name_cxx} ${example_name_cxx})
diff --git a/HIP-Basic/static_host_library/Makefile b/HIP-Basic/static_host_library/Makefile
new file mode 100644
index 000000000..e228e4e7e
--- /dev/null
+++ b/HIP-Basic/static_host_library/Makefile
@@ -0,0 +1,82 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_static_host_library
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+CUDA_INSTALL_DIR := /usr/local/cuda
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+CXX ?= g++
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -I library
+ILDFLAGS :=
+ILDLIBS :=
+LIBFLAGS :=
+HIPLIBS :=
+HOSTFLAGS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ LIBFLAGS += -lib
+ HIPLIBS += -L$(CUDA_INSTALL_DIR)/lib64 -lcudart
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+ LIBFLAGS += --emit-static-lib -fPIC
+ HIPLIBS += -L$(ROCM_INSTALL_DIR)/lib -lamdhip64
+ HOSTFLAGS += $(CXXFLAGS)
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+EXAMPLE_CXX := $(EXAMPLE)_cxx
+EXAMPLE_LIB := hip_static_host
+LIBEXAMPLE := lib$(EXAMPLE_LIB).a
+
+all: $(EXAMPLE) $(EXAMPLE_CXX)
+
+$(EXAMPLE): main.cpp $(LIBEXAMPLE)
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) -L. $(ILDFLAGS) -o $@ $< -l$(EXAMPLE_LIB) $(ILDLIBS)
+
+$(EXAMPLE_CXX): main.cpp $(LIBEXAMPLE)
+ $(CXX) $(HOSTFLAGS) $(ICPPFLAGS) -L. $(ILDFLAGS) -o $@ $< -l$(EXAMPLE_LIB) $(HIPLIBS) $(ILDLIBS)
+
+$(LIBEXAMPLE): library/library.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) $(LIBFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE_CXX) $(EXAMPLE) $(LIBEXAMPLE)
+
+.PHONY: all clean
diff --git a/HIP-Basic/static_host_library/README.md b/HIP-Basic/static_host_library/README.md
new file mode 100644
index 000000000..5f2c9aabf
--- /dev/null
+++ b/HIP-Basic/static_host_library/README.md
@@ -0,0 +1,65 @@
+# HIP-Basic Host Static Library Example
+
+## Description
+This example shows how to create a static library that exports hosts functions. The library may contain both `__global__` and `__device__` code as well, but in this example only `__host__` functions are exported. The resulting library may be linked with other libraries or programs, which do not necessarily need to be HIP libraries or programs. A static host library appears as a regular library, and is compatible with either hipcc or the native system's linker. When using the system linker, the libraries or applications using the static host library do need to be linked with `libamdhip64`.
+
+### Application flow
+1. The `main` function in `main.cpp` calls the library's sole exported function, `run_test`. This symbol is made visible by including the static library's header file.
+2. In `run_test` in `library/library.hip`, a number of constants for the example problem are initialized.
+3. A vector with input data is initialized in host memory. It is filled with an incrementing sequence starting from 0.
+4. The necessary amount of device (GPU) memory is allocated and the elements of the input vectors are copied to the device memory.
+5. A simple copy kernel is launched with the previously defined arguments.
+6. The results are copied back to the host.
+7. The previously allocated device memory is freed.
+8. The results from the device are compared with the expected results on the host. An error message is printed if the results were not as expected and the function returns with an error code. If the results were as expected, the function returns 0.
+9. Control flow returns to `main` in `main.cpp`, which exits the program with the value that was returned from `run_test`.
+
+## Build Process
+A HIP static host library is built the same as a regular application, except that the additional flag `--emit-static-lib` must be passed to `hipcc`. Additionally, the library should be compiled with position independent code enabled:
+```shell
+hipcc library/library.hip -o liblibrary.a --emit-static-lib -fPIC
+```
+Linking the static library with another library or object is done in the same way as a regular library:
+```shell
+hipcc -llibrary -Ilibrary main.cpp -o hip_static_host_library
+```
+Note that when linking the library using the host compiler or linker, such as `g++` or `clang++`, the `amdhip64` library should be linked with additionally:
+```shell
+g++ -L/opt/rocm/lib -llibrary -lamdhip64 -Ilibrary main.cpp -o hip_static_host_library
+```
+
+### CMake
+Building a HIP static host library can be done using the CMake `add_library` command:
+```cmake
+add_library(library_name STATIC library/library.hip)
+target_include_directories(library_name PUBLIC library)
+```
+Note that while the required compilation flags to create a library are passed to the compiler automatically by CMake, position independent code must be turned on manually:
+```cmake
+set_target_properties(${library_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+```
+Linking with the static library is done in the same way as regular libraries. If used via `target_link_libraries`, this automatically adds the `amdhip64` dependency:
+```cmake
+add_executable(excutable_name main.cpp)
+target_link_libraries(executable_name library_name)
+```
+
+### Visual Studio 2019
+When using Visual Studio 2019 to build a HIP static host library, a separate project can be used to build the static library. This can be set up from scratch by creating a new AMD HIP C++ project, and then converting it to a library by setting `[right click project] -> Properties -> Configuration Properties -> General -> Configuration Type` to `Library`.
+
+Linking with a HIP static host library can then be done simply by adding a reference to the corresponding project. This can be done under `[right click project] -> Add -> Reference` by checking the checkbox of the library project, and works both for AMD HIP C++ Visual Studio projects (demonstrated in [static_host_library_vs2019.vcxproj](./static_host_library_vs2019.vcxproj)) as well as regular Windows application Visual Studio projects (demonstrated in [static_host_library_msvc_vs2019.vcxproj](./static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj)).
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `blockDim`
+- `blockIdx`
+- `threadIdx`
+- `__device__`
+- `__global__`
+#### Host symbols
+- `hipMalloc`
+- `hipMemcpy`
+- `hipLaunchKernelGGL`
+- `hipGetLastError`
+- `hipFree`
diff --git a/HIP-Basic/static_host_library/library/libhip_static_host_vs2019.vcxproj b/HIP-Basic/static_host_library/library/libhip_static_host_vs2019.vcxproj
new file mode 100644
index 000000000..938b68617
--- /dev/null
+++ b/HIP-Basic/static_host_library/library/libhip_static_host_vs2019.vcxproj
@@ -0,0 +1,98 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {6d3f8f78-225e-490e-abd3-762857ebf597}
+ Win32Proj
+ libstatic_host
+ 10.0
+ libstatic_host_vs2019
+
+
+
+ StaticLibrary
+ true
+ HIP
+ Unicode
+
+
+ StaticLibrary
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ false
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/static_host_library/library/libhip_static_host_vs2019.vcxproj.filters b/HIP-Basic/static_host_library/library/libhip_static_host_vs2019.vcxproj.filters
new file mode 100644
index 000000000..bebfe1d26
--- /dev/null
+++ b/HIP-Basic/static_host_library/library/libhip_static_host_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {f36b245a-ea37-47ce-a958-b37d104166e1}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {ebf9651c-ade6-4a45-a0c1-b0a733ec66ce}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {44967eea-7946-4c00-86ca-5a46dc4bc494}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/static_host_library/library/library.hip b/HIP-Basic/static_host_library/library/library.hip
new file mode 100644
index 000000000..35fcbd3a6
--- /dev/null
+++ b/HIP-Basic/static_host_library/library/library.hip
@@ -0,0 +1,117 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+#include
+
+/// \brief A simple kernel that copies each value from the input to the output.
+__global__ void copy_kernel(uint32_t* out, const uint32_t* in, const unsigned int size)
+{
+ const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ if(tid < size)
+ {
+ out[tid] = in[tid];
+ }
+}
+
+/// \brief The main entry point of this static host library: This function is exported
+/// from this library, and can be called from other libraries or executables if this library
+/// is linked with it.
+///
+/// It performs a simple test invocation of a HIP kernel. If the test passes a message is printed
+/// and the function returns 0. Otherwise an error message is printed and the function returns
+/// `error_exit_code`.
+int run_test()
+{
+ // The number of elements in the input vector.
+ constexpr unsigned int size = 512;
+
+ // The number of bytes to allocate for the input- and output device vectors.
+ constexpr size_t size_bytes = size * sizeof(uint32_t);
+
+ // The number of threads per kernel block.
+ constexpr unsigned int block_size = 256;
+
+ // The number of blocks per kernel grid. The expression below calculates `ceil(size / block_size)`.
+ constexpr unsigned int grid_size = (size + block_size - 1) / block_size;
+
+ // Allocate host input vector and fill it with an increasing sequence (i.e. 0, 1, 2, ...).
+ std::vector in(size);
+ std::iota(in.begin(), in.end(), 0);
+
+ // Allocate input and output device vector and copy the input data.
+ uint32_t* d_in{};
+ uint32_t* d_out{};
+ HIP_CHECK(hipMalloc(&d_in, size_bytes));
+ HIP_CHECK(hipMalloc(&d_out, size_bytes));
+ HIP_CHECK(hipMemcpy(d_in, in.data(), size_bytes, hipMemcpyHostToDevice));
+
+ // Launch the kernel on the default stream.
+ hipLaunchKernelGGL(copy_kernel,
+ dim3(grid_size),
+ dim3(block_size),
+ 0,
+ hipStreamDefault,
+ d_out,
+ d_in,
+ size);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Copy the results back to the host. This call blocks the host's execution until the copy is finished.
+ std::vector out(size);
+ HIP_CHECK(hipMemcpy(out.data(), d_out, size_bytes, hipMemcpyDeviceToHost));
+
+ // Free device memory.
+ HIP_CHECK(hipFree(d_in));
+ HIP_CHECK(hipFree(d_out));
+
+ // Check the results' validity.
+ size_t errors = 0;
+ for(size_t i = 0; i < size; ++i)
+ {
+ if(in[i] != out[i])
+ {
+ ++errors;
+ }
+ }
+
+ if(errors != 0)
+ {
+ std::cout << "Validation failed. Errors: " << errors << std::endl;
+ // Return control flow to the main program.
+ return error_exit_code;
+ }
+ else
+ {
+ std::cout << "Validation passed." << std::endl;
+ }
+
+ // Return control flow to the main program.
+ return 0;
+}
diff --git a/HIP-Basic/static_host_library/library/library.hpp b/HIP-Basic/static_host_library/library/library.hpp
new file mode 100644
index 000000000..4133f2c6d
--- /dev/null
+++ b/HIP-Basic/static_host_library/library/library.hpp
@@ -0,0 +1,28 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef _HIP_BASIC_STATIC_HOST_LIBRARY_LIBRARY_HPP
+#define _HIP_BASIC_STATIC_HOST_LIBRARY_LIBRARY_HPP
+
+int run_test();
+
+#endif
diff --git a/HIP-Basic/static_host_library/main.cpp b/HIP-Basic/static_host_library/main.cpp
new file mode 100644
index 000000000..f905c8497
--- /dev/null
+++ b/HIP-Basic/static_host_library/main.cpp
@@ -0,0 +1,32 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "library.hpp"
+
+/// \brief The main entry point of the application.
+/// This file is compiled as a regular program, and can not contain
+/// HIP code.
+int main()
+{
+ // Call into the library function, which does contain HIP code.
+ return run_test();
+}
diff --git a/HIP-Basic/static_host_library/static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj b/HIP-Basic/static_host_library/static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj
new file mode 100644
index 000000000..2be321815
--- /dev/null
+++ b/HIP-Basic/static_host_library/static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj
@@ -0,0 +1,97 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ 16.0
+ Win32Proj
+ {547b99c2-cbe3-4e1f-a1d6-26e261d67a3e}
+ static_host_library_msvc_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ v142
+ Unicode
+
+
+ Application
+ false
+ v142
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+
+ Level3
+ true
+ _DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+ $(MSBuildProjectDirectory)\..;$(MSBuildProjectDirectory)\..\library;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+
+
+
+
+ Level3
+ true
+ true
+ true
+ NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+ $(MSBuildProjectDirectory)\..;$(MSBuildProjectDirectory)\..\library;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+ {6d3f8f78-225e-490e-abd3-762857ebf597}
+
+
+
+
+
+
+
+
+
diff --git a/HIP-Basic/static_host_library/static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj.filters b/HIP-Basic/static_host_library/static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj.filters
new file mode 100644
index 000000000..4f11a70cd
--- /dev/null
+++ b/HIP-Basic/static_host_library/static_host_library_msvc/static_host_library_msvc_vs2019.vcxproj.filters
@@ -0,0 +1,22 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
diff --git a/HIP-Basic/static_host_library/static_host_library_vs2019.sln b/HIP-Basic/static_host_library/static_host_library_vs2019.sln
new file mode 100644
index 000000000..a98d4f88c
--- /dev/null
+++ b/HIP-Basic/static_host_library/static_host_library_vs2019.sln
@@ -0,0 +1,37 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "static_host_library_vs2019", "static_host_library_vs2019.vcxproj", "{5F8A7FEE-3A79-4588-9244-8575748026F7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libstatic_host_vs2019", "library\libhip_static_host_vs2019.vcxproj", "{6D3F8F78-225E-490E-ABD3-762857EBF597}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "static_host_library_msvc_vs2019", "static_host_library_msvc\static_host_library_msvc_vs2019.vcxproj", "{547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Debug|x64.ActiveCfg = Debug|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Debug|x64.Build.0 = Debug|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Release|x64.ActiveCfg = Release|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Release|x64.Build.0 = Release|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Debug|x64.ActiveCfg = Debug|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Debug|x64.Build.0 = Debug|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Release|x64.ActiveCfg = Release|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Release|x64.Build.0 = Release|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Debug|x64.ActiveCfg = Debug|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Debug|x64.Build.0 = Debug|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Release|x64.ActiveCfg = Release|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {3D47C778-E97A-461C-816D-262B25C88F3B}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/static_host_library/static_host_library_vs2019.vcxproj b/HIP-Basic/static_host_library/static_host_library_vs2019.vcxproj
new file mode 100644
index 000000000..a984a85f6
--- /dev/null
+++ b/HIP-Basic/static_host_library/static_host_library_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+ {6d3f8f78-225e-490e-abd3-762857ebf597}
+
+
+
+ 15.0
+ {5f8a7fee-3a79-4588-9244-8575748026f7}
+ Win32Proj
+ static_host_library_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ library/
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ library/
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/static_host_library/static_host_library_vs2019.vcxproj.filters b/HIP-Basic/static_host_library/static_host_library_vs2019.vcxproj.filters
new file mode 100644
index 000000000..3f96fef50
--- /dev/null
+++ b/HIP-Basic/static_host_library/static_host_library_vs2019.vcxproj.filters
@@ -0,0 +1,22 @@
+
+
+
+
+ {7c2c5589-25cf-46b5-b104-8d33b438c7e5}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {81c39452-a2c0-4bad-a016-b37808d239cb}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {2c9f24ff-7012-4ffe-a538-506eccc3a507}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/streams/Makefile b/HIP-Basic/streams/Makefile
index e7cbd72cf..080a2310f 100644
--- a/HIP-Basic/streams/Makefile
+++ b/HIP-Basic/streams/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
-$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/streams/main.hip b/HIP-Basic/streams/main.hip
index 49a78a35d..12c974527 100644
--- a/HIP-Basic/streams/main.hip
+++ b/HIP-Basic/streams/main.hip
@@ -190,7 +190,7 @@ int main()
HIP_CHECK(hipHostMalloc(&h_transpose_matrix[1], size_in_bytes));
// Initialize the host input matrix
- for(int i = 0; i < size; i++)
+ for(unsigned int i = 0; i < size; i++)
{
h_in[i] = static_cast(i);
}
@@ -199,7 +199,7 @@ int main()
// Free host memory
HIP_CHECK(hipHostFree(h_in));
- for(int i = 0; i < num_streams; i++)
+ for(unsigned int i = 0; i < num_streams; i++)
{
HIP_CHECK(hipHostFree(h_transpose_matrix[i]));
diff --git a/HIP-Basic/streams/streams_vs2019.sln b/HIP-Basic/streams/streams_vs2019.sln
index e4da9a436..465373e25 100644
--- a/HIP-Basic/streams/streams_vs2019.sln
+++ b/HIP-Basic/streams/streams_vs2019.sln
@@ -1,25 +1,25 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.32630.194
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "streams_vs2019", "streams_vs2019.vcxproj", "{4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|x64 = Debug|x64
- Release|x64 = Release|x64
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.ActiveCfg = Debug|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.Build.0 = Debug|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.ActiveCfg = Release|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.Build.0 = Release|x64
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- SolutionGuid = {507FFFF0-D864-42BC-AC76-F75DF573ACDF}
- EndGlobalSection
-EndGlobal
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "streams_vs2019", "streams_vs2019.vcxproj", "{4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.ActiveCfg = Debug|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.Build.0 = Debug|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.ActiveCfg = Release|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {507FFFF0-D864-42BC-AC76-F75DF573ACDF}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/streams/streams_vs2019.vcxproj b/HIP-Basic/streams/streams_vs2019.vcxproj
index 50d5b2d3f..7eec4a48c 100644
--- a/HIP-Basic/streams/streams_vs2019.vcxproj
+++ b/HIP-Basic/streams/streams_vs2019.vcxproj
@@ -1,99 +1,99 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
- 15.0
- {4e6b2034-d7ed-4cb4-98b2-7b2d2b71e0a9}
- Win32Proj
- streams_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- hip_$(ProjectName)
-
-
- false
- hip_$(ProjectName)
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- stdcpp17
-
-
- Console
- true
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
- stdcpp17
-
-
- Console
- true
- true
- true
-
-
-
-
-
-
-
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {4e6b2034-d7ed-4cb4-98b2-7b2d2b71e0a9}
+ Win32Proj
+ streams_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
diff --git a/HIP-Basic/streams/streams_vs2019.vcxproj.filters b/HIP-Basic/streams/streams_vs2019.vcxproj.filters
index acefe7d8a..c1a8ec302 100644
--- a/HIP-Basic/streams/streams_vs2019.vcxproj.filters
+++ b/HIP-Basic/streams/streams_vs2019.vcxproj.filters
@@ -1,27 +1,27 @@
-
-
-
-
- {3a545504-c23d-4602-8d7a-54aa20712fc7}
- cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
-
-
- {6c38bc79-9ffe-4e99-a9f8-501231c75594}
- h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
-
-
- {11c1a8cb-83e3-44fa-abed-2e966d1d568e}
- rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
-
-
-
-
- Source Files
-
-
-
-
- Header Files
-
-
+
+
+
+
+ {3a545504-c23d-4602-8d7a-54aa20712fc7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {6c38bc79-9ffe-4e99-a9f8-501231c75594}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {11c1a8cb-83e3-44fa-abed-2e966d1d568e}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
\ No newline at end of file
diff --git a/HIP-Basic/texture_management/.gitignore b/HIP-Basic/texture_management/.gitignore
new file mode 100644
index 000000000..2f7bf2053
--- /dev/null
+++ b/HIP-Basic/texture_management/.gitignore
@@ -0,0 +1 @@
+hip_texture_management
diff --git a/HIP-Basic/texture_management/CMakeLists.txt b/HIP-Basic/texture_management/CMakeLists.txt
new file mode 100644
index 000000000..2d0c80fc0
--- /dev/null
+++ b/HIP-Basic/texture_management/CMakeLists.txt
@@ -0,0 +1,61 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_texture_management)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip)
+# Make example runnable using ctest
+add_test(${example_name} ${example_name})
+
+# Temporary workaround: a known bug prevents the example from executing succesfully
+# if multiple GPUs are visible
+set_tests_properties(${example_name} PROPERTIES ENVIRONMENT "HIP_VISIBLE_DEVICES=0")
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/texture_management/Makefile b/HIP-Basic/texture_management/Makefile
new file mode 100644
index 000000000..464f4b935
--- /dev/null
+++ b/HIP-Basic/texture_management/Makefile
@@ -0,0 +1,60 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_texture_management
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
+clean:
+ $(RM) $(EXAMPLE)
+
+.PHONY: clean
\ No newline at end of file
diff --git a/HIP-Basic/texture_management/README.md b/HIP-Basic/texture_management/README.md
new file mode 100644
index 000000000..0e1032c71
--- /dev/null
+++ b/HIP-Basic/texture_management/README.md
@@ -0,0 +1,60 @@
+# HIP-Basic Texture Management Example
+
+## Description
+This example demonstrates how a kernel may use texture memory through the texture object API. Using texture memory may be beneficial as the texture cache is optimized for 2D spatial locality and exposes features such as hardware filtering. In the example, a texture is created using a device array and is sampled in a kernel to create a histogram of its values.
+
+### Application flow
+1. Check whether texture functions are supported on the device.
+2. Initialize the texture data on host side.
+3. Specify the channel description of the texture and allocate a device array based on the texture dimensions and channel descriptor.
+4. Copy the texture data from host to device.
+5. Specify the texture resource and its parameters, and create the texture object.
+6. Allocate a device-side histogram.
+7. Launch the histogram kernel, which creates a histogram of the texture on the device.
+8. Copy the histogram to host memory and print the results.
+9. Destroy the texture object and release resources.
+
+## Key APIs and Concepts
+- The memory for the texture may be a device array `hipArray_t`, which is allocated with `hipMallocArray`. The allocation call requires a channel descriptor `hipChannelFormatDesc` and the dimensions of the texture. The channel descriptor can be created using `hipCreateChannelDesc`. Host data can be transferred to the device array using `hipMemcpy2DToArray`.
+- The texture object `hipTextureObject_t` is created with `hipCreateTextureObject`, which requires a resource descriptor `hipResourceDesc` and a texture descriptor `hipTextureDesc`. The resource descriptor describes the resource used to create the texture, in this example a device array `hipResourceTypeArray`. The texture descriptor describes the properties of the texture, such as its addressing mode and whether it uses normalized coordinates.
+- The created texture object can be sampled in a kernel using `tex2D`.
+- The texture object is cleaned up by calling `hipDestroyTextureObject` and the device array is cleaned up by calling `hipFreeArray`.
+
+## Demonstrated API Calls
+### HIP runtime
+- `__global__`
+
+#### Device symbols
+- `atomicAdd`
+- `blockDim`
+- `blockIdx`
+- `tex2D`
+- `threadIdx`
+
+#### Host symbols
+- `hipArray_t`
+- `hipAddressModeWrap`
+- `hipChannelFormatDesc`
+- `hipChannelFormatKindUnsigned`
+- `hipCreateChannelDesc`
+- `hipCreateTextureObject`
+- `hipDestroyTextureObject`
+- `hipDeviceAttributeImageSupport`
+- `hipDeviceGetAttribute`
+- `hipFilterModePoint`
+- `hipFree`
+- `hipFreeArray`
+- `hipGetLastError`
+- `hipLaunchKernelGGL`
+- `hipMalloc`
+- `hipMallocArray`
+- `hipMemcpy`
+- `hipMemcpy2DToArray`
+- `hipMemcpyHostToDevice`
+- `hipMemset`
+- `hipReadModeElementType`
+- `hipResourceDesc`
+- `hipResourceTypeArray`
+- `hipStreamDefault`
+- `hipTextureDesc`
+- `hipTextureObject_t`
diff --git a/HIP-Basic/texture_management/main.hip b/HIP-Basic/texture_management/main.hip
new file mode 100644
index 000000000..732d965a1
--- /dev/null
+++ b/HIP-Basic/texture_management/main.hip
@@ -0,0 +1,177 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+
+#include
+
+#include
+#include
+
+/// \brief Computes the histogram of the texture object.
+__global__ void histogram_kernel(unsigned int* histogram,
+ unsigned int size_x,
+ unsigned int size_y,
+ unsigned int hist_bin_count,
+ hipTextureObject_t tex_obj)
+{
+ unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
+ unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if(x >= size_x || y >= size_y)
+ {
+ return;
+ }
+
+ // Normalize coordinates, add 0.5 to sample the middle of the texel.
+ float u = x / static_cast(size_x) + .5f;
+ float v = y / static_cast(size_y) + .5f;
+
+ // Read the value from the texture.
+ unsigned char val = tex2D(tex_obj, u, v);
+
+ // Determine the histogram bin and write to global memory.
+ unsigned int bin_range = ceiling_div(256, hist_bin_count);
+ unsigned int bin_idx = static_cast(val) / bin_range;
+ atomicAdd(&histogram[bin_idx], 1);
+}
+
+/// \brief Returns \p true if texture functions are supported for device device_id, \p false otherwise.
+static bool is_image_supported(int device_id)
+{
+#ifdef __HIP_PLATFORM_AMD__
+ int image_support;
+ HIP_CHECK(hipDeviceGetAttribute(&image_support, hipDeviceAttributeImageSupport, device_id));
+ return image_support == 1;
+#else
+ (void)device_id;
+ // hipDeviceAttributeImageSupport is not supported for the NVIDIA platform.
+ #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
+ return true;
+ #else
+ return false;
+ #endif
+#endif
+}
+
+int main()
+{
+ if(!is_image_supported(0))
+ {
+ std::cout << "Texture functions are not supported on device 0." << std::endl;
+ return 0;
+ }
+
+ constexpr unsigned int size_x = 1024;
+ constexpr unsigned int size_y = 1024;
+ constexpr unsigned int size = size_x * size_y;
+
+ // Allocate and set host data.
+ std::vector h_data(size);
+ for(unsigned int i = 0; i < size; i++)
+ {
+ h_data[i] = static_cast(i);
+ }
+
+ // Allocate hip array in device memory.
+ hipChannelFormatDesc channel_desc
+ = hipCreateChannelDesc(sizeof(unsigned char) * 8, 0, 0, 0, hipChannelFormatKindUnsigned);
+ hipArray_t d_array;
+ HIP_CHECK(hipMallocArray(&d_array, &channel_desc, size_x, size_y));
+
+ // The pitch of the source memory, which is the width in memory in bytes of the 2D array pointed
+ // to by h_data, including padding. We don't have any padding.
+ const size_t spitch = size_x * sizeof(unsigned char);
+ // Copy the data located at address h_data in host memory to device memory.
+ HIP_CHECK(hipMemcpy2DToArray(d_array,
+ 0,
+ 0,
+ h_data.data(),
+ spitch,
+ size_x * sizeof(unsigned char),
+ size_y,
+ hipMemcpyHostToDevice));
+
+ // Specify the texture resource.
+ hipResourceDesc res_desc{};
+ res_desc.resType = hipResourceTypeArray;
+ res_desc.res.array.array = d_array;
+
+ // Specify the texture object parameters.
+ hipTextureDesc tex_desc{};
+ tex_desc.addressMode[0] = hipAddressModeWrap;
+ tex_desc.addressMode[1] = hipAddressModeWrap;
+ tex_desc.filterMode = hipFilterModePoint;
+ tex_desc.readMode = hipReadModeElementType;
+ tex_desc.normalizedCoords = 1;
+
+ // Create the texture object.
+ hipTextureObject_t tex_obj{};
+ HIP_CHECK(hipCreateTextureObject(&tex_obj, &res_desc, &tex_desc, nullptr));
+
+ constexpr unsigned int hist_bin_count = 7;
+ constexpr size_t hist_bytes = hist_bin_count * sizeof(unsigned int);
+
+ // Allocate the histogram in device memory.
+ unsigned int* d_histogram{};
+ HIP_CHECK(hipMalloc(&d_histogram, hist_bytes));
+ HIP_CHECK(hipMemset(d_histogram, 0, hist_bytes));
+
+ // Invoke histogram kernel.
+ constexpr unsigned int block_dim = 16;
+ hipLaunchKernelGGL(histogram_kernel,
+ dim3(ceiling_div(size_x, block_dim), ceiling_div(size_y, block_dim)),
+ dim3(block_dim, block_dim),
+ 0,
+ hipStreamDefault,
+ d_histogram,
+ size_x,
+ size_y,
+ hist_bin_count,
+ tex_obj);
+
+ // Check if the kernel launch was successful.
+ HIP_CHECK(hipGetLastError());
+
+ // Copy data from device back to host.
+ unsigned int h_histogram[hist_bin_count];
+ HIP_CHECK(hipMemcpy(h_histogram, d_histogram, hist_bytes, hipMemcpyDeviceToHost));
+
+ // Print out results.
+ std::cout << "Equal-width histogram with " << hist_bin_count << " bins of values [0, " << size
+ << ") mod 256:\n";
+ unsigned int sum = 0;
+ for(unsigned int i = 0; i < hist_bin_count; i++)
+ {
+ std::cout << "bin[" << i << "] = " << h_histogram[i];
+ std::cout << (i + 1 < hist_bin_count ? ", " : "\n");
+ sum += h_histogram[i];
+ }
+ std::cout << "sum of bins: " << sum << std::endl;
+
+ // Destroy texture object.
+ HIP_CHECK(hipDestroyTextureObject(tex_obj));
+
+ // Free device memory.
+ HIP_CHECK(hipFreeArray(d_array));
+ HIP_CHECK(hipFree(d_histogram));
+}
diff --git a/HIP-Basic/texture_management/texture_management_vs2019.sln b/HIP-Basic/texture_management/texture_management_vs2019.sln
new file mode 100644
index 000000000..260791c08
--- /dev/null
+++ b/HIP-Basic/texture_management/texture_management_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "texture_management_vs2019", "texture_management_vs2019.vcxproj", "{40E56BFB-1A0C-4618-BB49-A9AA635127C1}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {40E56BFB-1A0C-4618-BB49-A9AA635127C1}.Debug|x64.ActiveCfg = Debug|x64
+ {40E56BFB-1A0C-4618-BB49-A9AA635127C1}.Debug|x64.Build.0 = Debug|x64
+ {40E56BFB-1A0C-4618-BB49-A9AA635127C1}.Release|x64.ActiveCfg = Release|x64
+ {40E56BFB-1A0C-4618-BB49-A9AA635127C1}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {D7C4B290-7C93-4D26-85D9-364F6A448EE0}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/texture_management/texture_management_vs2019.vcxproj b/HIP-Basic/texture_management/texture_management_vs2019.vcxproj
new file mode 100644
index 000000000..9a9adfc86
--- /dev/null
+++ b/HIP-Basic/texture_management/texture_management_vs2019.vcxproj
@@ -0,0 +1,99 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+ 15.0
+ {40E56BFB-1A0C-4618-BB49-A9AA635127C1}
+ Win32Proj
+ texture_management_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ hip_$(ProjectName)
+
+
+ false
+ hip_$(ProjectName)
+
+
+ gfx1030;gfx90c:xnack-
+
+
+ gfx1030;gfx90c:xnack-
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ stdcpp17
+ $(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/texture_management/texture_management_vs2019.vcxproj.filters b/HIP-Basic/texture_management/texture_management_vs2019.vcxproj.filters
new file mode 100644
index 000000000..591e9f2c6
--- /dev/null
+++ b/HIP-Basic/texture_management/texture_management_vs2019.vcxproj.filters
@@ -0,0 +1,27 @@
+
+
+
+
+ {2932a426-602b-4926-887e-27c50ba7eab7}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {ed043ec4-e8ac-4831-93f5-a58546ec7bea}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {0da954bd-e555-4454-b082-b68d10c753b9}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
+
+ Header Files
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/vulkan_interop/.gitignore b/HIP-Basic/vulkan_interop/.gitignore
new file mode 100644
index 000000000..72c215465
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/.gitignore
@@ -0,0 +1,2 @@
+hip_vulkan_interop
+*.spv.h
diff --git a/HIP-Basic/vulkan_interop/CMakeLists.txt b/HIP-Basic/vulkan_interop/CMakeLists.txt
new file mode 100644
index 000000000..e7aff455f
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/CMakeLists.txt
@@ -0,0 +1,83 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(example_name hip_vulkan_interop)
+
+cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+project(${example_name} LANGUAGES CXX)
+
+set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA")
+set(GPU_RUNTIMES "HIP" "CUDA")
+set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES})
+
+if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES)
+ set(ERROR_MESSAGE "GPU_RUNTIME is set to \"${GPU_RUNTIME}\".\nGPU_RUNTIME must be either HIP or CUDA.")
+ message(FATAL_ERROR ${ERROR_MESSAGE})
+endif()
+
+enable_language(${GPU_RUNTIME})
+set(CMAKE_${GPU_RUNTIME}_STANDARD 17)
+set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF)
+set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON)
+
+set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
+if(NOT CMAKE_PREFIX_PATH)
+ set(CMAKE_PREFIX_PATH "${ROCM_ROOT}")
+endif()
+
+add_executable(${example_name} main.hip vulkan_utils.hip)
+
+set(include_dirs "../../Common")
+if(GPU_RUNTIME STREQUAL "CUDA")
+ list(APPEND include_dirs "${ROCM_ROOT}/include")
+endif()
+
+find_package(Vulkan REQUIRED COMPONENTS glslangValidator)
+find_package(glfw3 REQUIRED)
+
+set(SHADER_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+
+add_custom_command(
+ OUTPUT "${SHADER_BINARY_DIR}/sinewave.vert.spv.h"
+ COMMAND ${Vulkan_GLSLANG_VALIDATOR_EXECUTABLE} -V100 --vn sinewave_vert -o "${SHADER_BINARY_DIR}/sinewave.vert.spv.h" "${CMAKE_CURRENT_SOURCE_DIR}/sinewave.vert"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/sinewave.vert"
+ COMMENT "Compiling vertex shader"
+)
+
+add_custom_command(
+ OUTPUT "${SHADER_BINARY_DIR}/sinewave.frag.spv.h"
+ COMMAND ${Vulkan_GLSLANG_VALIDATOR_EXECUTABLE} -V100 --vn sinewave_frag -o "${SHADER_BINARY_DIR}/sinewave.frag.spv.h" "${CMAKE_CURRENT_SOURCE_DIR}/sinewave.frag"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/sinewave.frag"
+ COMMENT "Compiling fragment shader"
+)
+
+add_custom_target(shaders DEPENDS "${SHADER_BINARY_DIR}/sinewave.vert.spv.h" "${SHADER_BINARY_DIR}/sinewave.frag.spv.h")
+
+list(APPEND include_dirs ${SHADER_BINARY_DIR})
+add_dependencies(${example_name} shaders)
+
+target_link_libraries(${example_name} PRIVATE Vulkan::Headers)
+target_link_libraries(${example_name} PRIVATE glfw)
+target_include_directories(${example_name} PRIVATE ${include_dirs})
+
+set_source_files_properties(main.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
+set_source_files_properties(vulkan_utils.hip PROPERTIES LANGUAGE ${GPU_RUNTIME})
diff --git a/HIP-Basic/vulkan_interop/Makefile b/HIP-Basic/vulkan_interop/Makefile
new file mode 100644
index 000000000..a8bb6bab2
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/Makefile
@@ -0,0 +1,68 @@
+# MIT License
+#
+# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+EXAMPLE := hip_vulkan_interop
+COMMON_INCLUDE_DIR := ../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) $(shell pkg-config --cflags glfw3 vulkan)
+ILDFLAGS :=
+ILDLIBS := $(shell pkg-config --libs glfw3)
+IGLSLFLAGS := -V100
+
+ifeq ($(GPU_RUNTIME), CUDA)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+else
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
+endif
+
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+IGLSLFLAGS += $(GLSLFLAGS)
+
+$(EXAMPLE): main.hip vulkan_utils.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp sinewave.frag.spv.h sinewave.vert.spv.h
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ main.hip vulkan_utils.hip $(ILDLIBS)
+
+sinewave.vert.spv.h: sinewave.vert
+ glslangValidator $< -o $@ $(IGLSLFLAGS) --vn sinewave_vert
+
+sinewave.frag.spv.h: sinewave.frag
+ glslangValidator $< -o $@ $(IGLSLFLAGS) --vn sinewave_frag
+
+clean:
+ $(RM) $(EXAMPLE) sinewave.frag.spv.h sinewave.vert.spv.h
+
+.PHONY: clean
diff --git a/HIP-Basic/vulkan_interop/README.md b/HIP-Basic/vulkan_interop/README.md
new file mode 100644
index 000000000..2b853ccd9
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/README.md
@@ -0,0 +1,99 @@
+# HIP-Basic Vulkan Interop Example
+
+## Description
+External device resources and other handles can be shared with HIP in order to provide interoperability between different GPU APIs. This example showcases a HIP program that interacts with the Vulkan API: A HIP kernel is used to simulate a sine wave over a grid of points, in a buffer that is shared with Vulkan. The resulting data is then rendered to a window using the Vulkan API. A set of shared semaphores is used to guarantee synchronous access to the device memory shared between HIP and Vulkan.
+
+### Application flow
+#### Initialization
+1. A window is opened using the GLFW library.
+2. The Vulkan API is initialized: Function pointers are loaded, the Vulkan instance is created.
+3. A physical device is picked to execute the example kernel on and to render the result to the window. This physical device must be the same for HIP and for Vulkan in order to be able to share the required resources. This is done by comparing the device's UUID, which can be queried from a HIP device by querying `hipDeviceGetUuid` and from a Vulkan physical device by passing `VkPhysicalDeviceIDProperties` to `vkGetPhysicalDeviceProperties2`. If the UUIDs from a particular HIP device and Vulkan device are the same, they represent the same physical or virtual device.
+4. A Vulkan logical device and related handles are initialized from the physical device handle.
+5. A HIP stream is created on the same physical device.
+6. A Vulkan swapchain and related handles are initialized for the window from the logical device and related handles.
+7. Additional Vulkan handles required for the rendering process are initialized: A render pass, the graphics pipeline, frame buffers and other frame resources.
+8. Three buffers are allocated using Vulkan: A buffer holding x- and y-coordinates for the triangle grid, a separate buffer holding a height value corresponding to each point in the triangle grid, and an index buffer that defines the triangles grid made up of the grid coordinates. The height buffer is going to be shared with HIP, and therefore it needs to be created in a way that allows it to be exported to a native memory handle. This requires passing `VkExternalMemoryBufferCreateInfoKHR` to `vkCreateBuffer` when creating a buffer, with `VkExternalMemoryBufferCreateInfoKHR::handleTypes` initialized to the appropriate type for the native platform. Additionally, this requires setting the same value on `VkExportMemoryAllocateInfoKHR`, which must be passed to `vkAllocateMemory` when allocating memory for a buffer that is to be exported.
+9. The x- and y-coordinates buffer and the index buffer are initialized with their contents. These buffers do not change during the duration of the program.
+10. A HIP external memory handle is created from the Vulkan height buffer memory handle. This is done by first exporting the Vulkan buffer to a platform-native handle using `VkGetMemoryFd` or `VkGetMemoryWin32Handle` depending on the platform, and then importing that handle to HIP using `hipImportExternalMemory`.
+11. A pointer to the device memory of the height buffer is obtained from the HIP external memory handle using `hipExternalMemoryGetMappedBuffer`.
+12. Two semaphores used to synchronize memory accesses between HIP and Vulkan are initialized: The first synchronizes the access from when the buffer was used to render the previous frame in Vulkan to when the HIP kernel is invoked, , and the second synchronizes the access from when the HIP kernel is finished to when Vulkan can use the buffer to render the next frame. Similar to buffers, these must be created in a way that allows them to be exported to a platform-native semaphore handle, so that they may later be imported as HIP external semaphore. This is done by passing `VkExportSemaphoreCreateInfoKHR` to `vkCreateSemaphore`, of which `handleTypes` must again be initialized to the appropriate platform-dependent handle type.
+13. The Vulkan semaphores are converted to HIP external semaphores. This is done by first exporting a Vulkan semaphore handle to a native semaphore handle, either by `vkGetSemaphoreFdKHR` or `vkGetSemaphoreWin32HandleKHR` depending on the target platform. The resulting handle is passed to `hipImportExternalSemaphore` to obtain the HIP semaphore handle.
+
+#### Rendering
+A frame is rendered as follows:
+1. The frame resources for the current frame in the frame pipeline are fetched from memory.
+2. The next image index is acquired from the swapchain.
+3. The command pool associated to the current frame is reset and the associated command buffer is initialized.
+4. `hipWaitExternalSemaphoresAsync` is used to ensure that Vulkan has finished rendering the previous frame before the HIP kernel is invoked. Note that this function is not required on the first frame.
+5. The HIP kernel is invoked.
+6. `hipSignalExternalSemaphoresAsync` is used to signal Vulkan that HIP is now finished with the buffer and that Vulkan can proceed with rendering.
+7. The Vulkan rendering commands are recorded to the current frame's command buffer.
+8. The command buffer is submitted to the Vulkan graphics queue. The semaphore that synchronizes the HIP kernel invocation with the Vulkan rending commands is passed to `VkSubmitInfo::pWaitSemaphores` to make Vulkan wait on the semaphore signal before proceeding with rendering. As a small optimization, the corresponding element in `VkSubmitInfo::pWaitDstStageMask` is set to `VK_PIPELINE_STAGE_VERTEX_INPUT_BIT`. The height buffer is only needed at the vertex input stage, and this way the prior stages can already be executed by Vulkan even if the semaphore is not signaled yet. The semaphore that synchronizes between rendering the previous frame and running the HIP kernel for the next frame is passed to `vkSubmitInfo::pSignalSemaphores`, so that Vulkan signals it when the frame is finished with rendering.
+9. The swapchain is asked to present the current frame to the screen.
+
+## Key APIs and Concepts
+To share memory allocated by Vulkan with HIP, the `VkDeviceMemory` must be created by passing the `VkExportMemoryAllocateInfoKHR` structure to `vkAllocateDeviceMemory`. This structure needs the appropriate `handleTypes` set to a type that can be shared with HIP for the current platform; `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR` for Linux and `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR` or `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR` for Windows. Any Vulkan buffer that is to be associated with this device memory must similarly be created by passing `VkExternalMemoryBufferCreateInfoKHR` to `vkCreateBuffer`, of which the `handleTypes` member must be initialized to the same value. The `VkDeviceMemory` handle can then be exported to a native file descriptor or `HANDLE` using `vkGetMemoryFdKHR` or `vkGetMemoryWin32HandleKHR` respectively on Linux and Windows. A `hipExternalMemory_t` can then be imported from a native handle through `hipImportExternalMemory`. This function must be passed an instance of `hipExternalmemoryHandleDesc`, of which `type` is initialized with a handle type compatible with the Vulkan `handleTypes`. This mapping is as follows:
+| Vulkan memory handle type | HIP memory handle type |
+| --------------------------------------------------------- | ------------------------------------------- |
+| `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR` | `hipExternalMemoryHandleTypeOpaqueFd` |
+| `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR` | `hipExternalMemoryHandleTypeOpaqueWin32` |
+| `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR` | `hipExternalMemoryHandleTypeOpaqueWin32Kmt` |
+
+To actually use this external memory handle in HIP the corresponding HIP device memory pointer should first be obtained. This can be done with the `hipExternalMemoryGetMappedBuffer` function.
+
+Sharing semaphores follows a similar process: The `VkSemaphore` must be created by passing `VkExportSemaphoreCreateInfoKHR`, of which `handleTypes` must be initialized to `VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR` for Linux, or `VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR` or `VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR` for Windows. The `VkSemaphore` handle can then be exported to a native Linux file descriptor or Windows `HANDLE` using `vkGetSemaphoreFdKHR` or `vkGetSemaphoreWin32HandleKHR` on Linux and Windows respectively. The `hipExternalSemaphore_t` can then be created using `hipImportExternalSemaphore`. It must be passed an instance of `hipExternalSemaphoreHandleDesc`, of which `type` is again initialized with a compatible HIP-version of the Vulkan `handleTypes`. This mapping is as follows:
+| Vulkan semaphore handle type | HIP semaphore handle type |
+| ------------------------------------------------------------ | ---------------------------------------------- |
+| `VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR` | `hipExternalSemaphoreHandleTypeOpaqueFd` |
+| `VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR` | `hipExternalSemaphoreHandleTypeOpaqueWin32` |
+| `VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR` | `hipExternalSemaphoreHandleTypeOpaqueWin32Kmt` |
+
+To wait on a shared semaphore in HIP, `hipWaitExternalSemaphoresAsync` should be used. This must be passed a number of `hipExternalSemaphoreWaitParams` structures, each corresponding to a semaphore with the same index. When using timeline semaphores, its `fence.value` member can be used to specify which timeline semaphore value to wait on.
+
+To signal a shared semaphore in HIP, the `hipSignalExternalSemaphoresAsync` function can be used. This must be passed a number of `hipExternalSemaphoreSignalParams` structures, each corresponding to a semaphore with the same index. When using timeline semaphores, its `fence.value` member should be set to specify the value to which the semaphore should be set.
+
+## Dependencies
+This example has additional library dependencies besides HIP:
+- [GLFW3](https://glfw.org). GLFW can be installed either through the package manager, or can be obtained from its home page. If using CMake, the `glfw3Config.cmake` file must be in a path that CMake searches by default or must be passed using `-DCMAKE_MODULE_PATH`.
+The official GLFW3 binaries do not ship this file on Windows, and so GLFW3 must either be compiled manually. CMake will be able to find GLFW on Windows if it is installed in `C:\Program Files(x86)\glfw\`. Alternatively, GLFW can be obtained from [vcpkg](https://vcpkg.io/), which does ship the required cmake files. In this case, the vcpkg toolchain path should be passed to CMake using `-DCMAKE_TOOLCHAIN_FILE="/path/to/vcpkg/scripts/buildsystems/vcpkg.cmake"`.
+If using Visual Studio, the easiest way to obtain GLFW is by installing glfw3 from vcpkg. Alternatively, the appropriate path to the GLFW3 library and header directories can be set in Properties->Linker->General->Additional Library Directories and Properties->C/C++->General->Additional Include Directories. When using this method, the appropriate name for the glfw library should also be updated under Properties->C/C++->Linker->Input->Additional Dependencies.
+- Vulkan headers, validation layers, and `glslangValidator` are required. The easiest way to obtain this is by installing the [LunarG Vulkan SDK](https://vulkan.lunarg.com/). CMake will be able to find the SDK using the `VULKAN_SDK` environment variable, which is set by default using the SDK activation script on Linux. On Windows, this environment variable is not automatically provided, and so should be set to the appropriate path before invoking CMake. The Visual Studio projects will automatically pick up `VULKAN_SDK`. Alternatively, the required Vulkan components can be installed through the system package manager. Note that libvulkan is _not_ required, the example loads function pointers dynamically.
+
+## Demonstrated API Calls
+### HIP runtime
+#### Device symbols
+- `threadIdx`, `blockIdx`, `blockDim`
+
+#### Host symbols
+- `hipComputeModeProhibited`
+- `hipCUDAErrorTohipError`
+- `hipDestroyExternalMemory`
+- `hipDestroyExternalSemaphore`
+- `hipDeviceGetUuid`
+- `hipExternalMemoryBufferDesc`
+- `hipExternalMemoryGetMappedBuffer`
+- `hipExternalMemoryHandleDesc`
+- `hipExternalMemoryHandleType`
+- `hipExternalMemoryHandleTypeOpaqueFd`
+- `hipExternalMemoryHandleTypeOpaqueWin32`
+- `hipExternalMemoryHandleTypeOpaqueWin32Kmt`
+- `hipExternalSemaphoreHandleDesc`
+- `hipExternalSemaphoreHandleType`
+- `hipExternalSemaphoreHandleTypeOpaqueFd`
+- `hipExternalSemaphoreHandleTypeOpaqueWin32`
+- `hipExternalSemaphoreHandleTypeOpaqueWin32Kmt`
+- `hipExternalSemaphoreSignalParams`
+- `hipExternalSemaphoreWaitParams`
+- `hipGetDeviceCount`
+- `hipGetDeviceProperties`
+- `hipGetLastError`
+- `hipImportExternalMemory`
+- `hipImportExternalSemaphore`
+- `hipLaunchKernelGGL`
+- `hipSetDevice`
+- `hipSignalExternalSemaphoresAsync`
+- `hipStreamCreate`
+- `hipStreamDestroy`
+- `hipStreamSynchronize`
+- `hipWaitExternalSemaphoresAsync`
+- `HIP_KERNEL_NAME`
diff --git a/HIP-Basic/vulkan_interop/main.hip b/HIP-Basic/vulkan_interop/main.hip
new file mode 100644
index 000000000..25b6345be
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/main.hip
@@ -0,0 +1,1364 @@
+// MIT License
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "example_utils.hpp"
+#include "vulkan_utils.hpp"
+
+#include "nvidia_hip_fix.hpp"
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include "sinewave.frag.spv.h"
+#include "sinewave.vert.spv.h"
+
+// Currently hip external semaphores are not working under Linux when
+// compiling for the AMD platform.
+// TODO: Remove once this is implemented in hipamd.
+// See https://github.com/ROCm-Developer-Tools/hipamd/issues/48.
+#ifndef USE_EXTERNAL_SEMAPHORES
+ #if defined(__HIP_PLATFORM_AMD__) && !defined(_WIN64)
+ #define USE_EXTERNAL_SEMAPHORES 0
+ #else
+ #define USE_EXTERNAL_SEMAPHORES 1
+ #endif
+#endif
+
+// Currently it seems like waiting on an external semaphore that is signaled
+// from hip is not working under windows
+#ifndef USE_SIGNAL_SEMAPHORE
+ #if defined(__HIP_PLATFORM_AMD__) && defined(_WIN64)
+ #define USE_SIGNAL_SEMAPHORE 0
+ #else
+ #define USE_SIGNAL_SEMAPHORE 1
+ #endif
+#endif
+
+/// \brief The maximum number of frames that can be rendered at the same time. By
+/// setting this value to more than one, we can allow the presentation engine to
+/// draw the rendered frame to the monitor while we already render the next frame
+/// in the background.
+constexpr size_t max_frames_in_flight = 2;
+
+/// \brief Time maximum time (in nanoseconds) that we are willing to wait on the next
+/// image from the swapchain.
+constexpr uint64_t frame_timeout = std::numeric_limits::max();
+
+/// \brief The number of triangles that the example's grid is in width.
+constexpr uint32_t grid_width = 256;
+/// \brief The number of triangles that the example's grid is in height.
+constexpr uint32_t grid_height = 256;
+
+/// \brief The Vulkan instance extensions required for sharing HIP- and Vulkan
+/// types. \p VK_KHR_external_memory_capabilities is required to share buffers, and
+/// \p VK_KHR_external_semaphore_capabilities is required to share semaphores.
+/// \p VK_KHR_get_physical_device_properties2 is required for the other two, as well
+/// as for querying the device's UUID.
+constexpr const char* required_instance_extensions[] = {
+ VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
+};
+
+/// \brief The general Vulkan extensions that a particular device needs to support in order
+/// for it to be able to run this example.
+/// \p VK_KHR_swapchain is required in order to draw to the example's window, and \p VK_KHR_external_memory
+/// and \p VK_KHR_external_semaphore are required to share memory and semaphores respectively with HIP.
+constexpr const char* required_device_extensions[]
+ = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
+#ifdef _WIN64
+ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME};
+#else
+ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME};
+#endif
+
+/// \brief This structure represents a device UUID, obtained either from Vulkan or
+/// from HIP.
+struct uuid
+{
+ uint8_t bytes[VK_UUID_SIZE];
+
+ /// \brief This function fetches a Vulkan-compatible device UUID from a HIP device.
+ ///
+ /// The use of this function should actually be replaced by \p hipDeviceGetUuid. However,
+ /// on AMD it returns a device UUID that is not compatible with that returned by Vulkan, and
+ /// when compiling for NVIDIA it yields a linker error. For this reason we provide our own
+ /// implementation that is compatible with both the Mesa (RADV) and AMD (AMDVLK) implementations
+ /// of Vulkan on AMD, and call into the CUDA API directly when compiling for NVIDIA.
+ static uuid get_hip_device_uuid(hipDevice_t device)
+ {
+#if defined(__HIP_PLATFORM_AMD__)
+ // The value that hipDeviceGetUuid returns does not correspond with those returned
+ // by mesa (see https://gitlab.freedesktop.org/mesa/mesa/-/blob/5cd3e395037250946ba2519600836341df02c8ca/src/amd/common/ac_gpu_info.c#L1366-1382)
+ // and by xgl (see https://github.com/GPUOpen-Drivers/xgl/blob/4118707939c2f4783d28ce2a383184a3794ca477/icd/api/vk_physical_device.cpp#L4363-L4421)
+ // Those drivers _do_ align with each other, so we can create our own UUID here.
+ // \see https://github.com/ROCm-Developer-Tools/hipamd/issues/50.
+ hipDeviceProp_t props;
+ HIP_CHECK(hipGetDeviceProperties(&props, device));
+
+ struct uuid result = {};
+ uint32_t* uuid_ints = reinterpret_cast(result.bytes);
+ uuid_ints[0] = props.pciDomainID;
+ uuid_ints[1] = props.pciBusID;
+ uuid_ints[2] = props.pciDeviceID;
+ // Note: function is 0 anyway.
+
+ return result;
+#elif defined(__HIP_PLATFORM_NVCC__)
+ // Work around a compile error related to hipDeviceGetUuid when compiling for NVIDIA:
+ // "undefined reference to `cuDeviceGetUuid'"
+ // \see https://github.com/ROCm-Developer-Tools/hipamd/issues/51.
+ cudaDeviceProp props;
+ HIP_CHECK(hipCUDAErrorTohipError(cudaGetDeviceProperties(&props, device)));
+
+ struct uuid result = {};
+ std::memcpy(result.bytes, props.uuid.bytes, VK_UUID_SIZE);
+
+ return result;
+#else
+ #error unsupported platform
+#endif
+ }
+};
+
+/// \brief \p std::ostream print operator overload for \p uuid.
+/// \see uuid.
+std::ostream& operator<<(std::ostream& os, const uuid uuid)
+{
+ for(size_t i = 0; i < VK_UUID_SIZE * 2; ++i)
+ {
+ // Extract the current nibble.
+ const uint8_t c = (uuid.bytes[i / 2] >> (4 - (i % 2) * 4)) & 0xF;
+ os << static_cast(c < 10 ? c + '0' : c + 'a' - 10);
+ if(i == 8 || i == 12 || i == 16 || i == 20)
+ {
+ os << '-';
+ }
+ }
+ return os;
+}
+
+/// \brief This structure represents a candidate HIP-device that we can use
+/// for this example.
+struct hip_device_candidate
+{
+ /// The HIP device index representing this device.
+ hipDevice_t device;
+ /// The Vulkan-compatible device UUID.
+ uuid device_uuid;
+};
+
+/// \brief This structure represents a candidate device that we can use for this
+/// example.
+struct physical_device_candidate
+{
+ /// The Vulkan physical device handle of the device to be used.
+ VkPhysicalDevice pdev;
+
+ /// The candidate device's Vulkan device properties.
+ VkPhysicalDeviceProperties props;
+
+ /// The HIP device candidate that this Vulkan device corresponds to.
+ hip_device_candidate hip_candidate;
+
+ /// The queue allocation that contains details about which queues will be
+ /// used throughout this example.
+ queue_allocation queues;
+};
+
+/// \brief Checks if a particular Vulkan physical device is qualified to run this example:
+/// - It needs to support the Vulkan surface which we want to render to.
+/// - It needs to support the required generic and platform-specific Vulkan device extensions.
+/// - It needs to be a HIP-supported device. This is checked by fetching the device
+/// UUID from Vulkan, and checking if it appears in the device UUIDs fetched from HIP
+/// (passed through \p hip_uuids).
+/// - It needs to support graphics- and present queues that can render to the surface.
+/// If all of these are satisfied, the \p candidate structure is filled with information
+/// about the physical device that is required later, and the function returns \p true.
+/// Otherwise, \p false is returned.
+///
+/// \param hip_devices - A vector of \p hipDevice_t and their corresponding Vulkan-compatible
+/// device UUID.
+/// \param pdev - The Vulkan physical device to check suitability off.
+/// \p surface - The Vulkan surface that the physical device needs to support.
+bool is_physical_device_suitable(const instance_dispatch& dispatch,
+ const std::vector hip_devices,
+ VkPhysicalDevice pdev,
+ VkSurfaceKHR surface,
+ physical_device_candidate& candidate)
+{
+ // Check if HIP supports this device by checking if there is any device with the same UUID.
+ {
+ // Query the Vulkan device UUID using vkGetPhysicalDeviceProperties2.
+ VkPhysicalDeviceIDPropertiesKHR id_props = {};
+ id_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR;
+
+ VkPhysicalDeviceProperties2KHR props2 = {};
+ props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ props2.pNext = &id_props;
+
+ dispatch.get_physical_device_properties2(pdev, &props2);
+
+ const auto cmp_device_uuid = [&](const hip_device_candidate& hip_candidate)
+ {
+ return std::equal(std::begin(hip_candidate.device_uuid.bytes),
+ std::end(hip_candidate.device_uuid.bytes),
+ std::begin(id_props.deviceUUID),
+ std::end(id_props.deviceUUID));
+ };
+
+ // Try to find a HIP device UUID that matches the UUID reported by Vulkan - if any such exists,
+ // we know that the device supports both Vulkan and HIP, and we can use it to run this example.
+ const auto it = std::find_if(hip_devices.begin(), hip_devices.end(), cmp_device_uuid);
+ if(it == hip_devices.end())
+ {
+ // This device does not support HIP.
+ return false;
+ }
+
+ candidate.props = props2.properties;
+ candidate.hip_candidate = *it;
+ }
+
+ // Check if the device supports our surface at all.
+ if(!check_surface_support(dispatch, pdev, surface))
+ {
+ return false;
+ }
+
+ // Check if the device supports the required extensions.
+ if(!check_device_extensions(dispatch,
+ pdev,
+ required_device_extensions,
+ std::size(required_device_extensions)))
+ {
+ return false;
+ }
+
+ // Try to allocate device queues for the candidate device.
+ if(!allocate_device_queues(dispatch, pdev, surface, candidate.queues))
+ {
+ return false;
+ }
+
+ candidate.pdev = pdev;
+ return true;
+}
+
+/// \brief Try to find a physical device that can run this example. This is done by fetching
+/// all supported devices from HIP and from Vulkan, and checking each of these to see if the required
+/// features are supported.
+///
+/// To check whether a Vulkan and HIP device are the same, their UUIDs are compared.
+/// \see \p uuid::get_hip_device_uuid.
+/// \see \p is_physical_device_suitable.
+void find_physical_device(const instance_dispatch& dispatch,
+ VkInstance instance,
+ VkSurfaceKHR surface,
+ physical_device_candidate& candidate)
+{
+ uint32_t physical_device_count;
+ VK_CHECK(dispatch.enumerate_physical_devices(instance, &physical_device_count, nullptr));
+ std::vector physical_devices(physical_device_count);
+ VK_CHECK(dispatch.enumerate_physical_devices(instance,
+ &physical_device_count,
+ physical_devices.data()));
+
+ if(physical_device_count == 0)
+ {
+ std::cerr << "System has no physical devices\n";
+ std::exit(error_exit_code);
+ }
+
+ // Fetch the number of HIP devices that are currently present on the system.
+ // Note: This depends on the current HIP platform, and may report different
+ // devices depending on that.
+ int hip_device_count;
+ HIP_CHECK(hipGetDeviceCount(&hip_device_count));
+
+ // For each HIP device, check to see if we can use it all, and then query
+ // its Vulkan-compatible device UUID.
+ std::vector hip_devices;
+ for(hipDevice_t hip_device = 0; hip_device < hip_device_count; ++hip_device)
+ {
+ hipDeviceProp_t hip_properties;
+ HIP_CHECK(hipGetDeviceProperties(&hip_properties, hip_device));
+ if(hip_properties.computeMode == hipComputeModeProhibited)
+ continue;
+
+ const uuid device_uuid = uuid::get_hip_device_uuid(hip_device);
+ hip_devices.push_back({hip_device, device_uuid});
+ }
+
+ for(VkPhysicalDevice pdev : physical_devices)
+ {
+ if(is_physical_device_suitable(dispatch, hip_devices, pdev, surface, candidate))
+ {
+ return;
+ }
+ }
+
+ std::cerr << "No suitable device\n";
+ std::exit(error_exit_code);
+}
+
+/// \brief Allocate and bind memory for a Vulkan buffer
+/// \param buffer - The buffer to allocate create memory for.
+/// \param properties - The memory properties for the allocated memory.
+/// \param external - Whether to allocate this memory such that it can be exported.
+VkDeviceMemory allocate_buffer_memory(const graphics_context& ctx,
+ const VkBuffer buffer,
+ const VkMemoryPropertyFlags properties,
+ const bool external = false)
+{
+ VkMemoryRequirements mem_reqs;
+ ctx.vkd->get_buffer_memory_requirements(ctx.dev, buffer, &mem_reqs);
+
+ const uint32_t memory_type = ctx.find_memory_type_index(mem_reqs.memoryTypeBits, properties);
+
+ VkMemoryAllocateInfo allocate_info = {};
+ allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+ allocate_info.allocationSize = mem_reqs.size;
+ allocate_info.memoryTypeIndex = memory_type;
+
+ VkExportMemoryAllocateInfoKHR export_info = {};
+ export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
+#ifdef _WIN64
+ export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR;
+#else
+ export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+#endif
+
+ if(external)
+ {
+ allocate_info.pNext = &export_info;
+ }
+
+ VkDeviceMemory memory;
+ VK_CHECK(ctx.vkd->allocate_memory(ctx.dev, &allocate_info, nullptr, &memory));
+ VK_CHECK(ctx.vkd->bind_buffer_memory(ctx.dev, buffer, memory, 0));
+ return memory;
+}
+
+/// \brief Create and allocate a Vulkan buffer.
+/// \param size - The size (in bytes) that this buffer should be allocated for.
+/// \param usage - The Vulkan usage that this buffer will be used for.
+/// \param external - If true, this buffer will be created so that it can later be exported to a
+/// platform-native handle, that may be imported to HIP.
+VkBuffer create_buffer(const graphics_context& ctx,
+ const VkDeviceSize size,
+ const VkBufferUsageFlags usage,
+ const bool external = false)
+{
+ VkBufferCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+ create_info.size = size;
+ create_info.usage = usage;
+ create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+
+ // In order to be able to export the buffer handle, we need to supply Vulkan with this
+ // VkExternalMemoryBufferCreateInfoKHR, and set the handleTypes to the native handle type
+ // that we want to export. Which handle type to export depends on the platform we are
+ // currently compiling for.
+ VkExternalMemoryBufferCreateInfoKHR external_create_info = {};
+ external_create_info.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR;
+#ifdef _WIN64
+ external_create_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR;
+#else
+ external_create_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+#endif
+ // If exporting, add the external buffer create information to the buffer's create info
+ // so that it gets passed to Vulkan.
+ if(external)
+ {
+ create_info.pNext = &external_create_info;
+ }
+
+ VkBuffer buffer;
+ VK_CHECK(ctx.vkd->create_buffer(ctx.dev, &create_info, nullptr, &buffer));
+ return buffer;
+}
+
+/// \brief This function converts a Vulkan memory handle to its equivalent HIP handle. The
+/// VkDeviceMemory passed to this function and the returned HIP memory represents the same
+/// physical area of GPU memory, through the handles of each respective API. Writing to the
+/// buffer in one API will allow us to read the results through the other. Note that access
+/// to the buffer should be synchronized between the APIs, for example using queue syncs or
+/// semaphores.
+/// \param memory - The Vulkan memory handle to convert. This memory needs to be created with
+/// the appropriate fields set in VkExportMemoryAllocateInfoKHR.
+/// \see allocate_buffer_memory for allocating such a memory handle, and
+/// \see create_buffer for creating a Vulkan buffer that is compatible with that memory.
+hipExternalMemory_t
+ memory_to_hip(const graphics_context& ctx, const VkDeviceMemory memory, const VkDeviceSize size)
+{
+ // Prepare the HIP external semaphore descriptor with the platform-specific
+ // handle type that we wish to import. This value should correspond to the
+ // handleTypes field set in VkExportMemoryAllocateInfoKHR while creating the
+ // Vulkan buffer.
+ hipExternalMemoryHandleDesc desc = {};
+ desc.size = size;
+
+ // Export the Vulkan buffer handle to a platform-specific native handle, depending
+ // on the current platform: On Windows the buffer is converted to a HANDLE, and on Linux
+ // to a file descriptor representing the driver's GPU handle to the memory.
+ // This native handle is then passed to the HIP external memory descriptor so that it
+ // may be imported.
+#ifdef _WIN64
+ desc.type = hipExternalMemoryHandleTypeOpaqueWin32Kmt;
+
+ VkMemoryGetWin32HandleInfoKHR get_handle_info = {};
+ get_handle_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
+ get_handle_info.memory = memory;
+ get_handle_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR;
+
+ VK_CHECK(
+ ctx.vkd->get_memory_win32_handle(ctx.dev, &get_handle_info, &desc.handle.win32.handle));
+#else
+ desc.type = hipExternalMemoryHandleTypeOpaqueFd;
+
+ VkMemoryGetFdInfoKHR get_fd_info = {};
+ get_fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+ get_fd_info.memory = memory;
+ get_fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+
+ VK_CHECK(ctx.vkd->get_memory_fd(ctx.dev, &get_fd_info, &desc.handle.fd));
+#endif
+
+ // Import the native memory handle to HIP to create an external memory.
+ hipExternalMemory_t hip_memory;
+ HIP_CHECK(hipImportExternalMemory(&hip_memory, &desc));
+ return hip_memory;
+}
+
+/// \brief Utility function to create a Vulkan semaphore.
+/// \param external - If true, this semaphore is created so that it can later be exported
+/// to a platform-native handle, which may be imported to HIP later.
+VkSemaphore create_semaphore(const graphics_context& ctx, const bool external = false)
+{
+ VkSemaphoreCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+
+ // Similar to buffers, in order to be able to export the semaphore handle we need to supply
+ // Vulkan with this VkExportSemaphoreCreateInfoKHR structure, and set the handleTypes to the
+ // value appropriate for the platform that we are currently compiling for.
+ VkExportSemaphoreCreateInfoKHR export_create_info = {};
+ export_create_info.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR;
+#ifdef _WIN64
+ export_create_info.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR;
+#else
+ export_create_info.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+#endif
+
+ // If exporting, add the export structure to the create info chain.
+ if(external)
+ {
+ create_info.pNext = &export_create_info;
+ }
+
+ VkSemaphore sema;
+ VK_CHECK(ctx.vkd->create_semaphore(ctx.dev, &create_info, nullptr, &sema));
+ return sema;
+}
+
+/// \brief This function converts a Vulkan semaphore to its equivalent HIP handle. The passed
+/// semaphore and the returned HIP semaphore represent the same backing semaphore, though the
+/// handles of the respective API. Signaling on the semaphore in one API will allow the other
+/// API to wait on it, which is how we can guarantee synchronized access to resources in a
+/// cross-API manner.
+/// \param sema - The Vulkan semaphore to convert. This semaphore needs to be created with
+/// \p the appropriate fields set in VkExportSemaphoreCreateInfoKHR.
+/// \see create_semaphore for creating such a semaphore.
+hipExternalSemaphore_t semaphore_to_hip(const graphics_context& ctx, const VkSemaphore sema)
+{
+ // Prepare the HIP external semaphore descriptor with the platform-specific handle type
+ // that we wish to import. This value should correspond to the handleTypes field set in
+ // the VkExportSemaphoreCreateInfoKHR structure that was passed to Vulkan when creating
+ // the semaphore.
+ hipExternalSemaphoreHandleDesc desc = {};
+
+ // Export the Vulkan semaphore to a platform-specific handle depending on the current
+ // platform: On Windows, we convert the semaphore into a HANDLE, and on Linux it is
+ // converted to a file descriptor.
+ // This native handle is then passed to the HIP external semaphore descriptor.
+#ifdef _WIN64
+ desc.type = hipExternalSemaphoreHandleTypeOpaqueWin32;
+
+ VkSemaphoreGetWin32HandleInfoKHR get_handle_info = {};
+ get_handle_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR;
+ get_handle_info.semaphore = sema;
+ get_handle_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR;
+
+ VK_CHECK(
+ ctx.vkd->get_semaphore_win32_handle(ctx.dev, &get_handle_info, &desc.handle.win32.handle));
+
+#else
+ desc.type = hipExternalSemaphoreHandleTypeOpaqueFd;
+
+ VkSemaphoreGetFdInfoKHR get_fd_info = {};
+ get_fd_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR;
+ get_fd_info.semaphore = sema;
+ get_fd_info.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+
+ VK_CHECK(ctx.vkd->get_semaphore_fd(ctx.dev, &get_fd_info, &desc.handle.fd));
+#endif
+
+ // Import the native semaphore to HIP to create a HIP external semaphore.
+ hipExternalSemaphore_t hip_sema;
+ HIP_CHECK(hipImportExternalSemaphore(&hip_sema, &desc));
+ return hip_sema;
+}
+
+/// \brief When the HIP external memory is exported from Vulkan and imported to HIP, it
+/// is not yet ready for use. To actually use the memory, we need to map it to a pointer
+/// so that we may pass it to the kernel so that it can be read from and written to.
+void* map_hip_external_memory(const hipExternalMemory_t mem, const VkDeviceSize size)
+{
+ hipExternalMemoryBufferDesc desc = {};
+ desc.offset = 0;
+ desc.size = size;
+ desc.flags = 0;
+
+ void* ptr;
+ HIP_CHECK(hipExternalMemoryGetMappedBuffer(&ptr, mem, &desc));
+ return ptr;
+}
+
+/// \brief The main HIP kernel for this example - computes a simple sine wave over a
+/// 2-dimensional grid of points.
+/// \param height_map - the grid of points to compute a sine wave for. It is expected to be
+/// a \p grid_width by \p grid_height array packed into memory.(y on the inner axis).
+/// \param time - The current time relative to the start of the program.
+__global__ void sinewave_kernel(float* height_map, const float time)
+{
+ const float freq = 10.f;
+ const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
+ const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
+ const float u = (2.f * x) / grid_width - 1.f;
+ const float v = (2.f * y) / grid_height - 1.f;
+
+ if(x < grid_width && y < grid_height)
+ {
+ height_map[x * grid_width + y] = sinf(u * freq + time) * cosf(v * freq + time);
+ }
+}
+
+/// \brief In order to increase efficiency, we pipeline the rendering process. This allows us to render
+/// the next frame already while another frame is being presented by Vulkan. The \p frame structure
+/// contains the relevant Vulkan handles that are duplicated for each phase of the pipeline.
+struct frame
+{
+ const graphics_context& ctx;
+
+ /// The semaphore that guards the use of the swapchain image before it is ready.
+ VkSemaphore image_acquired;
+ /// The semaphore that guards the present before the image is rendered.
+ VkSemaphore render_finished;
+ /// A fence that allows us to synchronize on CPU until this frame is ready
+ /// to be re-rendered again after it has been submitted to the GPU.
+ VkFence frame_fence;
+ /// The command pool that the command buffer for this frame will is allocated from.
+ /// By having a separate pool for each frame we can reset the command for the frame simply
+ /// by resetting the pool.
+ VkCommandPool cmd_pool;
+ /// The main command buffer for this frame.
+ VkCommandBuffer cmd_buf;
+
+ /// \brief Create a new frame.
+ explicit frame(const graphics_context& ctx) : ctx(ctx)
+ {
+ this->image_acquired = create_semaphore(ctx);
+ this->render_finished = create_semaphore(ctx);
+
+ VkFenceCreateInfo fence_create_info = {};
+ fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fence_create_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
+ VK_CHECK(ctx.vkd->create_fence(ctx.dev, &fence_create_info, nullptr, &this->frame_fence));
+
+ VkCommandPoolCreateInfo cmd_pool_create_info = {};
+ cmd_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ cmd_pool_create_info.queueFamilyIndex = ctx.graphics_queue.family;
+ VK_CHECK(
+ ctx.vkd->create_command_pool(ctx.dev, &cmd_pool_create_info, nullptr, &this->cmd_pool));
+
+ VkCommandBufferAllocateInfo cmd_buf_allocate_info = {};
+ cmd_buf_allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ cmd_buf_allocate_info.commandPool = this->cmd_pool;
+ cmd_buf_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ cmd_buf_allocate_info.commandBufferCount = 1;
+ VK_CHECK(
+ ctx.vkd->allocate_command_buffers(ctx.dev, &cmd_buf_allocate_info, &this->cmd_buf));
+ }
+
+ ~frame()
+ {
+ this->ctx.vkd->destroy_command_pool(this->ctx.dev, this->cmd_pool, nullptr);
+ this->ctx.vkd->destroy_fence(this->ctx.dev, this->frame_fence, nullptr);
+ this->ctx.vkd->destroy_semaphore(this->ctx.dev, this->image_acquired, nullptr);
+ this->ctx.vkd->destroy_semaphore(this->ctx.dev, this->render_finished, nullptr);
+ }
+
+ /// \brief Wait until the GPU-work for this frame has been completed, so that we
+ /// can render to it again.
+ void wait() const
+ {
+ VK_CHECK(this->ctx.vkd->wait_for_fences(this->ctx.dev,
+ 1,
+ &this->frame_fence,
+ VK_TRUE,
+ frame_timeout));
+ }
+
+ /// \brief Reset the fence that backs this frame.
+ void reset() const
+ {
+ VK_CHECK(this->ctx.vkd->reset_fences(this->ctx.dev, 1, &this->frame_fence));
+ }
+};
+
+/// \brief This structure contains all the rendering related information for this example.
+/// Its contents differ itself from the \p graphics_context in that in a typical Vulkan programs
+/// there is usually only one graphics_context-like structure, but there may be multiple
+/// renderer-like structures. In this example though, there is only one.
+///
+/// This renderer renders a grid of triangles to the window, the color of which is determined by
+/// a HIP computation. Rendering is done using 3 buffers:
+/// - One buffer contains the height of each triangle (rendered as color).
+/// - One buffer holds the x- and y-coordinates for each of the corners of the triangle. Note: these
+/// coordinates are unique, as the triangles that are made up from these points are defined by the
+/// - Index buffer, that holds indices into the former two buffers to make up a list of triangles.
+struct renderer
+{
+ /// The total number of vertices for the triangles.
+ constexpr static size_t num_verts = grid_width * grid_height;
+ /// The number of bytes in the x- and y-coordinates buffer. Each x/y coordinate is encoded as
+ /// a pair of floats, which are stored in a packed array-of-structures format: | x | y | x | y | ... |.
+ constexpr static size_t grid_buffer_size = num_verts * sizeof(float) * 2;
+ /// The number of bytes in the height buffer. Each height is encoded as a floating point value.
+ /// This buffer will be shared with HIP, which is why these coordinates are
+ /// stored in a separate buffer.
+ constexpr static size_t height_buffer_size = num_verts * sizeof(float);
+
+ /// The number of indices in the index buffer. Each triangle has 3 points, each square in the grid
+ /// is made up of 2 triangles. There are (width - 1) by (height - 1) squares in the grid.
+ constexpr static size_t num_indices = (grid_width - 1) * (grid_height - 1) * 3 * 2;
+ /// The number of bytes in the index buffer. Each index is encoded as a 32-bit int.
+ constexpr static size_t index_buffer_size = num_indices * sizeof(uint32_t);
+
+ const graphics_context& ctx;
+ swapchain& sc;
+
+ hipDevice_t hip_device;
+ hipStream_t hip_stream;
+
+ VkRenderPass render_pass;
+
+ /// The frames in the rendering pipeline.
+ std::vector frames;
+ /// The index of the frame we are currently rendering to.
+ uint32_t frame_index = 0;
+
+ /// The Vulkan frame buffers to render to - each corresponds to a swapchain
+ /// image with the same index in sc
+ std::vector framebuffers;
+
+ /// The pipeline layout and pipeline of the rendering pipeline for the Vulkan part
+ /// of this example.
+ VkPipelineLayout pipeline_layout;
+ VkPipeline pipeline;
+
+ /// Whether the swapchain is out-of-date and needs to be recreated.
+ bool swapchain_out_of_date = false;
+
+ /// The buffer and memory holding the grid coordinates.
+ VkBuffer grid_buffer;
+ VkDeviceMemory grid_memory;
+ /// The buffer and memory holding the grid heights.
+ /// This buffer will be exported to HIP.
+ /// \see hip_height_memory.
+ /// \see hip_height_buffer.
+ VkBuffer height_buffer;
+ VkDeviceMemory height_memory;
+ /// The buffer and memory holding the indices for the triangles to render.
+ VkBuffer index_buffer;
+ VkDeviceMemory index_memory;
+
+ /// The HIP-imported version of \p height_buffer.
+ hipExternalMemory_t hip_height_memory;
+ /// The HIP-imported version of \p height_buffer mapped into the program's memory.
+ float* hip_height_buffer;
+
+ /// The semaphore that guards between when the buffer has been rendered from the
+ /// Vulkan side and when we can simulate it again from the HIP side, and
+ /// its hip-imported version.
+ VkSemaphore buffer_ready;
+ hipExternalSemaphore_t hip_buffer_ready;
+
+ /// The semaphore that guards between when the simulation has finished from the HIP
+ /// side and when we can render it to the swapchain in the Vulkan side, and its HIP-
+ /// imported version.
+ VkSemaphore simulation_finished;
+ hipExternalSemaphore_t hip_simulation_finished;
+
+ /// The time at which this example started.
+ std::chrono::high_resolution_clock::time_point start_time;
+
+ /// Counters used to keep track of the current performance.
+ uint32_t fps_start_frame = 0;
+ std::chrono::high_resolution_clock::time_point fps_start_time;
+
+ /// \brief Initialize a new renderer.
+ renderer(const graphics_context& ctx, swapchain& sc, const hipDevice_t hip_device)
+ : ctx(ctx), sc(sc), hip_device(hip_device)
+ {
+ // Create a HIP stream for the (hip) device that was selected, which compute commands will be scheduled to later.
+ HIP_CHECK(hipSetDevice(this->hip_device));
+ HIP_CHECK(hipStreamCreate(&this->hip_stream));
+
+ // Initialize the Vulkan resources related to this renderer.
+ this->render_pass = sc.create_render_pass();
+ this->pipeline_layout = this->ctx.create_pipeline_layout();
+ this->create_pipeline();
+
+ this->frames.reserve(max_frames_in_flight);
+ for(size_t i = 0; i < max_frames_in_flight; ++i)
+ {
+ this->frames.emplace_back(ctx);
+ }
+
+ this->sc.recreate_framebuffers(this->render_pass, this->framebuffers);
+
+ // Create each of the buffers, and allocate memory for them.
+
+ this->grid_buffer
+ = create_buffer(ctx,
+ grid_buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+
+ // This buffer is going to be exported to HIP, so we should create it as
+ // an external buffer.
+ this->height_buffer
+ = create_buffer(ctx,
+ height_buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ true);
+
+ this->index_buffer
+ = create_buffer(ctx,
+ index_buffer_size,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
+
+ // Allocate the memory for each buffer.
+
+ this->grid_memory
+ = allocate_buffer_memory(ctx, this->grid_buffer, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ // Allocate this memory in a way that supports exporting.
+ this->height_memory = allocate_buffer_memory(ctx,
+ this->height_buffer,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ true);
+ this->index_memory
+ = allocate_buffer_memory(ctx, this->index_buffer, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+ // Upload the initial data to the buffers.
+ this->initialize_buffer_data();
+
+ // Export the height buffer and import it in HIP.
+ this->hip_height_memory = memory_to_hip(this->ctx, this->height_memory, height_buffer_size);
+ // Map it into memory.
+ this->hip_height_buffer = reinterpret_cast(
+ map_hip_external_memory(this->hip_height_memory, height_buffer_size));
+
+ // Create the Vulkan-HIP synchronization resources from Vulkan and import them in HIP.
+#if USE_EXTERNAL_SEMAPHORES == 1
+ this->buffer_ready = create_semaphore(this->ctx, true);
+ this->hip_buffer_ready = semaphore_to_hip(this->ctx, this->buffer_ready);
+
+ this->simulation_finished = create_semaphore(this->ctx, true);
+ this->hip_simulation_finished = semaphore_to_hip(this->ctx, this->simulation_finished);
+#endif
+
+ // Initialize performance counters.
+ this->start_time = std::chrono::high_resolution_clock::now();
+ this->fps_start_time = this->start_time;
+ }
+
+ ~renderer()
+ {
+ // Be sure that rendering is finished
+ this->wait_all_frames();
+
+ // Make sure that all work has been finished before destroying the stream.
+ HIP_CHECK(hipStreamSynchronize(this->hip_stream));
+ HIP_CHECK(hipStreamDestroy(this->hip_stream));
+
+ // Destroy Vulkan-HIP synchronization resources.
+#if USE_EXTERNAL_SEMAPHORES == 1
+ HIP_CHECK(hipDestroyExternalSemaphore(this->hip_buffer_ready));
+ HIP_CHECK(hipDestroyExternalSemaphore(this->hip_simulation_finished));
+
+ this->ctx.vkd->destroy_semaphore(this->ctx.dev, this->buffer_ready, nullptr);
+ this->ctx.vkd->destroy_semaphore(this->ctx.dev, this->simulation_finished, nullptr);
+#endif
+
+ // Destroy the HIP external memory handle. We don't need to unmap it.
+ HIP_CHECK(hipDestroyExternalMemory(this->hip_height_memory));
+
+ // Destroy Vulkan device memory & buffer handles.
+ this->ctx.vkd->free_memory(this->ctx.dev, this->index_memory, nullptr);
+ this->ctx.vkd->free_memory(this->ctx.dev, this->height_memory, nullptr);
+ this->ctx.vkd->free_memory(this->ctx.dev, this->grid_memory, nullptr);
+ this->ctx.vkd->destroy_buffer(this->ctx.dev, this->index_buffer, nullptr);
+ this->ctx.vkd->destroy_buffer(this->ctx.dev, this->height_buffer, nullptr);
+ this->ctx.vkd->destroy_buffer(this->ctx.dev, this->grid_buffer, nullptr);
+
+ this->ctx.vkd->destroy_pipeline_layout(this->ctx.dev, this->pipeline_layout, nullptr);
+ this->ctx.vkd->destroy_pipeline(this->ctx.dev, this->pipeline, nullptr);
+
+ for(const VkFramebuffer fb : this->framebuffers)
+ {
+ this->ctx.vkd->destroy_framebuffer(this->ctx.dev, fb, nullptr);
+ }
+
+ this->ctx.vkd->destroy_render_pass(this->ctx.dev, this->render_pass, nullptr);
+ }
+
+ renderer(const renderer&) = delete;
+ renderer& operator=(const renderer&) = delete;
+
+ renderer(renderer&&) = delete;
+ renderer& operator=(renderer&&) = delete;
+
+ /// \brief Block until all current frames have finished rendering.
+ void wait_all_frames()
+ {
+ for(const frame& frame : this->frames)
+ {
+ frame.wait();
+ }
+ }
+
+ /// \brief Upload the initial values for each buffer to Vulkan.
+ void initialize_buffer_data()
+ {
+ // Create a "staging" buffer that is accessible from the CPU, that we will be using to
+ // upload data to. We can re-use the same staging buffer for all three buffers, so create it
+ // so that it is able to hold the maximum size of all three buffers.
+ constexpr size_t staging_buffer_size = std::max(grid_buffer_size, index_buffer_size);
+ VkBuffer staging_buffer
+ = create_buffer(ctx, staging_buffer_size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
+ VkDeviceMemory staging_memory = allocate_buffer_memory(
+ ctx,
+ staging_buffer,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+
+ // Map the staging buffer into host memory.
+ void* staging;
+ VK_CHECK(
+ this->ctx.vkd
+ ->map_memory(this->ctx.dev, staging_memory, 0, staging_buffer_size, 0, &staging));
+
+ // Initialize the height buffer
+ {
+ std::memset(staging, 0, height_buffer_size);
+ this->ctx.copy_buffer(this->height_buffer, staging_buffer, height_buffer_size);
+ }
+
+ // Initialize the grid buffer
+ {
+ float* grid = reinterpret_cast(staging);
+ for(uint32_t y = 0; y < grid_height; ++y)
+ {
+ for(uint32_t x = 0; x < grid_width; ++x)
+ {
+ *grid++ = (2.0f * x) / (grid_width - 1) - 1;
+ *grid++ = (2.0f * y) / (grid_height - 1) - 1;
+ }
+ }
+
+ this->ctx.copy_buffer(this->grid_buffer, staging_buffer, grid_buffer_size);
+ }
+
+ // Initialize the index buffer
+ {
+ uint32_t* indices = reinterpret_cast(staging);
+ for(uint32_t y = 0; y < grid_height - 1; ++y)
+ {
+ for(uint32_t x = 0; x < grid_width - 1; ++x)
+ {
+ *indices++ = (y + 0) * grid_width + (x + 0);
+ *indices++ = (y + 1) * grid_width + (x + 0);
+ *indices++ = (y + 0) * grid_width + (x + 1);
+ *indices++ = (y + 1) * grid_width + (x + 0);
+ *indices++ = (y + 1) * grid_width + (x + 1);
+ *indices++ = (y + 0) * grid_width + (x + 1);
+ }
+ }
+
+ this->ctx.copy_buffer(this->index_buffer, staging_buffer, index_buffer_size);
+ }
+
+ // We are done with the staging buffer so clean it up.
+ this->ctx.vkd->unmap_memory(this->ctx.dev, staging_memory);
+ this->ctx.vkd->free_memory(this->ctx.dev, staging_memory, nullptr);
+ this->ctx.vkd->destroy_buffer(this->ctx.dev, staging_buffer, nullptr);
+ }
+
+ /// \brief Initialize the Vulkan pipeline for the renderer.
+ void create_pipeline()
+ {
+ VkShaderModule vert
+ = create_shader_module(this->ctx, std::size(sinewave_vert), sinewave_vert);
+ VkShaderModule frag
+ = create_shader_module(this->ctx, std::size(sinewave_frag), sinewave_frag);
+
+ // Keep in sync with shaders!
+ VkPipelineShaderStageCreateInfo pssci[2] = {};
+ pssci[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ pssci[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
+ pssci[0].module = vert;
+ pssci[0].pName = "main";
+ pssci[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ pssci[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
+ pssci[1].module = frag;
+ pssci[1].pName = "main";
+
+ // Keep in sync with shaders!
+ VkVertexInputBindingDescription bindings[2] = {};
+ bindings[0].binding = 0;
+ bindings[0].stride = sizeof(float);
+ bindings[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
+ bindings[1].binding = 1;
+ bindings[1].stride = sizeof(float) * 2;
+ bindings[1].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
+
+ // Keep in sync with shaders!
+ VkVertexInputAttributeDescription attribs[2] = {};
+ attribs[0].binding = 0;
+ attribs[0].location = 0;
+ attribs[0].format = VK_FORMAT_R32_SFLOAT;
+ attribs[1].binding = 1;
+ attribs[1].location = 1;
+ attribs[1].format = VK_FORMAT_R32G32_SFLOAT;
+
+ this->pipeline = this->ctx.create_simple_pipeline(this->pipeline_layout,
+ this->render_pass,
+ pssci,
+ std::size(pssci),
+ bindings,
+ std::size(bindings),
+ attribs,
+ std::size(attribs));
+
+ // Shader modules do not need to be kept around in memory.
+ this->ctx.vkd->destroy_shader_module(this->ctx.dev, vert, nullptr);
+ this->ctx.vkd->destroy_shader_module(this->ctx.dev, frag, nullptr);
+ }
+
+ /// \brief Re-create the backing swapchain and re-initialize frame buffers if the swapchain
+ /// has become outdated.
+ bool recreate_swapchain(GLFWwindow* const window)
+ {
+ VK_CHECK(this->ctx.vkd->queue_wait_idle(this->ctx.present_queue.queue));
+ int width, height;
+ glfwGetFramebufferSize(window, &width, &height);
+ if(width == 0 || height == 0)
+ {
+ return false;
+ }
+
+ this->sc.recreate({static_cast(width), static_cast(height)});
+ this->sc.recreate_framebuffers(this->render_pass, this->framebuffers);
+
+ return true;
+ }
+
+ /// \brief Start rendering the next frame
+ /// \returns if the frame can be rendered at all. This may not be the case on
+ /// some operating systems for example if the window is minimized and has a
+ /// surface extent of 0 by 0 pixels.
+ bool begin_frame(GLFWwindow* const window)
+ {
+ const frame& frame = frames[this->frame_index % this->frames.size()];
+ // Wait until the previous instance of this frame is done rendering.
+ frame.wait();
+
+ // Acquire the next image index from the swapchain.
+ // Re-create the swapchain if it has become outdated in the meantime.
+ if(this->swapchain_out_of_date)
+ {
+ if(!this->recreate_swapchain(window))
+ return false;
+ this->swapchain_out_of_date = false;
+ }
+
+ const swapchain::present_state present_state
+ = this->sc.acquire_next_image(frame.image_acquired, frame_timeout);
+ switch(present_state)
+ {
+ case swapchain::present_state::optimal: break;
+ case swapchain::present_state::suboptimal:
+ // Sub-optimal, but semaphore is already signaled.
+ // Continue rendering this frame and re-create on the next.
+ this->swapchain_out_of_date = true;
+ break;
+ case swapchain::present_state::out_of_date:
+ // Need to re-create immediately.
+ this->swapchain_out_of_date = true;
+ return false;
+ }
+
+ // Reset the fence backing the frame now that we are creating work.
+ frame.reset();
+
+ // Reset the command pool and initialize the command buffer so that we can start submitting
+ // draw commands to it.
+ VK_CHECK(this->ctx.vkd->reset_command_pool(this->ctx.dev, frame.cmd_pool, 0));
+ VkCommandBufferBeginInfo begin_info = {};
+ begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ VK_CHECK(this->ctx.vkd->begin_command_buffer(frame.cmd_buf, &begin_info));
+
+ return true;
+ }
+
+ /// \brief End the current frame and submit it to the graphics queue for rendering and the
+ /// present queue for presenting.
+ void end_frame()
+ {
+ const frame& frame = frames[this->frame_index % this->frames.size()];
+
+ VK_CHECK(this->ctx.vkd->end_command_buffer(frame.cmd_buf));
+
+ // The semaphores that we need to wait on before this frame can be rendered completely:
+ // - The frame needs to wait before the image is completely acquired from Vulkan. In
+ // vkAcquireNextImageKHR the implementation may already know _which_ image is going to
+ // be rendered to next, but it may not be quite ready for it yet. This is why we need
+ // to wait on it here.
+ // - HIP needs to be finished with the height buffer, and so it also need to wait on the
+ // semaphore that signals that its ready.
+#if USE_EXTERNAL_SEMAPHORES == 1 && USE_SIGNAL_SEMAPHORE == 1
+ VkSemaphore wait_semaphores[] = {frame.image_acquired, this->simulation_finished};
+#else
+ VkSemaphore wait_semaphores[] = {frame.image_acquired};
+#endif
+
+ // The pipeline stage at which each of the corresponding \p wait_semaphores need to be
+ // waited upon. This allows Vulkan to start with some rendering processes even though
+ // the semaphores are not yet signaled:
+ // - We only need the swapchain image when we are actually going to draw to it, we can
+ // already perform the vertex shader for example and the fragment shader to some extent
+ // before the output is actually drawn to the swap image.
+ // - The buffer passed to HIP is used for vertex coordinates during when drawing in Vulkan,
+ // so that buffer needs to be finished (and its associated \p simulation_finished semaphore
+ // needs to be signaled) when we vertex inputs are bound.
+ const VkPipelineStageFlags wait_dst_stage_masks[]
+ = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT};
+
+ // The semaphores that need to be signaled after this step is finished:
+ // - The \p render_finished semaphore allows us to guard the time between when the rendering
+ // commands are finished (and so when the result is on the swapchain image) and when it can
+ // be copied to the GLFW window.
+ // - The \p buffer_ready semaphore signals that the rendering process is finished, and that we
+ // can perform the next step of the simulation. This prevents that HIP is already modifying the
+ // buffer while Vulkan has not completely rendered it to the swapchain image.
+#if USE_EXTERNAL_SEMAPHORES == 1
+ VkSemaphore signal_semaphores[] = {frame.render_finished, this->buffer_ready};
+#else
+ VkSemaphore signal_semaphores[] = {frame.render_finished};
+#endif
+
+ // Submit the current frame's command buffer to the GPU.
+ VkSubmitInfo submit_info = {};
+ submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submit_info.waitSemaphoreCount = std::size(wait_semaphores);
+ submit_info.pWaitSemaphores = wait_semaphores;
+ submit_info.pWaitDstStageMask = wait_dst_stage_masks;
+ submit_info.signalSemaphoreCount = std::size(signal_semaphores);
+ submit_info.pSignalSemaphores = signal_semaphores;
+ submit_info.commandBufferCount = 1;
+ submit_info.pCommandBuffers = &frame.cmd_buf;
+ VK_CHECK(this->ctx.vkd->queue_submit(this->ctx.graphics_queue.queue,
+ 1,
+ &submit_info,
+ frame.frame_fence));
+
+ // Then finally ask the swapchain to draw the current image to the GLFW window, when rendering
+ // is finished.
+ const swapchain::present_state present_state = this->sc.present(frame.render_finished);
+ if(present_state != swapchain::present_state::optimal)
+ this->swapchain_out_of_date = true;
+
+ ++this->frame_index;
+ }
+
+ /// \brief This function updates the height buffer with new coordinates.
+ void step_simulation()
+ {
+ // Take care that we are not going to modify the buffer before it is ready.
+#if USE_EXTERNAL_SEMAPHORES == 1
+ // If semaphores are supported and used, we need to wait on it so that it is
+ // certain that Vulkan is no longer using the buffer.
+ // Note: This semaphore is not signaled in the first frame, so we don't need to wait
+ // on it then.
+ if(this->frame_index != 0)
+ {
+ hipExternalSemaphoreWaitParams wait_params = {};
+ HIP_CHECK(hipWaitExternalSemaphoresAsync(&this->hip_buffer_ready,
+ &wait_params,
+ 1,
+ this->hip_stream));
+ }
+#else
+ // If semaphores are not supported or not used, then we need to perform a full queue
+ // sync to be sure that Vulkan is not using the buffer anymore.
+ VK_CHECK(this->ctx.vkd->queue_wait_idle(this->ctx.graphics_queue.queue));
+#endif
+
+ const auto now = std::chrono::high_resolution_clock::now();
+ const float time
+ = std::chrono::duration(now - this->start_time)
+ .count();
+
+ // The tile size to be used for each block of the computation. A tile is
+ // tile_size by tile_size threads in this case, since we are invoking the
+ // computation over a 2D-grid.
+ constexpr size_t tile_size = 8;
+
+ // Launch the HIP kernel to advance the simulation.
+ hipLaunchKernelGGL(HIP_KERNEL_NAME(sinewave_kernel),
+ dim3((grid_width + tile_size - 1) / tile_size,
+ (grid_height + tile_size - 1) / tile_size),
+ dim3(tile_size, tile_size),
+ 0,
+ this->hip_stream,
+ this->hip_height_buffer,
+ time);
+ HIP_CHECK(hipGetLastError());
+
+ // Signal to Vulkan that we are done with the buffer and that it can proceed
+ // with rendering.
+#if USE_EXTERNAL_SEMAPHORES == 1 && USE_SIGNAL_SEMAPHORE == 1
+ // If semaphores are supported and used, signal the semaphore that indicates
+ // that the simulation has finished.
+ hipExternalSemaphoreSignalParams signal_params = {};
+ HIP_CHECK(hipSignalExternalSemaphoresAsync(&this->hip_simulation_finished,
+ &signal_params,
+ 1,
+ this->hip_stream));
+#else
+ // If semaphores are not used or not supported, we need to again perform a full
+ // queue sync from the HIP side this time.
+ HIP_CHECK(hipStreamSynchronize(this->hip_stream));
+#endif
+ }
+
+ /// \brief Draw the next frame to the window.
+ void draw(GLFWwindow* const window)
+ {
+ if(!this->begin_frame(window))
+ return;
+
+ // Advance the simulation on the HIP side.
+ this->step_simulation();
+
+ // Render the grid to the screen from the Vulkan side.
+ const frame& frame = frames[this->frame_index % this->frames.size()];
+ const VkCommandBuffer cmd_buf = frame.cmd_buf;
+
+ // Initialize the rendering pass
+ VkClearValue clear_color = {};
+
+ VkViewport viewport = {};
+ viewport.width = this->sc.extent.width;
+ viewport.height = this->sc.extent.height;
+ viewport.minDepth = 0;
+ viewport.maxDepth = 1;
+
+ VkRect2D scissor = {};
+ scissor.extent = this->sc.extent;
+
+ const device_dispatch& vkd = *this->ctx.vkd;
+
+ vkd.cmd_set_viewport(cmd_buf, 0, 1, &viewport);
+ vkd.cmd_set_scissor(cmd_buf, 0, 1, &scissor);
+
+ VkRenderPassBeginInfo rp_begin_info = {};
+ rp_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
+ rp_begin_info.renderPass = this->render_pass;
+ rp_begin_info.framebuffer = this->framebuffers[this->sc.image_index];
+ rp_begin_info.renderArea = scissor;
+ rp_begin_info.clearValueCount = 1;
+ rp_begin_info.pClearValues = &clear_color;
+ vkd.cmd_begin_render_pass(cmd_buf, &rp_begin_info, VK_SUBPASS_CONTENTS_INLINE);
+
+ // Bind the pipeline that we are using to render with.
+ vkd.cmd_bind_pipeline(cmd_buf, VK_PIPELINE_BIND_POINT_GRAPHICS, this->pipeline);
+
+ VkBuffer vertex_buffers[] = {this->height_buffer, this->grid_buffer};
+ VkDeviceSize offsets[] = {0, 0};
+ vkd.cmd_bind_vertex_buffers(cmd_buf, 0, std::size(vertex_buffers), vertex_buffers, offsets);
+ vkd.cmd_bind_index_buffer(cmd_buf, this->index_buffer, 0, VK_INDEX_TYPE_UINT32);
+
+ // Draw the triangles.
+ vkd.cmd_draw_indexed(cmd_buf, num_indices, 1, 0, 0, 0);
+
+ vkd.cmd_end_render_pass(cmd_buf);
+
+ this->end_frame();
+
+ // Output a native performance measurement.
+ const auto frame_time = std::chrono::high_resolution_clock::now();
+ const auto time_diff = frame_time - this->fps_start_time;
+ if(time_diff > std::chrono::seconds{5})
+ {
+ const auto time_diff_sec
+ = std::chrono::duration_cast>(time_diff).count();
+ const uint32_t frames = this->frame_index - this->fps_start_frame;
+ std::cout << "Average FPS (over " << std::fixed << std::setprecision(2) << time_diff_sec
+ << " seconds): " << std::fixed << std::setprecision(2)
+ << frames / time_diff_sec << " (" << std::fixed << std::setprecision(2)
+ << (time_diff_sec * 1000) / frames << " ms per frame)" << std::endl;
+ this->fps_start_frame = this->frame_index;
+ this->fps_start_time = frame_time;
+ }
+ }
+};
+
+/// \brief GLFW window resize callback: If the window is resized then we need to re-create the
+/// swapchain on the next frame.
+void resize_callback(GLFWwindow* const window, const int, const int)
+{
+ renderer* r = reinterpret_cast(glfwGetWindowUserPointer(window));
+ r->swapchain_out_of_date = true;
+}
+
+/// \brief Program entry point.
+int main()
+{
+ // The initial size of the GLFW window when the example is first started.
+ constexpr VkExtent2D initial_window_extent = {1280, 800};
+
+ // Initialize GLFW.
+ glfwSetErrorCallback(
+ [](int code, const char* const message)
+ { std::cerr << "A glfw error encountered: " << message << "(" << code << ")\n"; });
+
+ if(glfwInit() != GLFW_TRUE)
+ {
+ std::cerr << "failed to initialize GLFW\n";
+ return error_exit_code;
+ }
+
+ // Initialize the window.
+ VkApplicationInfo app_info = {};
+ app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+ app_info.pApplicationName = "HIP-Vulkan interop example";
+ app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
+ app_info.pEngineName = "rocm-examples";
+ app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0);
+ app_info.apiVersion = VK_MAKE_VERSION(1, 0, 0);
+
+ GLFWwindow* window = create_window(app_info, initial_window_extent);
+
+ // Create the base Vulkan types: Load base function pointers, create instance, load
+ // instance function pointers, create the surface.
+ const auto vkb = std::make_unique(glfwGetInstanceProcAddress);
+ const VkInstance instance = create_instance(*vkb,
+ app_info,
+ required_instance_extensions,
+ std::size(required_instance_extensions));
+ const auto vki = std::make_unique(*vkb, instance);
+ const VkSurfaceKHR surface = create_surface(instance, window);
+
+ // Try to find a physical device that we can use for this example.
+ physical_device_candidate candidate;
+ find_physical_device(*vki, instance, surface, candidate);
+
+ const hipDevice_t hip_device = candidate.hip_candidate.device;
+
+ // Let the user know which device we are using, on both the Vulkan and HIP sides.
+ hipDeviceProp_t hip_props;
+ HIP_CHECK(hipGetDeviceProperties(&hip_props, hip_device));
+
+ std::cout << "Using device " << candidate.props.deviceName << " (hip device " << hip_device
+ << ", UUID " << candidate.hip_candidate.device_uuid << ", compute capability "
+ << hip_props.major << "." << hip_props.minor << ")\n";
+
+ {
+ // Initialize the rendering resources, both the Vulkan and HIP ones.
+ // These are defined in a sub-scope so that the destructors are
+ // invoked before we call `glfwDestroyWindow` and `glfwTerminate`.
+ graphics_context ctx(vki.get(),
+ instance,
+ surface,
+ candidate.pdev,
+ candidate.queues,
+ required_device_extensions,
+ std::size(required_device_extensions));
+
+ swapchain swapchain(ctx, initial_window_extent);
+ renderer renderer(ctx, swapchain, hip_device);
+
+ glfwSetWindowUserPointer(window, reinterpret_cast(&renderer));
+ glfwSetFramebufferSizeCallback(window, resize_callback);
+
+ // The main rendering loop.
+ // Repeat for as long as the window is not closed.
+ while(glfwWindowShouldClose(window) == GLFW_FALSE)
+ {
+ renderer.draw(window);
+ glfwPollEvents();
+ }
+
+ glfwSetFramebufferSizeCallback(window, nullptr);
+ glfwSetWindowUserPointer(window, nullptr);
+ }
+
+ // Destroy the surface and instance now that we are done with them.
+ vki->destroy_surface(instance, surface, nullptr);
+ vki->destroy_instance(instance, nullptr);
+
+ // Clean up GLFW.
+ glfwDestroyWindow(window);
+ glfwTerminate();
+
+ return 0;
+}
diff --git a/HIP-Basic/vulkan_interop/nvidia_hip_fix.hpp b/HIP-Basic/vulkan_interop/nvidia_hip_fix.hpp
new file mode 100644
index 000000000..5e967b72a
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/nvidia_hip_fix.hpp
@@ -0,0 +1,69 @@
+#ifndef _HIP_BASIC_VULKAN_INTEROP_NVIDIA_HIP_FIX_HPP
+#define _HIP_BASIC_VULKAN_INTEROP_NVIDIA_HIP_FIX_HPP
+
+#include
+
+// Currently these HIP symbols are missing when compiling for NVIDIA.
+// TODO: Remove this once HIP supports these symbols.
+// See https://github.com/ROCm-Developer-Tools/hipamd/issues/49.
+#if defined(__HIP_PLATFORM_NVCC__) && !defined(hipExternalMemoryHandleTypeOpaqueFd)
+ #define hipExternalMemoryHandleType cudaExternalMemoryHandleType
+ #define hipExternalMemoryHandleTypeOpaqueFd cudaExternalMemoryHandleTypeOpaqueFd
+ #define hipExternalSemaphoreHandleType cudaExternalSemaphoreHandleType
+ #define hipExternalSemaphoreHandleTypeOpaqueFd cudaExternalSemaphoreHandleTypeOpaqueFd
+ #define hipExternalMemory_t cudaExternalMemory_t
+ #define hipExternalMemoryHandleDesc cudaExternalMemoryHandleDesc
+ #define hipExternalSemaphoreHandleDesc cudaExternalSemaphoreHandleDesc
+ #define hipExternalMemoryBufferDesc cudaExternalMemoryBufferDesc
+ #define hipExternalSemaphore_t cudaExternalSemaphore_t
+ #define hipExternalSemaphoreSignalParams cudaExternalSemaphoreSignalParams
+ #define hipExternalSemaphoreWaitParams cudaExternalSemaphoreWaitParams
+
+hipError_t hipImportExternalMemory(hipExternalMemory_t* extmem, hipExternalMemoryHandleDesc* desc)
+{
+ return hipCUDAErrorTohipError(cudaImportExternalMemory(extmem, desc));
+}
+
+hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extmem,
+ const hipExternalSemaphoreHandleDesc* desc)
+{
+ return hipCUDAErrorTohipError(cudaImportExternalSemaphore(extmem, desc));
+}
+
+hipError_t hipDestroyExternalMemory(hipExternalMemory_t extmem)
+{
+ return hipCUDAErrorTohipError(cudaDestroyExternalMemory(extmem));
+}
+
+hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extmem)
+{
+ return hipCUDAErrorTohipError(cudaDestroyExternalSemaphore(extmem));
+}
+
+hipError_t hipExternalMemoryGetMappedBuffer(void** ptr,
+ hipExternalMemory_t extmem,
+ hipExternalMemoryBufferDesc* desc)
+{
+ return hipCUDAErrorTohipError(cudaExternalMemoryGetMappedBuffer(ptr, extmem, desc));
+}
+
+hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extsems,
+ const hipExternalSemaphoreSignalParams* params,
+ unsigned int num_sems,
+ hipStream_t stream)
+{
+ return hipCUDAErrorTohipError(
+ cudaSignalExternalSemaphoresAsync(extsems, params, num_sems, stream));
+}
+
+hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extsems,
+ const hipExternalSemaphoreWaitParams* params,
+ unsigned int num_sems,
+ hipStream_t stream)
+{
+ return hipCUDAErrorTohipError(
+ cudaWaitExternalSemaphoresAsync(extsems, params, num_sems, stream));
+}
+#endif
+
+#endif
diff --git a/HIP-Basic/vulkan_interop/sinewave.frag b/HIP-Basic/vulkan_interop/sinewave.frag
new file mode 100644
index 000000000..3eaa2c5a2
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/sinewave.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 out_color;
+
+layout(location = 0) in float frag_height;
+
+void main()
+{
+ out_color = vec4(vec3(frag_height * 0.5 + 0.5), 1.0);
+}
diff --git a/HIP-Basic/vulkan_interop/sinewave.vert b/HIP-Basic/vulkan_interop/sinewave.vert
new file mode 100644
index 000000000..25a406307
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/sinewave.vert
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) in float height;
+layout(location = 1) in vec2 xy;
+
+layout(location = 0) out float frag_height;
+
+void main()
+{
+ gl_Position = vec4(xy, 0, 1);
+ frag_height = height;
+}
diff --git a/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.sln b/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.sln
new file mode 100644
index 000000000..29f7e915f
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vulkan_interop_vs2019", "vulkan_interop_vs2019.vcxproj", "{16B11B54-CD72-43B6-B226-38C668B41A79}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Debug|x64.ActiveCfg = Debug|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Debug|x64.Build.0 = Debug|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Release|x64.ActiveCfg = Release|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {F896B114-6DA9-45D0-B06E-31D84F18F73B}
+ EndGlobalSection
+EndGlobal
diff --git a/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.vcxproj b/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.vcxproj
new file mode 100644
index 000000000..9363b26c9
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.vcxproj
@@ -0,0 +1,130 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Document
+ $(VULKAN_SDK)\Bin\glslangValidator.exe %(FullPath) -o %(IntDir)%(Identity).spv.h --vn sinewave_frag -V100
+ Compiling Fragment Shader
+ %(IntDir)%(Identity).spv.h
+ $(VULKAN_SDK)\Bin\glslangValidator.exe %(FullPath) -o %(IntDir)%(Identity).spv.h --vn sinewave_frag -V100
+ Compiling Fragment Shader
+ %(IntDir)%(Identity).spv.h
+
+
+ Document
+ $(VULKAN_SDK)\Bin\glslangValidator.exe %(FullPath) -o %(IntDir)%(Identity).spv.h --vn sinewave_vert -V100
+ Compiling Vertex Shader
+ %(IntDir)%(Identity).spv.h
+ $(VULKAN_SDK)\Bin\glslangValidator.exe %(FullPath) -o %(IntDir)%(Identity).spv.h --vn sinewave_vert -V100
+ Compiling Vertex Shader
+ %(IntDir)%(Identity).spv.h
+
+
+
+ 15.0
+ {688433e2-b189-431d-a5f8-9ac82102b58c}
+ Win32Proj
+ vulkan_interop
+ 10.0
+ vulkan_interop_vs2019
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ ClCompile
+ hip_$(ProjectName)
+
+
+ false
+ ClCompile
+ hip_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(IntDir);$(MSBuildProjectDirectory)\..\..\Common;$(VULKAN_SDK)\Include;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ glfw3dll.lib;%(AdditionalDependencies)
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(IntDir);$(MSBuildProjectDirectory)\..\..\Common;$(VULKAN_SDK)\Include;%(AdditionalIncludeDirectories)
+ stdcpp17
+
+
+ Console
+ true
+ true
+ true
+ glfw3dll.lib;%(AdditionalDependencies)
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.vcxproj.filters b/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.vcxproj.filters
new file mode 100644
index 000000000..5da63f266
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/vulkan_interop_vs2019.vcxproj.filters
@@ -0,0 +1,41 @@
+
+
+
+
+ {d14ca4e1-beab-46bc-9c78-437de98683c9}
+ cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu
+
+
+ {95061464-7a10-4067-827c-2f727e3d64c3}
+ h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh
+
+
+ {c9ca99c5-f62b-4480-b631-3cc751fa0fd6}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Header Files
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+
diff --git a/HIP-Basic/vulkan_interop/vulkan_utils.hip b/HIP-Basic/vulkan_interop/vulkan_utils.hip
new file mode 100644
index 000000000..008ade384
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/vulkan_utils.hip
@@ -0,0 +1,827 @@
+#include "vulkan_utils.hpp"
+
+namespace
+{
+
+/// \brief The validation layers that we want to be active by default.
+/// The \p VK_LAYER_KHRONOS_validation layer performs general checks on Vulkan
+/// calls.
+static constexpr const char* validation_layers[] = {"VK_LAYER_KHRONOS_validation"};
+
+/// \brief A utility function that helps to load a function pointer from Vulkan. If the specified
+/// function pointer is not available in the implementation, this function prints an error message
+/// and exits the program.
+///
+/// By passing the target variable as a parameter rather than returning the function
+/// pointer, we can perform the cast in this function, and so save a bit of repetitive
+/// typing that way.
+template
+void load_vulkan_function(FuncType& fptr, Loader loader, HandleType handle, const char* const name)
+{
+ fptr = reinterpret_cast(loader(handle, name));
+ if(fptr == nullptr)
+ {
+ std::cerr << "Failed to load vulkan function pointer " << name << std::endl;
+ std::exit(error_exit_code);
+ }
+}
+
+} // namespace
+
+base_dispatch::base_dispatch(PFN_vkGetInstanceProcAddr loader)
+{
+ const auto load
+ = [&](auto& fptr, const char* name) { load_vulkan_function(fptr, loader, nullptr, name); };
+
+ this->get_instance_proc_addr = loader;
+ load(this->enumerate_instance_extension_properties, "vkEnumerateInstanceExtensionProperties");
+ load(this->create_instance, "vkCreateInstance");
+}
+
+instance_dispatch::instance_dispatch(const base_dispatch& dispatch, VkInstance instance)
+{
+ const auto load = [&](auto& fptr, const char* name)
+ { load_vulkan_function(fptr, dispatch.get_instance_proc_addr, instance, name); };
+
+ load(this->destroy_instance, "vkDestroyInstance");
+ load(this->destroy_surface, "vkDestroySurfaceKHR");
+ load(this->enumerate_physical_devices, "vkEnumeratePhysicalDevices");
+ load(this->get_physical_device_properties2, "vkGetPhysicalDeviceProperties2KHR");
+ load(this->get_physical_device_memory_properties, "vkGetPhysicalDeviceMemoryProperties");
+ load(this->get_physical_device_surface_formats, "vkGetPhysicalDeviceSurfaceFormatsKHR");
+ load(this->get_physical_device_surface_present_modes,
+ "vkGetPhysicalDeviceSurfacePresentModesKHR");
+ load(this->enumerate_device_extension_properties, "vkEnumerateDeviceExtensionProperties");
+ load(this->get_physical_device_queue_family_properties,
+ "vkGetPhysicalDeviceQueueFamilyProperties");
+ load(this->get_physical_device_surface_support, "vkGetPhysicalDeviceSurfaceSupportKHR");
+ load(this->create_device, "vkCreateDevice");
+ load(this->get_device_proc_addr, "vkGetDeviceProcAddr");
+ load(this->get_physical_device_surface_capabilities,
+ "vkGetPhysicalDeviceSurfaceCapabilitiesKHR");
+}
+
+device_dispatch::device_dispatch(const instance_dispatch& dispatch, VkDevice device)
+{
+ const auto load = [&](auto& fptr, const char* name)
+ { load_vulkan_function(fptr, dispatch.get_device_proc_addr, device, name); };
+
+ load(this->destroy_device, "vkDestroyDevice");
+ load(this->get_device_queue, "vkGetDeviceQueue");
+ load(this->create_swapchain, "vkCreateSwapchainKHR");
+ load(this->destroy_swapchain, "vkDestroySwapchainKHR");
+ load(this->get_swapchain_images, "vkGetSwapchainImagesKHR");
+ load(this->create_image_view, "vkCreateImageView");
+ load(this->destroy_image_view, "vkDestroyImageView");
+ load(this->create_semaphore, "vkCreateSemaphore");
+ load(this->destroy_semaphore, "vkDestroySemaphore");
+ load(this->create_fence, "vkCreateFence");
+ load(this->destroy_fence, "vkDestroyFence");
+ load(this->create_command_pool, "vkCreateCommandPool");
+ load(this->destroy_command_pool, "vkDestroyCommandPool");
+ load(this->allocate_command_buffers, "vkAllocateCommandBuffers");
+ load(this->wait_for_fences, "vkWaitForFences");
+ load(this->reset_fences, "vkResetFences");
+ load(this->acquire_next_image, "vkAcquireNextImageKHR");
+ load(this->queue_present, "vkQueuePresentKHR");
+ load(this->reset_command_pool, "vkResetCommandPool");
+ load(this->begin_command_buffer, "vkBeginCommandBuffer");
+ load(this->end_command_buffer, "vkEndCommandBuffer");
+ load(this->queue_submit, "vkQueueSubmit");
+ load(this->create_render_pass, "vkCreateRenderPass");
+ load(this->destroy_render_pass, "vkDestroyRenderPass");
+ load(this->create_framebuffer, "vkCreateFramebuffer");
+ load(this->destroy_framebuffer, "vkDestroyFramebuffer");
+ load(this->create_shader_module, "vkCreateShaderModule");
+ load(this->destroy_shader_module, "vkDestroyShaderModule");
+ load(this->create_graphics_pipelines, "vkCreateGraphicsPipelines");
+ load(this->destroy_pipeline, "vkDestroyPipeline");
+ load(this->create_pipeline_layout, "vkCreatePipelineLayout");
+ load(this->destroy_pipeline_layout, "vkDestroyPipelineLayout");
+ load(this->queue_wait_idle, "vkQueueWaitIdle");
+ load(this->cmd_set_viewport, "vkCmdSetViewport");
+ load(this->cmd_set_scissor, "vkCmdSetScissor");
+ load(this->cmd_begin_render_pass, "vkCmdBeginRenderPass");
+ load(this->cmd_bind_pipeline, "vkCmdBindPipeline");
+ load(this->cmd_end_render_pass, "vkCmdEndRenderPass");
+ load(this->cmd_draw_indexed, "vkCmdDrawIndexed");
+ load(this->create_buffer, "vkCreateBuffer");
+ load(this->destroy_buffer, "vkDestroyBuffer");
+ load(this->allocate_memory, "vkAllocateMemory");
+ load(this->free_memory, "vkFreeMemory");
+ load(this->get_buffer_memory_requirements, "vkGetBufferMemoryRequirements");
+ load(this->bind_buffer_memory, "vkBindBufferMemory");
+ load(this->cmd_copy_buffer, "vkCmdCopyBuffer");
+ load(this->map_memory, "vkMapMemory");
+ load(this->unmap_memory, "vkUnmapMemory");
+ load(this->cmd_bind_vertex_buffers, "vkCmdBindVertexBuffers");
+ load(this->cmd_bind_index_buffer, "vkCmdBindIndexBuffer");
+#ifdef _WIN64
+ load(this->get_memory_win32_handle, "vkGetMemoryWin32HandleKHR");
+ load(this->get_semaphore_win32_handle, "vkGetSemaphoreWin32HandleKHR");
+#else
+ load(this->get_memory_fd, "vkGetMemoryFdKHR");
+ load(this->get_semaphore_fd, "vkGetSemaphoreFdKHR");
+#endif
+}
+
+GLFWwindow* create_window(const VkApplicationInfo& app_info, const VkExtent2D extent)
+{
+ glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
+
+ GLFWwindow* window = glfwCreateWindow(extent.width,
+ extent.height,
+ app_info.pApplicationName,
+ nullptr,
+ nullptr);
+ if(window == nullptr)
+ {
+ std::cerr << "Failed to create GLFW window\n";
+ std::exit(error_exit_code);
+ }
+
+ return window;
+}
+
+VkInstance create_instance(const base_dispatch& dispatch,
+ const VkApplicationInfo& app_info,
+ const char* const* const required_extensions,
+ const size_t num_required_extensions,
+ const bool with_validation)
+{
+ uint32_t glfw_extension_count;
+ const char* const* const glfw_extensions
+ = glfwGetRequiredInstanceExtensions(&glfw_extension_count);
+
+ std::vector all_required_extensions;
+ all_required_extensions.insert(all_required_extensions.end(),
+ glfw_extensions,
+ glfw_extensions + glfw_extension_count);
+ all_required_extensions.insert(all_required_extensions.end(),
+ required_extensions,
+ required_extensions + num_required_extensions);
+
+ uint32_t supported_extension_count;
+ VK_CHECK(dispatch.enumerate_instance_extension_properties(nullptr,
+ &supported_extension_count,
+ nullptr));
+ std::vector supported_extensions(supported_extension_count);
+ VK_CHECK(dispatch.enumerate_instance_extension_properties(nullptr,
+ &supported_extension_count,
+ supported_extensions.data()));
+ if(!extensions_supported(supported_extensions,
+ all_required_extensions.begin(),
+ all_required_extensions.end()))
+ {
+ std::cerr << "Required instance extensions are not supported\n";
+ std::exit(error_exit_code);
+ }
+
+ VkInstanceCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+ create_info.pApplicationInfo = &app_info;
+ create_info.enabledExtensionCount = all_required_extensions.size();
+ create_info.ppEnabledExtensionNames = all_required_extensions.data();
+ if(with_validation)
+ {
+ create_info.enabledLayerCount = std::size(validation_layers);
+ create_info.ppEnabledLayerNames = validation_layers;
+ }
+
+ VkInstance instance;
+ VK_CHECK(dispatch.create_instance(&create_info, nullptr, &instance));
+ return instance;
+}
+
+VkSurfaceKHR create_surface(const VkInstance instance, GLFWwindow* window)
+{
+ VkSurfaceKHR surface;
+ VK_CHECK(glfwCreateWindowSurface(instance, window, nullptr, &surface));
+ return surface;
+}
+
+bool check_surface_support(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const VkSurfaceKHR surface)
+{
+ uint32_t format_count;
+ VK_CHECK(dispatch.get_physical_device_surface_formats(pdev, surface, &format_count, nullptr));
+
+ uint32_t present_mode_count;
+ VK_CHECK(dispatch.get_physical_device_surface_present_modes(pdev,
+ surface,
+ &present_mode_count,
+ nullptr));
+
+ return format_count > 0 && present_mode_count > 0;
+}
+
+bool check_device_extensions(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const char* const* const required_extensions,
+ const size_t num_required_extensions)
+{
+ uint32_t supported_extension_count;
+ VK_CHECK(dispatch.enumerate_device_extension_properties(pdev,
+ nullptr,
+ &supported_extension_count,
+ nullptr));
+ std::vector supported_extensions_properties(supported_extension_count);
+ VK_CHECK(
+ dispatch.enumerate_device_extension_properties(pdev,
+ nullptr,
+ &supported_extension_count,
+ supported_extensions_properties.data()));
+
+ return extensions_supported(supported_extensions_properties,
+ required_extensions,
+ required_extensions + num_required_extensions);
+}
+
+bool allocate_device_queues(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const VkSurfaceKHR surface,
+ queue_allocation& queues)
+{
+ uint32_t family_count;
+ dispatch.get_physical_device_queue_family_properties(pdev, &family_count, nullptr);
+ std::vector families(family_count);
+ dispatch.get_physical_device_queue_family_properties(pdev, &family_count, families.data());
+
+ int64_t graphics_family = -1;
+ int64_t present_family = -1;
+ for(uint32_t i = 0; i < family_count; ++i)
+ {
+ if(graphics_family < 0 && families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)
+ graphics_family = i;
+
+ VkBool32 present_support = VK_FALSE;
+ VK_CHECK(dispatch.get_physical_device_surface_support(pdev, i, surface, &present_support));
+ if(present_family < 0 && present_support == VK_TRUE)
+ present_family = i;
+ }
+
+ if(graphics_family < 0 || present_family < 0)
+ {
+ return false;
+ }
+
+ queues.graphics_family = graphics_family;
+ queues.graphics_family_properties = families[graphics_family];
+ queues.present_family = present_family;
+ queues.present_family_properties = families[present_family];
+
+ return true;
+}
+
+VkDevice create_device(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const queue_allocation& queues,
+ const char* const* const required_extensions,
+ const size_t num_required_extensions)
+{
+ float priorities[] = {1.f, 1.f};
+ uint32_t num_queues;
+ VkDeviceQueueCreateInfo queue_create_infos[2] = {};
+
+ // Set up the queue create info for each queue family. We need to consider
+ // - The queues may be the same if the queue family supports both graphics and presenting.
+ // In this case we can try to allocate two separate queues if the device has enough queues
+ // for this family. If not, we just use the same queue for both of these operations.
+ // - Otherwise we need to allocate to separate queues.
+ if(queues.graphics_family == queues.present_family)
+ {
+ // Queues are the same. Try to allocate 2 if possible, otherwise use the same queue index.
+ num_queues = 1;
+ queue_create_infos[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ queue_create_infos[0].queueFamilyIndex = queues.graphics_family;
+ queue_create_infos[0].queueCount
+ = std::min(2, queues.graphics_family_properties.queueCount);
+ queue_create_infos[0].pQueuePriorities = priorities;
+ }
+ else
+ {
+ // Different families, so we can allocate them separately.
+ num_queues = 2;
+ queue_create_infos[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ queue_create_infos[0].queueFamilyIndex = queues.graphics_family;
+ queue_create_infos[0].queueCount = 1;
+ queue_create_infos[0].pQueuePriorities = priorities;
+
+ queue_create_infos[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ queue_create_infos[1].queueFamilyIndex = queues.present_family;
+ queue_create_infos[1].queueCount = 1;
+ queue_create_infos[1].pQueuePriorities = priorities;
+ }
+
+ VkDeviceCreateInfo device_create_info = {};
+ device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+ device_create_info.queueCreateInfoCount = num_queues;
+ device_create_info.pQueueCreateInfos = queue_create_infos;
+ device_create_info.ppEnabledExtensionNames = required_extensions;
+ device_create_info.enabledExtensionCount = num_required_extensions;
+
+ VkDevice dev;
+ VK_CHECK(dispatch.create_device(pdev, &device_create_info, nullptr, &dev));
+
+ return dev;
+}
+
+void create_device_queues(const device_dispatch& dispatch,
+ const VkDevice device,
+ const queue_allocation& queues,
+ queue& graphics_queue,
+ queue& present_queue)
+{
+ // This function needs to mind the same thing about the device queues:
+ // if the candidate and present family are the same, and if the device supports it,
+ // we can create two separate queues from the same family.
+ uint32_t graphics_index = 0;
+ uint32_t present_index = 0;
+ if(queues.graphics_family == queues.present_family
+ && queues.graphics_family_properties.queueCount >= 2)
+ {
+ present_index = 1;
+ }
+
+ dispatch.get_device_queue(device,
+ queues.graphics_family,
+ graphics_index,
+ &graphics_queue.queue);
+ graphics_queue.family = queues.graphics_family;
+
+ dispatch.get_device_queue(device, queues.present_family, present_index, &present_queue.queue);
+ present_queue.family = queues.present_family;
+}
+
+graphics_context::graphics_context(const instance_dispatch* vki,
+ const VkInstance instance,
+ const VkSurfaceKHR surface,
+ const VkPhysicalDevice pdev,
+ const queue_allocation& queues,
+ const char* const* const required_device_extensions,
+ const size_t num_required_device_extensions)
+ : vki(vki), instance(instance), surface(surface), pdev(pdev)
+{
+ // Fetch some properties of the device which will aid us later.
+ this->vki->get_physical_device_memory_properties(this->pdev, &this->mem_props);
+
+ // Create a Vulkan logical device from the physical device candidate, and load the
+ // device function pointers.
+ this->dev = create_device(*this->vki,
+ this->pdev,
+ queues,
+ required_device_extensions,
+ num_required_device_extensions);
+ this->vkd = std::make_unique(*this->vki, this->dev);
+ create_device_queues(*this->vkd, this->dev, queues, this->graphics_queue, this->present_queue);
+
+ // Create a Vulkan command pool that we can use for one-shot command submissions, like uploading buffers to the
+ // device via Vulkan.
+ VkCommandPoolCreateInfo cmd_pool_create_info = {};
+ cmd_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ cmd_pool_create_info.queueFamilyIndex = this->graphics_queue.family;
+ VK_CHECK(this->vkd->create_command_pool(this->dev,
+ &cmd_pool_create_info,
+ nullptr,
+ &this->one_time_submit_pool));
+}
+
+graphics_context::~graphics_context()
+{
+ this->vkd->destroy_command_pool(this->dev, this->one_time_submit_pool, nullptr);
+ this->vkd->destroy_device(this->dev, nullptr);
+}
+
+VkSurfaceFormatKHR graphics_context::find_surface_format() const
+{
+ // TODO: Check that the format has the required format features?
+ constexpr VkFormat preferred_format = VK_FORMAT_B8G8R8A8_UNORM;
+ constexpr VkColorSpaceKHR preferred_color_space = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
+
+ uint32_t format_count;
+ VK_CHECK(this->vki->get_physical_device_surface_formats(this->pdev,
+ this->surface,
+ &format_count,
+ nullptr));
+ std::vector formats(format_count);
+ VK_CHECK(this->vki->get_physical_device_surface_formats(this->pdev,
+ this->surface,
+ &format_count,
+ formats.data()));
+
+ for(const VkSurfaceFormatKHR format : formats)
+ {
+ if(format.format == preferred_format && format.colorSpace == preferred_color_space)
+ {
+ return format;
+ }
+ }
+
+ return formats[0];
+}
+
+VkPresentModeKHR graphics_context::find_present_mode() const
+{
+ uint32_t present_mode_count;
+ VK_CHECK(this->vki->get_physical_device_surface_present_modes(pdev,
+ surface,
+ &present_mode_count,
+ nullptr));
+ std::vector modes(present_mode_count);
+ VK_CHECK(this->vki->get_physical_device_surface_present_modes(pdev,
+ surface,
+ &present_mode_count,
+ modes.data()));
+
+ constexpr VkPresentModeKHR preferred[]
+ = {VK_PRESENT_MODE_MAILBOX_KHR, VK_PRESENT_MODE_IMMEDIATE_KHR};
+
+ for(const VkPresentModeKHR mode : modes)
+ {
+ const auto it = std::find(std::begin(preferred), std::end(preferred), mode);
+ if(it != std::end(preferred))
+ return mode;
+ }
+
+ return VK_PRESENT_MODE_FIFO_KHR; // always supported
+}
+
+uint32_t graphics_context::find_memory_type_index(const uint32_t memory_type_bits,
+ const VkMemoryPropertyFlags properties) const
+{
+ for(uint32_t i = 0; i < this->mem_props.memoryTypeCount; ++i)
+ {
+ if((memory_type_bits & (1U << i)) == 0)
+ continue;
+ if((this->mem_props.memoryTypes[i].propertyFlags & properties) == properties)
+ return i;
+ }
+
+ std::cerr << "failed to find a suitable memory type\n";
+ std::exit(error_exit_code);
+}
+
+VkPipelineLayout graphics_context::create_pipeline_layout() const
+{
+ VkPipelineLayoutCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+
+ VkPipelineLayout pipeline_layout;
+ VK_CHECK(this->vkd->create_pipeline_layout(this->dev, &create_info, nullptr, &pipeline_layout));
+
+ return pipeline_layout;
+}
+
+VkPipeline
+ graphics_context::create_simple_pipeline(const VkPipelineLayout layout,
+ const VkRenderPass render_pass,
+ const VkPipelineShaderStageCreateInfo* shaders,
+ const unsigned int num_shaders,
+ const VkVertexInputBindingDescription* bindings,
+ const unsigned int num_bindings,
+ const VkVertexInputAttributeDescription* attribs,
+ const unsigned int num_attribs) const
+{
+ VkPipelineVertexInputStateCreateInfo pvisci = {};
+ pvisci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
+ pvisci.vertexBindingDescriptionCount = num_bindings;
+ pvisci.pVertexBindingDescriptions = bindings;
+ pvisci.vertexAttributeDescriptionCount = num_attribs;
+ pvisci.pVertexAttributeDescriptions = attribs;
+
+ VkPipelineInputAssemblyStateCreateInfo piasci = {};
+ piasci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
+ piasci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
+
+ VkPipelineViewportStateCreateInfo pvsci = {};
+ pvsci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
+ pvsci.viewportCount = 1; // set with cmdSetViewport
+ pvsci.scissorCount = 1; // set with cmdSetScissor
+
+ VkDynamicState dynstate[] = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR};
+ VkPipelineDynamicStateCreateInfo pdsci = {};
+ pdsci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
+ pdsci.dynamicStateCount = std::size(dynstate);
+ pdsci.pDynamicStates = dynstate;
+
+ VkPipelineRasterizationStateCreateInfo prsci = {};
+ prsci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
+ prsci.polygonMode = VK_POLYGON_MODE_FILL;
+ prsci.cullMode = VK_CULL_MODE_BACK_BIT;
+ prsci.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
+ prsci.lineWidth = 1.f;
+
+ VkPipelineMultisampleStateCreateInfo pmsci = {};
+ pmsci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
+ pmsci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
+ pmsci.minSampleShading = 1;
+
+ VkPipelineColorBlendAttachmentState pcbas = {};
+ pcbas.srcColorBlendFactor = VK_BLEND_FACTOR_ONE;
+ pcbas.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
+ pcbas.colorBlendOp = VK_BLEND_OP_ADD;
+ pcbas.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE;
+ pcbas.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
+ pcbas.alphaBlendOp = VK_BLEND_OP_ADD;
+ pcbas.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT
+ | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
+
+ VkPipelineColorBlendStateCreateInfo pcbsci = {};
+ pcbsci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
+ pcbsci.logicOp = VK_LOGIC_OP_COPY;
+ pcbsci.attachmentCount = 1;
+ pcbsci.pAttachments = &pcbas;
+
+ VkGraphicsPipelineCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
+ create_info.stageCount = num_shaders;
+ create_info.pStages = shaders;
+ create_info.pVertexInputState = &pvisci;
+ create_info.pInputAssemblyState = &piasci;
+ create_info.pViewportState = &pvsci;
+ create_info.pRasterizationState = &prsci;
+ create_info.pMultisampleState = &pmsci;
+ create_info.pColorBlendState = &pcbsci;
+ create_info.pDynamicState = &pdsci;
+ create_info.layout = layout;
+ create_info.renderPass = render_pass;
+ create_info.subpass = 0;
+
+ VkPipeline pipeline;
+ VK_CHECK(this->vkd->create_graphics_pipelines(this->dev,
+ VK_NULL_HANDLE,
+ 1,
+ &create_info,
+ nullptr,
+ &pipeline));
+
+ return pipeline;
+}
+
+void graphics_context::copy_buffer(const VkBuffer dst,
+ const VkBuffer src,
+ const VkDeviceSize size) const
+{
+ this->one_time_submit(
+ [&](const VkCommandBuffer cmd_buf)
+ {
+ VkBufferCopy region = {};
+ region.srcOffset = 0;
+ region.dstOffset = 0;
+ region.size = size;
+ this->vkd->cmd_copy_buffer(cmd_buf, src, dst, 1, ®ion);
+ });
+}
+
+VkShaderModule create_shader_module(const graphics_context& ctx,
+ const size_t shader_len,
+ const uint32_t* shader)
+{
+ VkShaderModuleCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ create_info.codeSize = sizeof(uint32_t) * shader_len;
+ create_info.pCode = shader;
+
+ VkShaderModule module;
+ VK_CHECK(ctx.vkd->create_shader_module(ctx.dev, &create_info, nullptr, &module));
+ return module;
+}
+
+swapchain::swapchain(const graphics_context& ctx, VkExtent2D desired_extent)
+ : ctx(ctx), handle(VK_NULL_HANDLE)
+{
+ this->recreate(desired_extent);
+}
+
+swapchain::~swapchain()
+{
+ for(const VkImageView& view : this->views)
+ {
+ this->ctx.vkd->destroy_image_view(this->ctx.dev, view, nullptr);
+ }
+
+ this->ctx.vkd->destroy_swapchain(this->ctx.dev, this->handle, nullptr);
+}
+
+VkExtent2D swapchain::find_actual_extent(const VkSurfaceCapabilitiesKHR& caps,
+ const VkExtent2D desired_extent)
+{
+ if(caps.currentExtent.width != 0xFFFF'FFFF)
+ {
+ return caps.currentExtent;
+ }
+
+ return VkExtent2D{
+ std::clamp(desired_extent.width, caps.minImageExtent.width, caps.maxImageExtent.width),
+ std::clamp(desired_extent.height, caps.minImageExtent.height, caps.maxImageExtent.height)};
+}
+
+void swapchain::recreate(VkExtent2D desired_extent)
+{
+ this->surface_format = this->ctx.find_surface_format();
+ const VkPresentModeKHR present_mode = this->ctx.find_present_mode();
+
+ VkSurfaceCapabilitiesKHR surface_caps;
+ VK_CHECK(this->ctx.vki->get_physical_device_surface_capabilities(this->ctx.pdev,
+ this->ctx.surface,
+ &surface_caps));
+
+ this->extent = find_actual_extent(surface_caps, desired_extent);
+
+ if((surface_caps.supportedUsageFlags & swapchain_image_usage) != swapchain_image_usage)
+ {
+ std::cerr << "Surface does not support intended usage flags\n";
+ std::exit(error_exit_code);
+ }
+
+ uint32_t image_count = surface_caps.minImageCount + 1;
+ if(surface_caps.maxImageCount > 0)
+ {
+ image_count = std::min(image_count, surface_caps.maxImageCount);
+ }
+
+ const uint32_t queue_families[]
+ = {this->ctx.graphics_queue.family, this->ctx.present_queue.family};
+
+ const VkSwapchainKHR old_swapchain = this->handle;
+
+ VkSwapchainCreateInfoKHR create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
+ create_info.surface = this->ctx.surface;
+ create_info.minImageCount = image_count;
+ create_info.imageFormat = this->surface_format.format;
+ create_info.imageColorSpace = this->surface_format.colorSpace;
+ create_info.imageExtent = this->extent;
+ create_info.imageArrayLayers = 1;
+ create_info.imageUsage = swapchain_image_usage;
+ create_info.imageSharingMode = this->ctx.graphics_queue_is_present_queue()
+ ? VK_SHARING_MODE_EXCLUSIVE
+ : VK_SHARING_MODE_CONCURRENT;
+ create_info.queueFamilyIndexCount = this->ctx.graphics_queue_is_present_queue() ? 1 : 2;
+ create_info.pQueueFamilyIndices = queue_families;
+ create_info.preTransform = surface_caps.currentTransform;
+ create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+ create_info.presentMode = present_mode;
+ create_info.clipped = VK_TRUE;
+ create_info.oldSwapchain = old_swapchain;
+
+ VK_CHECK(this->ctx.vkd->create_swapchain(this->ctx.dev, &create_info, nullptr, &this->handle));
+
+ // Note: it may be better to wait a few frames with destroying the swapchain to give it the
+ // time to finalize rendering. We are lazy though and destroy it now.
+ if(old_swapchain != VK_NULL_HANDLE)
+ {
+ this->ctx.vkd->destroy_swapchain(this->ctx.dev, old_swapchain, nullptr);
+ }
+
+ this->fetch_swap_images();
+ this->create_views();
+}
+
+void swapchain::fetch_swap_images()
+{
+ uint32_t count;
+ VK_CHECK(this->ctx.vkd->get_swapchain_images(this->ctx.dev, this->handle, &count, nullptr));
+ // Note: Old images do not need to be manually destroyed.
+ this->images.resize(count);
+ VK_CHECK(this->ctx.vkd->get_swapchain_images(this->ctx.dev,
+ this->handle,
+ &count,
+ this->images.data()));
+}
+
+void swapchain::create_views()
+{
+ // If we are recreating the swapchain, then we need to destroy the old image views.
+ for(const VkImageView view : this->views)
+ {
+ this->ctx.vkd->destroy_image_view(this->ctx.dev, view, nullptr);
+ }
+
+ this->views.resize(this->images.size());
+ for(size_t i = 0; i < this->images.size(); ++i)
+ {
+ VkImageViewCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+ create_info.image = this->images[i];
+ create_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
+ create_info.format = this->surface_format.format;
+ create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+ create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ create_info.subresourceRange.baseMipLevel = 0;
+ create_info.subresourceRange.levelCount = 1;
+ create_info.subresourceRange.baseArrayLayer = 0;
+ create_info.subresourceRange.layerCount = 1;
+
+ VK_CHECK(this->ctx.vkd->create_image_view(this->ctx.dev,
+ &create_info,
+ nullptr,
+ &this->views[i]));
+ }
+}
+
+swapchain::present_state swapchain::acquire_next_image(const VkSemaphore image_acquired,
+ const uint64_t frame_timeout)
+{
+ const VkResult result = this->ctx.vkd->acquire_next_image(this->ctx.dev,
+ this->handle,
+ frame_timeout,
+ image_acquired,
+ VK_NULL_HANDLE,
+ &this->image_index);
+ switch(result)
+ {
+ case VK_SUCCESS: return present_state::optimal;
+ case VK_SUBOPTIMAL_KHR: return present_state::suboptimal;
+ case VK_ERROR_OUT_OF_DATE_KHR: return present_state::out_of_date;
+ case VK_TIMEOUT:
+ default: VK_CHECK(result); return present_state::out_of_date; // make compiler happy.
+ }
+}
+
+swapchain::present_state swapchain::present(const VkSemaphore wait_sema) const
+{
+ VkPresentInfoKHR present_info = {};
+ present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
+ present_info.waitSemaphoreCount = 1;
+ present_info.pWaitSemaphores = &wait_sema;
+ present_info.swapchainCount = 1;
+ present_info.pSwapchains = &this->handle;
+ present_info.pImageIndices = &this->image_index;
+
+ const VkResult result
+ = this->ctx.vkd->queue_present(this->ctx.present_queue.queue, &present_info);
+ switch(result)
+ {
+ case VK_SUCCESS: return present_state::optimal;
+ case VK_SUBOPTIMAL_KHR: return present_state::suboptimal;
+ case VK_ERROR_OUT_OF_DATE_KHR: return present_state::out_of_date;
+ default: VK_CHECK(result); return present_state::out_of_date; // make compiler happy.
+ }
+}
+
+/// \brief Utility function to create a Vulkan render pass that is compatible
+/// with a particular swapchain.
+VkRenderPass swapchain::create_render_pass() const
+{
+ VkAttachmentDescription color_attachment = {};
+ color_attachment.format = this->surface_format.format;
+ color_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
+ color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+ color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+ color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+ color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+
+ VkAttachmentReference color_attachment_ref = {};
+ color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+
+ VkSubpassDescription subpass = {};
+ subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
+ subpass.colorAttachmentCount = 1;
+ subpass.pColorAttachments = &color_attachment_ref;
+
+ VkRenderPassCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
+ create_info.attachmentCount = 1;
+ create_info.pAttachments = &color_attachment;
+ create_info.subpassCount = 1;
+ create_info.pSubpasses = &subpass;
+
+ VkRenderPass render_pass;
+ VK_CHECK(ctx.vkd->create_render_pass(ctx.dev, &create_info, nullptr, &render_pass));
+ return render_pass;
+}
+
+void swapchain::recreate_framebuffers(const VkRenderPass render_pass,
+ std::vector& framebuffers)
+{
+ // Be sure to delete the old frame buffers if any exist.
+ for(const VkFramebuffer fb : framebuffers)
+ {
+ this->ctx.vkd->destroy_framebuffer(this->ctx.dev, fb, nullptr);
+ }
+
+ framebuffers.resize(this->images.size());
+ for(uint32_t i = 0; i < this->images.size(); ++i)
+ {
+ VkFramebufferCreateInfo create_info = {};
+ create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
+ create_info.renderPass = render_pass;
+ create_info.attachmentCount = 1;
+ create_info.pAttachments = &this->views[i];
+ create_info.width = this->extent.width;
+ create_info.height = this->extent.height;
+ create_info.layers = 1;
+
+ VK_CHECK(this->ctx.vkd->create_framebuffer(this->ctx.dev,
+ &create_info,
+ nullptr,
+ &framebuffers[i]));
+ }
+}
diff --git a/HIP-Basic/vulkan_interop/vulkan_utils.hpp b/HIP-Basic/vulkan_interop/vulkan_utils.hpp
new file mode 100644
index 000000000..dccd511a8
--- /dev/null
+++ b/HIP-Basic/vulkan_interop/vulkan_utils.hpp
@@ -0,0 +1,505 @@
+#ifndef _HIP_BASIC_VULKAN_INTEROP_VULKAN_UTILS_HPP
+#define _HIP_BASIC_VULKAN_INTEROP_VULKAN_UTILS_HPP
+
+#include "example_utils.hpp"
+
+#include
+
+#ifdef _WIN64
+ #define NOMINMAX
+ #include
+
+ #include
+#endif
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+/// \brief Checks if the provided Vulkan error code is \p VK_SUCCESS. If not, prints an
+/// error message to the standard error output and terminates the program with an error code.
+#define VK_CHECK(condition) \
+ { \
+ const VkResult error = condition; \
+ if(error != VK_SUCCESS) \
+ { \
+ std::cerr << "A vulkan error encountered: " << error << " at " << __FILE__ << ':' \
+ << __LINE__ << std::endl; \
+ std::exit(error_exit_code); \
+ } \
+ }
+
+// Older versions of the vulkan headers define this macro incorrectly to 0, which would
+// give compile errors.
+#undef VK_NULL_HANDLE
+#define VK_NULL_HANDLE nullptr
+
+/// \brief This structure contains the basis function pointers of Vulkan.
+///
+/// There are two main ways to call the functions of the Vulkan API: Either
+/// through the static library (libvulkan-1), or by loading the function pointers
+/// manually from the library. When interfacing with other libraries that use
+/// vulkan, like GLFW, it is important that all Vulkan functions are invoked from the
+/// same library version. In practice, GLFW loads the Vulkan library dynamically by
+/// default (and would need to be re-compiled to use the static functions). This means
+/// that if, for example, the Vulkan SDK is installed (which contains its own version of
+/// libvulkan-1), and GLFW finds that, it might load a different Vulkan library than the
+/// statically linked version! The correct approach is thus to ask GLFW for the
+/// \p vkGetInstanceProcAddr function that it loaded from the dynamic vulkan library,
+/// which is glfwGetInstanceProcAddress, and use that to load the other
+/// Vulkan functions manually.
+///
+/// \see https://www.glfw.org/docs/latest/vulkan_guide.html#vulkan_loader.
+struct base_dispatch
+{
+ PFN_vkGetInstanceProcAddr get_instance_proc_addr;
+ PFN_vkEnumerateInstanceExtensionProperties enumerate_instance_extension_properties;
+ PFN_vkCreateInstance create_instance;
+
+ /// \brief Initialize a \p base_dispatch by fetching all required base functions from
+ /// Vulkan.
+ ///
+ /// \param loader - The \p vkGetInstanceProcAddr function to load the other function
+ /// pointers with. This can for example be \p glfwGetInstanceProcAddress.
+ base_dispatch(PFN_vkGetInstanceProcAddr loader);
+};
+
+/// \brief This structure contains the function pointers related to the Vulkan instance.
+/// \see base_dispatch
+struct instance_dispatch
+{
+ PFN_vkDestroyInstance destroy_instance;
+ PFN_vkDestroySurfaceKHR destroy_surface;
+ PFN_vkEnumeratePhysicalDevices enumerate_physical_devices;
+ PFN_vkGetPhysicalDeviceProperties2 get_physical_device_properties2;
+ PFN_vkGetPhysicalDeviceMemoryProperties get_physical_device_memory_properties;
+ PFN_vkGetPhysicalDeviceSurfaceFormatsKHR get_physical_device_surface_formats;
+ PFN_vkGetPhysicalDeviceSurfacePresentModesKHR get_physical_device_surface_present_modes;
+ PFN_vkEnumerateDeviceExtensionProperties enumerate_device_extension_properties;
+ PFN_vkGetPhysicalDeviceQueueFamilyProperties get_physical_device_queue_family_properties;
+ PFN_vkGetPhysicalDeviceSurfaceSupportKHR get_physical_device_surface_support;
+ PFN_vkCreateDevice create_device;
+ PFN_vkGetDeviceProcAddr get_device_proc_addr;
+ PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR get_physical_device_surface_capabilities;
+
+ /// \brief Initialize a \p instance_dispatch by fetching all required base functions
+ /// from Vulkan.
+ ///
+ /// \param instance - The instance to load function pointers for.
+ instance_dispatch(const base_dispatch& dispatch, VkInstance instance);
+};
+
+/// \brief This structure contains the function pointers for device-specific Vulkan function
+/// pointers, for a particular device.
+/// \see device_dispatch
+struct device_dispatch
+{
+ PFN_vkDestroyDevice destroy_device;
+ PFN_vkGetDeviceQueue get_device_queue;
+ PFN_vkCreateSwapchainKHR create_swapchain;
+ PFN_vkDestroySwapchainKHR destroy_swapchain;
+ PFN_vkGetSwapchainImagesKHR get_swapchain_images;
+ PFN_vkCreateImageView create_image_view;
+ PFN_vkDestroyImageView destroy_image_view;
+ PFN_vkCreateSemaphore create_semaphore;
+ PFN_vkDestroySemaphore destroy_semaphore;
+ PFN_vkCreateFence create_fence;
+ PFN_vkDestroyFence destroy_fence;
+ PFN_vkCreateCommandPool create_command_pool;
+ PFN_vkDestroyCommandPool destroy_command_pool;
+ PFN_vkAllocateCommandBuffers allocate_command_buffers;
+ PFN_vkWaitForFences wait_for_fences;
+ PFN_vkResetFences reset_fences;
+ PFN_vkAcquireNextImageKHR acquire_next_image;
+ PFN_vkQueuePresentKHR queue_present;
+ PFN_vkResetCommandPool reset_command_pool;
+ PFN_vkBeginCommandBuffer begin_command_buffer;
+ PFN_vkEndCommandBuffer end_command_buffer;
+ PFN_vkQueueSubmit queue_submit;
+ PFN_vkCreateRenderPass create_render_pass;
+ PFN_vkDestroyRenderPass destroy_render_pass;
+ PFN_vkCreateFramebuffer create_framebuffer;
+ PFN_vkDestroyFramebuffer destroy_framebuffer;
+ PFN_vkCreateShaderModule create_shader_module;
+ PFN_vkDestroyShaderModule destroy_shader_module;
+ PFN_vkCreateGraphicsPipelines create_graphics_pipelines;
+ PFN_vkDestroyPipeline destroy_pipeline;
+ PFN_vkCreatePipelineLayout create_pipeline_layout;
+ PFN_vkDestroyPipelineLayout destroy_pipeline_layout;
+ PFN_vkQueueWaitIdle queue_wait_idle;
+ PFN_vkCmdSetViewport cmd_set_viewport;
+ PFN_vkCmdSetScissor cmd_set_scissor;
+ PFN_vkCmdBeginRenderPass cmd_begin_render_pass;
+ PFN_vkCmdBindPipeline cmd_bind_pipeline;
+ PFN_vkCmdEndRenderPass cmd_end_render_pass;
+ PFN_vkCmdDrawIndexed cmd_draw_indexed;
+ PFN_vkCreateBuffer create_buffer;
+ PFN_vkDestroyBuffer destroy_buffer;
+ PFN_vkAllocateMemory allocate_memory;
+ PFN_vkFreeMemory free_memory;
+ PFN_vkGetBufferMemoryRequirements get_buffer_memory_requirements;
+ PFN_vkBindBufferMemory bind_buffer_memory;
+ PFN_vkCmdCopyBuffer cmd_copy_buffer;
+ PFN_vkMapMemory map_memory;
+ PFN_vkUnmapMemory unmap_memory;
+ PFN_vkCmdBindVertexBuffers cmd_bind_vertex_buffers;
+ PFN_vkCmdBindIndexBuffer cmd_bind_index_buffer;
+#ifdef _WIN64
+ PFN_vkGetMemoryWin32HandleKHR get_memory_win32_handle;
+ PFN_vkGetSemaphoreWin32HandleKHR get_semaphore_win32_handle;
+#else
+ PFN_vkGetMemoryFdKHR get_memory_fd;
+ PFN_vkGetSemaphoreFdKHR get_semaphore_fd;
+#endif
+
+ /// \brief Initialize a \p device_dispatch by fetching all required base functions
+ /// from Vulkan.
+ ///
+ /// \param device - The device to load function pointers for.
+ device_dispatch(const instance_dispatch& dispatch, VkDevice device);
+};
+
+/// \brief Initialize a GLFW window with a particular extent. The window name is set from
+/// the \p app_info.
+GLFWwindow* create_window(const VkApplicationInfo& app_info, const VkExtent2D extent);
+
+/// \brief A utility function to check whether all required extensions are supported by Vulkan.
+/// Returns \p true if all required extensions are supported.
+///
+/// \param supported_extensions_properties - The supported extensions as reported by Vulkan, for example
+/// by \p vkEnumerateInstanceExtensionProperties or by \p vkEnumerateDeviceExtensionProperties.
+/// \param required_extensions_begin - Beginning of the iterator that indicates the extensions we need
+/// to be supported. This should be an iterator over const char*.
+/// \param required_extensions_end - End of the iterator that indicates the extensions that need
+/// to be supported.
+template
+bool extensions_supported(const std::vector& supported_extensions_properties,
+ const IteratorT required_extensions_begin,
+ const IteratorT required_extensions_end)
+{
+ IteratorT it = required_extensions_begin;
+ for(; it != required_extensions_end; ++it)
+ {
+ const auto supported_it
+ = std::find_if(supported_extensions_properties.begin(),
+ supported_extensions_properties.end(),
+ [&](const VkExtensionProperties& props)
+ { return std::strcmp(*it, props.extensionName) == 0; });
+
+ if(supported_it == supported_extensions_properties.end())
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Create a new Vulkan instance.
+/// \param app_info - The application info used to construct the Vulkan instance.
+/// \param required_extensions - The extensions to initialize this Vulkan instance with.
+/// The required GLFW extensions are added to this value.
+/// \param num_required_extensions - The number of extensions in \p required_extensions.
+/// \param with_validation - Enable the VK_LAYER_KHRONOS_validation standard validation layer.
+VkInstance create_instance(const base_dispatch& dispatch,
+ const VkApplicationInfo& app_info,
+ const char* const* const required_extensions,
+ const size_t num_required_extensions,
+ const bool with_validation = true);
+
+/// \brief Create a Vulkan surface from a GLFW window handle.
+VkSurfaceKHR create_surface(const VkInstance instance, GLFWwindow* window);
+
+/// \brief Checks whether the physical device supports a surface at all.
+/// Returns \p true if the surface is supported.
+bool check_surface_support(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const VkSurfaceKHR surface);
+
+/// \brief Check whether a physical device supports the required extensions.
+/// Returns \p true if the extensions are supported.
+bool check_device_extensions(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const char* const* const required_extensions,
+ const size_t num_required_extensions);
+
+/// \brief This structure represents an assigment of device queues
+/// that will be used to render, present, and perform transfers on.
+struct queue_allocation
+{
+ /// The Vulkan graphics queue family that will be used to render the example.
+ uint32_t graphics_family;
+ /// The Vulkan properties of the graphics queue.
+ VkQueueFamilyProperties graphics_family_properties;
+
+ /// The Vulkan present queue family that will be used to draw the example to
+ /// the monitor. May be the same as the \p graphics_family.
+ uint32_t present_family;
+ /// The Vulkan properties of the present queue.
+ VkQueueFamilyProperties present_family_properties;
+};
+
+/// \brief Try to allocate device queues for a physical device.
+/// This function tries to find a graphics and present queue family index.
+/// If there is no such queue available, returns false. Otherwise fills \p qa
+/// with details surrounding the queues that should be used.
+bool allocate_device_queues(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const VkSurfaceKHR surface,
+ queue_allocation& qa);
+
+/// \brief This function is used to create a Vulkan logical device from a physical device
+/// and a queue allocation;
+VkDevice create_device(const instance_dispatch& dispatch,
+ const VkPhysicalDevice pdev,
+ const queue_allocation& queues,
+ const char* const* const required_extensions,
+ const size_t num_required_extensions);
+
+/// \brief A utility structure that groups a Vulkan queue handle and the queue family index
+/// that it was created from.
+struct queue
+{
+ VkQueue queue;
+ uint32_t family;
+};
+
+/// \brief Create Vulkan device queues from a \p queue_allocation, after the logical device
+/// has been created.
+void create_device_queues(const device_dispatch& dispatch,
+ const VkDevice device,
+ const queue_allocation& queues,
+ queue& graphics_queue,
+ queue& present_queue);
+
+/// \brief This structure is used to group all basic Vulkan-related stuff together: The Vulkan instance,
+/// device, queues, properties, etc. It also provides some utility functions that use those types.
+struct graphics_context
+{
+ const instance_dispatch* vki;
+ std::unique_ptr vkd;
+
+ VkInstance instance;
+ VkSurfaceKHR surface;
+ VkPhysicalDevice pdev;
+ VkPhysicalDeviceMemoryProperties mem_props;
+
+ VkDevice dev;
+ queue graphics_queue;
+ queue present_queue;
+
+ VkCommandPool one_time_submit_pool;
+
+ /// \brief Initialize a \p graphics_context. This initializes Vulkan, fetches function pointers, creates
+ /// Vulkan logical devices and various related handles.
+ graphics_context(const instance_dispatch* vki,
+ const VkInstance instance,
+ const VkSurfaceKHR surface,
+ const VkPhysicalDevice pdev,
+ const queue_allocation& queues,
+ const char* const* const required_device_extensions,
+ const size_t num_required_device_extensions);
+
+ ~graphics_context();
+
+ graphics_context(const graphics_context&) = delete;
+ graphics_context& operator=(const graphics_context&) = delete;
+
+ graphics_context(graphics_context&&) = delete;
+ graphics_context& operator=(graphics_context&&) = delete;
+
+ /// \brief Utility function to find a Vulkan surface format suitable for rendering to the GLFW window.
+ VkSurfaceFormatKHR find_surface_format() const;
+
+ /// \brief Utility function to find a Vulkan present mode suitable for rendering to the GLFW window.
+ VkPresentModeKHR find_present_mode() const;
+
+ /// \brief Utility function that returns \p true if the Vulkan queues used for graphics and present
+ /// were created from the same family.
+ inline bool graphics_queue_is_present_queue() const
+ {
+ return this->graphics_queue.family == this->present_queue.family;
+ }
+
+ /// \brief Utility function that helps us find a Vulkan memory type that satisfy the required
+ /// properties and Vulkan memory type bits. Returns the memory type if any such was found,
+ /// or exits the program otherwise.
+ uint32_t find_memory_type_index(const uint32_t memory_type_bits,
+ const VkMemoryPropertyFlags properties) const;
+
+ /// \brief Initialize a Vulkan pipeline layout.
+ VkPipelineLayout create_pipeline_layout() const;
+
+ /// \brief Create a Vulkan pipeline. This is a relatively standard pipeline that renders
+ /// a triangle list, with culling enabled, no blending, and dynamic state for viewport and scissor.
+ VkPipeline create_simple_pipeline(const VkPipelineLayout layout,
+ const VkRenderPass render_pass,
+ const VkPipelineShaderStageCreateInfo* shaders,
+ const unsigned int num_shaders,
+ const VkVertexInputBindingDescription* bindings,
+ const unsigned int num_bindings,
+ const VkVertexInputAttributeDescription* attribs,
+ const unsigned int num_attribs) const;
+
+ /// \brief A utility function to quickly submit a single-time command buffer to Vulkan.
+ /// This can be used for example to upload buffers to the GPU, or other low-frequency
+ /// GPU operations. Operations that happen on a per-frame basis should be handled through
+ /// the per-frame command buffer in \p frame.
+ /// The command buffer is submitted to a graphics queue, meaning that it is only suitable for
+ /// commands that can be submitted to such a queue. Note that a graphics queue in Vulkan is also
+ /// always capable as transfer queue, and so the command buffer can be used to perform memory
+ /// transfers.
+ /// This function blocks until the command buffer has finished executing.
+ /// \param f - A callback of type void(VkCommandBuffer) that records the commands
+ /// to be submitted. \p vkBeginCommandBuffer and \p vkEndCommandBuffer should not be
+ /// called on this buffer.
+ /// \see frame
+ template
+ void one_time_submit(F f) const
+ {
+ // Reset the command pool and allocate a new buffer that we will use to submit commands to.
+ VK_CHECK(this->vkd->reset_command_pool(this->dev, this->one_time_submit_pool, 0));
+
+ VkCommandBufferAllocateInfo cmd_buf_allocate_info = {};
+ cmd_buf_allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ cmd_buf_allocate_info.commandPool = this->one_time_submit_pool;
+ cmd_buf_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ cmd_buf_allocate_info.commandBufferCount = 1;
+
+ VkCommandBuffer cmd_buf;
+ VK_CHECK(this->vkd->allocate_command_buffers(this->dev, &cmd_buf_allocate_info, &cmd_buf));
+
+ // Begin recording the command buffer.
+ VkCommandBufferBeginInfo begin_info = {};
+ begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+ VK_CHECK(this->vkd->begin_command_buffer(cmd_buf, &begin_info));
+
+ // Record the commands that we want to dispatch.
+ f(cmd_buf);
+
+ // Finalize the command buffer.
+ VK_CHECK(this->vkd->end_command_buffer(cmd_buf));
+
+ // Submit the command buffer to the graphics queue.
+ VkSubmitInfo submit_info = {};
+ submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ submit_info.commandBufferCount = 1;
+ submit_info.pCommandBuffers = &cmd_buf;
+ VK_CHECK(
+ this->vkd->queue_submit(this->graphics_queue.queue, 1, &submit_info, VK_NULL_HANDLE));
+
+ // Synchronize with the GPU so that we can be sure after this function that the work is finished.
+ VK_CHECK(this->vkd->queue_wait_idle(this->graphics_queue.queue));
+ }
+
+ /// \brief Utility function to copy two Vulkan buffers.
+ void copy_buffer(const VkBuffer dst, const VkBuffer src, const VkDeviceSize size) const;
+};
+
+/// \brief Utility function to create a Vulkan shader module from SPIR-V shader byte code.
+VkShaderModule create_shader_module(const graphics_context& ctx,
+ const size_t shader_len,
+ const uint32_t* shader);
+
+/// \brief This structure represents a Vulkan swapchain and all its associated resources. This
+/// type is required to let us draw to the GLFW window.
+struct swapchain
+{
+ /// \brief The required usage flags for images owned by the swapchain: We are going to draw to
+ /// the swapchain, and so the swapchain images need to be able to act as frame buffer color attachment.
+ static constexpr VkImageUsageFlags swapchain_image_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+
+ /// \brief This enumeration represents the current state of the swapchain.
+ enum class present_state
+ {
+ /// Everything is A-OK.
+ optimal,
+ /// Everything is fine, but the current configuration is not the
+ /// optimal. This can for example happen if the window is dragged
+ /// to a different monitor that has a different native color format.
+ suboptimal,
+ /// The swapchain has become outdated with the window's configuration
+ /// and should be re-created. This can for example happen if the window
+ /// is resized.
+ out_of_date,
+ };
+
+ /// The graphics context that this swapchain uses.
+ const graphics_context& ctx;
+ /// The Vulkan handle for the current swapchain.
+ VkSwapchainKHR handle;
+ /// The format of the surface that we are rendering to.
+ VkSurfaceFormatKHR surface_format;
+ /// The width and height of the swapchain. Usually corresponds to the dimensions
+ /// of the window.
+ VkExtent2D extent;
+ /// The swapchain's images that we can render to.
+ std::vector images;
+ /// A Vulkan image view, each index corresponding to the image with the same index in \p images.
+ std::vector views;
+ /// The current index of the image that we should draw to. This is an index into
+ /// \p images and \p views.
+ /// Note: Not valid until acquire_next_image() is called!
+ uint32_t image_index;
+
+ /// \brief Create a new Swapchain for a particular surface.
+ swapchain(const graphics_context& ctx, VkExtent2D desired_extent);
+
+ ~swapchain();
+
+ swapchain(const swapchain&) = delete;
+ swapchain& operator=(const swapchain&) = delete;
+
+ swapchain(swapchain&&) = delete;
+ swapchain& operator=(swapchain&&) = delete;
+
+ /// \brief The window's extent may not correspond to the actual extent of the swapchain's images.
+ /// This function finds the actual resolution that we should render to.
+ static VkExtent2D find_actual_extent(const VkSurfaceCapabilitiesKHR& caps,
+ const VkExtent2D desired_extent);
+
+ /// \brief Re-create the swapchain after it has become out-of-date or sub-optimal.
+ void recreate(VkExtent2D desired_extent);
+
+ /// \brief Fetch the swapchain images that are associated to the current swapchain.
+ /// Note: Swapchain images are owned by the swapchain - we do not need to manage their lifetime,
+ /// and they should not be destroyed.
+ void fetch_swap_images();
+
+ /// \brief Create a Vulkan image view for each swapchain image.
+ /// Note: Image views are _not_ owned by the swapchain: We should mind to destroy old views.
+ /// This function should be called after fetch_swap_images, as it creates a view
+ /// for each current image.
+ void create_views();
+
+ /// \brief Acquire the next image from the swapchain. This may block until the swapchain has
+ /// finished presenting a previous image. This function updates swapchain::image_index,
+ /// which should be used when rendering.
+ /// \returns the current presenting state of the swapchain.
+ present_state acquire_next_image(const VkSemaphore image_acquired,
+ const uint64_t frame_timeout);
+
+ /// \brief Present the image's contents to the GLFW window.
+ /// \param sema - A Vulkan semaphore that the presenting process should wait on before
+ /// continuing with the presenting. This semaphore should be signaled by the Vulkan queue
+ /// submission that renders to the current swapchain image.
+ /// \returns the current state of the swapchain.
+ present_state present(const VkSemaphore wait_sema) const;
+
+ /// \brief Utility function to create a Vulkan render pass that is compatible
+ /// with this swapchain.
+ VkRenderPass create_render_pass() const;
+
+ /// \brief (Re-)initialize a vector of framebuffers, each of which is associated to the
+ /// swap image with the same index.
+ /// \param render_pass - The render pass that each framebuffer is to be associated with.
+ /// \param framebuffers - Vector of framebuffers to reinitialize. Elements of this parameter
+ /// will be de-initialized before new elements are inserted.
+ void recreate_framebuffers(const VkRenderPass render_pass,
+ std::vector& framebuffers);
+};
+
+#endif
diff --git a/HIP-Basic/warp_shuffle/Makefile b/HIP-Basic/warp_shuffle/Makefile
index 1143e9c45..0aa4f135b 100644
--- a/HIP-Basic/warp_shuffle/Makefile
+++ b/HIP-Basic/warp_shuffle/Makefile
@@ -31,22 +31,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/HIP-Basic/warp_shuffle/main.hip b/HIP-Basic/warp_shuffle/main.hip
index 1174d924c..2a28af14a 100644
--- a/HIP-Basic/warp_shuffle/main.hip
+++ b/HIP-Basic/warp_shuffle/main.hip
@@ -82,7 +82,7 @@ int main()
// To guarantee the correct behaviour of the program, keep total number of matrix elements
// below (or equal to) warp size.
- assert(size <= props.warpSize
+ assert(size <= static_cast(props.warpSize)
&& "Matrix has more elements than architecture's warp size value.");
// Block (2D) and grid sizes. Note that in this example we have only 1 block (and 1 warp).
diff --git a/HIP-Basic/warp_shuffle/warp_shuffle_vs2019.vcxproj b/HIP-Basic/warp_shuffle/warp_shuffle_vs2019.vcxproj
index c56ab5392..1170fe041 100644
--- a/HIP-Basic/warp_shuffle/warp_shuffle_vs2019.vcxproj
+++ b/HIP-Basic/warp_shuffle/warp_shuffle_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/Libraries/CMakeLists.txt b/Libraries/CMakeLists.txt
index c275c6d12..da0858d22 100644
--- a/Libraries/CMakeLists.txt
+++ b/Libraries/CMakeLists.txt
@@ -21,7 +21,8 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(Libraries)
+project(Libraries LANGUAGES NONE)
+
add_subdirectory(exampleLibraryTemplate)
add_subdirectory(hipCUB)
add_subdirectory(rocPRIM)
diff --git a/Libraries/exampleLibraryTemplate/CMakeLists.txt b/Libraries/exampleLibraryTemplate/CMakeLists.txt
index 8ce1dc5c7..e65544290 100644
--- a/Libraries/exampleLibraryTemplate/CMakeLists.txt
+++ b/Libraries/exampleLibraryTemplate/CMakeLists.txt
@@ -24,7 +24,7 @@
# DELETE ME: Change the project name and subdirectory names to the appropriate names.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(exampleLibraryTemplate)
+project(exampleLibraryTemplate LANGUAGES NONE)
# DELETE ME: The order of the directory names should be sorted to reduce merge conflicts
add_subdirectory(example_template)
diff --git a/Libraries/exampleLibraryTemplate/README.md b/Libraries/exampleLibraryTemplate/README.md
index d95b6e479..d93d37cbd 100644
--- a/Libraries/exampleLibraryTemplate/README.md
+++ b/Libraries/exampleLibraryTemplate/README.md
@@ -17,10 +17,10 @@ The examples in this subdirectory showcase the functionality of the [`example-li
### Windows
*Most common prerequisites from the other examples:*
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
-- [CMake](https://cmake.org/download/) (at least version 3.21)
- ROCm toolchain for Windows (No public release yet)
- The Visual Studio ROCm extension needs to be installed to build with the solution files.
-
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
## Building
*List the instructions to build the examples in the subdirectories. Ideally, each example builds similarly, and here the common steps can be described for both Windows and Linux. If there are example-specific build instructions, list those in the example-specific README file.*
@@ -32,7 +32,7 @@ The variable `GPU_RUNTIME` can be used to set the targeted runtime. Use `HIP` to
Make sure that the dependencies are installed, or use the [provided Dockerfile](../../Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile) to build and run the examples in a containerized environment that has all prerequisites installed.
#### Using CMake
-All examples in the `example-library-name` subdirectory can be built by a single CMake project.
+All examples in the `example-library-name` subdirectory can either be built by a single CMake project or be built independently.
- `$ cd Libraries/`
- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA)
@@ -41,7 +41,8 @@ All examples in the `example-library-name` subdirectory can be built by a single
#### Using Make
*Only if applicable! Not all examples have to support Make*
-All examples can be built by a single invocation to Make.
+All examples can be built by a single invocation to Make or be built independently.
+
- `$ cd Libraries/`
- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA)
@@ -50,3 +51,6 @@ All examples can be built by a single invocation to Make.
Visual Studio solution files are available for the individual examples. To build all examples for open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for .
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `example-library-name` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/Libraries/exampleLibraryTemplate/example_template/CMakeLists.txt b/Libraries/exampleLibraryTemplate/example_template/CMakeLists.txt
index f94c3690e..35824769d 100644
--- a/Libraries/exampleLibraryTemplate/example_template/CMakeLists.txt
+++ b/Libraries/exampleLibraryTemplate/example_template/CMakeLists.txt
@@ -53,7 +53,7 @@ endif()
# DELETE ME: Remove if no required libraries (such as HIP examples without dependencies).
# List required packages
-find_package(${library_name} REQUIRED CONFIG)
+# find_package(${library_name} REQUIRED)
add_executable(${example_name} main.hip)
# Make example runnable using ctest
@@ -61,7 +61,7 @@ add_test(${example_name} ${example_name})
# DELETE ME: Update path depending on directory nesting.
set(include_dirs "../../../Common")
-# DELETE ME: Adding the HIP header directory is only required for examples that directly
+# DELETE ME: Adding the HIP header directory is only required for examples that directly
# depend on HIP without linking to a HIP-based library
# For examples targeting NVIDIA, include the HIP header directory.
if(GPU_RUNTIME STREQUAL "CUDA")
diff --git a/Libraries/exampleLibraryTemplate/example_template/Makefile b/Libraries/exampleLibraryTemplate/example_template/Makefile
index 87b3ea4e7..a603ad7b4 100644
--- a/Libraries/exampleLibraryTemplate/example_template/Makefile
+++ b/Libraries/exampleLibraryTemplate/example_template/Makefile
@@ -34,22 +34,28 @@ HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
# Common variables and flags
-CXX_STD := c++17
-CXXFLAGS := -std=$(CXX_STD)
-CPPFLAGS := -I $(COMMON_INCLUDE_DIR)
-LDFLAGS :=
-LDLIBS :=
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
ifeq ($(GPU_RUNTIME), CUDA)
- CXXFLAGS += -x cu
- CPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR)
else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
else
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
$(RM) $(EXAMPLE)
diff --git a/Libraries/exampleLibraryTemplate/example_template/example_template_vs2019.vcxproj b/Libraries/exampleLibraryTemplate/example_template/example_template_vs2019.vcxproj
index 25cd7210b..af2fc284e 100644
--- a/Libraries/exampleLibraryTemplate/example_template/example_template_vs2019.vcxproj
+++ b/Libraries/exampleLibraryTemplate/example_template/example_template_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/Libraries/hipCUB/CMakeLists.txt b/Libraries/hipCUB/CMakeLists.txt
index 19c0e80ff..44fc66a9d 100644
--- a/Libraries/hipCUB/CMakeLists.txt
+++ b/Libraries/hipCUB/CMakeLists.txt
@@ -21,7 +21,13 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(hipCUB_examples)
+project(hipCUB_examples LANGUAGES CXX)
+
+find_package(hipcub)
+if(NOT hipcub_FOUND)
+ message(STATUS "hipCUB could not be found, not building hipCUB examples")
+ return()
+endif()
add_subdirectory(device_radix_sort)
add_subdirectory(device_sum)
diff --git a/Libraries/hipCUB/README.md b/Libraries/hipCUB/README.md
index 4222fcb02..e06285ec1 100644
--- a/Libraries/hipCUB/README.md
+++ b/Libraries/hipCUB/README.md
@@ -15,27 +15,29 @@ The examples in this subdirectory showcase the functionality of the [hipCUB](htt
### Windows
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
-- [CMake](https://cmake.org/download/) (at least version 3.21)
- ROCm toolchain for Windows (No public release yet)
- The Visual Studio ROCm extension needs to be installed to build with the solution files.
- [hipCUB](https://github.com/ROCmSoftwarePlatform/hipCUB)
- ROCm platform: Installed as part of the ROCm SDK on Windows for ROCm platform.
- CUDA platform: Install hipCUB from source: [instructions](https://github.com/ROCmSoftwarePlatform/hipCUB#build-and-install).
- [CUB](https://github.com/NVIDIA/cub) is a dependency of hipCUB for NVIDIA platforms. CUB is part of the NVIDIA CUDA Toolkit.
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
## Building
### Linux
Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment.
#### Using CMake
-All examples in the `hipCUB` subdirectory can be built by a single CMake project.
+All examples in the `hipCUB` subdirectory can either be built by a single CMake project or be built independently.
- `$ cd Libraries/hipCUB`
- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA)
- `$ cmake --build build`
#### Using Make
-All examples can be built by a single invocation to Make.
+All examples can be built by a single invocation to Make or be built independently.
+
- `$ cd Libraries/hipCUB`
- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA)
@@ -44,3 +46,6 @@ All examples can be built by a single invocation to Make.
Visual Studio solution files are available for the individual examples. To build all examples for hipCUB open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for hipCUB.
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `hipCUB` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/Libraries/hipCUB/device_radix_sort/CMakeLists.txt b/Libraries/hipCUB/device_radix_sort/CMakeLists.txt
index a453f3328..08a45f991 100644
--- a/Libraries/hipCUB/device_radix_sort/CMakeLists.txt
+++ b/Libraries/hipCUB/device_radix_sort/CMakeLists.txt
@@ -40,7 +40,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(hipcub REQUIRED CONFIG)
+find_package(hipcub REQUIRED)
add_executable(hipcub_device_radix_sort main.hip)
add_test(hipcub_device_radix_sort hipcub_device_radix_sort)
diff --git a/Libraries/hipCUB/device_radix_sort/Makefile b/Libraries/hipCUB/device_radix_sort/Makefile
index 862181113..abd8bf414 100644
--- a/Libraries/hipCUB/device_radix_sort/Makefile
+++ b/Libraries/hipCUB/device_radix_sort/Makefile
@@ -20,24 +20,48 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-CUDA_INSTALL_DIR = /usr/local/cuda
-CUDACXX = $(CUDA_INSTALL_DIR)/bin/nvcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := hipcub_device_radix_sort
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-HIP_INCLUDE_DIR = $(ROCM_INSTALL_DIR)/include/
-HIPCUB_INCLUDE_DIR = $(ROCM_INSTALL_DIR)/include/
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+CUDA_INSTALL_DIR := /usr/local/cuda
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+HIPCUB_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+CUDACXX ?= $(CUDA_INSTALL_DIR)/bin/nvcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -isystem $(HIPCUB_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
-hipcub_device_radix_sort: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
ifeq ($(GPU_RUNTIME), CUDA)
- $(CUDACXX) $< -std=$(CXX_STD) -isystem $(HIPCUB_INCLUDE_DIR) -isystem $(HIP_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -o $@ -D__HIP_PLATFORM_NVIDIA__ -x cu
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) -D__HIP_PLATFORM_NVIDIA__
+ COMPILER := $(CUDACXX)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+ ICPPFLAGS += -D__HIP_PLATFORM_AMD__
+ COMPILER := $(HIPCXX)
else
- $(HIPCXX) $< -std=$(CXX_STD) -isystem $(HIPCUB_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -o $@ -D__HIP_PLATFORM_AMD__
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
clean:
- rm -f hipcub_device_radix_sort
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/hipCUB/device_radix_sort/device_radix_sort_vs2019.vcxproj b/Libraries/hipCUB/device_radix_sort/device_radix_sort_vs2019.vcxproj
index fa7af4091..01f825838 100644
--- a/Libraries/hipCUB/device_radix_sort/device_radix_sort_vs2019.vcxproj
+++ b/Libraries/hipCUB/device_radix_sort/device_radix_sort_vs2019.vcxproj
@@ -67,7 +67,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -97,4 +97,4 @@
-
\ No newline at end of file
+
diff --git a/Libraries/hipCUB/device_sum/CMakeLists.txt b/Libraries/hipCUB/device_sum/CMakeLists.txt
index 9844b9525..6c09ffae6 100644
--- a/Libraries/hipCUB/device_sum/CMakeLists.txt
+++ b/Libraries/hipCUB/device_sum/CMakeLists.txt
@@ -40,7 +40,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(hipcub REQUIRED CONFIG)
+find_package(hipcub REQUIRED)
add_executable(hipcub_device_sum main.hip)
add_test(hipcub_device_sum hipcub_device_sum)
diff --git a/Libraries/hipCUB/device_sum/Makefile b/Libraries/hipCUB/device_sum/Makefile
index 2da3412cb..0c9e95461 100644
--- a/Libraries/hipCUB/device_sum/Makefile
+++ b/Libraries/hipCUB/device_sum/Makefile
@@ -20,24 +20,48 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-CUDA_INSTALL_DIR = /usr/local/cuda
-CUDACXX = $(CUDA_INSTALL_DIR)/bin/nvcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := hipcub_device_sum
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-HIP_INCLUDE_DIR = $(ROCM_INSTALL_DIR)/include/
-HIPCUB_INCLUDE_DIR = $(ROCM_INSTALL_DIR)/include/
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+CUDA_INSTALL_DIR := /usr/local/cuda
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+HIPCUB_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+CUDACXX ?= $(CUDA_INSTALL_DIR)/bin/nvcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -isystem $(HIPCUB_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS :=
+ILDLIBS :=
-hipcub_device_sum: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
ifeq ($(GPU_RUNTIME), CUDA)
- $(CUDACXX) $< -std=$(CXX_STD) -isystem $(HIPCUB_INCLUDE_DIR) -isystem $(HIP_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -o $@ -D__HIP_PLATFORM_NVIDIA__ -x cu
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) -D__HIP_PLATFORM_NVIDIA__
+ COMPILER := $(CUDACXX)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+ ICPPFLAGS += -D__HIP_PLATFORM_AMD__
+ COMPILER := $(HIPCXX)
else
- $(HIPCXX) $< -std=$(CXX_STD) -isystem $(HIPCUB_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -o $@ -D__HIP_PLATFORM_AMD__
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
clean:
- rm -f hipcub_device_sum
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/hipCUB/device_sum/device_sum_vs2019.vcxproj b/Libraries/hipCUB/device_sum/device_sum_vs2019.vcxproj
index 6b0385fa0..c165d8414 100644
--- a/Libraries/hipCUB/device_sum/device_sum_vs2019.vcxproj
+++ b/Libraries/hipCUB/device_sum/device_sum_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -96,4 +96,4 @@
-
\ No newline at end of file
+
diff --git a/Libraries/rocPRIM/CMakeLists.txt b/Libraries/rocPRIM/CMakeLists.txt
index 99077a6b7..7d4106384 100644
--- a/Libraries/rocPRIM/CMakeLists.txt
+++ b/Libraries/rocPRIM/CMakeLists.txt
@@ -21,12 +21,18 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(rocPRIM_examples)
+project(rocPRIM_examples LANGUAGES CXX)
if(GPU_RUNTIME STREQUAL "CUDA")
message(STATUS "rocPRIM examples do not support the CUDA runtime")
return()
endif()
+find_package(rocprim)
+if(NOT rocprim_FOUND)
+ message(STATUS "rocPRIM could not be found, not building rocPRIM examples")
+ return()
+endif()
+
add_subdirectory(block_sum)
add_subdirectory(device_sum)
diff --git a/Libraries/rocPRIM/README.md b/Libraries/rocPRIM/README.md
index fe20702f6..9223a72d2 100644
--- a/Libraries/rocPRIM/README.md
+++ b/Libraries/rocPRIM/README.md
@@ -13,26 +13,27 @@ The examples in this subdirectory showcase the functionality of the [rocPRIM](ht
### Windows
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
-- [CMake](https://cmake.org/download/) (at least version 3.21)
- ROCm toolchain for Windows (No public release yet)
- The Visual Studio ROCm extension needs to be installed to build with the solution files.
- [rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM)
- Installed as part of the ROCm SDK on Windows for ROCm platform.
-
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
## Building
### Linux
Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment.
#### Using CMake
-All examples in the `rocPRIM` subdirectory can be built by a single CMake project.
+All examples in the `rocPRIM` subdirectory can either be built by a single CMake project or be built independently.
- `$ cd Libraries/rocPRIM`
- `$ cmake -S . -B build`
- `$ cmake --build build`
#### Using Make
-All examples can be built by a single invocation to Make.
+All examples can be built by a single invocation to Make or be built independently.
+
- `$ cd Libraries/rocPRIM`
- `$ make`
@@ -41,3 +42,6 @@ All examples can be built by a single invocation to Make.
Visual Studio solution files are available for the individual examples. To build all examples for rocPRIM open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for rocPRIM.
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `rocPRIM` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/Libraries/rocPRIM/block_sum/CMakeLists.txt b/Libraries/rocPRIM/block_sum/CMakeLists.txt
index f6f49e723..4624c0f09 100644
--- a/Libraries/rocPRIM/block_sum/CMakeLists.txt
+++ b/Libraries/rocPRIM/block_sum/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocprim REQUIRED CONFIG)
+find_package(rocprim REQUIRED)
add_executable(rocprim_block_sum main.hip)
add_test(rocprim_block_sum rocprim_block_sum)
diff --git a/Libraries/rocPRIM/block_sum/Makefile b/Libraries/rocPRIM/block_sum/Makefile
index 524cc50a6..5468ff3c1 100644
--- a/Libraries/rocPRIM/block_sum/Makefile
+++ b/Libraries/rocPRIM/block_sum/Makefile
@@ -20,16 +20,34 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
-ROCPRIM_INCLUDE_DIR = $(ROCM_INSTALL_DIR)/include
+EXAMPLE := rocprim_block_sum
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocprim_block_sum: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -isystem $(ROCPRIM_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -o $@ -D__HIP_PLATFORM_AMD__
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCPRIM_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCPRIM_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocprim_block_sum
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocPRIM/block_sum/block_sum_vs2019.vcxproj b/Libraries/rocPRIM/block_sum/block_sum_vs2019.vcxproj
index d3a64fb78..e42ec8869 100644
--- a/Libraries/rocPRIM/block_sum/block_sum_vs2019.vcxproj
+++ b/Libraries/rocPRIM/block_sum/block_sum_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/Libraries/rocPRIM/block_sum/main.hip b/Libraries/rocPRIM/block_sum/main.hip
index fb9a63939..627f0bca3 100644
--- a/Libraries/rocPRIM/block_sum/main.hip
+++ b/Libraries/rocPRIM/block_sum/main.hip
@@ -36,7 +36,7 @@ std::vector reduce_sum_host(const std::vector& data,
const unsigned int grid_size)
{
std::vector sum_per_block(grid_size);
- for(int i = 0; i < grid_size; i++)
+ for(unsigned int i = 0; i < grid_size; i++)
{
const auto begin = data.begin() + i * run_size;
const auto end = data.begin() + std::min(data.size(), (size_t(1) + i) * run_size);
diff --git a/Libraries/rocPRIM/device_sum/CMakeLists.txt b/Libraries/rocPRIM/device_sum/CMakeLists.txt
index fcdfc816b..2c8a1e41f 100644
--- a/Libraries/rocPRIM/device_sum/CMakeLists.txt
+++ b/Libraries/rocPRIM/device_sum/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocprim REQUIRED CONFIG)
+find_package(rocprim REQUIRED)
add_executable(rocprim_device_sum main.hip)
add_test(rocprim_device_sum rocprim_device_sum)
diff --git a/Libraries/rocPRIM/device_sum/Makefile b/Libraries/rocPRIM/device_sum/Makefile
index ab438a9ff..2b7be2845 100644
--- a/Libraries/rocPRIM/device_sum/Makefile
+++ b/Libraries/rocPRIM/device_sum/Makefile
@@ -20,16 +20,33 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
-ROCPRIM_INCLUDE_DIR = $(ROCM_INSTALL_DIR)/include
+EXAMPLE := rocprim_device_sum
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocprim_device_sum: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -isystem $(ROCPRIM_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -o $@ -D__HIP_PLATFORM_AMD__
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCPRIM_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCPRIM_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocprim_device_sum
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocPRIM/device_sum/device_sum_vs2019.vcxproj b/Libraries/rocPRIM/device_sum/device_sum_vs2019.vcxproj
index 584b7d7c3..af3cfb3a3 100644
--- a/Libraries/rocPRIM/device_sum/device_sum_vs2019.vcxproj
+++ b/Libraries/rocPRIM/device_sum/device_sum_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/Libraries/rocRAND/CMakeLists.txt b/Libraries/rocRAND/CMakeLists.txt
index 9d5522d7e..3bcd58256 100644
--- a/Libraries/rocRAND/CMakeLists.txt
+++ b/Libraries/rocRAND/CMakeLists.txt
@@ -21,5 +21,12 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(rocRAND_examples)
+project(rocRAND_examples LANGUAGES CXX)
+
+find_package(rocrand)
+if(NOT rocrand_FOUND)
+ message(STATUS "rocRAND could not be found, not building rocRAND examples")
+ return()
+endif()
+
add_subdirectory(simple_distributions_cpp)
diff --git a/Libraries/rocRAND/README.md b/Libraries/rocRAND/README.md
index c166fa281..eb573c5ad 100644
--- a/Libraries/rocRAND/README.md
+++ b/Libraries/rocRAND/README.md
@@ -14,26 +14,28 @@ The examples in this subdirectory showcase the functionality of the [rocRAND](ht
### Windows
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
-- [CMake](https://cmake.org/download/) (at least version 3.21)
- ROCm toolchain for Windows (No public release yet)
- The Visual Studio ROCm extension needs to be installed to build with the solution files.
- [rocRAND](https://github.com/rocmSoftwarePlatform/rocRAND)
- ROCm platform: Installed as part of the ROCm SDK on Windows.
- CUDA platform: Install rocRAND from source: [instructions](https://github.com/rocmSoftwarePlatform/rocRAND#build-and-install).
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
## Building
### Linux
Make sure that the dependencies are installed, or use the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment set up specifically for the example suite.
#### Using CMake
-All examples in the `rocRAND` subdirectory can be built by a single CMake project.
+All examples in the `rocRAND` subdirectory can either be built by a single CMake project or be built independently.
- `$ cd Libraries/rocRAND`
- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA)
- `$ cmake --build build`
#### Using Make
-All examples in the `rocRAND` subdirectory can be built by a single invocation of Make.
+All examples can be built by a single invocation to Make or be built independently.
+
- `$ cd Libraries/rocRAND`
- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA)
@@ -42,3 +44,6 @@ All examples in the `rocRAND` subdirectory can be built by a single invocation o
Visual Studio solution files are available for the individual examples. To build all examples for rocRAND open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for rocRAND.
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `rocRAND` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/Libraries/rocRAND/simple_distributions_cpp/Makefile b/Libraries/rocRAND/simple_distributions_cpp/Makefile
index f573aea40..0d0682e0f 100644
--- a/Libraries/rocRAND/simple_distributions_cpp/Makefile
+++ b/Libraries/rocRAND/simple_distributions_cpp/Makefile
@@ -20,20 +20,51 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-CUDA_INSTALL_DIR = /usr/local/cuda
-CUDACXX = $(CUDA_INSTALL_DIR)/bin/nvcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocrand_simple_distributions_cpp
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+CUDA_INSTALL_DIR := /usr/local/cuda
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCRAND_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+CXX ?= g++
+CUDACXX ?= $(CUDA_INSTALL_DIR)/bin/nvcc
+
+# Common variables and flags
+CXX_STD := c++17
+ICXXFLAGS := -std=$(CXX_STD)
+ICPPFLAGS := -isystem $(ROCRAND_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR)
+ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib
+ILDLIBS := -lrocrand
-rocrand_simple_distributions_cpp: main.cpp $(COMMON_INCLUDE_DIR)/cmdparser.hpp $(COMMON_INCLUDE_DIR)/example_utils.hpp
ifeq ($(GPU_RUNTIME), CUDA)
- $(CUDACXX) $< -std=$(CXX_STD) -isystem $(ROCM_INSTALL_DIR)/include -isystem $(CUDA_INSTALL_DIR)/include -I $(COMMON_INCLUDE_DIR) -L $(ROCM_INSTALL_DIR)/lib -L $(CUDA_INSTALL_DIR)/lib64 -lrocrand -lcudart -o $@ -D__HIP_PLATFORM_NVIDIA__ -x cu
+ ICXXFLAGS += -x cu
+ ICPPFLAGS += -D__HIP_PLATFORM_NVIDIA__ -isystem $(HIP_INCLUDE_DIR)
+ ILDFLAGS += -L $(CUDA_INSTALL_DIR)/lib64
+ ILDLIBS += -lcudart
+ COMPILER := $(CUDACXX)
+else ifeq ($(GPU_RUNTIME), HIP)
+ CXXFLAGS ?= -Wall -Wextra
+ ICPPFLAGS += -D__HIP_PLATFORM_AMD__
+ ILDLIBS += -lamdhip64
+ COMPILER := $(CXX)
else
- $(CXX) $< -std=$(CXX_STD) -isystem $(ROCM_INSTALL_DIR)/include -I $(COMMON_INCLUDE_DIR) -L $(ROCM_INSTALL_DIR)/lib -lrocrand -lamdhip64 -o $@ -D__HIP_PLATFORM_AMD__
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endif
+ICXXFLAGS += $(CXXFLAGS)
+ICPPFLAGS += $(CPPFLAGS)
+ILDFLAGS += $(LDFLAGS)
+ILDLIBS += $(LDLIBS)
+
+$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/cmdparser.hpp $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
+
clean:
- rm -f rocrand_simple_distributions_cpp
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocRAND/simple_distributions_cpp/main.cpp b/Libraries/rocRAND/simple_distributions_cpp/main.cpp
index 0864c765d..dfdf32166 100644
--- a/Libraries/rocRAND/simple_distributions_cpp/main.cpp
+++ b/Libraries/rocRAND/simple_distributions_cpp/main.cpp
@@ -29,6 +29,7 @@
#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP__) && (defined(WIN32) || defined(_WIN32))
#include
#endif
+
#include
#include
diff --git a/Libraries/rocRAND/simple_distributions_cpp/simple_distributions_cpp_vs2019.vcxproj b/Libraries/rocRAND/simple_distributions_cpp/simple_distributions_cpp_vs2019.vcxproj
index a76642327..f4efc4266 100644
--- a/Libraries/rocRAND/simple_distributions_cpp/simple_distributions_cpp_vs2019.vcxproj
+++ b/Libraries/rocRAND/simple_distributions_cpp/simple_distributions_cpp_vs2019.vcxproj
@@ -1,104 +1,106 @@
-
-
-
-
- Debug
- x64
-
-
- Release
- x64
-
-
-
-
-
-
-
-
-
-
- 15.0
- {13bb009a-0679-49c0-a763-3f0a388ea78f}
- Win32Proj
- simple_distributions_cpp_vs2019
- 10.0
-
-
-
- Application
- true
- HIP
- Unicode
-
-
- Application
- false
- HIP
- true
- Unicode
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- rocrand_$(ProjectName)
-
-
- false
- rocrand_$(ProjectName)
-
-
- gfx1030
-
-
- gfx1030
-
-
-
- Level1
- __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
- stdcpp17
- true
-
-
- Console
- true
- rocrand.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
-
-
- Level2
- true
- true
- __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
- $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
- stdcpp17
- true
-
-
- Console
- true
- true
- true
- rocrand.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
-
-
-
-
-
-
-
\ No newline at end of file
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+ 15.0
+ {13bb009a-0679-49c0-a763-3f0a388ea78f}
+ Win32Proj
+ simple_distributions_cpp_vs2019
+ 10.0
+
+
+
+ Application
+ true
+ HIP
+ Unicode
+
+
+ Application
+ false
+ HIP
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ rocrand_$(ProjectName)
+
+
+ false
+ rocrand_$(ProjectName)
+
+
+ gfx1030
+
+
+ gfx1030
+
+
+
+ Level2
+ __HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
+ stdcpp17
+ true
+ -Wno-#warnings
+
+
+ Console
+ true
+ rocrand.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+
+
+ Level2
+ true
+ true
+ __HIP_ROCclr__;__clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
+ stdcpp17
+ true
+ -Wno-#warnings
+
+
+ Console
+ true
+ true
+ true
+ rocrand.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+
+
+
+
+
diff --git a/Libraries/rocThrust/CMakeLists.txt b/Libraries/rocThrust/CMakeLists.txt
index 658cf420e..fa7d95c9d 100644
--- a/Libraries/rocThrust/CMakeLists.txt
+++ b/Libraries/rocThrust/CMakeLists.txt
@@ -21,13 +21,19 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
-project(rocThrust_examples)
+project(rocThrust_examples LANGUAGES CXX)
if(GPU_RUNTIME STREQUAL "CUDA")
message(STATUS "rocThrust examples do not support the CUDA runtime")
return()
endif()
+find_package(rocthrust)
+if(NOT rocthrust_FOUND)
+ message(STATUS "rocThrust could not be found, not building rocThrust examples")
+ return()
+endif()
+
add_subdirectory(device_ptr)
add_subdirectory(norm)
add_subdirectory(reduce_sum)
diff --git a/Libraries/rocThrust/README.md b/Libraries/rocThrust/README.md
index c59309a58..ba2ae1d4d 100644
--- a/Libraries/rocThrust/README.md
+++ b/Libraries/rocThrust/README.md
@@ -12,25 +12,26 @@ The examples in this subdirectory showcase the functionality of the [rocThrust](
### Windows
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
-- [CMake](https://cmake.org/download/) (at least version 3.21)
- ROCm toolchain for Windows (No public release yet)
- The Visual Studio ROCm extension needs to be installed to build with the solution files.
- [rocThrust](https://github.com/rocmSoftwarePlatform/rocThrust): installed as part of the ROCm SDK on Windows
-
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
## Building
### Linux
Make sure that the dependencies are installed, or use the [provided Dockerfile](../../Dockerfiles/hip-libraries-rocm-ubuntu.Dockerfile) to build and run the examples in a containerized environment that has all prerequisites installed.
#### Using CMake
-All examples in the `rocThrust` subdirectory can be built by a single CMake project.
+All examples in the `rocThrust` subdirectory can either be built by a single CMake project or be built independently.
- `$ cd Libraries/rocThrust`
- `$ cmake -S . -B build`
- `$ cmake --build build`
#### Using Make
-All examples can be built by a single invocation to Make.
+All examples can be built by a single invocation to Make or be built independently.
+
- `$ cd Libraries/rocThrust`
- `$ make`
@@ -39,3 +40,6 @@ All examples can be built by a single invocation to Make.
Visual Studio solution files are available for the individual examples. To build all examples for rocThrust open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for rocThrust.
For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio).
+
+#### CMake
+All examples in the `rocThrust` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2).
diff --git a/Libraries/rocThrust/device_ptr/CMakeLists.txt b/Libraries/rocThrust/device_ptr/CMakeLists.txt
index f90bb585c..a88fb9c20 100644
--- a/Libraries/rocThrust/device_ptr/CMakeLists.txt
+++ b/Libraries/rocThrust/device_ptr/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocthrust REQUIRED CONFIG)
+find_package(rocthrust REQUIRED)
add_executable(rocthrust_device_ptr main.hip)
target_link_libraries(rocthrust_device_ptr PRIVATE roc::rocthrust)
add_test(rocthrust_device_ptr rocthrust_device_ptr)
diff --git a/Libraries/rocThrust/device_ptr/Makefile b/Libraries/rocThrust/device_ptr/Makefile
index 54adb7b7d..5fc116cdd 100644
--- a/Libraries/rocThrust/device_ptr/Makefile
+++ b/Libraries/rocThrust/device_ptr/Makefile
@@ -20,15 +20,35 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocthrust_device_ptr
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocthrust_device_ptr: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -I $(COMMON_INCLUDE_DIR) -o $@
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCTHRUST_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCTHRUST_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocthrust_device_ptr
+ $(RM) $(EXAMPLE)
.PHONY: clean
+
diff --git a/Libraries/rocThrust/device_ptr/device_ptr_vs2019.vcxproj b/Libraries/rocThrust/device_ptr/device_ptr_vs2019.vcxproj
index af83c5c0b..0d1557070 100644
--- a/Libraries/rocThrust/device_ptr/device_ptr_vs2019.vcxproj
+++ b/Libraries/rocThrust/device_ptr/device_ptr_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -96,4 +96,4 @@
-
\ No newline at end of file
+
diff --git a/Libraries/rocThrust/norm/CMakeLists.txt b/Libraries/rocThrust/norm/CMakeLists.txt
index 7fa2a583b..9f13eb544 100644
--- a/Libraries/rocThrust/norm/CMakeLists.txt
+++ b/Libraries/rocThrust/norm/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocthrust REQUIRED CONFIG)
+find_package(rocthrust REQUIRED)
add_executable(rocthrust_norm main.hip)
target_link_libraries(rocthrust_norm PRIVATE roc::rocthrust)
add_test(rocthrust_norm rocthrust_norm)
diff --git a/Libraries/rocThrust/norm/Makefile b/Libraries/rocThrust/norm/Makefile
index dcf88e391..5c08004b9 100644
--- a/Libraries/rocThrust/norm/Makefile
+++ b/Libraries/rocThrust/norm/Makefile
@@ -20,15 +20,34 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocthrust_norm
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocthrust_norm: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -I $(COMMON_INCLUDE_DIR) -o $@
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCTHRUST_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCTHRUST_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocthrust_norm
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocThrust/norm/norm_vs2019.vcxproj b/Libraries/rocThrust/norm/norm_vs2019.vcxproj
index 810848257..24c46ba85 100644
--- a/Libraries/rocThrust/norm/norm_vs2019.vcxproj
+++ b/Libraries/rocThrust/norm/norm_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -96,4 +96,4 @@
-
\ No newline at end of file
+
diff --git a/Libraries/rocThrust/reduce_sum/CMakeLists.txt b/Libraries/rocThrust/reduce_sum/CMakeLists.txt
index c23c930bc..8348fcb6f 100644
--- a/Libraries/rocThrust/reduce_sum/CMakeLists.txt
+++ b/Libraries/rocThrust/reduce_sum/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocthrust REQUIRED CONFIG)
+find_package(rocthrust REQUIRED)
add_executable(rocthrust_reduce_sum main.hip)
target_link_libraries(rocthrust_reduce_sum PRIVATE roc::rocthrust)
add_test(rocthrust_reduce_sum rocthrust_reduce_sum)
diff --git a/Libraries/rocThrust/reduce_sum/Makefile b/Libraries/rocThrust/reduce_sum/Makefile
index 5d69e3c3f..c61fb4534 100644
--- a/Libraries/rocThrust/reduce_sum/Makefile
+++ b/Libraries/rocThrust/reduce_sum/Makefile
@@ -20,15 +20,34 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocthrust_reduce_sum
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocthrust_reduce_sum: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -I $(COMMON_INCLUDE_DIR) -o $@
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCTHRUST_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCTHRUST_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocthrust_reduce_sum
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocThrust/reduce_sum/reduce_sum_vs2019.vcxproj b/Libraries/rocThrust/reduce_sum/reduce_sum_vs2019.vcxproj
index 65defd994..36fdedab8 100644
--- a/Libraries/rocThrust/reduce_sum/reduce_sum_vs2019.vcxproj
+++ b/Libraries/rocThrust/reduce_sum/reduce_sum_vs2019.vcxproj
@@ -63,7 +63,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -93,4 +93,4 @@
-
\ No newline at end of file
+
diff --git a/Libraries/rocThrust/remove_points/CMakeLists.txt b/Libraries/rocThrust/remove_points/CMakeLists.txt
index 8710a3671..6b1a5f717 100644
--- a/Libraries/rocThrust/remove_points/CMakeLists.txt
+++ b/Libraries/rocThrust/remove_points/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocthrust REQUIRED CONFIG)
+find_package(rocthrust REQUIRED)
add_executable(rocthrust_remove_points main.hip)
target_link_libraries(rocthrust_remove_points PRIVATE roc::rocthrust)
diff --git a/Libraries/rocThrust/remove_points/Makefile b/Libraries/rocThrust/remove_points/Makefile
index 068b60aeb..50b34fdf5 100644
--- a/Libraries/rocThrust/remove_points/Makefile
+++ b/Libraries/rocThrust/remove_points/Makefile
@@ -20,15 +20,34 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocthrust_remove_points
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocthrust_remove_points: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -I $(COMMON_INCLUDE_DIR) -o $@
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCTHRUST_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCTHRUST_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocthrust_remove_points
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocThrust/remove_points/remove_points_vs2019.vcxproj b/Libraries/rocThrust/remove_points/remove_points_vs2019.vcxproj
index 732544aa8..6dd24701b 100644
--- a/Libraries/rocThrust/remove_points/remove_points_vs2019.vcxproj
+++ b/Libraries/rocThrust/remove_points/remove_points_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -96,4 +96,4 @@
-
\ No newline at end of file
+
diff --git a/Libraries/rocThrust/saxpy/CMakeLists.txt b/Libraries/rocThrust/saxpy/CMakeLists.txt
index 6d73f052a..4945236dd 100644
--- a/Libraries/rocThrust/saxpy/CMakeLists.txt
+++ b/Libraries/rocThrust/saxpy/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocthrust REQUIRED CONFIG)
+find_package(rocthrust REQUIRED)
add_executable(rocthrust_saxpy main.hip)
target_link_libraries(rocthrust_saxpy PRIVATE roc::rocthrust)
add_test(rocthrust_saxpy rocthrust_saxpy)
diff --git a/Libraries/rocThrust/saxpy/Makefile b/Libraries/rocThrust/saxpy/Makefile
index 1587cdcb0..9f6ecaee6 100644
--- a/Libraries/rocThrust/saxpy/Makefile
+++ b/Libraries/rocThrust/saxpy/Makefile
@@ -20,15 +20,34 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocthrust_saxpy
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocthrust_saxpy: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -I $(COMMON_INCLUDE_DIR) -o $@
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCTHRUST_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCTHRUST_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocthrust_saxpy
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocThrust/saxpy/saxpy_vs2019.vcxproj b/Libraries/rocThrust/saxpy/saxpy_vs2019.vcxproj
index c12faddc9..bf11ed199 100644
--- a/Libraries/rocThrust/saxpy/saxpy_vs2019.vcxproj
+++ b/Libraries/rocThrust/saxpy/saxpy_vs2019.vcxproj
@@ -66,7 +66,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
stdcpp17
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
diff --git a/Libraries/rocThrust/vectors/CMakeLists.txt b/Libraries/rocThrust/vectors/CMakeLists.txt
index 8b02f18b9..efd467c32 100644
--- a/Libraries/rocThrust/vectors/CMakeLists.txt
+++ b/Libraries/rocThrust/vectors/CMakeLists.txt
@@ -31,7 +31,7 @@ if(NOT CMAKE_PREFIX_PATH)
set(CMAKE_PREFIX_PATH "/opt/rocm")
endif()
-find_package(rocthrust REQUIRED CONFIG)
+find_package(rocthrust REQUIRED)
add_executable(rocthrust_vectors main.hip)
target_link_libraries(rocthrust_vectors PRIVATE roc::rocthrust)
add_test(rocthrust_vectors rocthrust_vectors)
diff --git a/Libraries/rocThrust/vectors/Makefile b/Libraries/rocThrust/vectors/Makefile
index cfc50037b..bbcc489b5 100644
--- a/Libraries/rocThrust/vectors/Makefile
+++ b/Libraries/rocThrust/vectors/Makefile
@@ -20,15 +20,34 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-ROCM_INSTALL_DIR = /opt/rocm
-HIPCXX = $(ROCM_INSTALL_DIR)/bin/hipcc
-CXX_STD = c++17
-COMMON_INCLUDE_DIR = ../../../Common
+EXAMPLE := rocthrust_vectors
+COMMON_INCLUDE_DIR := ../../../Common
+GPU_RUNTIME := HIP
-rocthrust_vectors: main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
- $(HIPCXX) $< -std=$(CXX_STD) -I $(COMMON_INCLUDE_DIR) -o $@
+ifneq ($(GPU_RUNTIME), HIP)
+ $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.)
+endif
+
+# HIP variables
+ROCM_INSTALL_DIR := /opt/rocm
+
+HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include
+ROCTHRUST_INCLUDE_DIR := $(HIP_INCLUDE_DIR)
+
+HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
+
+# Common variables and flags
+CXX_STD := c++17
+CXXFLAGS ?= -Wall -Wextra
+ICXXFLAGS := -std=$(CXX_STD) $(CXXFLAGS)
+ICPPFLAGS := -I $(COMMON_INCLUDE_DIR) -isystem $(ROCTHRUST_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ $(CPPFLAGS)
+ILDFLAGS := $(LDFLAGS)
+ILDLIBS := $(LDLIBS)
+
+$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/example_utils.hpp
+ $(HIPCXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)
clean:
- rm -f rocthrust_vectors
+ $(RM) $(EXAMPLE)
.PHONY: clean
diff --git a/Libraries/rocThrust/vectors/vectors_vs2019.vcxproj b/Libraries/rocThrust/vectors/vectors_vs2019.vcxproj
index 7c4941832..df3cb55af 100644
--- a/Libraries/rocThrust/vectors/vectors_vs2019.vcxproj
+++ b/Libraries/rocThrust/vectors/vectors_vs2019.vcxproj
@@ -63,7 +63,7 @@
- Level1
+ Level2
__HIP_ROCclr__;__clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
$(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories)
stdcpp17
@@ -93,4 +93,4 @@
-
\ No newline at end of file
+
diff --git a/Makefile b/Makefile
index 1e2ea48dd..637937e01 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,7 @@
# SOFTWARE.
SUB_PROJECTS := \
+ Applications \
HIP-Basic \
Libraries
diff --git a/README.md b/README.md
index 55d7a3a18..4268b6f2f 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,36 @@
# ROCm Examples
This project is currently unsupported and in an early testing stage. Feedback on the contents of this repository is appreciated.
## Repository Contents
+- [Applications](/Applications/) groups a number of examples ... .
+ - [floyd_warshall](/Applications/floyd_warshall/): Showcases a GPU implementation of the Floyd-Warshall algorithm for finding shortest paths in certain types of graphs.
- [Common](/Common/) contains common utility functionality shared between the examples.
- [HIP-Basic](/HIP-Basic/) hosts self-contained recipes showcasing HIP runtime functionality.
- [assembly_to_executable](/HIP-Basic/assembly_to_executable): Program and accompanying build systems that show how to manually compile and link a HIP application from host and device code.
- [bandwidth](/HIP-Basic/bandwidth): Program that measures memory bandwidth from host to device, device to host, and device to device.
+ - [bit_extract](/HIP-Basic/bit_extract): Program that showcases how to use HIP built-in bit extract.
+ - [device_globals](/HIP-Basic/device_globals): Show cases how to set global variables on the device from the host.
- [device_query](/HIP-Basic/device_query): Program that showcases how properties from the device may be queried.
- [dynamic_shared](/HIP-Basic/dynamic_shared): Program that showcases how to use dynamic shared memory with the help of a simple matrix transpose kernel.
- [events](/HIP-Basic/events/): Measuring execution time and synchronizing with HIP events.
+ - [gpu_arch](/HIP-Basic/gpu_arch/): Program that showcases how to implement GPU architecture-specific code.
- [hello_world](/HIP-Basic/hello_world): Simple program that showcases launching kernels and printing from the device.
- [hipify](/HIP-Basic/hipify): Simple program and build definitions that showcase automatically converting a CUDA `.cu` source into portable HIP `.hip` source.
- [llvm_ir_to_executable](/HIP-Basic/llvm_ir_to_executable): Shows how to create a HIP executable from LLVM IR.
+ - [inline_assembly](/HIP-Basic/inline_assembly/): Program that showcases how to use inline assembly in a portable manner.
- [matrix_multiplication](/HIP-Basic/matrix_multiplication/): Multiply two dynamically sized matrices utilizing shared memory.
+ - [module_api]((/HIP-Basic/module_api/): Shows how to load and execute a HIP module in runtime.
+ - [moving_average](/HIP-Basic/moving_average/): Simple program that demonstrates parallel computation of a moving average of one-dimensional data.
+ - [multi_gpu_data_transfer](/HIP-Basic/multi_gpu_data_transfer/): Performs two matrix transposes on two different devices (one on each) to showcase how to use peer-to-peer communication among devices.
- [occupancy](/HIP-Basic/occupancy/): Shows how to find optimal configuation parameters for a kernel launch with maximum occupancy.
+ - [opengl_interop](/HIP-Basic/opengl_interop): Showcases how to share resources and computation between HIP and OpenGL.
- [runtime_compilation](/HIP-Basic/runtime_compilation/): Simple program that showcases how to use HIP runtime compilation (hipRTC) to compile a kernel and launch it on a device.
- [saxpy](/HIP-Basic/saxpy/): Implements the $Y_i=aX_i+Y_i$ kernel and explains basic HIP functionality.
- [shared_memory](/HIP-Basic/shared_memory/): Showcases how to use static shared memory by implementing a simple matrix transpose kernel.
+ - [static_device_library](/HIP-Basic/static_device_library): Shows how to create a static library containing device functions, and how to link it with an executable.
+ - [static_host_library](/HIP-Basic/static_host_library): Shows how to create a static library containing HIP host functions, and how to link it with an executable.
- [streams](/HIP-Basic/streams/): Program that showcases usage of multiple streams each with their own tasks.
+ - [texture_management](/HIP-Basic/texture_management/): Shows the usage of texture memory.
+ - [vulkan_interop](/HIP-Basic/vulkan_interop): Showcases how to share resources and computation between HIP and Vulkan.
- [warp_shuffle](/HIP-Basic/warp_shuffle/): Uses a simple matrix transpose kernel to showcase how to use warp shuffle operations.
- [Dockerfiles](/Dockerfiles/) hosts Dockerfiles with ready-to-use environments for the various samples. See [Dockerfiles/README.md](/Dockerfiles/README.md) for details.
- [Docs](/Docs/)
@@ -47,15 +61,17 @@ This project is currently unsupported and in an early testing stage. Feedback on
### Windows
- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload
-- [CMake](https://cmake.org/download/) (at least version 3.21)
- ROCm toolchain for Windows (No public release yet)
- The Visual Studio ROCm extension needs to be installed to build with the solution files.
+- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21)
+- [Ninja](https://ninja-build.org/) (optional, to build with CMake)
## Building the example suite
### Linux
These instructions assume that the prerequisites for every example are installed on the system.
#### CMake
+See [CMake build options](#cmake-build-options) for an overview of build options.
- `$ git clone https://github.com/amd/rocm-examples.git`
- `$ cd rocm-examples`
- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA)
@@ -93,3 +109,37 @@ The repository has Visual Studio project files for all examples and individually
- To build in Release mode pass the `/p:Configuration=Release` option to MSBuild.
- The exutables will be created in a subfolder named "Debug" or "Release" inside the project folder.
- The HIP specific project settings like the GPU architectures targeted can be set on the `General [AMD HIP C++]` tab of project properties.
+
+#### CMake
+First, clone the repository and go to the source directory.
+
+```shell
+git clone https://github.com/amd/rocm-examples.git
+cd rocm-examples
+```
+
+There are two ways to build the project using CMake: with the Visual Studio Developer Command Prompt (recommended) or with a standard Command Prompt. See [CMake build options](#cmake-build-options) for an overview of build options.
+
+##### Visual Studio Developer Command Prompt
+Select Start, search for "x64 Native Tools Command Prompt for VS 2019", and the resulting Command Prompt. Ninja must be selected as generator, and Clang as C++ compiler.
+
+```shell
+cmake -S . -B build -G Ninja -D CMAKE_CXX_COMPILER=clang
+cmake --build build
+```
+
+##### Standard Command Prompt
+Run the standard Command Prompt. When using the standard Command Prompt to build the project, the Resource Compiler (RC) path must be specified. The RC is a tool used to build Windows-based applications, its default path is `C:/Program Files (x86)/Windows Kits/10/bin//x64/rc.exe`. Finally, the generator must be set to Ninja.
+
+```shell
+cmake -S . -B build -G Ninja -D CMAKE_RC_COMPILER=""
+cmake --build build
+```
+
+### CMake build options
+The following options are available when building with CMake.
+| Option | Relevant to | Default value | Description |
+|:---------------------------|:------------|:-----------------|:--------------------------------------------------------------------------------------------------------|
+| `GPU_RUNTIME` | HIP / CUDA | `"HIP"` | GPU runtime to compile for. Set to `"CUDA"` to compile for NVIDIA GPUs and to `"HIP"` for AMD GPUs. |
+| `CMAKE_HIP_ARCHITECTURES` | HIP | Compiler default | HIP device architectures to target, e.g. `"gfx908;gfx1030"` to target architectures gfx908 and gfx1030. |
+| `CMAKE_CUDA_ARCHITECTURES` | CUDA | Compiler default | CUDA architecture to compile for e.g. `"50;72"` to target compute capibility 50 and 72. |
diff --git a/ROCm-Examples-VS2019.sln b/ROCm-Examples-VS2019.sln
index 675964e0e..f245c9749 100644
--- a/ROCm-Examples-VS2019.sln
+++ b/ROCm-Examples-VS2019.sln
@@ -1,214 +1,314 @@
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.32630.194
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Libraries", "Libraries", "{052412EF-7CEB-4E32-96F9-AADBC70945D7}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocThrust", "rocThrust", "{481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_ptr_vs2019", "Libraries\rocThrust\device_ptr\device_ptr_vs2019.vcxproj", "{FD1402C4-336F-4AEF-A5F6-1DD7903A965C}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "norm_vs2019", "Libraries\rocThrust\norm\norm_vs2019.vcxproj", "{8683C739-F470-44A6-A187-9A5929AE9DF9}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reduce_sum_vs2019", "Libraries\rocThrust\reduce_sum\reduce_sum_vs2019.vcxproj", "{C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vectors_vs2019", "Libraries\rocThrust\vectors\vectors_vs2019.vcxproj", "{8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hipCUB", "hipCUB", "{DCEAB7B6-0784-4186-B79F-5C7C947F9077}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_radix_sort_vs2019", "Libraries\hipCUB\device_radix_sort\device_radix_sort_vs2019.vcxproj", "{BE670E16-8A40-46E0-9CF2-93352ED685B0}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_sum_vs2019", "Libraries\hipCUB\device_sum\device_sum_vs2019.vcxproj", "{EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocRAND", "rocRAND", "{B8AE36C3-BE07-48B0-B375-5BAAE9355A45}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple_distributions_cpp_vs2019", "Libraries\rocRAND\simple_distributions_cpp\simple_distributions_cpp_vs2019.vcxproj", "{13BB009A-0679-49C0-A763-3F0A388EA78F}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "remove_points_vs2019", "Libraries\rocThrust\remove_points\remove_points_vs2019.vcxproj", "{631C61AA-52BA-4818-BD39-FA9CF47076C7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "saxpy_vs2019", "Libraries\rocThrust\saxpy\saxpy_vs2019.vcxproj", "{E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "exampleLibraryTemplate", "exampleLibraryTemplate", "{0A489EDA-4BAD-4966-B439-37260D37D969}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_template_vs2019", "Libraries\exampleLibraryTemplate\example_template\example_template_vs2019.vcxproj", "{B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocPRIM", "rocPRIM", "{82BF226F-956B-4E2E-B295-71C17F33A5FB}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_sum_vs2019", "Libraries\rocPRIM\device_sum\device_sum_vs2019.vcxproj", "{E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "block_sum_vs2019", "Libraries\rocPRIM\block_sum\block_sum_vs2019.vcxproj", "{65B21869-2BE2-4DA5-BEC5-28D1F910731C}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "HIP-Basic", "HIP-Basic", "{6EB7144D-2707-489E-A043-D59B7BE006D1}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_query_vs2019", "HIP-Basic\device_query\device_query_vs2019.vcxproj", "{C2C6E811-57E3-44C5-9AB9-195D60A1638C}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "saxpy_vs2019", "HIP-Basic\saxpy\saxpy_vs2019.vcxproj", "{D6334F08-D560-439A-A704-ADA0349D72B7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_multiplication_vs2019", "HIP-Basic\matrix_multiplication\matrix_multiplication_vs2019.vcxproj", "{ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "occupancy_vs2019", "HIP-Basic\occupancy\occupancy_vs2019.vcxproj", "{E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime_compilation_vs2019", "HIP-Basic\runtime_compilation\runtime_compilation_vs2019.vcxproj", "{E03790B7-B203-4504-BEF5-F4F061183642}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dynamic_shared_vs2019", "HIP-Basic\dynamic_shared\dynamic_shared_vs2019.vcxproj", "{7B7D1745-7635-40DA-B6AF-B8F728A31124}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "shared_memory_vs2019", "HIP-Basic\shared_memory\shared_memory_vs2019.vcxproj", "{C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "streams_vs2019", "HIP-Basic\streams\streams_vs2019.vcxproj", "{4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "events_vs2019", "HIP-Basic\events\events_vs2019.vcxproj", "{5B822836-110B-44D8-8E02-2A9B2CB83D14}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidth_vs2019", "HIP-Basic\bandwidth\bandwidth_vs2019.vcxproj", "{16B11B54-CD72-43B6-B226-38C668B41A79}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "warp_shuffle_vs2019", "HIP-Basic\warp_shuffle\warp_shuffle_vs2019.vcxproj", "{5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "assembly_to_executable_vs2019", "HIP-Basic\assembly_to_executable\assembly_to_executable_vs2019.vcxproj", "{60B4ADE0-8286-46AE-B884-5DA51B541DED}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "llvm_ir_to_executable_vs2019", "HIP-Basic\llvm_ir_to_executable\llvm_ir_to_executable_vs2019.vcxproj", "{DBB8DFE9-CB1B-473C-937C-2A8120E0D819}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|x64 = Debug|x64
- Release|x64 = Release|x64
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Debug|x64.ActiveCfg = Debug|x64
- {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Debug|x64.Build.0 = Debug|x64
- {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Release|x64.ActiveCfg = Release|x64
- {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Release|x64.Build.0 = Release|x64
- {8683C739-F470-44A6-A187-9A5929AE9DF9}.Debug|x64.ActiveCfg = Debug|x64
- {8683C739-F470-44A6-A187-9A5929AE9DF9}.Debug|x64.Build.0 = Debug|x64
- {8683C739-F470-44A6-A187-9A5929AE9DF9}.Release|x64.ActiveCfg = Release|x64
- {8683C739-F470-44A6-A187-9A5929AE9DF9}.Release|x64.Build.0 = Release|x64
- {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Debug|x64.ActiveCfg = Debug|x64
- {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Debug|x64.Build.0 = Debug|x64
- {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Release|x64.ActiveCfg = Release|x64
- {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Release|x64.Build.0 = Release|x64
- {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Debug|x64.ActiveCfg = Debug|x64
- {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Debug|x64.Build.0 = Debug|x64
- {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Release|x64.ActiveCfg = Release|x64
- {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Release|x64.Build.0 = Release|x64
- {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Debug|x64.ActiveCfg = Debug|x64
- {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Debug|x64.Build.0 = Debug|x64
- {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Release|x64.ActiveCfg = Release|x64
- {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Release|x64.Build.0 = Release|x64
- {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Debug|x64.ActiveCfg = Debug|x64
- {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Debug|x64.Build.0 = Debug|x64
- {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Release|x64.ActiveCfg = Release|x64
- {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Release|x64.Build.0 = Release|x64
- {13BB009A-0679-49C0-A763-3F0A388EA78F}.Debug|x64.ActiveCfg = Debug|x64
- {13BB009A-0679-49C0-A763-3F0A388EA78F}.Debug|x64.Build.0 = Debug|x64
- {13BB009A-0679-49C0-A763-3F0A388EA78F}.Release|x64.ActiveCfg = Release|x64
- {13BB009A-0679-49C0-A763-3F0A388EA78F}.Release|x64.Build.0 = Release|x64
- {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Debug|x64.ActiveCfg = Debug|x64
- {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Debug|x64.Build.0 = Debug|x64
- {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Release|x64.ActiveCfg = Release|x64
- {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Release|x64.Build.0 = Release|x64
- {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Debug|x64.ActiveCfg = Debug|x64
- {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Debug|x64.Build.0 = Debug|x64
- {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Release|x64.ActiveCfg = Release|x64
- {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Release|x64.Build.0 = Release|x64
- {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Debug|x64.ActiveCfg = Debug|x64
- {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Debug|x64.Build.0 = Debug|x64
- {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Release|x64.ActiveCfg = Release|x64
- {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Release|x64.Build.0 = Release|x64
- {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Debug|x64.ActiveCfg = Debug|x64
- {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Debug|x64.Build.0 = Debug|x64
- {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Release|x64.ActiveCfg = Release|x64
- {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Release|x64.Build.0 = Release|x64
- {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Debug|x64.ActiveCfg = Debug|x64
- {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Debug|x64.Build.0 = Debug|x64
- {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Release|x64.ActiveCfg = Release|x64
- {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Release|x64.Build.0 = Release|x64
- {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Debug|x64.ActiveCfg = Debug|x64
- {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Debug|x64.Build.0 = Debug|x64
- {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Release|x64.ActiveCfg = Release|x64
- {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Release|x64.Build.0 = Release|x64
- {D6334F08-D560-439A-A704-ADA0349D72B7}.Debug|x64.ActiveCfg = Debug|x64
- {D6334F08-D560-439A-A704-ADA0349D72B7}.Debug|x64.Build.0 = Debug|x64
- {D6334F08-D560-439A-A704-ADA0349D72B7}.Release|x64.ActiveCfg = Release|x64
- {D6334F08-D560-439A-A704-ADA0349D72B7}.Release|x64.Build.0 = Release|x64
- {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Debug|x64.ActiveCfg = Debug|x64
- {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Debug|x64.Build.0 = Debug|x64
- {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Release|x64.ActiveCfg = Release|x64
- {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Release|x64.Build.0 = Release|x64
- {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Debug|x64.ActiveCfg = Debug|x64
- {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Debug|x64.Build.0 = Debug|x64
- {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Release|x64.ActiveCfg = Release|x64
- {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Release|x64.Build.0 = Release|x64
- {E03790B7-B203-4504-BEF5-F4F061183642}.Debug|x64.ActiveCfg = Debug|x64
- {E03790B7-B203-4504-BEF5-F4F061183642}.Debug|x64.Build.0 = Debug|x64
- {E03790B7-B203-4504-BEF5-F4F061183642}.Release|x64.ActiveCfg = Release|x64
- {E03790B7-B203-4504-BEF5-F4F061183642}.Release|x64.Build.0 = Release|x64
- {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Debug|x64.ActiveCfg = Debug|x64
- {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Debug|x64.Build.0 = Debug|x64
- {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Release|x64.ActiveCfg = Release|x64
- {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Release|x64.Build.0 = Release|x64
- {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Debug|x64.ActiveCfg = Debug|x64
- {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Debug|x64.Build.0 = Debug|x64
- {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Release|x64.ActiveCfg = Release|x64
- {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Release|x64.Build.0 = Release|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.ActiveCfg = Debug|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.Build.0 = Debug|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.ActiveCfg = Release|x64
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.Build.0 = Release|x64
- {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Debug|x64.ActiveCfg = Debug|x64
- {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Debug|x64.Build.0 = Debug|x64
- {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Release|x64.ActiveCfg = Release|x64
- {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Release|x64.Build.0 = Release|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.ActiveCfg = Debug|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.Build.0 = Debug|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.ActiveCfg = Release|x64
- {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.Build.0 = Release|x64
- {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Debug|x64.ActiveCfg = Debug|x64
- {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Debug|x64.Build.0 = Debug|x64
- {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Release|x64.ActiveCfg = Release|x64
- {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Release|x64.Build.0 = Release|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.ActiveCfg = Debug|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.Build.0 = Debug|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.ActiveCfg = Release|x64
- {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.Build.0 = Release|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.ActiveCfg = Debug|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.Build.0 = Debug|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.ActiveCfg = Release|x64
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.Build.0 = Release|x64
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
- GlobalSection(NestedProjects) = preSolution
- {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
- {FD1402C4-336F-4AEF-A5F6-1DD7903A965C} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
- {8683C739-F470-44A6-A187-9A5929AE9DF9} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
- {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
- {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
- {DCEAB7B6-0784-4186-B79F-5C7C947F9077} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
- {BE670E16-8A40-46E0-9CF2-93352ED685B0} = {DCEAB7B6-0784-4186-B79F-5C7C947F9077}
- {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4} = {DCEAB7B6-0784-4186-B79F-5C7C947F9077}
- {B8AE36C3-BE07-48B0-B375-5BAAE9355A45} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
- {13BB009A-0679-49C0-A763-3F0A388EA78F} = {B8AE36C3-BE07-48B0-B375-5BAAE9355A45}
- {631C61AA-52BA-4818-BD39-FA9CF47076C7} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
- {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
- {0A489EDA-4BAD-4966-B439-37260D37D969} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
- {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {82BF226F-956B-4E2E-B295-71C17F33A5FB} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
- {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E} = {82BF226F-956B-4E2E-B295-71C17F33A5FB}
- {65B21869-2BE2-4DA5-BEC5-28D1F910731C} = {82BF226F-956B-4E2E-B295-71C17F33A5FB}
- {C2C6E811-57E3-44C5-9AB9-195D60A1638C} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {D6334F08-D560-439A-A704-ADA0349D72B7} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {E03790B7-B203-4504-BEF5-F4F061183642} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {7B7D1745-7635-40DA-B6AF-B8F728A31124} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {5B822836-110B-44D8-8E02-2A9B2CB83D14} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {16B11B54-CD72-43B6-B226-38C668B41A79} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {60B4ADE0-8286-46AE-B884-5DA51B541DED} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- {DBB8DFE9-CB1B-473C-937C-2A8120E0D819} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- SolutionGuid = {90580497-38BF-428E-A951-6EC6CFC68193}
- EndGlobalSection
-EndGlobal
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.32630.194
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Applications", "Applications", "{9254BAD9-FDFC-4645-B2C8-EEB42F1F069D}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "floyd_warshall_vs2019", "Applications\floyd_warshall\floyd_warshall_vs2019.vcxproj", "{FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Libraries", "Libraries", "{052412EF-7CEB-4E32-96F9-AADBC70945D7}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocThrust", "rocThrust", "{481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_ptr_vs2019", "Libraries\rocThrust\device_ptr\device_ptr_vs2019.vcxproj", "{FD1402C4-336F-4AEF-A5F6-1DD7903A965C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "norm_vs2019", "Libraries\rocThrust\norm\norm_vs2019.vcxproj", "{8683C739-F470-44A6-A187-9A5929AE9DF9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reduce_sum_vs2019", "Libraries\rocThrust\reduce_sum\reduce_sum_vs2019.vcxproj", "{C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vectors_vs2019", "Libraries\rocThrust\vectors\vectors_vs2019.vcxproj", "{8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hipCUB", "hipCUB", "{DCEAB7B6-0784-4186-B79F-5C7C947F9077}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_radix_sort_vs2019", "Libraries\hipCUB\device_radix_sort\device_radix_sort_vs2019.vcxproj", "{BE670E16-8A40-46E0-9CF2-93352ED685B0}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_sum_vs2019", "Libraries\hipCUB\device_sum\device_sum_vs2019.vcxproj", "{EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocRAND", "rocRAND", "{B8AE36C3-BE07-48B0-B375-5BAAE9355A45}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple_distributions_cpp_vs2019", "Libraries\rocRAND\simple_distributions_cpp\simple_distributions_cpp_vs2019.vcxproj", "{13BB009A-0679-49C0-A763-3F0A388EA78F}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "remove_points_vs2019", "Libraries\rocThrust\remove_points\remove_points_vs2019.vcxproj", "{631C61AA-52BA-4818-BD39-FA9CF47076C7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "saxpy_vs2019", "Libraries\rocThrust\saxpy\saxpy_vs2019.vcxproj", "{E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "exampleLibraryTemplate", "exampleLibraryTemplate", "{0A489EDA-4BAD-4966-B439-37260D37D969}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_template_vs2019", "Libraries\exampleLibraryTemplate\example_template\example_template_vs2019.vcxproj", "{B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocPRIM", "rocPRIM", "{82BF226F-956B-4E2E-B295-71C17F33A5FB}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_sum_vs2019", "Libraries\rocPRIM\device_sum\device_sum_vs2019.vcxproj", "{E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "block_sum_vs2019", "Libraries\rocPRIM\block_sum\block_sum_vs2019.vcxproj", "{65B21869-2BE2-4DA5-BEC5-28D1F910731C}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "HIP-Basic", "HIP-Basic", "{6EB7144D-2707-489E-A043-D59B7BE006D1}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_query_vs2019", "HIP-Basic\device_query\device_query_vs2019.vcxproj", "{C2C6E811-57E3-44C5-9AB9-195D60A1638C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "saxpy_vs2019", "HIP-Basic\saxpy\saxpy_vs2019.vcxproj", "{D6334F08-D560-439A-A704-ADA0349D72B7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrix_multiplication_vs2019", "HIP-Basic\matrix_multiplication\matrix_multiplication_vs2019.vcxproj", "{ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_data_transfer_vs2019", "HIP-Basic\multi_gpu_data_transfer\multi_gpu_data_transfer_vs2019.vcxproj", "{6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "occupancy_vs2019", "HIP-Basic\occupancy\occupancy_vs2019.vcxproj", "{E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime_compilation_vs2019", "HIP-Basic\runtime_compilation\runtime_compilation_vs2019.vcxproj", "{E03790B7-B203-4504-BEF5-F4F061183642}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "inline_assembly_vs2019", "HIP-Basic\inline_assembly\inline_assembly_vs2019.vcxproj", "{7B7D1745-7635-40DA-B6AF-B8F728A31122}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpu_arch_vs2019", "HIP-Basic\gpu_arch\gpu_arch_vs2019.vcxproj", "{7B7D1745-7635-40DA-B6AF-B8F728A31123}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dynamic_shared_vs2019", "HIP-Basic\dynamic_shared\dynamic_shared_vs2019.vcxproj", "{7B7D1745-7635-40DA-B6AF-B8F728A31124}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "shared_memory_vs2019", "HIP-Basic\shared_memory\shared_memory_vs2019.vcxproj", "{C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "streams_vs2019", "HIP-Basic\streams\streams_vs2019.vcxproj", "{4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "events_vs2019", "HIP-Basic\events\events_vs2019.vcxproj", "{5B822836-110B-44D8-8E02-2A9B2CB83D14}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidth_vs2019", "HIP-Basic\bandwidth\bandwidth_vs2019.vcxproj", "{16B11B54-CD72-43B6-B226-38C668B41A79}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "warp_shuffle_vs2019", "HIP-Basic\warp_shuffle\warp_shuffle_vs2019.vcxproj", "{5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "assembly_to_executable_vs2019", "HIP-Basic\assembly_to_executable\assembly_to_executable_vs2019.vcxproj", "{60B4ADE0-8286-46AE-B884-5DA51B541DED}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "llvm_ir_to_executable_vs2019", "HIP-Basic\llvm_ir_to_executable\llvm_ir_to_executable_vs2019.vcxproj", "{DBB8DFE9-CB1B-473C-937C-2A8120E0D819}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bit_extract_vs2019", "HIP-Basic\bit_extract\bit_extract_vs2019.vcxproj", "{63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vulkan_interop_vs2019", "HIP-Basic\vulkan_interop\vulkan_interop_vs2019.vcxproj", "{688433E2-B189-431D-A5F8-9AC82102B58C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "module_api_vs2019", "HIP-Basic\module_api\module_api_vs2019.vcxproj", "{306EB993-653A-45F6-863A-5f43BC86DA79}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "device_globals_vs2019", "HIP-Basic\device_globals\device_globals_vs2019.vcxproj", "{F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "moving_average_vs2019", "HIP-Basic\moving_average\moving_average_vs2019.vcxproj", "{628390E3-DB62-4D52-9594-DE6BC15F9943}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "opengl_interop_vs2019", "HIP-Basic\opengl_interop\opengl_interop_vs2019.vcxproj", "{96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "static_host_library_vs2019", "HIP-Basic\static_host_library\static_host_library_vs2019.vcxproj", "{5F8A7FEE-3A79-4588-9244-8575748026F7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libstatic_host_vs2019", "HIP-Basic\static_host_library\library\libhip_static_host_vs2019.vcxproj", "{6D3F8F78-225E-490E-ABD3-762857EBF597}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "static_host_library_msvc_vs2019", "HIP-Basic\static_host_library\static_host_library_msvc\static_host_library_msvc_vs2019.vcxproj", "{547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cooperative_groups_vs2019", "HIP-Basic\cooperative_groups\cooperative_groups_vs2019.vcxproj", "{7A25CE69-BACE-4410-BEB0-12A69890F212}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Debug|x64.ActiveCfg = Debug|x64
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Debug|x64.Build.0 = Debug|x64
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Release|x64.ActiveCfg = Release|x64
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103}.Release|x64.Build.0 = Release|x64
+ {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Debug|x64.ActiveCfg = Debug|x64
+ {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Debug|x64.Build.0 = Debug|x64
+ {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Release|x64.ActiveCfg = Release|x64
+ {FD1402C4-336F-4AEF-A5F6-1DD7903A965C}.Release|x64.Build.0 = Release|x64
+ {8683C739-F470-44A6-A187-9A5929AE9DF9}.Debug|x64.ActiveCfg = Debug|x64
+ {8683C739-F470-44A6-A187-9A5929AE9DF9}.Debug|x64.Build.0 = Debug|x64
+ {8683C739-F470-44A6-A187-9A5929AE9DF9}.Release|x64.ActiveCfg = Release|x64
+ {8683C739-F470-44A6-A187-9A5929AE9DF9}.Release|x64.Build.0 = Release|x64
+ {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Debug|x64.ActiveCfg = Debug|x64
+ {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Debug|x64.Build.0 = Debug|x64
+ {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Release|x64.ActiveCfg = Release|x64
+ {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C}.Release|x64.Build.0 = Release|x64
+ {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Debug|x64.ActiveCfg = Debug|x64
+ {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Debug|x64.Build.0 = Debug|x64
+ {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Release|x64.ActiveCfg = Release|x64
+ {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013}.Release|x64.Build.0 = Release|x64
+ {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Debug|x64.ActiveCfg = Debug|x64
+ {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Debug|x64.Build.0 = Debug|x64
+ {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Release|x64.ActiveCfg = Release|x64
+ {BE670E16-8A40-46E0-9CF2-93352ED685B0}.Release|x64.Build.0 = Release|x64
+ {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Debug|x64.ActiveCfg = Debug|x64
+ {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Debug|x64.Build.0 = Debug|x64
+ {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Release|x64.ActiveCfg = Release|x64
+ {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4}.Release|x64.Build.0 = Release|x64
+ {13BB009A-0679-49C0-A763-3F0A388EA78F}.Debug|x64.ActiveCfg = Debug|x64
+ {13BB009A-0679-49C0-A763-3F0A388EA78F}.Debug|x64.Build.0 = Debug|x64
+ {13BB009A-0679-49C0-A763-3F0A388EA78F}.Release|x64.ActiveCfg = Release|x64
+ {13BB009A-0679-49C0-A763-3F0A388EA78F}.Release|x64.Build.0 = Release|x64
+ {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Debug|x64.ActiveCfg = Debug|x64
+ {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Debug|x64.Build.0 = Debug|x64
+ {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Release|x64.ActiveCfg = Release|x64
+ {631C61AA-52BA-4818-BD39-FA9CF47076C7}.Release|x64.Build.0 = Release|x64
+ {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Debug|x64.ActiveCfg = Debug|x64
+ {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Debug|x64.Build.0 = Debug|x64
+ {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Release|x64.ActiveCfg = Release|x64
+ {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E}.Release|x64.Build.0 = Release|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Debug|x64.ActiveCfg = Debug|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Debug|x64.Build.0 = Debug|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Release|x64.ActiveCfg = Release|x64
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D}.Release|x64.Build.0 = Release|x64
+ {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Debug|x64.ActiveCfg = Debug|x64
+ {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Debug|x64.Build.0 = Debug|x64
+ {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Release|x64.ActiveCfg = Release|x64
+ {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E}.Release|x64.Build.0 = Release|x64
+ {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Debug|x64.ActiveCfg = Debug|x64
+ {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Debug|x64.Build.0 = Debug|x64
+ {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Release|x64.ActiveCfg = Release|x64
+ {65B21869-2BE2-4DA5-BEC5-28D1F910731C}.Release|x64.Build.0 = Release|x64
+ {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Debug|x64.ActiveCfg = Debug|x64
+ {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Debug|x64.Build.0 = Debug|x64
+ {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Release|x64.ActiveCfg = Release|x64
+ {C2C6E811-57E3-44C5-9AB9-195D60A1638C}.Release|x64.Build.0 = Release|x64
+ {D6334F08-D560-439A-A704-ADA0349D72B7}.Debug|x64.ActiveCfg = Debug|x64
+ {D6334F08-D560-439A-A704-ADA0349D72B7}.Debug|x64.Build.0 = Debug|x64
+ {D6334F08-D560-439A-A704-ADA0349D72B7}.Release|x64.ActiveCfg = Release|x64
+ {D6334F08-D560-439A-A704-ADA0349D72B7}.Release|x64.Build.0 = Release|x64
+ {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Debug|x64.ActiveCfg = Debug|x64
+ {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Debug|x64.Build.0 = Debug|x64
+ {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Release|x64.ActiveCfg = Release|x64
+ {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E}.Release|x64.Build.0 = Release|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Debug|x64.ActiveCfg = Debug|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Debug|x64.Build.0 = Debug|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Release|x64.ActiveCfg = Release|x64
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB}.Release|x64.Build.0 = Release|x64
+ {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Debug|x64.ActiveCfg = Debug|x64
+ {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Debug|x64.Build.0 = Debug|x64
+ {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Release|x64.ActiveCfg = Release|x64
+ {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5}.Release|x64.Build.0 = Release|x64
+ {E03790B7-B203-4504-BEF5-F4F061183642}.Debug|x64.ActiveCfg = Debug|x64
+ {E03790B7-B203-4504-BEF5-F4F061183642}.Debug|x64.Build.0 = Debug|x64
+ {E03790B7-B203-4504-BEF5-F4F061183642}.Release|x64.ActiveCfg = Release|x64
+ {E03790B7-B203-4504-BEF5-F4F061183642}.Release|x64.Build.0 = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Debug|x64.ActiveCfg = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Debug|x64.Build.0 = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Release|x64.ActiveCfg = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122}.Release|x64.Build.0 = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Debug|x64.ActiveCfg = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Debug|x64.Build.0 = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Release|x64.ActiveCfg = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123}.Release|x64.Build.0 = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Debug|x64.ActiveCfg = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Debug|x64.Build.0 = Debug|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Release|x64.ActiveCfg = Release|x64
+ {7B7D1745-7635-40DA-B6AF-B8F728A31124}.Release|x64.Build.0 = Release|x64
+ {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Debug|x64.ActiveCfg = Debug|x64
+ {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Debug|x64.Build.0 = Debug|x64
+ {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Release|x64.ActiveCfg = Release|x64
+ {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D}.Release|x64.Build.0 = Release|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.ActiveCfg = Debug|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Debug|x64.Build.0 = Debug|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.ActiveCfg = Release|x64
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9}.Release|x64.Build.0 = Release|x64
+ {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Debug|x64.ActiveCfg = Debug|x64
+ {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Debug|x64.Build.0 = Debug|x64
+ {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Release|x64.ActiveCfg = Release|x64
+ {5B822836-110B-44D8-8E02-2A9B2CB83D14}.Release|x64.Build.0 = Release|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.ActiveCfg = Debug|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Debug|x64.Build.0 = Debug|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.ActiveCfg = Release|x64
+ {16B11B54-CD72-43B6-B226-38C668B41A79}.Release|x64.Build.0 = Release|x64
+ {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Debug|x64.ActiveCfg = Debug|x64
+ {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Debug|x64.Build.0 = Debug|x64
+ {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Release|x64.ActiveCfg = Release|x64
+ {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045}.Release|x64.Build.0 = Release|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.ActiveCfg = Debug|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Debug|x64.Build.0 = Debug|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.ActiveCfg = Release|x64
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED}.Release|x64.Build.0 = Release|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.ActiveCfg = Debug|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Debug|x64.Build.0 = Debug|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.ActiveCfg = Release|x64
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819}.Release|x64.Build.0 = Release|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Debug|x64.ActiveCfg = Debug|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Debug|x64.Build.0 = Debug|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Release|x64.ActiveCfg = Release|x64
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2}.Release|x64.Build.0 = Release|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Debug|x64.ActiveCfg = Debug|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Debug|x64.Build.0 = Debug|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Release|x64.ActiveCfg = Release|x64
+ {688433E2-B189-431D-A5F8-9AC82102B58C}.Release|x64.Build.0 = Release|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Debug|x64.ActiveCfg = Debug|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Debug|x64.Build.0 = Debug|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Release|x64.ActiveCfg = Release|x64
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE}.Release|x64.Build.0 = Release|x64
+ {628390E3-DB62-4D52-9594-DE6BC15F9943}.Debug|x64.ActiveCfg = Debug|x64
+ {628390E3-DB62-4D52-9594-DE6BC15F9943}.Debug|x64.Build.0 = Debug|x64
+ {628390E3-DB62-4D52-9594-DE6BC15F9943}.Release|x64.ActiveCfg = Release|x64
+ {628390E3-DB62-4D52-9594-DE6BC15F9943}.Release|x64.Build.0 = Release|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Debug|x64.ActiveCfg = Debug|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Debug|x64.Build.0 = Debug|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Release|x64.ActiveCfg = Release|x64
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A}.Release|x64.Build.0 = Release|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Debug|x64.ActiveCfg = Debug|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Debug|x64.Build.0 = Debug|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Release|x64.ActiveCfg = Release|x64
+ {306EB993-653A-45F6-863A-5f43BC86DA79}.Release|x64.Build.0 = Release|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Debug|x64.ActiveCfg = Debug|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Debug|x64.Build.0 = Debug|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Release|x64.ActiveCfg = Release|x64
+ {5F8A7FEE-3A79-4588-9244-8575748026F7}.Release|x64.Build.0 = Release|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Debug|x64.ActiveCfg = Debug|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Debug|x64.Build.0 = Debug|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Release|x64.ActiveCfg = Release|x64
+ {6D3F8F78-225E-490E-ABD3-762857EBF597}.Release|x64.Build.0 = Release|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Debug|x64.ActiveCfg = Debug|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Debug|x64.Build.0 = Debug|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Release|x64.ActiveCfg = Release|x64
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E}.Release|x64.Build.0 = Release|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Debug|x64.ActiveCfg = Debug|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Debug|x64.Build.0 = Debug|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Release|x64.ActiveCfg = Release|x64
+ {7A25CE69-BACE-4410-BEB0-12A69890F212}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(NestedProjects) = preSolution
+ {FB6B7014-2BC9-475C-B3CC-FEE6B4C5B103} = {9254BAD9-FDFC-4645-B2C8-EEB42F1F069D}
+ {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
+ {FD1402C4-336F-4AEF-A5F6-1DD7903A965C} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
+ {8683C739-F470-44A6-A187-9A5929AE9DF9} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
+ {C0405FFB-7AA2-49C2-9AB5-AF336A54B41C} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
+ {8DEA1F0F-8BF3-422C-9BCD-99F69F43D013} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
+ {DCEAB7B6-0784-4186-B79F-5C7C947F9077} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
+ {BE670E16-8A40-46E0-9CF2-93352ED685B0} = {DCEAB7B6-0784-4186-B79F-5C7C947F9077}
+ {EF1E1A7E-2803-4606-BD9A-DA8FA981ABA4} = {DCEAB7B6-0784-4186-B79F-5C7C947F9077}
+ {B8AE36C3-BE07-48B0-B375-5BAAE9355A45} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
+ {13BB009A-0679-49C0-A763-3F0A388EA78F} = {B8AE36C3-BE07-48B0-B375-5BAAE9355A45}
+ {631C61AA-52BA-4818-BD39-FA9CF47076C7} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
+ {E1D552CF-3FE3-427A-95E1-8CFFB60BBF8E} = {481D0AFC-64BC-436C-9FF5-7C07F9F8E4BD}
+ {0A489EDA-4BAD-4966-B439-37260D37D969} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
+ {B885EF49-EDAA-4474-8D31-E0EF71D2BB3D} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {82BF226F-956B-4E2E-B295-71C17F33A5FB} = {052412EF-7CEB-4E32-96F9-AADBC70945D7}
+ {E71DB5FB-A1C4-4BB4-8B46-0037C32C885E} = {82BF226F-956B-4E2E-B295-71C17F33A5FB}
+ {65B21869-2BE2-4DA5-BEC5-28D1F910731C} = {82BF226F-956B-4E2E-B295-71C17F33A5FB}
+ {C2C6E811-57E3-44C5-9AB9-195D60A1638C} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {D6334F08-D560-439A-A704-ADA0349D72B7} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {ACC2A1E7-5865-4FAE-9016-E6EF73F8FA9E} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {6A0FFF7E-9C0A-4BF5-BBA5-745CB4253EFB} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {E5B2FC79-3928-47F6-B57B-33AAA3C5D9C5} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {E03790B7-B203-4504-BEF5-F4F061183642} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {7B7D1745-7635-40DA-B6AF-B8F728A31122} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {7B7D1745-7635-40DA-B6AF-B8F728A31123} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {7B7D1745-7635-40DA-B6AF-B8F728A31124} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {C370ACB7-AE52-4AD8-8C3D-4C32567FFE7D} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {4E6B2034-D7ED-4CB4-98B2-7B2D2B71E0A9} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {5B822836-110B-44D8-8E02-2A9B2CB83D14} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {16B11B54-CD72-43B6-B226-38C668B41A79} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {5852BE0E-BDA5-4BD9-8A16-30E8E40F4045} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {60B4ADE0-8286-46AE-B884-5DA51B541DED} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {DBB8DFE9-CB1B-473C-937C-2A8120E0D819} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {63823DD0-787C-42AE-B6E7-C03CF4CF5CE2} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {688433E2-B189-431D-A5F8-9AC82102B58C} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {F7DD9451-B0CA-4C76-AB92-0E01CBEBDBBE} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {628390E3-DB62-4D52-9594-DE6BC15F9943} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {96F8BE41-5C64-4BF2-8A8E-474BEAACAA5A} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {306EB993-653A-45F6-863A-5f43BC86DA79} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {5F8A7FEE-3A79-4588-9244-8575748026F7} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {6D3F8F78-225E-490E-ABD3-762857EBF597} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {547B99C2-CBE3-4E1F-A1D6-26E261D67A3E} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ {7A25CE69-BACE-4410-BEB0-12A69890F212} = {6EB7144D-2707-489E-A043-D59B7BE006D1}
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {90580497-38BF-428E-A951-6EC6CFC68193}
+ EndGlobalSection
+EndGlobal
diff --git a/Scripts/CodeFormat/check_format.sh b/Scripts/CodeFormat/check_format.sh
index e34d4591f..24c616620 100755
--- a/Scripts/CodeFormat/check_format.sh
+++ b/Scripts/CodeFormat/check_format.sh
@@ -39,7 +39,7 @@ finish () {
trap finish EXIT
GIT_CLANG_FORMAT="${GIT_CLANG_FORMAT:-git-clang-format}"
-"$GIT_CLANG_FORMAT" --style=file --extensions=cc,cp,cpp,c++,cxx,cu,cuh,hh,hpp,hxx,hip --diff "$@" "$SOURCE_COMMIT" > "$scratch"
+"$GIT_CLANG_FORMAT" --style=file --extensions=cc,cp,cpp,c++,cxx,cu,cuh,hh,hpp,hxx,hip,vert,frag --diff "$@" "$SOURCE_COMMIT" > "$scratch"
# Check for no-ops
grep '^no modified files to format$\|^clang-format did not modify any files$' \