-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rocSPARSE general functions (part III) (#114)
* Resolve "rocSPARSE preconditioner gtsv example" * Add rocSPARSE Level 2 CSR Iterative Triangular Matrix-Vector Multiplication * Add SpITSV example * Add rocSPARSE preconditioner GPSV example * Move gtsv from 5.5 to 5.7 * Move csritsv from 5.5 to 5.7 * Move spitsv from 5.5 to 5.7 * Move gpsv from 5.5 to 5.7 * Update copyright * Fix markdown linting --------- Co-authored-by: Mátyás Aradi <[email protected]>
- Loading branch information
1 parent
b725fd7
commit 9910763
Showing
66 changed files
with
3,946 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
rocsparse_csritsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# MIT License | ||
# | ||
# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in all | ||
# copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
# SOFTWARE. | ||
|
||
set(example_name rocsparse_csritsv) | ||
|
||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR) | ||
project(${example_name} LANGUAGES CXX) | ||
|
||
if(GPU_RUNTIME STREQUAL "CUDA") | ||
message(STATUS "rocSPARSE examples do not support the CUDA runtime") | ||
return() | ||
endif() | ||
|
||
# This example does not contain device code, thereby it can be compiled with any conforming C++ compiler. | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_EXTENSIONS OFF) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
|
||
if(WIN32) | ||
set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation") | ||
else() | ||
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") | ||
endif() | ||
|
||
list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") | ||
|
||
find_package(rocsparse REQUIRED) | ||
|
||
add_executable(${example_name} main.cpp) | ||
# Make example runnable using ctest | ||
add_test(${example_name} ${example_name}) | ||
|
||
# Link to example library | ||
target_link_libraries(${example_name} PRIVATE roc::rocsparse hip::host) | ||
|
||
target_include_directories(${example_name} PRIVATE "../../../../Common") | ||
|
||
install(TARGETS ${example_name}) | ||
|
||
if(CMAKE_SYSTEM_NAME MATCHES Windows) | ||
install(IMPORTED_RUNTIME_ARTIFACTS roc::rocsparse) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# MIT License | ||
# | ||
# Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in all | ||
# copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
# SOFTWARE. | ||
|
||
EXAMPLE := rocsparse_csritsv | ||
COMMON_INCLUDE_DIR := ../../../../Common | ||
GPU_RUNTIME := HIP | ||
|
||
ifneq ($(GPU_RUNTIME), HIP) | ||
$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.) | ||
endif | ||
|
||
ROCM_INSTALL_DIR := /opt/rocm | ||
|
||
HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include | ||
ROCSPARSE_INCLUDE_DIR := $(HIP_INCLUDE_DIR) | ||
|
||
CXX ?= g++ | ||
|
||
# Common variables and flags | ||
CXX_STD := c++17 | ||
ICXXFLAGS := -std=$(CXX_STD) | ||
ICPPFLAGS := -isystem $(ROCSPARSE_INCLUDE_DIR) -isystem $(HIP_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -D__HIP_PLATFORM_AMD__ | ||
ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib | ||
ILDLIBS := -lrocsparse -lamdhip64 | ||
|
||
CXXFLAGS ?= -Wall -Wextra | ||
|
||
ICXXFLAGS += $(CXXFLAGS) | ||
ICPPFLAGS += $(CPPFLAGS) | ||
ILDFLAGS += $(LDFLAGS) | ||
ILDLIBS += $(LDLIBS) | ||
|
||
$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp $(COMMON_INCLUDE_DIR)/rocsparse_utils.hpp | ||
$(CXX) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) | ||
|
||
clean: | ||
$(RM) $(EXAMPLE) | ||
|
||
.PHONY: clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# rocSPARSE Level 2 CSR Iterative Triangular Matrix-Vector Multiplication | ||
|
||
## Description | ||
|
||
This example illustrates the use of the `rocSPARSE` level 2 iterative triangular solver using the CSR storage format. | ||
|
||
This triangular solver is used to find an iterative solution with Jacobi method for a linear system of the form | ||
|
||
$$ | ||
A' y \approx \alpha x, | ||
$$ | ||
|
||
with a `tolerance` and a `max_iter` maximal number of iterations where | ||
|
||
- $A$ is a sparse triangular matrix of order $n$ whose elements are the coefficients of the equations, | ||
- $A'$ is one of the following: | ||
- $A' = A$ (identity) | ||
- $A' = A^T$ (transpose $A$: $A_{ij}^T = A_{ji}$) | ||
- $A' = A^H$ (conjugate transpose/Hermitian $A$: $A_{ij}^H = \bar A_{ji}$), | ||
- $\alpha$ is a scalar, | ||
- $x$ is a dense vector of size $n$ containing the constant terms of the equations, and | ||
- $y$ is a dense vector of size $n$ which contains the unknowns of the system. | ||
|
||
Obtaining solution for such a system consists on finding concrete values of all the unknowns such that the above equality holds. | ||
|
||
### Application flow | ||
|
||
1. Setup input data. | ||
2. Allocate device memory and offload input data to device. | ||
3. Initialize rocSPARSE by creating a handle. | ||
4. Prepare utility variables for rocSPARSE csritsv invocation. | ||
5. Perform analysis step. | ||
6. Perform triangular solve $A' y = \alpha x$. | ||
7. Check results obtained. | ||
8. Copy solution vector $y$ from device to host and compare with expected result. | ||
9. Free rocSPARSE resources and device memory. | ||
10. Print validation result. | ||
|
||
## Key APIs and Concepts | ||
|
||
### CSR Matrix Storage Format | ||
|
||
The [Compressed Sparse Row (CSR) storage format](https://rocsparse.readthedocs.io/en/latest/usermanual.html#csr-storage-format) describes an $m \times n$ sparse matrix with three arrays. | ||
|
||
Defining | ||
|
||
- `m`: number of rows | ||
- `n`: number of columns | ||
- `nnz`: number of non-zero elements | ||
|
||
we can describe a sparse matrix using the following arrays: | ||
|
||
- `csr_val`: array storing the non-zero elements of the matrix. | ||
- `csr_row_ptr`: given $i \in [0, m]$ | ||
- if $` 0 \leq i < m `$, `csr_row_ptr[i]` stores the index of the first non-zero element in row $i$ of the matrix | ||
- if $i = m$, `csr_row_ptr[i]` stores `nnz`. | ||
|
||
This way, row $j \in [0, m)$ contains the non-zero elements of indices from `csr_row_ptr[j]` to `csr_row_ptr[j+1]-1`. Therefore, the corresponding values in `csr_val` can be accessed from `csr_row_ptr[j]` to `csr_row_ptr[j+1]-1`. | ||
- `csr_col_ind`: given $i \in [0, nnz-1]$, `csr_col_ind[i]` stores the column of the $i^{th}$ non-zero element in the matrix. | ||
|
||
The CSR matrix is sorted by column indices in the same row, and each pair of indices appear only once. | ||
|
||
For instance, consider a sparse matrix as | ||
|
||
$$ | ||
A= | ||
\left( | ||
\begin{array}{ccccc} | ||
1 & 2 & 0 & 3 & 0 \\ | ||
0 & 4 & 5 & 0 & 0 \\ | ||
6 & 0 & 0 & 7 & 8 | ||
\end{array} | ||
\right) | ||
$$ | ||
|
||
Therefore, the CSR representation of $A$ is: | ||
|
||
```c++ | ||
m = 3 | ||
|
||
n = 5 | ||
|
||
nnz = 8 | ||
|
||
csr_val = { 1, 2, 3, 4, 5, 6, 7, 8 } | ||
|
||
csr_row_ptr = { 0, 3, 5, 8 } | ||
|
||
csr_col_ind = { 0, 1, 3, 1, 2, 0, 3, 4 } | ||
``` | ||
### rocSPARSE | ||
- rocSPARSE is initialized by calling `rocsparse_create_handle(rocsparse_handle*)` and is terminated by calling `rocsparse_destroy_handle(rocsparse_handle)`. | ||
- `rocsparse_pointer_mode` controls whether scalar parameters must be allocated on the host (`rocsparse_pointer_mode_host`) or on the device (`rocsparse_pointer_mode_device`). It is controlled by `rocsparse_set_pointer_mode`. | ||
- `rocsparse_operation trans`: matrix operation applied to the given input matrix. The following values are accepted: | ||
- `rocsparse_operation_none`: identity operation $A' = A$. | ||
- `rocsparse_operation_transpose`: transpose operation $A' = A^\mathrm{T}$. | ||
- `rocsparse_operation_conjugate_transpose`: conjugate transpose operation (Hermitian matrix) $A' = A^\mathrm{H}$. This operation is not yet supported. | ||
- `rocsparse_mat_descr descr`: holds all properties of a matrix. The properties set in this example are the following: | ||
- `rocsparse_diag_type`: indicates whether the diagonal entries of a matrix are unit elements (`rocsparse_diag_type_unit`) or not (`rocsparse_diag_type_non_unit`). | ||
- `rocsparse_fill_mode`: indicates whether a (triangular) matrix is lower (`rocsparse_fill_mode_lower`) or upper (`rocsparse_fill_mode_upper`) triangular. | ||
- `rocsparse_[sdcz]csritsv_buffer_size` allows to obtain the size (in bytes) of the temporary storage buffer required for the `rocsparse_[sdcz]csritsv_analysis` and `rocsparse_[sdcz]csritsv_solve` functions. The character matched in `[sdcz]` coincides with the one matched in any of the mentioned functions. | ||
- `rocsparse_solve_policy policy`: specifies the policy to follow for triangular solvers and factorizations. The only value accepted is `rocsparse_solve_policy_auto`. | ||
- `rocsparse_[sdcz]csritsv_solve` solves a sparse triangular linear system $A' y = \alpha x$. The correct function signature should be chosen based on the datatype of the input matrix: | ||
- `s` single-precision real (`float`) | ||
- `d` double-precision real (`double`) | ||
- `c` single-precision complex (`rocsparse_float_complex`) | ||
- `z` double-precision complex (`rocsparse_double_complex`) | ||
- `rocsparse_analysis_policy analysis`: specifies the policy to follow for analysis data. The following values are accepted: | ||
- `rocsparse_analysis_policy_reuse`: the analysis data gathered is re-used. | ||
- `rocsparse_analysis_policy_force`: the analysis data will be re-built. | ||
- `rocsparse_[sdcz]csritsv_analysis` performs the analysis step for `rocsparse_[sdcz]csritsv_solve`. The character matched in `[sdcz]` coincides with the one matched in `rocsparse_[sdcz]csritsv_solve`. | ||
- `rocsparse_csritsv_zero_pivot(rocsparse_handle, rocsparse_mat_info, rocsparse_int *position)` returns `rocsparse_status_zero_pivot` if either a structural or numerical zero has been found during the execution of `rocsparse_[sbcz]csritsv_solve(....)` and stores in `position` the index $i$ of the first zero pivot $A_{ii}$ found. If no zero pivot is found it returns `rocsparse_status_success`. | ||
## Demonstrated API Calls | ||
### rocSPARSE | ||
- `rocsparse_analysis_policy` | ||
- `rocsparse_analysis_policy_reuse` | ||
- `rocsparse_create_handle` | ||
- `rocsparse_create_mat_descr` | ||
- `rocsparse_create_mat_info` | ||
- `rocsparse_csritsv_zero_pivot` | ||
- `rocsparse_dcsritsv_analysis` | ||
- `rocsparse_dcsritsv_buffer_size` | ||
- `rocsparse_dcsritsv_solve` | ||
- `rocsparse_destroy_handle` | ||
- `rocsparse_destroy_mat_descr` | ||
- `rocsparse_destroy_mat_info` | ||
- `rocsparse_diag_type_non_unit` | ||
- `rocsparse_fill_mode_lower` | ||
- `rocsparse_handle` | ||
- `rocsparse_int` | ||
- `rocsparse_mat_descr` | ||
- `rocsparse_mat_info` | ||
- `rocsparse_operation` | ||
- `rocsparse_operation_none` | ||
- `rocsparse_pointer_mode_host` | ||
- `rocsparse_set_mat_diag_type` | ||
- `rocsparse_set_mat_fill_mode` | ||
- `rocsparse_set_pointer_mode` | ||
- `rocsparse_solve_policy` | ||
- `rocsparse_solve_policy_auto` | ||
- `rocsparse_status` | ||
- `rocsparse_status_zero_pivot` | ||
### HIP runtime | ||
- `hipDeviceSynchronize` | ||
- `hipFree` | ||
- `hipMalloc` | ||
- `hipMemcpy` | ||
- `hipMemcpyDeviceToHost` | ||
- `hipMemcpyHostToDevice` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 15 | ||
VisualStudioVersion = 15.0.33026.149 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csritsv_vs2017", "csritsv_vs2017.vcxproj", "{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|x64 = Debug|x64 | ||
Release|x64 = Release|x64 | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Debug|x64.ActiveCfg = Debug|x64 | ||
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Debug|x64.Build.0 = Debug|x64 | ||
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Release|x64.ActiveCfg = Release|x64 | ||
{F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7}.Release|x64.Build.0 = Release|x64 | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {E11DC4C1-CA8A-46CA-93BB-3CB480169DA5} | ||
EndGlobalSection | ||
EndGlobal |
Oops, something went wrong.