Develop Stream 2024-03-21 general fixes (part I) (#97)

* bump the required cmake version to 3.21.3 * Fix device_globals example name * Fix hip_streams timeout on AMD windows debug build type * Update templates * Update cuda container to ROCm 5.4 * Change std::bind into lambda * HIP 5.5 fixes * fix tests not being executed * Make the reference to the identity and transpose op uniform * Fix NVCC CI * Resolve "Increase timeout for CI" * Update fixed size arrays to C++ standards * Add missing include in hip_texture_management * Remove void** cast from hipMalloc * Fix hip-libraries-cuda-ubuntu Dockerfile * Make the windows builds less verbose * Rework Windows CI * Skip failing rocsparse tests * Fix cooperative groups example * ci: Make skipped examples more prominent in windows VS test runner * Enable rocsparse examples in CI * Update .gitlab/issue_templates/example.md Fix small typo --------- Co-authored-by: Balint Soproni <[email protected]> Co-authored-by: Robin Voetter <[email protected]> Co-authored-by: Nara Prasetya <[email protected]> Co-authored-by: Nol Moonen <[email protected]> Co-authored-by: Mátyás Aradi <[email protected]> Co-authored-by: Gergely Mészáros <[email protected]> Co-authored-by: Sam Wu <[email protected]>
ROCm · Apr 29, 2024 · 95687ef · 95687ef
1 parent 259508d
commit 95687ef
Show file tree

Hide file tree

Showing 30 changed files with 463 additions and 325 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
diff --git a/.gitlab/issue_templates/example.md b/.gitlab/issue_templates/example.md
@@ -1,22 +1,12 @@
 # Example checklist
 
 - Elaboration
-	- [ ] Example concept is described and agreed on
+    - [ ] Example concept is described and agreed upon
 - Implementation
-	- [ ] Example is implemented
-	- CMake support is added
-		- [ ] Linux
-		- [ ] Windows
-	- [ ] GNU Make support is added (Linux)
-	- [ ] Visual Studio project is added (Windows)
-		- [ ] Project is added to the root solution
-	- [ ] Inline code documentation is added
-	- [ ] README is added according to template
-		- [ ] Related READMEs, ToC are updated
-	- [ ] Internal CI passes
+    - [ ] Example is implemented
 - Internal review
-	- [ ] Internal code review is done
+    - [ ] Internal code review is done
 - External review
-	- [ ] Upstreaming PR is opened, external code review is done
+    - [ ] Upstreaming PR is opened, external review is done
 - Done
-	- [ ] Example merged to upstream
+    - [ ] Example merged to upstream
diff --git a/.gitlab/merge_request_templates/example.md b/.gitlab/merge_request_templates/example.md
@@ -0,0 +1,16 @@
+## Notes for the reviewer
+_The reviewer should acknowledge all these topics._
+<insert notes>
+
+## Checklist before merge
+- [ ] CMake support is added
+    - [ ] Dependencies are copied via `IMPORTED_RUNTIME_ARTIFACTS` if applicable
+- [ ] GNU Make support is added (Linux)
+- [ ] Visual Studio project is added for VS2017, 2019, 2022 (Windows) (use [the script](https://projects.streamhpc.com/departments/knowledge/employee-handbook/-/wikis/Projects/AMD/Libraries/examples/Adding-Visual-Studio-Projects-to-new-examples#scripts))
+    - [ ] DLL dependencies are copied via `<Content Include`
+    - [ ] Visual Studio project is added to `ROCm-Examples-vs*.sln` (ROCm)
+    - [ ] Visual Studio project is added to `ROCm-Examples-Portable-vs*.sln` (ROCm/CUDA) if applicable
+- [ ] Inline code documentation is added
+- [ ] README is added according to template
+    - [ ] Related READMEs, ToC are updated
+- [ ] The CI passes for Linux/ROCm, Linux/CUDA, Windows/ROCm, Windows/CUDA.
diff --git a/Applications/floyd_warshall/main.hip b/Applications/floyd_warshall/main.hip
@@ -198,8 +198,8 @@ int main(int argc, char* argv[])
     // Allocate device memory
     unsigned int* d_adjacency_matrix;
     unsigned int* d_next_matrix;
-    HIP_CHECK(hipMalloc((void**)&d_adjacency_matrix, size_bytes));
-    HIP_CHECK(hipMalloc((void**)&d_next_matrix, size_bytes));
+    HIP_CHECK(hipMalloc(&d_adjacency_matrix, size_bytes));
+    HIP_CHECK(hipMalloc(&d_next_matrix, size_bytes));
 
     // Create events to measure the execution time of the kernels.
     hipEvent_t start, stop;

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 # MIT License
 #
-# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.21.3 FATAL_ERROR)
 project(ROCMm-SDK-Examples LANGUAGES CXX)
 enable_testing()
 

diff --git a/Common/example_utils.hpp b/Common/example_utils.hpp
@@ -1,6 +1,6 @@
 // MIT License
 //
-// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
@@ -23,6 +23,18 @@
 #ifndef COMMON_EXAMPLE_UTILS_HPP
 #define COMMON_EXAMPLE_UTILS_HPP
 
+// Compiling HIP on Windows includes windows.h, and this triggers many silly warnings.
+#if defined(_WIN32) && defined(__NVCC__)
+    #pragma nv_diag_suppress 108 // signed bit field of length 1
+    #pragma nv_diag_suppress 174 // expression has no effect
+    #pragma nv_diag_suppress 1835 // attribute "dllimport" does not apply here
+#endif
+
+// rocPRIM adds a #warning about printf on NAVI.
+#ifdef __clang__
+    #pragma clang diagnostic ignored "-W#warnings"
+#endif
+
 #include <cassert>
 #include <chrono>
 #include <iostream>

diff --git a/Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile b/Dockerfiles/hip-libraries-cuda-ubuntu.Dockerfile
@@ -27,7 +27,7 @@ RUN export DEBIAN_FRONTEND=noninteractive; \
 # Install HIP using the installer script
 RUN export DEBIAN_FRONTEND=noninteractive; \
     wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \
-    && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ ubuntu main' > /etc/apt/sources.list.d/rocm.list \
+    && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.4/ ubuntu main' > /etc/apt/sources.list.d/rocm.list \
     && apt-get update -qq \
     && apt-get install -y hip-base hipify-clang \
     && apt-get download hip-runtime-nvidia hip-dev \
@@ -45,64 +45,71 @@ RUN echo "/opt/rocm/lib" >> /etc/ld.so.conf.d/rocm.conf \
     && ldconfig
 
 # Install rocRAND
-RUN wget https://github.com/ROCmSoftwarePlatform/rocRAND/archive/refs/tags/rocm-5.3.0.tar.gz \
-    && tar -xf ./rocm-5.3.0.tar.gz \
-    && rm ./rocm-5.3.0.tar.gz \
-    && cmake -S ./rocRAND-rocm-5.3.0 -B ./rocRAND-rocm-5.3.0/build \
+RUN wget https://github.com/ROCmSoftwarePlatform/rocRAND/archive/refs/tags/rocm-5.4.0.tar.gz \
+    && tar -xf ./rocm-5.4.0.tar.gz \
+    && rm ./rocm-5.4.0.tar.gz \
+    && cmake -S ./rocRAND-rocm-5.4.0 -B ./rocRAND-rocm-5.4.0/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
         -D BUILD_HIPRAND=OFF \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-    && cmake --build ./rocRAND-rocm-5.3.0/build --target install \
-    && rm -rf ./rocRAND-rocm-5.3.0
+    && cmake --build ./rocRAND-rocm-5.4.0/build --target install \
+    && rm -rf ./rocRAND-rocm-5.4.0
 
 # Install hipCUB
-RUN wget https://github.com/ROCmSoftwarePlatform/hipCUB/archive/refs/tags/rocm-5.3.0.tar.gz \
-    && tar -xf ./rocm-5.3.0.tar.gz \
-    && rm ./rocm-5.3.0.tar.gz \
-    && cmake -S ./hipCUB-rocm-5.3.0 -B ./hipCUB-rocm-5.3.0/build \
+RUN wget https://github.com/ROCmSoftwarePlatform/hipCUB/archive/refs/tags/rocm-5.4.0.tar.gz \
+    && tar -xf ./rocm-5.4.0.tar.gz \
+    && rm ./rocm-5.4.0.tar.gz \
+    && cmake -S ./hipCUB-rocm-5.4.0 -B ./hipCUB-rocm-5.4.0/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-    && cmake --build ./hipCUB-rocm-5.3.0/build --target install \
-    && rm -rf ./hipCUB-rocm-5.3.0
+    && cmake --build ./hipCUB-rocm-5.4.0/build --target install \
+    && rm -rf ./hipCUB-rocm-5.4.0
 
 # Install hipBLAS
-RUN wget https://github.com/ROCmSoftwarePlatform/hipBLAS/archive/refs/tags/rocm-5.3.0.tar.gz \
-    && tar -xf ./rocm-5.3.0.tar.gz \
-    && rm ./rocm-5.3.0.tar.gz \
-    && cmake -S ./hipBLAS-rocm-5.3.0 -B ./hipBLAS-rocm-5.3.0/build \
+RUN wget https://github.com/ROCmSoftwarePlatform/hipBLAS/archive/refs/tags/rocm-5.4.0.tar.gz \
+    && tar -xf ./rocm-5.4.0.tar.gz \
+    && rm ./rocm-5.4.0.tar.gz \
+    && cmake -S ./hipBLAS-rocm-5.4.0 -B ./hipBLAS-rocm-5.4.0/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \
         -D USE_CUDA=ON \
-    && cmake --build ./hipBLAS-rocm-5.3.0/build --target install \
-    && rm -rf ./hipBLAS-rocm-5.3.0
+    && cmake --build ./hipBLAS-rocm-5.4.0/build --target install \
+    && rm -rf ./hipBLAS-rocm-5.4.0
 
 # Install hipSOLVER
-RUN wget https://github.com/ROCmSoftwarePlatform/hipSOLVER/archive/refs/tags/rocm-5.3.0.tar.gz \
-    && tar -xf ./rocm-5.3.0.tar.gz \
-    && rm ./rocm-5.3.0.tar.gz \
-    && cmake -S ./hipSOLVER-rocm-5.3.0 -B ./hipSOLVER-rocm-5.3.0/build \
+RUN wget https://github.com/ROCmSoftwarePlatform/hipSOLVER/archive/refs/tags/rocm-5.4.0.tar.gz \
+    && tar -xf ./rocm-5.4.0.tar.gz \
+    && rm ./rocm-5.4.0.tar.gz \
+    && cmake -S ./hipSOLVER-rocm-5.4.0 -B ./hipSOLVER-rocm-5.4.0/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \
         -D USE_CUDA=ON \
-    && cmake --build ./hipSOLVER-rocm-5.3.0/build --target install \
-    && rm -rf ./hipSOLVER-rocm-5.3.0
+    && cmake --build ./hipSOLVER-rocm-5.4.0/build --target install \
+    && rm -rf ./hipSOLVER-rocm-5.4.0
 
 # Install hipRAND
-RUN wget https://github.com/ROCmSoftwarePlatform/hipRAND/archive/refs/tags/rocm-5.3.0.tar.gz \
-    && tar -xf ./rocm-5.3.0.tar.gz \
-    && rm ./rocm-5.3.0.tar.gz \
-    && cmake -S ./hipRAND-rocm-5.3.0 -B ./hipRAND-rocm-5.3.0/build \
+RUN wget https://github.com/ROCmSoftwarePlatform/hipRAND/archive/refs/tags/rocm-5.4.0.tar.gz \
+    && tar -xf ./rocm-5.4.0.tar.gz \
+    && rm ./rocm-5.4.0.tar.gz \
+    && cmake -S ./hipRAND-rocm-5.4.0 -B ./hipRAND-rocm-5.4.0/build \
         -D CMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
         -D CMAKE_INSTALL_PREFIX=/opt/rocm \
-    && cmake --build ./hipRAND-rocm-5.3.0/build --target install \
-    && rm -rf ./hipRAND-rocm-5.3.0
+        -D BUILD_WITH_LIB=CUDA \
+    && cmake --build ./hipRAND-rocm-5.4.0/build --target install \
+    && rm -rf ./hipRAND-rocm-5.4.0
 
 # Use render group as an argument from user
 ARG GID=109
 
-# Add the render group and a user with sudo permissions for the container
-RUN groupadd --system --gid ${GID} render \
-    && useradd -Um -G sudo,video,render developer \
+# Add the render group or change id if already exists
+RUN if [ $(getent group render) ]; then \
+        groupmod --gid ${GID} render; \
+    else \
+        groupadd --system --gid ${GID} render; \
+    fi
+
+# Add a user with sudo permissions for the container
+RUN useradd -Um -G sudo,video,render developer \
     && echo developer ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/developer \
     && chmod 0440 /etc/sudoers.d/developer
 

diff --git a/HIP-Basic/cooperative_groups/CMakeLists.txt b/HIP-Basic/cooperative_groups/CMakeLists.txt
@@ -54,6 +54,11 @@ add_test(${example_name} ${example_name})
 set(include_dirs "../../Common")
 if(GPU_RUNTIME STREQUAL "CUDA")
     list(APPEND include_dirs "${ROCM_ROOT}/include")
+else()
+    # Add NDEBUG for HIP version >= 5.5 and < 6.0 due to a known bug in the cooperative groups header
+    if( ${hip-lang_VERSION} VERSION_GREATER_EQUAL 5.5 AND ${hip-lang_VERSION} VERSION_LESS 6 )
+        add_compile_definitions(NDEBUG)
+    endif()
 endif()
 
 target_include_directories(${example_name} PRIVATE ${include_dirs})

diff --git a/HIP-Basic/device_globals/CMakeLists.txt b/HIP-Basic/device_globals/CMakeLists.txt
@@ -20,7 +20,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-set(example_name device_globals)
+set(example_name hip_device_globals)
 
 cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
 project(${example_name} LANGUAGES CXX)

diff --git a/HIP-Basic/device_query/main.cpp b/HIP-Basic/device_query/main.cpp
@@ -1,6 +1,6 @@
 // MIT License
 //
-// Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
@@ -20,13 +20,13 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.
 
+#include "example_utils.hpp"
+
 #include <iomanip>
 #include <iostream>
 
 #include <hip/hip_runtime.h>
 
-#include "example_utils.hpp"
-
 namespace
 {
 /// Number of characters in the first column.

diff --git a/HIP-Basic/occupancy/main.hip b/HIP-Basic/occupancy/main.hip
@@ -165,8 +165,8 @@ int main()
     // Initialize the input data
     for(int i = 0; i < size; i++)
     {
-        h_A[i] = (float)i;
-        h_B[i] = (float)i;
+        h_A[i] = static_cast<float>(i);
+        h_B[i] = static_cast<float>(i);
     }
 
     float* d_A = nullptr;

diff --git a/HIP-Basic/texture_management/main.hip b/HIP-Basic/texture_management/main.hip
@@ -24,6 +24,7 @@
 
 #include <hip/hip_runtime.h>
 
+#include <array>
 #include <iostream>
 #include <vector>
 
@@ -147,8 +148,8 @@ int main()
     HIP_CHECK(hipGetLastError());
 
     // Copy data from device back to host.
-    unsigned int h_histogram[hist_bin_count];
-    HIP_CHECK(hipMemcpy(h_histogram, d_histogram, hist_bytes, hipMemcpyDeviceToHost));
+    std::array<unsigned int, hist_bin_count> h_histogram;
+    HIP_CHECK(hipMemcpy(h_histogram.data(), d_histogram, hist_bytes, hipMemcpyDeviceToHost));
 
     // Print out results.
     std::cout << "Equal-width histogram with " << hist_bin_count << " bins of values [0, " << size

diff --git a/Libraries/hipBLAS/gemm_strided_batched/README.md b/Libraries/hipBLAS/gemm_strided_batched/README.md
@@ -3,20 +3,21 @@
 ## Description
 This example illustrates the use of the hipBLAS Level 3 Strided Batched General Matrix Multiplication. The hipBLAS GEMM STRIDED BATCHED performs a matrix--matrix operation for a _batch_ of matrices as:
 
-$C[i] = \alpha \cdot f(A[i]) \cdot f(B[i]) + \beta \cdot (C[i])$
+$C[i] = \alpha \cdot A[i]' \cdot B[i]' + \beta \cdot (C[i])$
 
-for each $i \in [0, batch - 1]$, where $X[i] = X + i \cdot strideX$ is the $i$-th element of the correspondent batch and $f(X)$ is one of the following:
-- $f(X) = X$ or
-- $f(X) = X^T$ (transpose $X$: $X_{ij}^T = X_{ji}$) or
-- $f(X) = X^H$ (Hermitian $X$: $X_{ij}^H = \bar X_{ji} $).
+for each $i \in [0, batch - 1]$, where $X[i] = X + i \cdot strideX$ is the $i$-th element of the correspondent batch and $X'$ is one of the following:
+- $X' = X$ or
+- $X' = X^T$ (transpose $X$: $X_{ij}^T = X_{ji}$) or
+- $X' = X^H$ (Hermitian $X$: $X_{ij}^H = \bar X_{ji} $).
+In this example the identity is used.
 
 $\alpha$ and $\beta$ are scalars, and $A$, $B$ and $C$ are the batches of matrices. For each $i$, $A[i]$, $B[i]$ and $C[i]$ are matrices such that
-$f(A[i])$ is an $m \times k$ matrix, $f(B[i])$ a $k \times n$ matrix and $C[i]$ an $m \times n$ matrix.
+$A_i'$ is an $m \times k$ matrix, $B_i'$ a $k \times n$ matrix and $C_i$ an $m \times n$ matrix.
 
 
 ### Application flow
 1. Read in command-line parameters.
-2. Set $f$ operation, set sizes of matrices and get batch count.
+2. Set dimension variables of the matrices and get the batch count.
 3. Allocate and initialize the host matrices. Set up $B$ matrix as an identity matrix.
 4. Initialize gold standard matrix.
 5. Compute CPU reference result with strided batched subvectors.
@@ -33,19 +34,19 @@ The application provides the following optional command line arguments:
 - `-a` or `--alpha`. The scalar value $\alpha$ used in the GEMM operation. Its default value is 1.
 - `-b` or `--beta`. The scalar value $\beta$ used in the GEMM operation. Its default value is 1.
 - `-c` or `--count`. Batch count. Its default value is 3.
-- `-m` or `--m`. The number of rows of matrices $f(A)$ and $C$, which must be greater than 0. Its default value is 5.
-- `-n` or `--n`. The number of columns of matrices $f(B)$ and $C$, which must be greater than 0. Its default value is 5.
-- `-k` or `--k`. The number of columns of matrix $f(A)$ and rows of matrix $f(B)$, which must be greater than 0. Its default value is 5.
+- `-m` or `--m`. The number of rows of matrices $A$ and $C$, which must be greater than 0. Its default value is 5.
+- `-n` or `--n`. The number of columns of matrices $B$ and $C$, which must be greater than 0. Its default value is 5.
+- `-k` or `--k`. The number of columns of matrix $A$ and rows of matrix $B$, which must be greater than 0. Its default value is 5.
 
 ## Key APIs and Concepts
 - The performance of a numerical multi-linear algebra code can be heavily increased by using tensor contractions [ [Y. Shi et al., HiPC, pp 193, 2016.](https://doi.org/10.1109/HiPC.2016.031) ], thereby most of the hipBLAS functions have a`_batched` and a `_strided_batched` [ [C. Jhurani and P. Mullowney, JPDP Vol 75, pp 133, 2015.](https://doi.org/10.1016/j.jpdc.2014.09.003) ] extensions.<br/>
 We can apply the same multiplication operator for several matrices if we combine them into batched matrices. Batched matrix multiplication has a performance improvement for a large number of small matrices. For a constant stride between matrices, further acceleration is available by strided batched GEMM.
 - hipBLAS is initialized by calling `hipblasCreate(hipblasHandle*)` and it is terminated by calling `hipblasDestroy(hipblasHandle)`.
 - The _pointer mode_ controls whether scalar parameters must be allocated on the host (`HIPBLAS_POINTER_MODE_HOST`) or on the device (`HIPBLAS_POINTER_MODE_DEVICE`). It is controlled by `hipblasSetPointerMode`.
-- The $f$ operator -- defined in Description section -- can be
-    - `HIPBLAS_OP_N`: identity operator ($f(X) = X$),
-    - `HIPBLAS_OP_T`: transpose operator ($f(X) = X^T$) or
-    - `HIPBLAS_OP_C`: Hermitian (conjugate transpose) operator ($f(X) = X^H$).
+- The symbol $X'$ denotes the following operations, as defined in the Description section:
+    - `HIPBLAS_OP_N`: identity operator ($X' = X$),
+    - `HIPBLAS_OP_T`: transpose operator ($X' = X^T$) or
+    - `HIPBLAS_OP_C`: Hermitian (conjugate transpose) operator ($X' = X^H$).
 - `hipblasStride` strides between matrices or vectors in strided_batched functions.
 - `hipblas[HSDCZ]gemmStridedBatched`
 
@@ -60,9 +61,9 @@ We can apply the same multiplication operator for several matrices if we combine
     - `hipblasHandle_t handle`
     - `hipblasOperation_t trans_a`: transformation operator on each $A_i$ matrix
     - `hipblasOperation_t trans_b`: transformation operator on each $B_i$ matrix
-    - `int m`: number of rows in each $f(A_i)$ and $C$ matrices
-    - `int n`: number of columns in each $f(B_i)$ and $C$ matrices
-    - `int k`: number of columns in each $f(A_i)$ matrix and number of rows in each $f(B_i)$ matrix
+    - `int m`: number of rows in each $A_i'$ and $C$ matrices
+    - `int n`: number of columns in each $B_i'$ and $C$ matrices
+    - `int k`: number of columns in each $A_i'$ matrix and number of rows in each $B_i'$ matrix
     - `const float *alpha`: scalar multiplier of each $C_i$ matrix addition
     - `const float  *A`: pointer to the each $A_i$ matrix
     - `int lda`: leading dimension of each $A_i$ matrix

diff --git a/Libraries/hipBLAS/gemm_strided_batched/main.hip b/Libraries/hipBLAS/gemm_strided_batched/main.hip
@@ -1,6 +1,6 @@
 // MIT License
 //
-// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
@@ -42,9 +42,9 @@ int main(const int argc, const char** argv)
     parser.set_optional<float>("a", "alpha", 1.f, "Alpha scalar");
     parser.set_optional<float>("b", "beta", 1.f, "Beta scalar");
     parser.set_optional<int>("c", "count", 3, "Batch count");
-    parser.set_optional<int>("m", "m", 5, "Number of rows of matrices f(A_i) and C_i");
-    parser.set_optional<int>("n", "n", 5, "Number of columns of matrices f(B_i) and C_i");
-    parser.set_optional<int>("k", "k", 5, "Number of columns of matrix f(A_i) and rows of f(B_i)");
+    parser.set_optional<int>("m", "m", 5, "Number of rows of matrices A_i and C_i");
+    parser.set_optional<int>("n", "n", 5, "Number of columns of matrices B_i and C_i");
+    parser.set_optional<int>("k", "k", 5, "Number of columns of matrix A_i and rows of B_i");
     parser.run_and_exit_if_error();
 
     // Set sizes of matrices.
@@ -84,7 +84,7 @@ int main(const int argc, const char** argv)
     const float h_alpha = parser.get<float>("a");
     const float h_beta  = parser.get<float>("b");
 
-    // Set GEMM operation as identity operation: $f(X) = X$
+    // Set GEMM operation as identity operation: $X' = X$
     const hipblasOperation_t trans_a = HIPBLAS_OP_N;
     const hipblasOperation_t trans_b = HIPBLAS_OP_N;