diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 3b2a174b97..8486266704 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -29,13 +29,12 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask ~vectorization +tests %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -# Known issue currently under investigation +# custom variant # https://github.com/LLNL/RAJA/pull/1712#issuecomment-2292006843 intel_2023_2_1: variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + SPEC: "${PROJECT_POODLE_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle - allow_failure: true ############ # Extra jobs diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 2258878c3e..c745ca4a6c 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -29,13 +29,12 @@ gcc_10_3_1: SPEC: " ~shared +openmp +omptask ~vectorization +tests %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby -# Known issue currently under investigation +# custom variant # https://github.com/LLNL/RAJA/pull/1712#issuecomment-2292006843 intel_2023_2_1: variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + SPEC: "${PROJECT_RUBY_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby - allow_failure: true ############ # Extra jobs diff --git a/docs/sphinx/user_guide/tutorial/view_layout.rst b/docs/sphinx/user_guide/tutorial/view_layout.rst index 252a9fe8b1..4d778ad941 100644 --- a/docs/sphinx/user_guide/tutorial/view_layout.rst +++ b/docs/sphinx/user_guide/tutorial/view_layout.rst @@ -22,8 +22,8 @@ from the build directory. Key RAJA features shown in this section are: - * ``RAJA::View`` - * ``RAJA::Layout`` and ``RAJA::OffsetLayout`` constructs + * ``RAJA::View`` + * ``RAJA::Layout`` and ``RAJA::OffsetLayout`` constructs * Layout permutations The examples in this section illustrate RAJA View and Layout concepts @@ -40,11 +40,11 @@ operation, using :math:`N \times N` matrices: :end-before: _cstyle_matmult_end :language: C++ -As is commonly done for efficiency in C and C++, we have allocated the data -for the matrices as one-dimensional arrays. Thus, we need to manually compute +As is commonly done for efficiency in C and C++, we have allocated the data +for the matrices as one-dimensional arrays. Thus, we need to manually compute the data pointer offsets for the row and column indices in the kernel. Here, we use the array ``Cref`` to hold a reference solution matrix that -we use to compare with results generated by the examples below. +we use to compare with results generated by the examples below. To simplify the multi-dimensional indexing, we can use ``RAJA::View`` objects, which we define as: @@ -55,20 +55,31 @@ which we define as: :language: C++ Here we define three ``RAJA::View`` objects, 'Aview', 'Bview', and 'Cview', -that *wrap* the array data pointers, 'A', 'B', and 'C', respectively. We -pass a data pointer as the first argument to each view constructor and then +that *wrap* the array data pointers, 'A', 'B', and 'C', respectively. We +pass a data pointer as the first argument to each view constructor and then the extent of each matrix dimension as the second and third arguments. There are two extent arguments since we indicate in the ``RAJA::Layout`` template -parameter list. The matrices are square and each extent is 'N'. Here, the -template parameters to ``RAJA::View`` are the array data type 'double' and +parameter list. The matrices are square and each extent is 'N'. Here, the +template parameters to ``RAJA::View`` are the array data type 'double' and a ``RAJA::Layout`` type. Specifically:: RAJA::Layout<2, int> -means that each View represents a two-dimensional default data layout, and -that we will use values of type 'int' to index into the arrays. +means that each View represents a two-dimensional default data layout, and +that we will use values of type 'int' to index into the arrays. -Using the ``RAJA::View`` objects, we can access the data entries for the rows +.. note:: A third argument in the Layout type can be used to specify the index + with unit stride:: + + RAJA::Layout<2, int, 1> + + In the example above index 1 will be marked to have unit stride making + multi-dimensional indexing more efficient by avoiding multiplication by + `1` when it is unnecessary. + + + +Using the ``RAJA::View`` objects, we can access the data entries for the rows and columns using a more natural, less error-prone syntax: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp @@ -79,9 +90,9 @@ and columns using a more natural, less error-prone syntax: Default Layouts Use Row-major Ordering ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The default data layout ordering in RAJA is *row-major*, which is the -convention for multi-dimensional array indexing in C and C++. This means that -the rightmost index will be stride-1, the index to the left of the rightmost +The default data layout ordering in RAJA is *row-major*, which is the +convention for multi-dimensional array indexing in C and C++. This means that +the rightmost index will be stride-1, the index to the left of the rightmost index will have stride equal to the extent of the rightmost dimension, and so on. @@ -90,8 +101,8 @@ so on. see :ref:`feat-view-label` for more details. To illustrate the default data layout striding, we next show simple -one-, two-, and three-dimensional examples where the for-loop ordering -for the different dimensions is such that all data access is stride-1. We +one-, two-, and three-dimensional examples where the for-loop ordering +for the different dimensions is such that all data access is stride-1. We begin by defining some dimensions, allocate and initialize arrays: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp @@ -99,7 +110,7 @@ begin by defining some dimensions, allocate and initialize arrays: :end-before: _default_views_init_end :language: C++ -The version of the array initialization kernel using a one-dimensional +The version of the array initialization kernel using a one-dimensional ``RAJA::View`` is: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp @@ -107,7 +118,7 @@ The version of the array initialization kernel using a one-dimensional :end-before: _default_view1D_end :language: C++ -The version of the array initialization using a two-dimensional +The version of the array initialization using a two-dimensional ``RAJA::View`` is: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp @@ -115,7 +126,7 @@ The version of the array initialization using a two-dimensional :end-before: _default_view2D_end :language: C++ -The three-dimensional version is: +The three-dimensional version is: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp :start-after: _default_view3D_start @@ -126,16 +137,16 @@ It's worth repeating that the data array access in all three variants shown here using ``RAJA::View`` objects is stride-1 since we order the for-loops in the loop nests to match the row-major ordering. -RAJA Layout types support other data access patterns with different striding -orders, offsets, and permutations. To this point, we have used the default -Layout constructor. RAJA provides methods to generate Layouts for different -indexing patterns. We describe these in the next several sections. Next, we +RAJA Layout types support other data access patterns with different striding +orders, offsets, and permutations. To this point, we have used the default +Layout constructor. RAJA provides methods to generate Layouts for different +indexing patterns. We describe these in the next several sections. Next, we show how to permute the data striding order using permuted Layouts. Permuted Layouts Change Data Striding Order ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Every ``RAJA::Layout`` object has a permutation. When a permutation is not +Every ``RAJA::Layout`` object has a permutation. When a permutation is not specified at creation, a Layout will use the identity permutation. Here are examples where the identity permutation is explicitly provided. First, in two dimensions: @@ -153,10 +164,10 @@ Then, in three dimensions: :language: C++ These two examples access the data with stride-1 ordering, the same as in -the earlier examples, which is shown by the nested loop ordering. +the earlier examples, which is shown by the nested loop ordering. The identity permutation in two dimensions is '{0, 1}' and is '{0, 1, 2}' -for three dimensions. The method ``RAJA::make_permuted_layout`` is used to -create a ``RAJA::Layout`` object with a permutation. The method takes two +for three dimensions. The method ``RAJA::make_permuted_layout`` is used to +create a ``RAJA::Layout`` object with a permutation. The method takes two arguments, the extents of each dimension and the permutation. .. note:: If a permuted Layout is created with the *identity permutation* @@ -170,9 +181,9 @@ Next, we permute the striding order for the two-dimensional example: :language: C++ Read from right to left, the permutation '{1, 0}' specifies that the first -(zero) index 'i' is stride-1 and the second index (one) 'j' has stride equal -to the extent of the first Layout dimension 'Nx'. This is evident in the -for-loop ordering. +(zero) index 'i' is stride-1, additionally captured in the ``RAJA::Layout``, +and the second index (one) 'j' has stride equal to the extent of the first +Layout dimension 'Nx'. This is evident in the for-loop ordering. Here is the three-dimensional case, where we have reversed the striding order using the permutation '{2, 1, 0}': @@ -182,7 +193,16 @@ using the permutation '{2, 1, 0}': :end-before: _perma_view3D_end :language: C++ -The data access remains stride-1 due to the for-loop reordering. For fun, +.. note:: As the index is now held by index 0 we adjust the Layout template + argument accordingly:: + + RAJA::Layout<3, int, 0> + + As before index 0 will be marked to have unit stride making + multi-dimensional indexing more efficient by avoiding multiplication by + `1` when it is unnecessary. + +The data access remains stride-1 due to the for-loop reordering. For fun, here is another three-dimensional permutation: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp @@ -197,8 +217,8 @@ Multi-dimensional Indices and Linear Indices ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``RAJA::Layout`` types provide methods to convert between linear indices and -multi-dimensional indices and vice versa. Recall the Layout 'perm3a_layout' -from above that was created with the permutation '{2, 1, 0}'. To get the +multi-dimensional indices and vice versa. Recall the Layout 'perm3a_layout' +from above that was created with the permutation '{2, 1, 0}'. To get the linear index corresponding to the index triple '(1, 2, 0)', you can do this:: @@ -210,12 +230,12 @@ for linear index 7, you can do:: int i, j, k; perm3a_layout.toIndices(7, i, j, k); -This sets 'i' to 1, 'j' to 2, and 'k' to 0. +This sets 'i' to 1, 'j' to 2, and 'k' to 0. -Similarly for the Layout 'permb_layout', which was created with the +Similarly for the Layout 'permb_layout', which was created with the permutation '{1, 2, 0}':: - lin = perm3b_layout(1, 2, 0); + lin = perm3b_layout(1, 2, 0); sets 'lin' to 13 = 1 + 0 * Nx + 2 * Nx * Nz and:: @@ -223,23 +243,23 @@ sets 'lin' to 13 = 1 + 0 * Nx + 2 * Nx * Nz and:: sets 'i' to 1, 'j' to 2, and 'k' to 0. -There are more examples in the exercise file associated with this section. +There are more examples in the exercise file associated with this section. Feel free to experiment with them. One important item to note is that, by default, there is no bounds checking on indices passed to a ``RAJA::View`` data access method or ``RAJA::Layout`` -index computation methods. Therefore, it is the responsibility of a user -to ensure that indices passed to ``RAJA::View`` and ``RAJA::Layoout`` -methods are in bounds to avoid accessing data outside -of the View or computing invalid indices. +index computation methods. Therefore, it is the responsibility of a user +to ensure that indices passed to ``RAJA::View`` and ``RAJA::Layout`` +methods are in bounds to avoid accessing data outside +of the View or computing invalid indices. -.. note:: RAJA provides a CMake variable ``RAJA_ENABLE_BOUNDS_CHECK`` to +.. note:: RAJA provides a CMake variable ``RAJA_ENABLE_BOUNDS_CHECK`` to turn run time bounds checking on or off when the code is compiled. Enabling bounds checking is useful for debugging and to ensure your code is correct. However, when enabled, bounds checking adds noticeable run time overhead. So it should not be enabled for - a production build of your code. - + a production build of your code. + Offset Layouts Apply Offsets to Indices ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -251,9 +271,9 @@ We first illustrate the concept of an offset with a C-style for-loop: :end-before: _cstyle_offlayout1D_end :language: C++ -Here, the for-loop runs from 'imin' to 'imax-1' (i.e., -5 to 5). To avoid -out-of-bounds negative indexing, we subtract 'imin' (i.e., -5) from the loop -index 'i'. +Here, the for-loop runs from 'imin' to 'imax-1' (i.e., -5 to 5). To avoid +out-of-bounds negative indexing, we subtract 'imin' (i.e., -5) from the loop +index 'i'. To do the same thing with RAJA, we create a ``RAJA::OffsetLayout`` object and use it to index into the array: @@ -264,7 +284,7 @@ and use it to index into the array: :language: C++ ``RAJA::OffsetLayout`` is a different type than ``RAJA::Layout`` because -it contains offset information. The arguments to the +it contains offset information. The arguments to the ``RAJA::make_offset_layout`` method are the index bounds. As expected, the two dimensional case is similar. First, a C-style loop: @@ -284,7 +304,7 @@ and then the same operation using a ``RAJA::OffsetLayout`` object: Note that the first argument passed to ``RAJA::make_offset_layout`` contains the lower bounds for 'i' and 'j' and the second argument contains the upper bounds. Also, the 'j' index is stride-1 by default since we did not pass -a permutation to the ``RAJA::make_offset_layout`` method, which is the same +a permutation to the ``RAJA::make_offset_layout`` method, which is the same as the non-offset Layout usage. Just like ``RAJA::Layout`` has a permutation, so does ``RAJA::OffsetLayout``. @@ -293,11 +313,10 @@ Here is an example where we permute the (i, j) index stride ordering: .. literalinclude:: ../../../../exercises/view-layout_solution.cpp :start-after: _raja_permofflayout2D_start :end-before: _raja_permofflayout2D_end - :language: C++ + :language: C++ -The permutation '{1, 0}' is passed as the third argument to -``RAJA::make_offset_layout``. From the ordering of the for-loops, we can see -that the 'i' index is stride-1 and the 'j' index has stride equal to the -extent of the 'i' dimension so the for-loop nest strides through +The permutation '{1, 0}' is passed as the third argument to +``RAJA::make_offset_layout``. From the ordering of the for-loops, we can see +that the 'i' index is stride-1 and the 'j' index has stride equal to the +extent of the 'i' dimension so the for-loop nest strides through the data with unit stride. - diff --git a/exercises/view-layout.cpp b/exercises/view-layout.cpp index 35c63726ee..6714fe66fb 100644 --- a/exercises/view-layout.cpp +++ b/exercises/view-layout.cpp @@ -22,9 +22,9 @@ * RAJA features shown: * - RAJA::View * - RAJA::Layout - * - Layout permutations + * - Layout permutations * - OffsetLayout - * - OffsetLayout permutations + * - OffsetLayout permutations * * NOTE: no RAJA kernel execution methods are used in these examples. */ @@ -38,16 +38,16 @@ void checkResult(T* C, T* Cref, int N); template void printValues(T* C, int N); -int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) { std::cout << "\n\nRAJA view & layout exercises...\n"; - //----------------------------------------------------------------------------// - // - // Matrix-matrix multiplication: default layout - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Matrix-matrix multiplication: default layout +// +//----------------------------------------------------------------------------// // _matmult_init_start // @@ -58,103 +58,95 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // Allocate storage for matrices and initialize matrix entries // - double* A = new double[N * N]; - double* B = new double[N * N]; - double* C = new double[N * N]; - double* Cref = new double[N * N]; - - for (int row = 0; row < N; ++row) - { - for (int col = 0; col < N; ++col) - { - A[col + N * row] = row + 1; - B[col + N * row] = col + 1; - C[col + N * row] = 0.0; - Cref[col + N * row] = 0.0; + double *A = new double[ N * N ]; + double *B = new double[ N * N ]; + double *C = new double[ N * N ]; + double *Cref = new double[ N * N ]; + + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + A[ col + N*row ] = row + 1; + B[ col + N*row ] = col + 1; + C[ col + N*row ] = 0.0; + Cref[ col + N*row ] = 0.0; } } // _matmult_init_end - // printValues(A, N*N); - // printValues(B, N*N); - // printValues(C, N*N); - // printValues(Cref, N*N); +//printValues(A, N*N); +//printValues(B, N*N); +//printValues(C, N*N); +//printValues(Cref, N*N); - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\n Running matrix multiplication reference solution...\n"; // _cstyle_matmult_start - for (int row = 0; row < N; ++row) - { - for (int col = 0; col < N; ++col) - { - for (int k = 0; k < N; ++k) - { - Cref[col + N * row] += A[k + N * row] * B[col + N * k]; + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + for (int k = 0; k < N; ++k) { + Cref[col + N*row] += A[k + N*row] * B[col + N*k]; } } } // _cstyle_matmult_end - // printValues(Cref, N*N); +//printValues(Cref, N*N); - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\n Running matrix multiplication w/Views...\n"; - // + // // Define RAJA View objects to simplify access to the matrix entries. - // - // Note: we use default Layout + // + // Note: we use default Layout // // _matmult_views_start - RAJA::View> Aview(A, N, N); - RAJA::View> Bview(B, N, N); - RAJA::View> Cview(C, N, N); + RAJA::View< double, RAJA::Layout<2, int, 1> > Aview(A, N, N); + RAJA::View< double, RAJA::Layout<2, int, 1> > Bview(B, N, N); + RAJA::View< double, RAJA::Layout<2, int, 1> > Cview(C, N, N); // _matmult_views_end // _cstyle_matmult_views_start - for (int row = 0; row < N; ++row) - { - for (int col = 0; col < N; ++col) - { - for (int k = 0; k < N; ++k) - { + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + for (int k = 0; k < N; ++k) { Cview(row, col) += Aview(row, k) * Bview(k, col); } } } // _cstyle_matmult_views_end - checkResult(C, Cref, N * N); - // printValues(C, N*N); + checkResult(C, Cref, N*N); +//printValues(C, N*N); - // - // Clean up. - // - delete[] A; - delete[] B; - delete[] C; - delete[] Cref; +// +// Clean up. +// + delete [] A; + delete [] B; + delete [] C; + delete [] Cref; - //----------------------------------------------------------------------------// - // - // Default layouts use row-major data ordering - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Default layouts use row-major data ordering +// +//----------------------------------------------------------------------------// // // Define dimensions and allocate arrays // // _default_views_init_start - constexpr int Nx = 3; - constexpr int Ny = 5; - constexpr int Nz = 2; - constexpr int Ntot = Nx * Ny * Nz; - int* a = new int[Ntot]; - int* aref = new int[Ntot]; + constexpr int Nx = 3; + constexpr int Ny = 5; + constexpr int Nz = 2; + constexpr int Ntot = Nx*Ny*Nz; + int* a = new int[ Ntot ]; + int* aref = new int[ Ntot ]; for (int i = 0; i < Ntot; ++i) { @@ -162,52 +154,49 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _default_views_init_end - // printValues(ref, Ntot); +//printValues(ref, Ntot); - //----------------------------------------// +//----------------------------------------// std::cout << "\n Running default layout view cases...\n"; std::cout << "\n\t Running 1D view case...\n"; - + std::memset(a, 0, Ntot * sizeof(int)); + + // _default_view1D_start + RAJA::View< int, RAJA::Layout<1, int, 0> > view_1D(a, Ntot); - // _default_view1D_start - RAJA::View> view_1D(a, Ntot); - - for (int i = 0; i < Ntot; ++i) - { + for (int i = 0; i < Ntot; ++i) { view_1D(i) = i; } - // _default_view1D_end + // _default_view1D_end checkResult(a, aref, Ntot); - // printValues(a, Ntot); +//printValues(a, Ntot); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D default layout view case...\n"; std::memset(a, 0, Ntot * sizeof(int)); - + // _default_view2D_start - RAJA::View> view_2D(a, Nx, Ny); + RAJA::View< int, RAJA::Layout<2, int, 1> > view_2D(a, Nx, Ny); - int iter {0}; - for (int i = 0; i < Nx; ++i) - { - for (int j = 0; j < Ny; ++j) - { + int iter{0}; + for (int i = 0; i < Nx; ++i) { + for (int j = 0; j < Ny; ++j) { view_2D(i, j) = iter; ++iter; } } // _default_view2D_end - checkResult(a, aref, Nx * Ny); - // printValues(a, Nx*Ny); + checkResult(a, aref, Nx*Ny); +//printValues(a, Nx*Ny); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D default layout view case...\n"; @@ -216,23 +205,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// /// TODO... /// - /// EXERCISE: Implement a triple loop nest using a RAJA::View and + /// EXERCISE: Implement a triple loop nest using a RAJA::View and /// three-dimensional RAJA::Layout that iterates over the /// data array 'a' with unit stride. /// - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - //----------------------------------------------------------------------------// - // - // Permuted layouts change the data striding order - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Permuted layouts change the data striding order +// +//----------------------------------------------------------------------------// std::cout << "\n Running permuted layout cases...\n"; - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D default permutation view case...\n"; @@ -240,25 +229,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _default_perm_view2D_start std::array defperm2 {{0, 1}}; - RAJA::Layout<2, int> defperm2_layout = - RAJA::make_permuted_layout({{Nx, Ny}}, defperm2); - RAJA::View> defperm_view_2D(a, defperm2_layout); + RAJA::Layout< 2, int> defperm2_layout = + RAJA::make_permuted_layout( {{Nx, Ny}}, defperm2); + RAJA::View< int, RAJA::Layout<2, int, 1> > defperm_view_2D(a, defperm2_layout); iter = 0; - for (int i = 0; i < Nx; ++i) - { - for (int j = 0; j < Ny; ++j) - { + for (int i = 0; i < Nx; ++i) { + for (int j = 0; j < Ny; ++j) { defperm_view_2D(i, j) = iter; ++iter; } } // _default_perm_view2D_end - checkResult(a, aref, Nx * Ny); - // printValues(a, Nx*Ny); + checkResult(a, aref, Nx*Ny); +//printValues(a, Nx*Ny); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D default permutation view case...\n"; @@ -271,11 +258,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// three-dimensional RAJA::Layout with the identity permutation. /// - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - //----------------------------------------// - //----------------------------------------// +//----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D permuted layout view case...\n"; @@ -283,25 +270,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _perm_2D_start std::array perm2 {{1, 0}}; - RAJA::Layout<2, int> perm2_layout = - RAJA::make_permuted_layout({{Nx, Ny}}, perm2); - RAJA::View> perm_view_2D(a, perm2_layout); + RAJA::Layout< 2, int > perm2_layout = + RAJA::make_permuted_layout( {{Nx, Ny}}, perm2); + RAJA::View< int, RAJA::Layout<2, int, 0> > perm_view_2D(a, perm2_layout); iter = 0; - for (int j = 0; j < Ny; ++j) - { - for (int i = 0; i < Nx; ++i) - { + for (int j = 0; j < Ny; ++j) { + for (int i = 0; i < Nx; ++i) { perm_view_2D(i, j) = iter; ++iter; } } // _perm_2D_end - checkResult(a, aref, Nx * Ny); - // printValues(a, Nx*Ny); + checkResult(a, aref, Nx*Ny); +//printValues(a, Nx*Ny); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D perma layout view case...\n"; @@ -312,7 +297,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// /// EXERCISE: Implement a triple loop nest using a RAJA::View and /// three-dimensional RAJA::Layout with the permutation - /// {2, 1, 0}. + /// {2, 1, 0}. /// /// Name the Layout object 'perm3a_layout' so it can be used /// with the index conversion methods in the section below. @@ -320,10 +305,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// Layout object you create here. /// - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D permb layout view case...\n"; @@ -331,17 +316,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _permb_view3D_start std::array perm3b {{1, 2, 0}}; - RAJA::Layout<3, int> perm3b_layout = - RAJA::make_permuted_layout({{Nx, Ny, Nz}}, perm3b); - RAJA::View> perm3b_view_3D(a, perm3b_layout); + RAJA::Layout< 3, int > perm3b_layout = + RAJA::make_permuted_layout( {{Nx, Ny, Nz}}, perm3b); + RAJA::View< int, RAJA::Layout<3, int, 0> > perm3b_view_3D(a, perm3b_layout); iter = 0; - for (int j = 0; j < Ny; ++j) - { - for (int k = 0; k < Nz; ++k) - { - for (int i = 0; i < Nx; ++i) - { + for (int j = 0; j < Ny; ++j) { + for (int k = 0; k < Nz; ++k) { + for (int i = 0; i < Nx; ++i) { perm3b_view_3D(i, j, k) = iter; ++iter; } @@ -349,29 +331,29 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _permb_view3D_end - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - // - // Clean up. - // - delete[] a; - delete[] aref; +// +// Clean up. +// + delete [] a; + delete [] aref; - //----------------------------------------------------------------------------// - // - // Layouts: multi-dimensional indices vs. linear indicies - // - // RAJA::Layout type has methods that can be used to convert between - // multi-dimensional and linear indices. We show these below using the - // three-dimensional layouts in the examples above. Recall the Nx, Ny, Nz - // sizes defined earlier: - // - // constexpr int Nx = 3; - // constexpr int Ny = 5; - // constexpr int Nz = 2; - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Layouts: multi-dimensional indices vs. linear indicies +// +// RAJA::Layout type has methods that can be used to convert between +// multi-dimensional and linear indices. We show these below using the +// three-dimensional layouts in the examples above. Recall the Nx, Ny, Nz +// sizes defined earlier: +// +// constexpr int Nx = 3; +// constexpr int Ny = 5; +// constexpr int Nz = 2; +// +//----------------------------------------------------------------------------// std::cout << "\n Multi-dimensional indices to linear indices...\n"; @@ -379,44 +361,44 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::cout << "\nperm3a_layout...\n" << std::endl; int lin = -1; - int i = -1; - int j = -1; - int k = -1; + int i = -1; + int j = -1; + int k = -1; - /* - // _perm3d_layout_start - lin = perm3a_layout(1, 2, 0); - std::cout << "\tperm3a_layout(1, 2, 0) = " << lin << std::endl; - std::cout << "\t Should be 7 = 1 + 2 * Nx + 0 * Nx * Ny " - << "(since perm is {2, 1, 0})" << std::endl; +/* + // _perm3d_layout_start + lin = perm3a_layout(1, 2, 0); + std::cout << "\tperm3a_layout(1, 2, 0) = " << lin << std::endl; + std::cout << "\t Should be 7 = 1 + 2 * Nx + 0 * Nx * Ny " + << "(since perm is {2, 1, 0})" << std::endl; - perm3a_layout.toIndices(7, i, j, k); - std::cout << "\tperm3a_layout.toIndices(7, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; - // _perm3d_layout_end + perm3a_layout.toIndices(7, i, j, k); + std::cout << "\tperm3a_layout.toIndices(7, i, j, k) --> (i, j, k) = " + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; + // _perm3d_layout_end - lin = perm3a_layout(2, 3, 1); - std::cout << "\tperm3a_layout(2, 3, 1) = " << lin << std::endl; - std::cout << "\t Should be 26 = 2 + 3 * Nx + 1 * Nx * Ny " - << "(since perm is {2, 1, 0})" << std::endl; + lin = perm3a_layout(2, 3, 1); + std::cout << "\tperm3a_layout(2, 3, 1) = " << lin << std::endl; + std::cout << "\t Should be 26 = 2 + 3 * Nx + 1 * Nx * Ny " + << "(since perm is {2, 1, 0})" << std::endl; - perm3a_layout.toIndices(26, i, j, k); - std::cout << "\tperm3a_layout.toIndices(26, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; + perm3a_layout.toIndices(26, i, j, k); + std::cout << "\tperm3a_layout.toIndices(26, i, j, k) --> (i, j, k) = " + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; - lin = perm3a_layout(0, 2, 1); - std::cout << "\tperm3a_layout(0, 2, 1) = " << lin << std::endl; - std::cout << "\t Should be 21 = 0 + 2 * Nx + 1 * Nx * Ny " - << "(since perm is {2, 1, 0})" << std::endl; + lin = perm3a_layout(0, 2, 1); + std::cout << "\tperm3a_layout(0, 2, 1) = " << lin << std::endl; + std::cout << "\t Should be 21 = 0 + 2 * Nx + 1 * Nx * Ny " + << "(since perm is {2, 1, 0})" << std::endl; - perm3a_layout.toIndices(21, i, j, k); - std::cout << "\tperm3a_layout.toIndices(21, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; - */ + perm3a_layout.toIndices(21, i, j, k); + std::cout << "\tperm3a_layout.toIndices(21, i, j, k) --> (i, j, k) = " + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; +*/ - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\nperm3b_layout...\n" << std::endl; @@ -427,8 +409,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3b_layout.toIndices(13, i, j, k); std::cout << "\tperm3b_layout.toIndices(13, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; lin = perm3b_layout(2, 3, 1); @@ -438,8 +419,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3b_layout.toIndices(23, i, j, k); std::cout << "\tperm3b_layout.toIndices(23, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; lin = perm3b_layout(0, 2, 1); @@ -448,8 +428,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) << "(since perm is {1, 2, 0})" << std::endl; perm3b_layout.toIndices(15, i, j, k); std::cout << "\tperm3b_layout.toIndices(15, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; /// /// TODO... @@ -459,11 +438,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// data array 'a' with unit stride. /// - //----------------------------------------------------------------------------// - // - // Offset layouts apply offsets to indices - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Offset layouts apply offsets to indices +// +//----------------------------------------------------------------------------// std::cout << "\n Running offset layout cases...\n"; @@ -471,10 +450,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // Define some dimensions, and allocate arrays // constexpr int Ntot_ao = 40; - int* ao = new int[Ntot_ao]; - int* ao_ref = new int[Ntot_ao]; + int* ao = new int[ Ntot_ao ]; + int* ao_ref = new int[ Ntot_ao ]; - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 1D offset layout case...\n"; @@ -488,39 +467,33 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) int imin = -5; int imax = 6; - for (int i = imin; i < imax; ++i) - { - ao_ref[i - imin] = i; + for (int i = imin; i < imax; ++i) { + ao_ref[ i-imin ] = i; } // _cstyle_offlayout1D_end - // printValues(ao_ref, imax-imin); +//printValues(ao_ref, imax-imin); - //----------------------------------------// +//----------------------------------------// std::memset(ao, 0, Ntot_ao * sizeof(int)); // _raja_offlayout1D_start - // clang-format off RAJA::OffsetLayout<1, int> offlayout_1D = - RAJA::make_offset_layout<1, int>( {{imin}}, {{imax}} ); + RAJA::make_offset_layout<1, int>( {{imin}}, {{imax}} ); - // clang-format on - // clang-format off RAJA::View< int, RAJA::OffsetLayout<1, int> > aoview_1Doff(ao, offlayout_1D); - // clang-format on - for (int i = imin; i < imax; ++i) - { + for (int i = imin; i < imax; ++i) { aoview_1Doff(i) = i; } // _raja_offlayout1D_end - checkResult(ao, ao_ref, imax - imin); - // printValues(ao, 11); + checkResult(ao, ao_ref, imax-imin); +//printValues(ao, 11); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D offset layout case...\n"; @@ -531,25 +504,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(ao_ref, 0, Ntot_ao * sizeof(int)); // _cstyle_offlayout2D_start - imin = -1; - imax = 2; + imin = -1; + imax = 2; int jmin = -5; int jmax = 5; iter = 0; - for (int i = imin; i < imax; ++i) - { - for (int j = jmin; j < jmax; ++j) - { - ao_ref[(j - jmin) + (i - imin) * (jmax - jmin)] = iter; + for (int i = imin; i < imax; ++i) { + for (int j = jmin; j < jmax; ++j) { + ao_ref[ (j-jmin) + (i-imin) * (jmax-jmin) ] = iter; iter++; } } // _cstyle_offlayout2D_end - // printValues(ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao_ref, (imax-imin)*(jmax-jmin)); - //----------------------------------------// +//----------------------------------------// std::memset(ao, 0, Ntot_ao * sizeof(int)); @@ -561,10 +532,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) /// same operations as the C-style example above. /// - checkResult(ao, ao_ref, (imax - imin) * (jmax - jmin)); - // printValues(ao, (imax-imin)*(jmax-jmin)); + checkResult(ao, ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao, (imax-imin)*(jmax-jmin)); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D permuted offset layout case...\n"; @@ -576,58 +547,50 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _cstyle_permofflayout2D_start iter = 0; - for (int j = jmin; j < jmax; ++j) - { - for (int i = imin; i < imax; ++i) - { - ao_ref[(i - imin) + (j - jmin) * (imax - imin)] = iter; + for (int j = jmin; j < jmax; ++j) { + for (int i = imin; i < imax; ++i) { + ao_ref[ (i-imin) + (j-jmin) * (imax-imin) ] = iter; iter++; } } // _cstyle_permofflayout2D_end - // printValues(ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao_ref, (imax-imin)*(jmax-jmin)); - //----------------------------------------// +//----------------------------------------// std::memset(ao, 0, Ntot_ao * sizeof(int)); // _raja_permofflayout2D_start std::array perm1D {{1, 0}}; - // clang-format off RAJA::OffsetLayout<2> permofflayout_2D = RAJA::make_permuted_offset_layout<2>( {{imin, jmin}}, {{imax, jmax}}, perm1D ); - // clang-format on - // clang-format off RAJA::View< int, RAJA::OffsetLayout<2> > aoview_2Dpermoff(ao, permofflayout_2D); - // clang-format on iter = 0; - for (int j = jmin; j < jmax; ++j) - { - for (int i = imin; i < imax; ++i) - { + for (int j = jmin; j < jmax; ++j) { + for (int i = imin; i < imax; ++i) { aoview_2Dpermoff(i, j) = iter; iter++; } } // _raja_permofflayout2D_end - checkResult(ao, ao_ref, (imax - imin) * (jmax - jmin)); - // printValues(ao, (imax-imin)*(jmax-jmin)); + checkResult(ao, ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao, (imax-imin)*(jmax-jmin)); - // - // Clean up. - // - delete[] ao; - delete[] ao_ref; +// +// Clean up. +// + delete [] ao; + delete [] ao_ref; - //----------------------------------------------------------------------------// - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\n DONE!...\n"; @@ -637,7 +600,6 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // Function to check result and report P/F. // -// clang-format off template void checkResult(T* C, T* Cref, int N) { @@ -654,8 +616,6 @@ void checkResult(T* C, T* Cref, int N) } }; -// clang-format on -// clang-format off template void printValues(T* C, int N) { diff --git a/exercises/view-layout_solution.cpp b/exercises/view-layout_solution.cpp index 54db5c52d8..e6a2788b6c 100644 --- a/exercises/view-layout_solution.cpp +++ b/exercises/view-layout_solution.cpp @@ -22,9 +22,9 @@ * RAJA features shown: * - RAJA::View * - RAJA::Layout - * - Layout permutations + * - Layout permutations * - OffsetLayout - * - OffsetLayout permutations + * - OffsetLayout permutations * * NOTE: no RAJA kernel execution methods are used in these examples. */ @@ -38,16 +38,16 @@ void checkResult(T* C, T* Cref, int N); template void printValues(T* C, int N); -int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) { std::cout << "\n\nRAJA view & layout exercises...\n"; - //----------------------------------------------------------------------------// - // - // Matrix-matrix multiplication: default layout - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Matrix-matrix multiplication: default layout +// +//----------------------------------------------------------------------------// // _matmult_init_start // @@ -58,103 +58,95 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // Allocate storage for matrices and initialize matrix entries // - double* A = new double[N * N]; - double* B = new double[N * N]; - double* C = new double[N * N]; - double* Cref = new double[N * N]; - - for (int row = 0; row < N; ++row) - { - for (int col = 0; col < N; ++col) - { - A[col + N * row] = row + 1; - B[col + N * row] = col + 1; - C[col + N * row] = 0.0; - Cref[col + N * row] = 0.0; + double *A = new double[ N * N ]; + double *B = new double[ N * N ]; + double *C = new double[ N * N ]; + double *Cref = new double[ N * N ]; + + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + A[ col + N*row ] = row + 1; + B[ col + N*row ] = col + 1; + C[ col + N*row ] = 0.0; + Cref[ col + N*row ] = 0.0; } } // _matmult_init_end - // printValues(A, N*N); - // printValues(B, N*N); - // printValues(C, N*N); - // printValues(Cref, N*N); +//printValues(A, N*N); +//printValues(B, N*N); +//printValues(C, N*N); +//printValues(Cref, N*N); - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\n Running matrix multiplication reference solution...\n"; // _cstyle_matmult_start - for (int row = 0; row < N; ++row) - { - for (int col = 0; col < N; ++col) - { - for (int k = 0; k < N; ++k) - { - Cref[col + N * row] += A[k + N * row] * B[col + N * k]; + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + for (int k = 0; k < N; ++k) { + Cref[col + N*row] += A[k + N*row] * B[col + N*k]; } } } // _cstyle_matmult_end - // printValues(Cref, N*N); +//printValues(Cref, N*N); - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\n Running matrix multiplication w/Views...\n"; - // + // // Define RAJA View objects to simplify access to the matrix entries. - // - // Note: we use default Layout + // + // Note: we use default Layout and specify unit stride // // _matmult_views_start - RAJA::View> Aview(A, N, N); - RAJA::View> Bview(B, N, N); - RAJA::View> Cview(C, N, N); + RAJA::View< double, RAJA::Layout<2, int, 1> > Aview(A, N, N); + RAJA::View< double, RAJA::Layout<2, int, 1> > Bview(B, N, N); + RAJA::View< double, RAJA::Layout<2, int, 1> > Cview(C, N, N); // _matmult_views_end // _cstyle_matmult_views_start - for (int row = 0; row < N; ++row) - { - for (int col = 0; col < N; ++col) - { - for (int k = 0; k < N; ++k) - { + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + for (int k = 0; k < N; ++k) { Cview(row, col) += Aview(row, k) * Bview(k, col); } } } // _cstyle_matmult_views_end - checkResult(C, Cref, N * N); - // printValues(C, N*N); + checkResult(C, Cref, N*N); +//printValues(C, N*N); - // - // Clean up. - // - delete[] A; - delete[] B; - delete[] C; - delete[] Cref; +// +// Clean up. +// + delete [] A; + delete [] B; + delete [] C; + delete [] Cref; - //----------------------------------------------------------------------------// - // - // Default layouts use row-major data ordering - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Default layouts use row-major data ordering +// +//----------------------------------------------------------------------------// // // Define dimensions and allocate arrays // // _default_views_init_start - constexpr int Nx = 3; - constexpr int Ny = 5; - constexpr int Nz = 2; - constexpr int Ntot = Nx * Ny * Nz; - int* a = new int[Ntot]; - int* aref = new int[Ntot]; + constexpr int Nx = 3; + constexpr int Ny = 5; + constexpr int Nz = 2; + constexpr int Ntot = Nx*Ny*Nz; + int* a = new int[ Ntot ]; + int* aref = new int[ Ntot ]; for (int i = 0; i < Ntot; ++i) { @@ -162,67 +154,61 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _default_views_init_end - // printValues(ref, Ntot); +//printValues(ref, Ntot); - //----------------------------------------// +//----------------------------------------// std::cout << "\n Running default layout view cases...\n"; std::cout << "\n\t Running 1D view case...\n"; - + std::memset(a, 0, Ntot * sizeof(int)); + + // _default_view1D_start + RAJA::View< int, RAJA::Layout<1, int, 0> > view_1D(a, Ntot); - // _default_view1D_start - RAJA::View> view_1D(a, Ntot); - - for (int i = 0; i < Ntot; ++i) - { + for (int i = 0; i < Ntot; ++i) { view_1D(i) = i; } - // _default_view1D_end + // _default_view1D_end checkResult(a, aref, Ntot); - // printValues(a, Ntot); +//printValues(a, Ntot); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D default layout view case...\n"; std::memset(a, 0, Ntot * sizeof(int)); - + // _default_view2D_start - RAJA::View> view_2D(a, Nx, Ny); + RAJA::View< int, RAJA::Layout<2, int, 1> > view_2D(a, Nx, Ny); - int iter {0}; - for (int i = 0; i < Nx; ++i) - { - for (int j = 0; j < Ny; ++j) - { + int iter{0}; + for (int i = 0; i < Nx; ++i) { + for (int j = 0; j < Ny; ++j) { view_2D(i, j) = iter; ++iter; } } // _default_view2D_end - checkResult(a, aref, Nx * Ny); - // printValues(a, Nx*Ny); + checkResult(a, aref, Nx*Ny); +//printValues(a, Nx*Ny); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D default layout view case...\n"; std::memset(a, 0, Ntot * sizeof(int)); - // _default_view3D_start - RAJA::View> view_3D(a, Nx, Ny, Nz); + // _default_view3D_start + RAJA::View< int, RAJA::Layout<3, int, 2> > view_3D(a, Nx, Ny, Nz); iter = 0; - for (int i = 0; i < Nx; ++i) - { - for (int j = 0; j < Ny; ++j) - { - for (int k = 0; k < Nz; ++k) - { + for (int i = 0; i < Nx; ++i) { + for (int j = 0; j < Ny; ++j) { + for (int k = 0; k < Nz; ++k) { view_3D(i, j, k) = iter; ++iter; } @@ -230,18 +216,18 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _default_view3D_end - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - //----------------------------------------------------------------------------// - // - // Permuted layouts change the data striding order - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Permuted layouts change the data striding order +// +//----------------------------------------------------------------------------// std::cout << "\n Running permuted layout cases...\n"; - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D default permutation view case...\n"; @@ -249,25 +235,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _default_perm_view2D_start std::array defperm2 {{0, 1}}; - RAJA::Layout<2, int> defperm2_layout = - RAJA::make_permuted_layout({{Nx, Ny}}, defperm2); - RAJA::View> defperm_view_2D(a, defperm2_layout); + RAJA::Layout< 2, int> defperm2_layout = + RAJA::make_permuted_layout( {{Nx, Ny}}, defperm2); + RAJA::View< int, RAJA::Layout<2, int, 1> > defperm_view_2D(a, defperm2_layout); iter = 0; - for (int i = 0; i < Nx; ++i) - { - for (int j = 0; j < Ny; ++j) - { + for (int i = 0; i < Nx; ++i) { + for (int j = 0; j < Ny; ++j) { defperm_view_2D(i, j) = iter; ++iter; } } // _default_perm_view2D_end - checkResult(a, aref, Nx * Ny); - // printValues(a, Nx*Ny); + checkResult(a, aref, Nx*Ny); +//printValues(a, Nx*Ny); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D default permutation view case...\n"; @@ -275,17 +259,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _default_perm_view3D_start std::array defperm3 {{0, 1, 2}}; - RAJA::Layout<3, int> defperm3_layout = - RAJA::make_permuted_layout({{Nx, Ny, Nz}}, defperm3); - RAJA::View> defperm_view_3D(a, defperm3_layout); + RAJA::Layout< 3, int > defperm3_layout = + RAJA::make_permuted_layout( {{Nx, Ny, Nz}}, defperm3); + RAJA::View< int, RAJA::Layout<3, int, 2> > defperm_view_3D(a, defperm3_layout); iter = 0; - for (int i = 0; i < Nx; ++i) - { - for (int j = 0; j < Ny; ++j) - { - for (int k = 0; k < Nz; ++k) - { + for (int i = 0; i < Nx; ++i) { + for (int j = 0; j < Ny; ++j) { + for (int k = 0; k < Nz; ++k) { defperm_view_3D(i, j, k) = iter; ++iter; } @@ -293,11 +274,11 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _default_perm_view3D_end - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - //----------------------------------------// - //----------------------------------------// +//----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D permuted layout view case...\n"; @@ -305,25 +286,23 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _perm_2D_start std::array perm2 {{1, 0}}; - RAJA::Layout<2, int> perm2_layout = - RAJA::make_permuted_layout({{Nx, Ny}}, perm2); - RAJA::View> perm_view_2D(a, perm2_layout); + RAJA::Layout< 2, int> perm2_layout = + RAJA::make_permuted_layout( {{Nx, Ny}}, perm2); + RAJA::View< int, RAJA::Layout<2, int, 0> > perm_view_2D(a, perm2_layout); iter = 0; - for (int j = 0; j < Ny; ++j) - { - for (int i = 0; i < Nx; ++i) - { + for (int j = 0; j < Ny; ++j) { + for (int i = 0; i < Nx; ++i) { perm_view_2D(i, j) = iter; ++iter; } } // _perm_2D_end - checkResult(a, aref, Nx * Ny); - // printValues(a, Nx*Ny); + checkResult(a, aref, Nx*Ny); +//printValues(a, Nx*Ny); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D perma layout view case...\n"; @@ -331,17 +310,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _perma_view3D_start std::array perm3a {{2, 1, 0}}; - RAJA::Layout<3, int> perm3a_layout = - RAJA::make_permuted_layout({{Nx, Ny, Nz}}, perm3a); - RAJA::View> perm3a_view_3D(a, perm3a_layout); + RAJA::Layout< 3, int> perm3a_layout = + RAJA::make_permuted_layout( {{Nx, Ny, Nz}}, perm3a); + RAJA::View< int, RAJA::Layout<3, int, 0> > perm3a_view_3D(a, perm3a_layout); iter = 0; - for (int k = 0; k < Nz; ++k) - { - for (int j = 0; j < Ny; ++j) - { - for (int i = 0; i < Nx; ++i) - { + for (int k = 0; k < Nz; ++k) { + for (int j = 0; j < Ny; ++j) { + for (int i = 0; i < Nx; ++i) { perm3a_view_3D(i, j, k) = iter; ++iter; } @@ -349,10 +325,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _perma_view3D_end - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 3D permb layout view case...\n"; @@ -360,17 +336,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _permb_view3D_start std::array perm3b {{1, 2, 0}}; - RAJA::Layout<3, int> perm3b_layout = - RAJA::make_permuted_layout({{Nx, Ny, Nz}}, perm3b); - RAJA::View> perm3b_view_3D(a, perm3b_layout); + RAJA::Layout< 3, int > perm3b_layout = + RAJA::make_permuted_layout( {{Nx, Ny, Nz}}, perm3b); + RAJA::View< int, RAJA::Layout<3, int, 0> > perm3b_view_3D(a, perm3b_layout); iter = 0; - for (int j = 0; j < Ny; ++j) - { - for (int k = 0; k < Nz; ++k) - { - for (int i = 0; i < Nx; ++i) - { + for (int j = 0; j < Ny; ++j) { + for (int k = 0; k < Nz; ++k) { + for (int i = 0; i < Nx; ++i) { perm3b_view_3D(i, j, k) = iter; ++iter; } @@ -378,29 +351,29 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) } // _permb_view3D_end - checkResult(a, aref, Nx * Ny * Nz); - // printValues(a, Nx*Ny*Nz); + checkResult(a, aref, Nx*Ny*Nz); +//printValues(a, Nx*Ny*Nz); - // - // Clean up. - // - delete[] a; - delete[] aref; +// +// Clean up. +// + delete [] a; + delete [] aref; - //----------------------------------------------------------------------------// - // - // Layouts: multi-dimensional indices vs. linear indicies - // - // RAJA::Layout type has methods that can be used to convert between - // multi-dimensional and linear indices. We show these below using the - // three-dimensional layouts in the examples above. Recall the Nx, Ny, Nz - // sizes defined earlier: - // - // constexpr int Nx = 3; - // constexpr int Ny = 5; - // constexpr int Nz = 2; - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Layouts: multi-dimensional indices vs. linear indicies +// +// RAJA::Layout type has methods that can be used to convert between +// multi-dimensional and linear indices. We show these below using the +// three-dimensional layouts in the examples above. Recall the Nx, Ny, Nz +// sizes defined earlier: +// +// constexpr int Nx = 3; +// constexpr int Ny = 5; +// constexpr int Nz = 2; +// +//----------------------------------------------------------------------------// std::cout << "\n Multi-dimensional indices to linear indices...\n"; @@ -408,9 +381,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::cout << "\nperm3a_layout...\n" << std::endl; int lin = -1; - int i = -1; - int j = -1; - int k = -1; + int i = -1; + int j = -1; + int k = -1; // _perm3d_layout_start lin = perm3a_layout(1, 2, 0); @@ -420,8 +393,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3a_layout.toIndices(7, i, j, k); std::cout << "\tperm3a_layout.toIndices(7, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; // _perm3d_layout_end @@ -432,8 +404,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3a_layout.toIndices(26, i, j, k); std::cout << "\tperm3a_layout.toIndices(26, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; lin = perm3a_layout(0, 2, 1); @@ -443,10 +414,9 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3a_layout.toIndices(21, i, j, k); std::cout << "\tperm3a_layout.toIndices(21, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\nperm3b_layout...\n" << std::endl; @@ -457,8 +427,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3b_layout.toIndices(13, i, j, k); std::cout << "\tperm3b_layout.toIndices(13, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; lin = perm3b_layout(2, 3, 1); @@ -468,8 +437,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3b_layout.toIndices(23, i, j, k); std::cout << "\tperm3b_layout.toIndices(23, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; lin = perm3b_layout(0, 2, 1); @@ -479,14 +447,13 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) perm3b_layout.toIndices(15, i, j, k); std::cout << "\tperm3b_layout.toIndices(15, i, j, k) --> (i, j, k) = " - << "(" << i << ", " << j << ", " << k << ")\n" - << std::endl; + << "(" << i << ", " << j << ", " << k << ")\n" << std::endl; - //----------------------------------------------------------------------------// - // - // Offset layouts apply offsets to indices - // - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +// +// Offset layouts apply offsets to indices +// +//----------------------------------------------------------------------------// std::cout << "\n Running offset layout cases...\n"; @@ -494,10 +461,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // Define some dimensions, and allocate arrays // constexpr int Ntot_ao = 40; - int* ao = new int[Ntot_ao]; - int* ao_ref = new int[Ntot_ao]; + int* ao = new int[ Ntot_ao ]; + int* ao_ref = new int[ Ntot_ao ]; - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 1D offset layout case...\n"; @@ -511,39 +478,33 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) int imin = -5; int imax = 6; - for (int i = imin; i < imax; ++i) - { - ao_ref[i - imin] = i; + for (int i = imin; i < imax; ++i) { + ao_ref[ i-imin ] = i; } // _cstyle_offlayout1D_end - // printValues(ao_ref, imax-imin); +//printValues(ao_ref, imax-imin); - //----------------------------------------// +//----------------------------------------// std::memset(ao, 0, Ntot_ao * sizeof(int)); // _raja_offlayout1D_start - // clang-format off RAJA::OffsetLayout<1, int> offlayout_1D = - RAJA::make_offset_layout<1, int>( {{imin}}, {{imax}} ); + RAJA::make_offset_layout<1, int>( {{imin}}, {{imax}} ); - // clang-format on - // clang-format off RAJA::View< int, RAJA::OffsetLayout<1, int> > aoview_1Doff(ao, offlayout_1D); - // clang-format on - for (int i = imin; i < imax; ++i) - { + for (int i = imin; i < imax; ++i) { aoview_1Doff(i) = i; } // _raja_offlayout1D_end - checkResult(ao, ao_ref, imax - imin); - // printValues(ao, 11); + checkResult(ao, ao_ref, imax-imin); +//printValues(ao, 11); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D offset layout case...\n"; @@ -554,53 +515,45 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) std::memset(ao_ref, 0, Ntot_ao * sizeof(int)); // _cstyle_offlayout2D_start - imin = -1; - imax = 2; + imin = -1; + imax = 2; int jmin = -5; int jmax = 5; iter = 0; - for (int i = imin; i < imax; ++i) - { - for (int j = jmin; j < jmax; ++j) - { - ao_ref[(j - jmin) + (i - imin) * (jmax - jmin)] = iter; + for (int i = imin; i < imax; ++i) { + for (int j = jmin; j < jmax; ++j) { + ao_ref[ (j-jmin) + (i-imin) * (jmax-jmin) ] = iter; iter++; } } // _cstyle_offlayout2D_end - // printValues(ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao_ref, (imax-imin)*(jmax-jmin)); - //----------------------------------------// +//----------------------------------------// std::memset(ao, 0, Ntot_ao * sizeof(int)); // _raja_offlayout2D_start - // clang-format off RAJA::OffsetLayout<2, int> offlayout_2D = RAJA::make_offset_layout<2, int>( {{imin, jmin}}, {{imax, jmax}} ); - // clang-format on - // clang-format off RAJA::View< int, RAJA::OffsetLayout<2, int> > aoview_2Doff(ao, offlayout_2D); iter = 0; - // clang-format on - for (int i = imin; i < imax; ++i) - { - for (int j = jmin; j < jmax; ++j) - { + for (int i = imin; i < imax; ++i) { + for (int j = jmin; j < jmax; ++j) { aoview_2Doff(i, j) = iter; iter++; } } // _raja_offlayout2D_end - checkResult(ao, ao_ref, (imax - imin) * (jmax - jmin)); - // printValues(ao, (imax-imin)*(jmax-jmin)); + checkResult(ao, ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao, (imax-imin)*(jmax-jmin)); - //----------------------------------------// +//----------------------------------------// std::cout << "\n\t Running 2D permuted offset layout case...\n"; @@ -612,58 +565,50 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // _cstyle_permofflayout2D_start iter = 0; - for (int j = jmin; j < jmax; ++j) - { - for (int i = imin; i < imax; ++i) - { - ao_ref[(i - imin) + (j - jmin) * (imax - imin)] = iter; + for (int j = jmin; j < jmax; ++j) { + for (int i = imin; i < imax; ++i) { + ao_ref[ (i-imin) + (j-jmin) * (imax-imin) ] = iter; iter++; } } // _cstyle_permofflayout2D_end - // printValues(ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao_ref, (imax-imin)*(jmax-jmin)); - //----------------------------------------// +//----------------------------------------// std::memset(ao, 0, Ntot_ao * sizeof(int)); // _raja_permofflayout2D_start std::array perm1D {{1, 0}}; - // clang-format off RAJA::OffsetLayout<2> permofflayout_2D = RAJA::make_permuted_offset_layout<2>( {{imin, jmin}}, {{imax, jmax}}, perm1D ); - // clang-format on - // clang-format off RAJA::View< int, RAJA::OffsetLayout<2> > aoview_2Dpermoff(ao, permofflayout_2D); - // clang-format on iter = 0; - for (int j = jmin; j < jmax; ++j) - { - for (int i = imin; i < imax; ++i) - { + for (int j = jmin; j < jmax; ++j) { + for (int i = imin; i < imax; ++i) { aoview_2Dpermoff(i, j) = iter; iter++; } } // _raja_permofflayout2D_end - checkResult(ao, ao_ref, (imax - imin) * (jmax - jmin)); - // printValues(ao, (imax-imin)*(jmax-jmin)); + checkResult(ao, ao_ref, (imax-imin)*(jmax-jmin)); +//printValues(ao, (imax-imin)*(jmax-jmin)); - // - // Clean up. - // - delete[] ao; - delete[] ao_ref; +// +// Clean up. +// + delete [] ao; + delete [] ao_ref; - //----------------------------------------------------------------------------// - //----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// +//----------------------------------------------------------------------------// std::cout << "\n DONE!...\n"; @@ -673,7 +618,6 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) // // Function to check result and report P/F. // -// clang-format off template void checkResult(T* C, T* Cref, int N) { @@ -690,8 +634,6 @@ void checkResult(T* C, T* Cref, int N) } }; -// clang-format on -// clang-format off template void printValues(T* C, int N) { diff --git a/host-configs/lc-builds/toss4/icpx_X.cmake b/host-configs/lc-builds/toss4/icpx_X.cmake index 2f5301bd22..a1499ce08d 100755 --- a/host-configs/lc-builds/toss4/icpx_X.cmake +++ b/host-configs/lc-builds/toss4/icpx_X.cmake @@ -8,8 +8,8 @@ set(RAJA_COMPILER "RAJA_COMPILER_ICC" CACHE STRING "") ##set(COMMON_FLAGS "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1") -##set(COMMON_OPT_FLAGS "-march=native -finline-functions -fp-model=precise") -set(COMMON_OPT_FLAGS "-march=native -finline-functions") +set(COMMON_OPT_FLAGS "-march=native -finline-functions -fp-model=precise") +#set(COMMON_OPT_FLAGS "-march=native -finline-functions") ##set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 ${COMMON_OPT_FLAGS}" CACHE STRING "") ##set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g ${COMMON_OPT_FLAGS}" CACHE STRING "") diff --git a/include/RAJA/policy/atomic_builtin.hpp b/include/RAJA/policy/atomic_builtin.hpp index 34755fa49d..e43bd71386 100644 --- a/include/RAJA/policy/atomic_builtin.hpp +++ b/include/RAJA/policy/atomic_builtin.hpp @@ -22,7 +22,7 @@ #include -#if defined(RAJA_COMPILER_MSVC) || (defined(_WIN32) && defined(__INTEL_COMPILER)) +#if defined(RAJA_COMPILER_MSVC) || ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER)) #include #endif @@ -48,7 +48,7 @@ struct builtin_atomic { namespace detail { -#if defined(RAJA_COMPILER_MSVC) || (defined(_WIN32) && defined(__INTEL_COMPILER)) +#if defined(RAJA_COMPILER_MSVC) || ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER)) /*! @@ -120,11 +120,14 @@ RAJA_INLINE long builtin_atomicOr(long *acc, long value) return _InterlockedOr(acc, value); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicOr(long long *acc, long long value) { return _InterlockedOr64(acc, value); } +#endif /*! * Atomic load using atomic or @@ -155,11 +158,15 @@ RAJA_INLINE long builtin_atomicExchange(long *acc, long value) return _InterlockedExchange(acc, value); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicExchange(long long *acc, long long value) { return _InterlockedExchange64(acc, value); } +#endif + /*! * Atomic store using atomic exchange @@ -190,11 +197,15 @@ RAJA_INLINE long builtin_atomicCAS(long *acc, long compare, long value) return _InterlockedCompareExchange(acc, value, compare); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicCAS(long long *acc, long long compare, long long value) { return _InterlockedCompareExchange64(acc, value, compare); } +#endif + /*! * Atomic addition using intrinsics @@ -214,11 +225,15 @@ RAJA_INLINE long builtin_atomicAdd(long *acc, long value) return _InterlockedExchangeAdd(acc, value); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicAdd(long long *acc, long long value) { return _InterlockedExchangeAdd64(acc, value); } +#endif + /*! * Atomic subtraction using intrinsics @@ -238,11 +253,15 @@ RAJA_INLINE long builtin_atomicSub(long *acc, long value) return _InterlockedExchangeAdd(acc, -value); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicSub(long long *acc, long long value) { return _InterlockedExchangeAdd64(acc, -value); } +#endif + /*! * Atomic and using intrinsics @@ -262,11 +281,15 @@ RAJA_INLINE long builtin_atomicAnd(long *acc, long value) return _InterlockedAnd(acc, value); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicAnd(long long *acc, long long value) { return _InterlockedAnd64(acc, value); } +#endif + /*! * Atomic xor using intrinsics @@ -286,11 +309,15 @@ RAJA_INLINE long builtin_atomicXor(long *acc, long value) return _InterlockedXor(acc, value); } +#if defined(_WIN64) + RAJA_INLINE long long builtin_atomicXor(long long *acc, long long value) { return _InterlockedXor64(acc, value); } +#endif + #else // RAJA_COMPILER_MSVC diff --git a/include/RAJA/policy/openmp_target/params/reduce.hpp b/include/RAJA/policy/openmp_target/params/reduce.hpp index 6127eef226..34c23fb5db 100644 --- a/include/RAJA/policy/openmp_target/params/reduce.hpp +++ b/include/RAJA/policy/openmp_target/params/reduce.hpp @@ -26,7 +26,7 @@ namespace detail { // Resolve template camp::concepts::enable_if< type_traits::is_target_openmp_policy > - resolve(Reducer& red) { + resolve(Reducer& red) { red.combineTarget(red.m_valop.val); } diff --git a/scripts/lc-builds/toss4_icpx.sh b/scripts/lc-builds/toss4_icpx.sh index 88cc43d824..d7d7c7dd85 100755 --- a/scripts/lc-builds/toss4_icpx.sh +++ b/scripts/lc-builds/toss4_icpx.sh @@ -35,12 +35,17 @@ module load cmake/3.23.1 # times at a potential cost of slower 'forall' execution. ## -source /usr/tce/packages/intel/intel-${COMP_VER}/setvars.sh +if [[ ${COMP_VER} == 2024.2.1 ]] +then + source /collab/usr/global/tools/intel/toss_4_x86_64_ib/oneapi-2024.2.1/setvars.sh +else + source /usr/tce/packages/intel/intel-${COMP_VER}/setvars.sh +fi cmake \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icpx \ - -DCMAKE_C_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DCMAKE_C_COMPILER=icx \ -DBLT_CXX_STD=c++14 \ -C ../host-configs/lc-builds/toss4/icpx_X.cmake \ -DRAJA_ENABLE_FORCEINLINE_RECURSIVE=Off \ @@ -49,3 +54,20 @@ cmake \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. + +if [[ ${COMP_VER} == 2024.2.1 ]] +then + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo "To successfully build and run all tests, you may need to run the" +echo "command to make sure your environment is set up properly:" +echo +echo " source /collab/usr/global/tools/intel/toss_4_x86_64_ib/oneapi-2024.2.1/setvars.sh" +echo +echo "***********************************************************************" + +fi diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 00c06c2d02..9634711c8b 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 00c06c2d0258802fbf4a57ff987314d4acd9f629 +Subproject commit 9634711c8bc0e8cbc6a4ae4c4fe81161d48d5d12 diff --git a/test/unit/multi_reducer/CMakeLists.txt b/test/unit/multi_reducer/CMakeLists.txt index 6453fa66cb..94bbbc68d9 100644 --- a/test/unit/multi_reducer/CMakeLists.txt +++ b/test/unit/multi_reducer/CMakeLists.txt @@ -37,9 +37,10 @@ if(RAJA_ENABLE_OPENMP) list(APPEND BACKENDS OpenMP) endif() -if(RAJA_ENABLE_TARGET_OPENMP) - list(APPEND BACKENDS OpenMPTarget) -endif() +# Add this back in when OpenMP Target implementation exists for multi-reducer +#if(RAJA_ENABLE_TARGET_OPENMP) +# list(APPEND BACKENDS OpenMPTarget) +#endif() if(RAJA_ENABLE_CUDA) list(APPEND BACKENDS Cuda)