Skip to content

Commit

Permalink
Merge pull request #92 from LLNL/v0.9.0-rc
Browse files Browse the repository at this point in the history
V0.9.0 rc
  • Loading branch information
rhornung67 authored Nov 4, 2020
2 parents 94c65b2 + 26aaff6 commit 064dd17
Show file tree
Hide file tree
Showing 108 changed files with 6,616 additions and 81 deletions.
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ if (ENABLE_OPENMP)
endif ()

set(RAJA_PERFSUITE_VERSION_MAJOR 0)
set(RAJA_PERFSUITE_VERSION_MINOR 8)
set(RAJA_PERFSUITE_VERSION_MINOR 9)
set(RAJA_PERFSUITE_VERSION_PATCHLEVEL 0)

set(RAJA_PERFSUITE_DEPENDS RAJA)
Expand All @@ -81,6 +81,9 @@ endif()
if (ENABLE_CUDA)
list(APPEND RAJA_PERFSUITE_DEPENDS cuda)
endif()
if (ENABLE_HIP)
list(APPEND RAJA_PERFSUITE_DEPENDS hip)
endif()

set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE})
set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME})
Expand All @@ -92,6 +95,10 @@ if (ENABLE_CUDA)
set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}")
list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER})
set(RAJAPERF_COMPILER_OPTIONS "${CUDA_NVCC_FLAGS}")
elseif (ENABLE_HIP)
set(RAJAPERF_COMPILER "${HIP_HIPCC_EXECUTABLE}")
list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER})
set(RAJAPERF_COMPILER_OPTIONS "${HIP_HIPCC_FLAGS}")
else()
set(RAJAPERF_COMPILER "${CMAKE_CXX_COMPILER}")
string(TOUPPER ${CMAKE_BUILD_TYPE} RAJAPERF_BUILD_TYPE)
Expand All @@ -104,6 +111,7 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in

# Make sure RAJA flag propagate
set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS})
set (HIP_HIPCC_FLAGS ${RAJA_HIPCC_FLAGS})

#
# Each directory in the perf suite has its own CMakeLists.txt file.
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@ is used to assess, monitor, and compare runtime performance of kernels
implemented using RAJA and variants implemented using standard or
vendor-supported parallel programming models directly. Each kernel in the
suite appears in multiple RAJA and non-RAJA (i.e., baseline) variants using
parallel programming models such as OpenMP and CUDA.
various parallel programming models supported as RAJA back-ends. Current kernel
variants include:

* Sequential
* OpenMP CPU multithreading
* CUDA (NVIDIA GPUs)
* HIP (AMD GPUs)
* OpenMP target offload


The kernels originate from various HPC benchmark suites and applications.
Kernels are partitioned into "groups" -- each group
Expand Down
31 changes: 31 additions & 0 deletions scripts/lc-builds/toss3_hipcc3.6.0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bash

###############################################################################
# Copyright (c) 2016-20, Lawrence Livermore National Security, LLC
# and RAJA project contributors. See the RAJA/COPYRIGHT file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

BUILD_SUFFIX=lc_toss3-hipcc-3.6.0
RAJA_HOSTCONFIG=../tpl/RAJA/host-configs/lc-builds/toss3/hip.cmake

rm -rf build_${BUILD_SUFFIX} >/dev/null
mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}


module load cmake/3.14.5

cmake \
-DCMAKE_BUILD_TYPE=Release \
-DHIP_ROOT_DIR="/opt/rocm-3.6.0/hip" \
-DHIP_CLANG_PATH=/opt/rocm-3.6.0/llvm/bin \
-DCMAKE_C_COMPILER=/opt/rocm-3.6.0/llvm/bin/clang \
-DCMAKE_CXX_COMPILER=/opt/rocm-3.6.0/llvm/bin/clang++ \
-C ${RAJA_HOSTCONFIG} \
-DENABLE_HIP=ON \
-DENABLE_OPENMP=OFF \
-DENABLE_CUDA=OFF \
-DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
"$@" \
..
29 changes: 29 additions & 0 deletions scripts/lc-builds/toss3_hipcc3.8.0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

###############################################################################
# Copyright (c) 2016-20, Lawrence Livermore National Security, LLC
# and RAJA project contributors. See the RAJA/COPYRIGHT file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

BUILD_SUFFIX=lc_toss3-hipcc-3.8.0
RAJA_HOSTCONFIG=../tpl/RAJA/host-configs/lc-builds/toss3/hip.cmake

rm -rf build_${BUILD_SUFFIX} >/dev/null
mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}


module load cmake/3.14.5

cmake \
-DCMAKE_BUILD_TYPE=Release \
-DRAJA_HIPCC_FLAGS="--amdgpu-target=gfx906" \
-DHIP_ROOT_DIR=/opt/rocm-3.8.0/hip \
-C ${RAJA_HOSTCONFIG} \
-DENABLE_HIP=ON \
-DENABLE_OPENMP=OFF \
-DENABLE_CUDA=OFF \
-DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
"$@" \
..
9 changes: 8 additions & 1 deletion src/apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,43 @@ blt_add_library(
SOURCES AppsData.cpp
DEL_DOT_VEC_2D.cpp
DEL_DOT_VEC_2D-Seq.cpp
DEL_DOT_VEC_2D-Hip.cpp
DEL_DOT_VEC_2D-Cuda.cpp
DEL_DOT_VEC_2D-OMP.cpp
DEL_DOT_VEC_2D-OMPTarget.cpp
ENERGY.cpp
ENERGY-Seq.cpp
ENERGY-Hip.cpp
ENERGY-Cuda.cpp
ENERGY-OMP.cpp
ENERGY-OMPTarget.cpp
FIR.cpp
FIR-Seq.cpp
FIR-Hip.cpp
FIR-Cuda.cpp
FIR-OMP.cpp
FIR-OMPTarget.cpp
LTIMES.cpp
LTIMES-Seq.cpp
LTIMES-Hip.cpp
LTIMES-Cuda.cpp
LTIMES-OMP.cpp
LTIMES-OMPTarget.cpp
LTIMES_NOVIEW.cpp
LTIMES_NOVIEW-Seq.cpp
LTIMES_NOVIEW-Hip.cpp
LTIMES_NOVIEW-Cuda.cpp
LTIMES_NOVIEW-OMP.cpp
LTIMES_NOVIEW-OMPTarget.cpp
PRESSURE.cpp
PRESSURE-Seq.cpp
PRESSURE-Hip.cpp
PRESSURE-Cuda.cpp
PRESSURE-OMP.cpp
PRESSURE-OMP.cpp
PRESSURE-OMPTarget.cpp
VOL3D.cpp
VOL3D-Seq.cpp
VOL3D-Hip.cpp
VOL3D-Cuda.cpp
VOL3D-OMP.cpp
VOL3D-OMPTarget.cpp
Expand Down
142 changes: 142 additions & 0 deletions src/apps/DEL_DOT_VEC_2D-Hip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/COPYRIGHT file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "DEL_DOT_VEC_2D.hpp"

#include "RAJA/RAJA.hpp"

#if defined(RAJA_ENABLE_HIP)

#include "common/HipDataUtils.hpp"

#include "AppsData.hpp"

#include "camp/resource.hpp"

#include <iostream>

namespace rajaperf
{
namespace apps
{

//
// Define thread block size for HIP execution
//
const size_t block_size = 256;


#define DEL_DOT_VEC_2D_DATA_SETUP_HIP \
allocAndInitHipDeviceData(x, m_x, m_array_length); \
allocAndInitHipDeviceData(y, m_y, m_array_length); \
allocAndInitHipDeviceData(xdot, m_xdot, m_array_length); \
allocAndInitHipDeviceData(ydot, m_ydot, m_array_length); \
allocAndInitHipDeviceData(div, m_div, m_array_length); \
allocAndInitHipDeviceData(real_zones, m_domain->real_zones, iend);

#define DEL_DOT_VEC_2D_DATA_TEARDOWN_HIP \
getHipDeviceData(m_div, div, m_array_length); \
deallocHipDeviceData(x); \
deallocHipDeviceData(y); \
deallocHipDeviceData(xdot); \
deallocHipDeviceData(ydot); \
deallocHipDeviceData(div); \
deallocHipDeviceData(real_zones);

__global__ void deldotvec2d(Real_ptr div,
const Real_ptr x1, const Real_ptr x2,
const Real_ptr x3, const Real_ptr x4,
const Real_ptr y1, const Real_ptr y2,
const Real_ptr y3, const Real_ptr y4,
const Real_ptr fx1, const Real_ptr fx2,
const Real_ptr fx3, const Real_ptr fx4,
const Real_ptr fy1, const Real_ptr fy2,
const Real_ptr fy3, const Real_ptr fy4,
const Index_ptr real_zones,
const Real_type half, const Real_type ptiny,
Index_type iend)
{
Index_type ii = blockIdx.x * blockDim.x + threadIdx.x;
if (ii < iend) {
DEL_DOT_VEC_2D_BODY_INDEX;
DEL_DOT_VEC_2D_BODY;
}
}


void DEL_DOT_VEC_2D::runHipVariant(VariantID vid)
{
const Index_type run_reps = getRunReps();
const Index_type iend = m_domain->n_real_zones;

DEL_DOT_VEC_2D_DATA_SETUP;

if ( vid == Base_HIP ) {

DEL_DOT_VEC_2D_DATA_SETUP_HIP;

NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ;
NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ;
NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ;
NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ;

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size);

hipLaunchKernelGGL((deldotvec2d), dim3(grid_size), dim3(block_size), 0, 0, div,
x1, x2, x3, x4,
y1, y2, y3, y4,
fx1, fx2, fx3, fx4,
fy1, fy2, fy3, fy4,
real_zones,
half, ptiny,
iend);

}
stopTimer();

DEL_DOT_VEC_2D_DATA_TEARDOWN_HIP;

} else if ( vid == RAJA_HIP ) {

DEL_DOT_VEC_2D_DATA_SETUP_HIP;

NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ;
NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ;
NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ;
NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ;

camp::resources::Resource working_res{camp::resources::Hip()};
RAJA::TypedListSegment<Index_type> zones(m_domain->real_zones,
m_domain->n_real_zones,
working_res);

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall< RAJA::hip_exec<block_size, true /*async*/> >(
zones, [=] __device__ (Index_type i) {
DEL_DOT_VEC_2D_BODY;
});

}
stopTimer();

DEL_DOT_VEC_2D_DATA_TEARDOWN_HIP;

} else {
std::cout << "\n DEL_DOT_VEC_2D : Unknown Hip variant id = " << vid << std::endl;
}
}

} // end namespace apps
} // end namespace rajaperf

#endif // RAJA_ENABLE_HIP
1 change: 1 addition & 0 deletions src/apps/DEL_DOT_VEC_2D.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class DEL_DOT_VEC_2D : public KernelBase
void runSeqVariant(VariantID vid);
void runOpenMPVariant(VariantID vid);
void runCudaVariant(VariantID vid);
void runHipVariant(VariantID vid);
void runOpenMPTargetVariant(VariantID vid);

private:
Expand Down
Loading

0 comments on commit 064dd17

Please sign in to comment.