Skip to content

Commit

Permalink
Quad precision (double-double) support in VkFFT
Browse files Browse the repository at this point in the history
-Added double-double support in VkFFT. Requires cpu initialization in full quad precision, so only supports gcc for now. Potentially possible to add full FP128 support or some other FP128 library (like mpir) in the future.
-Data has to be stored in double-double before VkFFT kernels calls (no fp128<->double-double conversion on the GPU yet).
-Full 1e-32 precision, but same range as FP64. See Library for Double-Double and Quad-Double Arithmetic by Y Hida for more information on double-double.
-Reuqires FMA contraction to be disabled (due to ab-cd contraction rounding mismatch). Doesn't work on Vulkan as I haven't found how to do that yet.
-Fixed warnings (#138)
-Added proper check for app to be zero before initializeVkFFT call and zeroing on deletion (#134)
-Added an option to provide staging buffer in application and VkGPU handle (#129)
-Added guards for build type (#128)
-Fixed missing deallocation calls for the inverse Bluestein axes. Fixed the buffer layout size in Vulkan in some cases.
-Refactored the code generator and container struct layout for better handling complex numbers (-5k loc).
-Added more precision tests and benchmarks.
-Will be merged in the main branch after more testing and update to the documentation.
  • Loading branch information
DTolm committed Sep 25, 2023
1 parent b4ae141 commit ae5a334
Show file tree
Hide file tree
Showing 57 changed files with 7,602 additions and 9,725 deletions.
49 changes: 39 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
cmake_minimum_required(VERSION 3.11)
project(VkFFT_TestSuite)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_CONFIGURATION_TYPES "Release" CACHE STRING "" FORCE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()

if (NOT DEFINED GLSLANG_GIT_TAG)
set(GLSLANG_GIT_TAG "origin/main")
set(GLSLANG_GIT_TAG "12.3.1")
endif()

include(FetchContent)
set(VKFFT_BACKEND 0 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal")

Expand All @@ -22,6 +27,7 @@ endif()

option(build_VkFFT_FFTW_precision "Build VkFFT FFTW precision comparison" OFF)
option(VkFFT_use_FP128_Bluestein_RaderFFT "Use FP128 for Bluestein and Rader FFT kernel calculations. Currently requires FP128 FFT library, like FFTWl" OFF)
option(VkFFT_use_FP128_double_double "Build VkFFT quad double-double" OFF)

if (MSVC)
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
Expand All @@ -40,6 +46,7 @@ if(build_VkFFT_FFTW_precision)
benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp
benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
Expand All @@ -49,14 +56,17 @@ if(build_VkFFT_FFTW_precision)
benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
benchmark_scripts/vkFFT_scripts/src/sample_19_precision_VkFFT_quadDoubleDouble_nonPow2.cpp
benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1000_benchmark_VkFFT_single_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp)
benchmark_scripts/vkFFT_scripts/src/sample_1002_benchmark_VkFFT_half_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp)
else()
add_executable(${PROJECT_NAME} VkFFT_TestSuite.cpp
benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
Expand All @@ -70,15 +80,18 @@ else()
benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp
benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1000_benchmark_VkFFT_single_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp)
benchmark_scripts/vkFFT_scripts/src/sample_1002_benchmark_VkFFT_half_2_4096.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp)
endif()
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_11)
add_definitions(-DVKFFT_BACKEND=${VKFFT_BACKEND})
Expand Down Expand Up @@ -138,7 +151,12 @@ target_compile_definitions(${PROJECT_NAME} PUBLIC -DVK_API_VERSION=11)#10 - Vulk
if(VkFFT_use_FP128_Bluestein_RaderFFT)
target_compile_definitions(${PROJECT_NAME} PUBLIC -DVkFFT_use_FP128_Bluestein_RaderFFT)
endif()
if(VkFFT_use_FP128_double_double)
target_compile_definitions(${PROJECT_NAME} PUBLIC -DVKFFT_USE_DOUBLEDOUBLE_FP128)
target_link_libraries(${PROJECT_NAME} PUBLIC quadmath)
endif()
if(${VKFFT_BACKEND} EQUAL 0)
set(ENABLE_OPT 0)
FetchContent_Declare(
glslang-main
GIT_TAG ${GLSLANG_GIT_TAG}
Expand All @@ -153,12 +171,12 @@ if(${VKFFT_BACKEND} EQUAL 0)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/glslang-main)
endif()

target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vkFFT/)
add_library(VkFFT INTERFACE)
target_include_directories(VkFFT INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/vkFFT/)
target_compile_definitions(VkFFT INTERFACE -DVKFFT_BACKEND=${VKFFT_BACKEND})

target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/half_lib/)
add_library(half INTERFACE)
target_include_directories(half INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/half_lib/)

target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_scripts/vkFFT_scripts/include/)

Expand All @@ -168,6 +186,7 @@ elseif(${VKFFT_BACKEND} EQUAL 1)
find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64)
add_definitions(-DCUDA_TOOLKIT_ROOT_DIR="${CUDA_TOOLKIT_ROOT_DIR}")
target_link_libraries(${PROJECT_NAME} PUBLIC ${CUDA_LIBRARIES} cuda ${CUDA_NVRTC_LIB} VkFFT half)
target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS})
elseif(${VKFFT_BACKEND} EQUAL 2)
target_link_libraries(${PROJECT_NAME} PUBLIC hip::host VkFFT half)
elseif(${VKFFT_BACKEND} EQUAL 3)
Expand Down Expand Up @@ -197,6 +216,16 @@ if(build_VkFFT_FFTW_precision OR VkFFT_use_FP128_Bluestein_RaderFFT)
NO_DEFAULT_PATH
)
target_include_directories(${PROJECT_NAME} PUBLIC ${FFTW_INCLUDES})
if(VkFFT_use_FP128_double_double)
find_library(
FFTWQ_LIB
NAMES "libfftw3q" "fftw3q"
PATHS ${FFTW3_LIB_DIR}
PATH_SUFFIXES "lib" "lib64"
NO_DEFAULT_PATH
)
target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTWQ_LIB})
endif()
if(VkFFT_use_FP128_Bluestein_RaderFFT)
find_library(
FFTWL_LIB
Expand All @@ -205,10 +234,9 @@ if(VkFFT_use_FP128_Bluestein_RaderFFT)
PATH_SUFFIXES "lib" "lib64"
NO_DEFAULT_PATH
)
target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTW_LIB} ${FFTWL_LIB})
else()
target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTW_LIB})
target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTWL_LIB})
endif()
target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTW_LIB})
endif()

if(build_VkFFT_cuFFT_benchmark)
Expand Down Expand Up @@ -256,6 +284,7 @@ if(build_VkFFT_cuFFT_benchmark)
-gencode arch=compute_80,code=compute_80
-gencode arch=compute_86,code=compute_86>")
target_include_directories(cuFFT_scripts PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_scripts/cuFFT_scripts/include)
target_include_directories(cuFFT_scripts PUBLIC ${CUDA_INCLUDE_DIRS})
set_target_properties(cuFFT_scripts PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(cuFFT_scripts PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(${PROJECT_NAME} PUBLIC cuFFT_scripts)
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ VkFFT is an efficient GPU-accelerated multidimensional Fast Fourier Transform li
- Radix-2/3/4/5/7/8/11/13 FFT. Sequences using radix 3, 5, 7, 11 and 13 have comparable performance to that of powers of 2.
- Rader's FFT algorithm for primes from 17 up to max shared memory length (~10000). Inlined and done without additional memory transfers.
- Bluestein's FFT algorithm for all other sequences. Full coverage of C2C range, single upload (2^12, 2^12, 2^12) for R2C/C2R/R2R. Optimized to have as few memory transfers as possible by using zero padding and merged convolution support of VkFFT.
- Single, double and half precision support. Double precision uses CPU-generated LUT tables. Half precision still does all computations in single and only uses half precision to store data.
- Single, double, half and quad (double-double) precision support. Double and quad precision uses CPU-generated LUT tables. Half precision still does all computations in single and only uses half precision to store data.
- All transformations are performed in-place with no performance loss. Out-of-place transforms are supported by selecting different input/output buffers.
- No additional transposition uploads. Note: Data can be reshuffled after the Four Step FFT algorithm with an additional buffer (for big sequences). Doesn't matter for convolutions - they return to the input ordering (saves memory).
- Complex to complex (C2C), real to complex (R2C), complex to real (C2R) transformations and real to real (R2R) Discrete Cosine Transformations of types I, II, III and IV. R2R, R2C and C2R are optimized to run up to 2x times faster than C2C and take 2x less memory.
Expand All @@ -33,19 +33,19 @@ Include the vkFFT.h file and glslang compiler. Provide the library with correctl
For single and double precision, Vulkan 1.0 is required. For half precision, Vulkan 1.1 is required.

CUDA/HIP:
Include the vkFFT.h file and make sure your system has NVRTC/HIPRTC built. Provide the library with correctly chosen VKFFT_BACKEND definition. Only single/double precision for now.\
Include the vkFFT.h file and make sure your system has NVRTC/HIPRTC built. Provide the library with correctly chosen VKFFT_BACKEND definition.\
To build CUDA/HIP version of the benchmark, replace VKFFT_BACKEND in CMakeLists (line 5) with the correct one and optionally enable FFTW. VKFFT_BACKEND=1 for CUDA, VKFFT_BACKEND=2 for HIP.

OpenCL:
Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. Only single/double precision for now.\
Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition.\
To build OpenCL version of the benchmark, replace VKFFT_BACKEND in CMakeLists (line 5) with the value 3 and optionally enable FFTW.

Level Zero:
Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. Clang and llvm-spirv must be valid system calls. Only single/double precision for now.\
Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. Clang and llvm-spirv must be valid system calls.\
To build Level Zero version of the benchmark, replace VKFFT_BACKEND in CMakeLists (line 5) with the value 4 and optionally enable FFTW.

Metal:
Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. VkFFT uses metal-cpp as a C++ bindings to Apple's libraries - Foundation.hpp, QuartzCore.hpp and Metal.hpp. Only single precision.\
Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. VkFFT uses metal-cpp as a C++ bindings to Apple's libraries - Foundation.hpp, QuartzCore.hpp and Metal.hpp.\
To build Metal version of the benchmark, replace VKFFT_BACKEND in CMakeLists (line 5) with the value 5 and optionally enable FFTW.

## Command-line interface
Expand Down
63 changes: 57 additions & 6 deletions VkFFT_TestSuite.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <vector>
#include <vector>
#include <memory>
#include <string.h>
#include <chrono>
Expand Down Expand Up @@ -62,6 +62,9 @@
#include "sample_6_benchmark_VkFFT_single_r2c.h"
#include "sample_7_benchmark_VkFFT_single_Bluestein.h"
#include "sample_8_benchmark_VkFFT_double_Bluestein.h"
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
#include "sample_9_benchmark_VkFFT_quadDoubleDouble.h"
#endif
#include "sample_10_benchmark_VkFFT_single_multipleBuffers.h"
#ifdef USE_FFTW
#include "sample_11_precision_VkFFT_single.h"
Expand All @@ -72,17 +75,23 @@
#include "sample_16_precision_VkFFT_single_dct.h"
#include "sample_17_precision_VkFFT_double_dct.h"
#include "sample_18_precision_VkFFT_double_nonPow2.h"
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
#include "sample_19_precision_VkFFT_quadDoubleDouble_nonPow2.h"
#endif
#endif
#include "sample_50_convolution_VkFFT_single_1d_matrix.h"
#include "sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.h"
#include "sample_52_convolution_VkFFT_single_2d_batched_r2c.h"

#include "sample_100_benchmark_VkFFT_single_nd_dct.h"
#include "sample_101_benchmark_VkFFT_double_nd_dct.h"
#include "sample_1000_VkFFT_single_2_4096.h"
#include "sample_1000_benchmark_VkFFT_single_2_4096.h"
#include "sample_1001_benchmark_VkFFT_double_2_4096.h"
#include "sample_1002_benchmark_VkFFT_half_2_4096.h"
#include "sample_1003_benchmark_VkFFT_single_3d_2_512.h"

#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
#include "sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.h"
#endif
#ifdef USE_cuFFT
#include "user_benchmark_cuFFT.h"
#include "sample_0_benchmark_cuFFT_single.h"
Expand Down Expand Up @@ -337,6 +346,13 @@ VkFFTResult launchVkFFT(VkGPU* vkGPU, uint64_t sample_id, bool file_output, FILE
resFFT = sample_8_benchmark_VkFFT_double_Bluestein(vkGPU, file_output, output, isCompilerInitialized);
break;
}
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
case 9:
{
resFFT = sample_9_benchmark_VkFFT_quadDoubleDouble(vkGPU, file_output, output, isCompilerInitialized);
break;
}
#endif
#if(VKFFT_BACKEND==0)
case 10:
{
Expand Down Expand Up @@ -385,6 +401,13 @@ VkFFTResult launchVkFFT(VkGPU* vkGPU, uint64_t sample_id, bool file_output, FILE
resFFT = sample_18_precision_VkFFT_double_nonPow2(vkGPU, file_output, output, isCompilerInitialized);
break;
}
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
case 19:
{
resFFT = sample_19_precision_VkFFT_quadDoubleDouble_nonPow2(vkGPU, file_output, output, isCompilerInitialized);
break;
}
#endif
#endif
case 50:
{
Expand Down Expand Up @@ -453,19 +476,31 @@ VkFFTResult launchVkFFT(VkGPU* vkGPU, uint64_t sample_id, bool file_output, FILE
}
case 1000:
{
resFFT = sample_1000_VkFFT_single_2_4096(vkGPU, file_output, output, isCompilerInitialized);
resFFT = sample_1000_benchmark_VkFFT_single_2_4096(vkGPU, file_output, output, isCompilerInitialized);
break;
}
case 1001:
{
resFFT = sample_1001_benchmark_VkFFT_double_2_4096(vkGPU, file_output, output, isCompilerInitialized);
break;
}
case 1002:
{
resFFT = sample_1002_benchmark_VkFFT_half_2_4096(vkGPU, file_output, output, isCompilerInitialized);
break;
}
case 1003:
{
resFFT = sample_1003_benchmark_VkFFT_single_3d_2_512(vkGPU, file_output, output, isCompilerInitialized);
break;
}
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
case 1004:
{
resFFT = sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096(vkGPU, file_output, output, isCompilerInitialized);
break;
}
#endif
}
#if(VKFFT_BACKEND==0)
vkDestroyFence(vkGPU->device, vkGPU->fence, NULL);
Expand Down Expand Up @@ -525,7 +560,7 @@ int main(int argc, char* argv[])
version_decomposed[0] = version / 10000;
version_decomposed[1] = (version - version_decomposed[0] * 10000) / 100;
version_decomposed[2] = (version - version_decomposed[0] * 10000 - version_decomposed[1] * 100);
printf("VkFFT v%d.%d.%d (01-08-2023). Author: Tolmachev Dmitrii\n", version_decomposed[0], version_decomposed[1], version_decomposed[2]);
printf("VkFFT v%d.%d.%d (25-09-2023). Author: Tolmachev Dmitrii\n", version_decomposed[0], version_decomposed[1], version_decomposed[2]);
#if (VKFFT_BACKEND==0)
printf("Vulkan backend\n");
#elif (VKFFT_BACKEND==1)
Expand Down Expand Up @@ -553,6 +588,9 @@ int main(int argc, char* argv[])
printf(" 6 - FFT + iFFT R2C / C2R benchmark\n");
printf(" 7 - FFT + iFFT C2C Bluestein benchmark in single precision\n");
printf(" 8 - FFT + iFFT C2C Bluestein benchmark in double precision\n");
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
printf(" 9 - FFT + iFFT C2C benchmark 1D batched in quad double-double precision LUT\n");
#endif
#if (VKFFT_BACKEND==0)
printf(" 10 - multiple buffer(4 by default) split version of benchmark 0\n");
#endif
Expand All @@ -566,6 +604,9 @@ int main(int argc, char* argv[])
printf(" 16 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in single precision\n");
printf(" 17 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
printf(" 18 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in double precision\n");
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
printf(" 19 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in quad double-double precision\n");
#endif
#elif USE_rocFFT
printf(" 11 - VkFFT / rocFFT / FFTW C2C precision test in single precision\n");
printf(" 12 - VkFFT / rocFFT / FFTW C2C precision test in double precision\n");
Expand All @@ -575,6 +616,9 @@ int main(int argc, char* argv[])
printf(" 16 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in single precision\n");
printf(" 17 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
printf(" 18 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in double precision\n");
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
printf(" 19 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in quad double-double precision\n");
#endif
#else
printf(" 11 - VkFFT / FFTW C2C precision test in single precision\n");
printf(" 12 - VkFFT / FFTW C2C precision test in double precision\n");
Expand All @@ -584,6 +628,9 @@ int main(int argc, char* argv[])
printf(" 16 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in single precision\n");
printf(" 17 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
printf(" 18 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in double precision\n");
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
printf(" 19 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in quad double-double precision\n");
#endif
#endif
#endif
printf(" 50 - convolution example with identity kernel\n");
Expand All @@ -600,11 +647,15 @@ int main(int argc, char* argv[])

printf(" 1000 - FFT + iFFT C2C benchmark 1D batched in single precision: all supported systems from 2 to 4096\n");
printf(" 1001 - FFT + iFFT C2C benchmark 1D batched in double precision: all supported systems from 2 to 4096\n");
printf(" 1002 - FFT + iFFT C2C benchmark 1D batched in half precision: all supported systems from 2 to 4096\n");
printf(" 1003 - FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n");
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
printf(" 1004 - FFT + iFFT C2C benchmark 1D batched in quad double-double precision: all supported systems from 2 to 4096\n");
#endif
printf(" -benchmark_vkfft: run VkFFT benchmark on a user-defined system:\n\
-X uint, -Y uint, -Z uint - FFT dimensions (default Y and Z are 1)\n");
printf("\
-P uint - precision (0 - single, 1 - double, 2 - half) (default 0)\n");
-P uint - precision (0 - single, 1 - double, 2 - half, 3 - quad double-double) (default 0)\n");
printf("\
-B uint - number of batched systems (default 1)\n\
-N uint - number of consecutive FFT+iFFT iterations (default 1)\n\
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include "vkFFT.h"
#include "utils_VkFFT.h"

VkFFTResult sample_1000_benchmark_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output, FILE* output, uint64_t isCompilerInitialized);
Loading

0 comments on commit ae5a334

Please sign in to comment.