diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dd04311..481b2a9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,6 +39,7 @@ package_sample: - build_dx12 - build_vk script: + - xcopy .\ffx-dnsr\docs\*.pdf .\docs /sy - echo "Packaging build" - echo cd .\sample\bin\ > %SampleName%_DX12.bat - echo start %SampleName%_DX12.exe >> %SampleName%_DX12.bat diff --git a/.gitmodules b/.gitmodules index 0c66215..5547136 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "sample/libs/cauldron"] path = sample/libs/cauldron url = ../../GPUOpen-LibrariesAndSDKs/Cauldron.git +[submodule "ffx-dnsr"] + path = ffx-dnsr + url = ../../GPUOpen-Effects/FidelityFX-Denoiser.git diff --git a/docs/FFX_SSSR_GUI.pdf b/docs/FFX_SSSR_GUI.pdf index fbc0435..5f67404 100644 Binary files a/docs/FFX_SSSR_GUI.pdf and b/docs/FFX_SSSR_GUI.pdf differ diff --git a/docs/FFX_SSSR_Technology.pdf b/docs/FFX_SSSR_Technology.pdf index aed2649..cf53fc9 100644 Binary files a/docs/FFX_SSSR_Technology.pdf and b/docs/FFX_SSSR_Technology.pdf differ diff --git a/ffx-dnsr b/ffx-dnsr new file mode 160000 index 0000000..670c76e --- /dev/null +++ b/ffx-dnsr @@ -0,0 +1 @@ +Subproject commit 670c76e340b80b17dd9018f320d328821825cf80 diff --git a/sample/src/DX12/Shaders/ffx_a.h b/ffx-spd/ffx_a.h similarity index 100% rename from sample/src/DX12/Shaders/ffx_a.h rename to ffx-spd/ffx_a.h diff --git a/sample/src/DX12/Shaders/ffx_spd.h b/ffx-spd/ffx_spd.h similarity index 100% rename from sample/src/DX12/Shaders/ffx_spd.h rename to ffx-spd/ffx_spd.h diff --git a/ffx-sssr/.gitignore b/ffx-sssr/.gitignore deleted file mode 100644 index 62f5258..0000000 --- a/ffx-sssr/.gitignore +++ /dev/null @@ -1 +0,0 @@ -shaders/*.h diff --git a/ffx-sssr/CMakeLists.txt b/ffx-sssr/CMakeLists.txt deleted file mode 100644 index 93d9090..0000000 --- a/ffx-sssr/CMakeLists.txt +++ /dev/null @@ -1,134 +0,0 @@ -cmake_minimum_required(VERSION 3.10.0) - -project(stochastic-screen-space-reflections) - -find_package(PythonInterp 3.6 REQUIRED) - -# ensure that only one option is enabled -if(FFX_SSSR_VK AND FFX_SSSR_D3D12) - message(FATAL_ERROR "FFX_SSSR_VK and FFX_SSSR_D3D12 are enabled. Please make sure to enable only one at a time.") -endif() - -if(FFX_SSSR_VK) - find_package(Vulkan REQUIRED) -endif() - -set_property(GLOBAL PROPERTY USE_FOLDERS ON) - -if (MSVC) - add_compile_options(/W3 /WX) -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall -Werror -std=c++17") -endif() - -file(GLOB FFX_SSSR_HEADER_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/inc/ffx_sssr.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h) -file(GLOB FFX_SSSR_INLINE_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/src/*.inl) -file(GLOB FFX_SSSR_SOURCE_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) -file(GLOB FFX_SSSR_SHADER_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/shaders/*.hlsl) - -if(FFX_SSSR_D3D12) - file(GLOB FFX_SSSR_HEADER_FILES_D3D12 - ${CMAKE_CURRENT_SOURCE_DIR}/inc/ffx_sssr_d3d12.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/d3d12/*.h) - file(GLOB FFX_SSSR_INLINE_FILES_D3D12 - ${CMAKE_CURRENT_SOURCE_DIR}/src/d3d12/*.inl) - file(GLOB FFX_SSSR_SOURCE_FILES_D3D12 - ${CMAKE_CURRENT_SOURCE_DIR}/src/d3d12/*.cpp) -endif() - -if(FFX_SSSR_VK) - file(GLOB FFX_SSSR_HEADER_FILES_VK - ${CMAKE_CURRENT_SOURCE_DIR}/inc/ffx_sssr_vk.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/vk/*.h) - file(GLOB FFX_SSSR_INLINE_FILES_VK - ${CMAKE_CURRENT_SOURCE_DIR}/src/vk/*.inl) - file(GLOB FFX_SSSR_SOURCE_FILES_VK - ${CMAKE_CURRENT_SOURCE_DIR}/src/vk/*.cpp) -endif() - -set(FFX_SSSR_HEADER_FILES_SHADERS) - -foreach(shaderfile classify_tiles - common - intersect - prepare_indirect_args - resolve_eaw - resolve_spatial - resolve_temporal) - - add_custom_command( - OUTPUT - ${CMAKE_CURRENT_SOURCE_DIR}/shaders/shader_${shaderfile}.h - DEPENDS - ${CMAKE_CURRENT_SOURCE_DIR}/shaders/${shaderfile}.hlsl - COMMAND - ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/sourceToHeader.py ${CMAKE_CURRENT_SOURCE_DIR}/shaders/${shaderfile}.hlsl ${shaderfile} > ${CMAKE_CURRENT_SOURCE_DIR}/shaders/shader_${shaderfile}.h - COMMENT - "Generate shader header shader_${shaderfile}.h for ${shaderfile}.hlsl" - USES_TERMINAL) - - list(APPEND FFX_SSSR_HEADER_FILES_SHADERS "${CMAKE_CURRENT_SOURCE_DIR}/shaders/shader_${shaderfile}.h") - -endforeach() - -add_library(FFX_SSSR - ${FFX_SSSR_SOURCE_FILES} - ${FFX_SSSR_SOURCE_FILES_D3D12} - ${FFX_SSSR_SOURCE_FILES_VK} - ${FFX_SSSR_HEADER_FILES_SHADERS}) - -target_include_directories(FFX_SSSR PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/inc) -target_include_directories(FFX_SSSR PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) -target_include_directories(FFX_SSSR PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/shaders) - -target_include_directories(FFX_SSSR PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/externals) -target_include_directories(FFX_SSSR PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/externals/dxc) -target_include_directories(FFX_SSSR PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/externals/samplerCPP) - -if(FFX_SSSR_D3D12) - target_compile_definitions(FFX_SSSR PRIVATE FFX_SSSR_D3D12) -endif() - -if(FFX_SSSR_VK) - target_compile_definitions(FFX_SSSR PRIVATE FFX_SSSR_VK) -endif() - -target_sources(FFX_SSSR PRIVATE - ${FFX_SSSR_HEADER_FILES} - ${FFX_SSSR_INLINE_FILES} - ${FFX_SSSR_SHADER_FILES} - ${FFX_SSSR_HEADER_FILES_D3D12} - ${FFX_SSSR_INLINE_FILES_D3D12} - ${FFX_SSSR_HEADER_FILES_VK} - ${FFX_SSSR_INLINE_FILES_VK}) - -source_group("Header Files\\Shaders" FILES ${FFX_SSSR_HEADER_FILES_SHADERS}) - -source_group("Inline Files" FILES ${FFX_SSSR_INLINE_FILES}) -source_group("Media Files\\Shaders" FILES ${FFX_SSSR_SHADER_FILES}) - - -source_group("Header Files\\D3D12" FILES ${FFX_SSSR_HEADER_FILES_D3D12}) -source_group("Inline Files\\D3D12" FILES ${FFX_SSSR_INLINE_FILES_D3D12}) -source_group("Source Files\\D3D12" FILES ${FFX_SSSR_SOURCE_FILES_D3D12}) - -source_group("Header Files\\VK" FILES ${FFX_SSSR_HEADER_FILES_VK}) -source_group("Inline Files\\VK" FILES ${FFX_SSSR_INLINE_FILES_VK}) -source_group("Source Files\\VK" FILES ${FFX_SSSR_SOURCE_FILES_VK}) - -if(MSVC) - set_source_files_properties(${FFX_SSSR_SHADER_FILES} - PROPERTIES - VS_TOOL_OVERRIDE - "None") -endif() - - -if(FFX_SSSR_VK) - target_link_libraries (FFX_SSSR Vulkan::Vulkan) -endif() diff --git a/ffx-sssr/README.md b/ffx-sssr/README.md deleted file mode 100644 index 3e9bf7e..0000000 --- a/ffx-sssr/README.md +++ /dev/null @@ -1,278 +0,0 @@ -# FidelityFX SSSR - -The **FidelityFX SSSR** library provides the means to render stochastic screen space reflections for the use in real-time applications. -A full sample running the library can be found on the [FidelityFX SSSR GitHub page](https://github.com/GPUOpen-Effects/FidelityFX-SSSR.git). - -The library supports D3D12 and Vulkan. - -## Prerequisits - -The library relies on [dxcompiler.dll](https://github.com/microsoft/DirectXShaderCompiler) to generate DXIL/SPIRV from HLSL at runtime. -Use the version built for SPIRV from the [DirectXShaderCompiler GitHub repository](https://github.com/microsoft/DirectXShaderCompiler) or the one that comes with the [Vulkan SDK 1.2.141.2 (or later)](https://www.lunarg.com/vulkan-sdk/) if you are planning to use the Vulkan version of **FidelityFX SSSR**. - -## Device Creation - -Vulkan version only: The library relies on [VK_EXT_subgroup_size_control](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_EXT_subgroup_size_control.html) for optimal performance on RDNA. Make sure the extension is enabled at device creation by adding `VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME` to `ppEnabledExtensionNames` if it is available. -Also enable `subgroupSizeControl` in `VkPhysicalDeviceSubgroupSizeControlFeaturesEXT` and chain it into the `pNext` chain of `VkDeviceCreateInfo` if the extension name is available. It is fine to run **FidelityFX SSSR** if the extension is not supported. - -## Context - Initialization and Shutdown - -First the header files must be included. This is `ffx_sssr.h` for Graphics API independent definitions and `ffx_sssr_d3d12.h` for D3D12 specific definitions: - -```C++ -#include "ffx_sssr.h" -#include "ffx_sssr_d3d12.h" -``` - -or `ffx_sssr_vk.h` for Vulkan specific definitions: - -```C++ -#include "ffx_sssr.h" -#include "ffx_sssr_vk.h" -``` - -Then a context must be created. This usually is done only once per device. Depending on the preferred API, populate either `FfxSssrD3D12CreateContextInfo` or `FfxSssrVkCreateContextInfo`. - -```C++ -FfxSssrD3D12CreateContextInfo d3d12ContextInfo = {}; -d3d12ContextInfo.pDevice = myDevice; -d3d12ContextInfo.pUploadCommandList = myCommandList; - -FfxSssrVkCreateContextInfo vkContextInfo = {}; -vkContextInfo.device = myDeviceHandle; -vkContextInfo.physicalDevice = myPhysicalDeviceHandle; -vkContextInfo.uploadCommandBuffer = myUploadCommandBufferHandle; - -FfxSssrLoggingCallbacks loggingCallbacks = {}; -loggingCallbacks.pUserData = myUserData; -loggingCallbacks.pfnLogging = myLoggingFunction; - -FfxSssrCreateContextInfo contextInfo = {}; -contextInfo.apiVersion = FFX_SSSR_API_VERSION; -contextInfo.maxReflectionViewCount = myMaxViewCount; -contextInfo.frameCountBeforeMemoryReuse = myMaxFrameCountInFlight; -contextInfo.uploadBufferSize = 8 * 1024 * 1024; -contextInfo.pLoggingCallbacks = &loggingCallbacks; -contextInfo.pD3D12CreateContextInfo = &d3d12ContextInfo; -contextInfo.pVkCreateContextInfo = &vkContextInfo; -``` - -The library requires certain input textures from the application to create a reflection view. -Thus, the context requires user specified unpack functions (HLSL SM 6.0) to access the individual attributes. It is recommended to keep these snippets as small as possible to achieve good performance. -The function headers have to match in order for the shaders to compile. The `FFX_SSSR_*_TEXTURE_FORMAT` macros hold the definitions provided in the `p*TextureFormat` members of `FfxSssrCreateContextInfo`. The snippets provided below shall serve as a starting point: - -```C++ -contextInfo.pRoughnessTextureFormat = L"float4"; -contextInfo.pUnpackRoughnessSnippet = L"float FfxSssrUnpackRoughness(FFX_SSSR_ROUGHNESS_TEXTURE_FORMAT packed) { return packed.w; }"; -contextInfo.pNormalsTextureFormat = L"float4"; -contextInfo.pUnpackNormalsSnippet = L"float3 FfxSssrUnpackNormals(FFX_SSSR_NORMALS_TEXTURE_FORMAT packed) { return 2 * packed.xyz - 1; }"; -contextInfo.pSceneTextureFormat = L"float4"; -contextInfo.pUnpackSceneRadianceSnippet = L"float3 FfxSssrUnpackSceneRadiance(FFX_SSSR_SCENE_TEXTURE_FORMAT packed) { return packed.xyz; }"; -contextInfo.pDepthTextureFormat = L"float"; -contextInfo.pUnpackDepthSnippet = L"float FfxSssrUnpackDepth(FFX_SSSR_DEPTH_TEXTURE_FORMAT packed) { return packed.x; }"; -contextInfo.pMotionVectorFormat = L"float2"; -contextInfo.pUnpackMotionVectorsSnippet = L"float2 FfxSssrUnpackMotionVectors(FFX_SSSR_MOTION_VECTOR_TEXTURE_FORMAT packed) { return packed.xy; }"; -``` - -After that the context can be created: - -```C++ -FfxSssrContext myContext; -FfxSssrStatus status = ffxSssrCreateContext(&contextInfo, &myContext); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -Finally, submit the command list provided to the `pUploadCommandList` member of `FfxSssrCreateContextInfoD3D12` to the queue of your choice to upload the internal resources to the GPU. The same is required on Vulkan for the `uploadCommandBuffer` member of `FfxSssrVkCreateContextInfo`. - -Once there is no need to render reflections anymore the context should be destroyed to free internal resources: - -```C++ -FfxSssrStatus status = ffxSssrDestroyContext(myContext); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -## Reflection View - Creation and Update - -Reflection views represent the abstraction for the first bounce of indirect light from reflective surfaces as seen from a given camera. - -`FfxSssrReflectionView` resources can be created as such. Depending on the API fill either `FfxSssrD3D12CreateReflectionViewInfo` or `FfxSssrVkCreateReflectionViewInfo`: - -```C++ -FfxSssrD3D12CreateReflectionViewInfo d3d12ReflectionViewInfo = {}; -d3d12ReflectionViewInfo.depthBufferHierarchySRV; -d3d12ReflectionViewInfo.motionBufferSRV; -d3d12ReflectionViewInfo.normalBufferSRV; -d3d12ReflectionViewInfo.roughnessBufferSRV; -d3d12ReflectionViewInfo.normalHistoryBufferSRV; -d3d12ReflectionViewInfo.roughnessHistoryBufferSRV; -d3d12ReflectionViewInfo.outputBufferUAV; -d3d12ReflectionViewInfo.sceneFormat; -d3d12ReflectionViewInfo.sceneSRV; -d3d12ReflectionViewInfo.environmentMapSRV; -d3d12ReflectionViewInfo.pEnvironmentMapSamplerDesc; - -FfxSssrVkCreateReflectionViewInfo vkReflectionViewInfo = {}; -vkReflectionViewInfo.depthBufferHierarchySRV; -vkReflectionViewInfo.motionBufferSRV; -vkReflectionViewInfo.normalBufferSRV; -vkReflectionViewInfo.roughnessBufferSRV; -vkReflectionViewInfo.normalHistoryBufferSRV; -vkReflectionViewInfo.roughnessHistoryBufferSRV; -vkReflectionViewInfo.reflectionViewUAV; -vkReflectionViewInfo.sceneFormat; -vkReflectionViewInfo.sceneSRV; -vkReflectionViewInfo.environmentMapSRV; -vkReflectionViewInfo.environmentMapSampler; -vkReflectionViewInfo.uploadCommandBuffer; - -FfxSssrCreateReflectionViewInfo reflectionViewInfo = {}; -reflectionViewInfo.flags = FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS | FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_NORMAL_BUFFERS | FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_ROUGHNESS_BUFFERS; -reflectionViewInfo.outputWidth = width; -reflectionViewInfo.outputHeight = height; -reflectionViewInfo.pD3D12CreateReflectionViewInfo = &d3d12ReflectionViewInfo; -reflectionViewInfo.pVkCreateReflectionViewInfo = &vkReflectionViewInfo; - -FfxSssrReflectionView myReflectionView; -FfxSssrStatus status = ffxSssrCreateReflectionView(myContext, &reflectionViewInfo, &myReflectionView); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -On D3D12 all SRVs and UAVs must be allocated from a CPU accessible descriptor heap as they are copied into the descriptor tables of the library. `FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS` can be used if the application intends to query for timings later. The `FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_*` flags should be set if the normal or roughness surfaces are written in an alternating fashion. Don't set the flags if the surfaces are copied each frame. - -The reflection view depends on the screen size. It is recommended to destroy the reflection view on resize and create a new one: - -```C++ -FfxSssrStatus status = ffxSssrDestroyReflectionView(myContext, myReflectionView); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -Finally, the camera properties can be specified via the view and projection matrices. Each matrix is defined in row-major layout (i.e. the last 4 values in the float array of the view matrix are expected to be `(0, 0, 0, 1)` == the last row of the matrix): - -```C++ -FfxSssrStatus status = ffxSssrReflectionViewSetCameraParameters(myContext, myReflectionView, &myViewMatrix, &myProjectionMatrix); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -## Reflection View - Resolve - -Calling `ffxSssrEncodeResolveReflectionView` dispatches the actual shaders that perform the hierarchical tracing through the depth buffer and optionally also dispatches the denoising passes if the `FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE` flag is set. Depending on the API populate either `FfxSssrD3D12CommandEncodeInfo` or `FfxSssrVkCommandEncodeInfo`: - -```C++ -FfxSssrD3D12CommandEncodeInfo d3d12EncodeInfo = {}; -d3d12EncodeInfo.pCommandList = myCommandList; - -FfxSssrVkCommandEncodeInfo vkEncodeInfo = {}; -vkEncodeInfo.commandBuffer = myCommandBufferHandle; - -FfxSssrResolveReflectionViewInfo resolveInfo = {}; -resolveInfo.flags = FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE | FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING; -resolveInfo.temporalStabilityScale = 0.99f; -resolveInfo.maxTraversalIterations = 128; -resolveInfo.mostDetailedDepthHierarchyMipLevel = 1; -resolveInfo.depthBufferThickness = 0.015f; -resolveInfo.minTraversalOccupancy = 4; -resolveInfo.samplesPerQuad = FFX_SSSR_RAY_SAMPLES_PER_QUAD_1; -resolveInfo.roughnessThreshold = 0.2f; -resolveInfo.pD3D12CommandEncodeInfo = &d3d12EncodeInfo; -resolveInfo.pVkCommandEncodeInfo = &vkEncodeInfo; -FfxSssrStatus status = ffxSssrEncodeResolveReflectionView(myContext, myReflectionView, &resolveInfo); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` -* Enabling `FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE` runs the libraries denoisers. Omit that flag if denoising is not required. -* Enabling `FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING` counteracts temporal instabilities by shooting more rays in temporally unstable regions. -* `resolveInfo.temporalStabilityScale` serves as a mean to trade noise with temporal stability (implies more ghosting). -* `resolveInfo.maxTraversalIterations` limits the maximum number of intersections with the depth buffer hierarchy -* `resolveInfo.mostDetailedDepthHierarchyMipLevel` limits the most detailed mipmap for depth buffer lookups when tracing non-mirror reflection rays. -* `resolveInfo.depthBufferThickness` configures the accepted hit distance behind the depth buffer in view space. -* `resolveInfo.minTraversalOccupancy` limits the number of threads in the depth traversal loop. If less than that number of threads remain present they exit the intersection loop early even if they did not find a depth buffer intersection yet. This only affects non-mirror reflection rays. -* `resolveInfo.samplesPerQuad` serves as a starting point how many rays are spawned in glossy regions. The only supported values are `FFX_SSSR_RAY_SAMPLES_PER_QUAD_1`, `FFX_SSSR_RAY_SAMPLES_PER_QUAD_2` and `FFX_SSSR_RAY_SAMPLES_PER_QUAD_4`. The use of `FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING` dynamically bumps this up to a maximum of `4` to enforce convergence on a per pixel basis. -* `resolveInfo.roughnessThreshold` determines the roughness value below which reflection rays are spawned. Any roughness values higher are considered not reflective and the reflection view will contain `(0, 0, 0, 0)`. - -When resolving a reflection view, the following operations take place: - -- Reflect the view rays at the surface normal and spawn reflection rays from the depth buffer. -- Glossy reflections are supported by randomly jittering the ray based on surface roughness. -- The resulting radiance information is denoised using spatio-temporal filtering. -- The shading values are written out to the output buffer supplied at creation time. - -Note that the application is responsible for issuing the required barrier to synchronize the writes to the output buffer. - -## Reflection View - Performance Profiling - -It is possible to query profiling information by enabling the `FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS` flag when creating a reflection view: - -``` -d3d12ReflectionViewInfo.flags |= FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS; -``` - -This enables the scheduling of GPU timestamp queries to track the amount of time spent in the individual passes (these are tile classification, intersection and denoising). - -Note that these flags add additional runtime overhead and should be used for debugging/profiling purposes only. Set the flag to `0` to disable any timestamp queries. - -The profiling information can then be queried as below for the tile classification pass: - -```C++ -uint64_t tileClassificationTime; -FfxSssrStatus status = ffxSssrReflectionViewGetTileClassificationElapsedTime(myContext, myReflectionView, &tileClassificationTime); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -For the intersection pass: - -```C++ -uint64_t intersectionTime; -FfxSssrStatus status = ffxSssrReflectionViewGetIntersectionElapsedTime(myContext, myReflectionView, &intersectionTime); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -And the same for the denoising passes: - -```C++ -uint64_t denoisingTime; -FfxSssrStatus status = ffxSssrReflectionViewGetDenoisingElapsedTime(myContext, myReflectionView, &denoisingTime); -if (status != FFX_SSSR_STATUS_OK) { - // Error handling -} -``` - -The retrieved times are expressed in GPU ticks and can be converted using the timestamp frequency of the queue used to execute the encoded command list on D3D12 (`GetTimestampFrequency`). On Vulkan the `timestampPeriod` member of `VkPhysicalDeviceLimits` can be used to convert the times from GPU ticks to nanoseconds. - -## Frame management - -The **FidelityFX SSSR** library manages its own upload buffer internally that is used as a ring to transfer constant buffer data from the CPU to the GPU. The user must specify the number of frames the library should wait for before it can safely start re-using memory blocks: - -``` -contextInfo.frameCountBeforeMemoryReuse = myMaxFrameCountInFlight; -``` - -Finally, frame boundaries must be signalled to the library as such: - -```C++ -FfxSssrStatus status = ffxSssrAdvanceToNextFrame(myContext); -if (status != FFX_SSSR_STATUS_OK) -{ - // Error handling -} -``` - -Note that `ffxSssrAdvanceToNextFrame()` can be called either at the beginning or the end of a frame, but should not be called in the middle of performing work for a given frame. - -## Limitations - -The library assumes that the depth buffer values range from `0 --> 1` with 0 being at the near plane and 1 being at the far plane. This implies that the depth buffer hierarchy is built with the `minimum` operator. \ No newline at end of file diff --git a/ffx-sssr/externals/dxc/dxcapi.h b/ffx-sssr/externals/dxc/dxcapi.h deleted file mode 100644 index fd9a996..0000000 --- a/ffx-sssr/externals/dxc/dxcapi.h +++ /dev/null @@ -1,384 +0,0 @@ - -/////////////////////////////////////////////////////////////////////////////// -// // -// dxcapi.h // -// Copyright (C) Microsoft Corporation. All rights reserved. // -// This file is distributed under the University of Illinois Open Source // -// License. See LICENSE.TXT for details. // -// // -// Provides declarations for the DirectX Compiler API entry point. // -// // -/////////////////////////////////////////////////////////////////////////////// - -#ifndef __DXC_API__ -#define __DXC_API__ - -#ifndef DXC_API_IMPORT -#define DXC_API_IMPORT __declspec(dllimport) -#endif - -struct IMalloc; -struct IDxcIncludeHandler; - -/// -/// Creates a single uninitialized object of the class associated with a specified CLSID. -/// -/// -/// The CLSID associated with the data and code that will be used to create the object. -/// -/// -/// A reference to the identifier of the interface to be used to communicate -/// with the object. -/// -/// -/// Address of pointer variable that receives the interface pointer requested -/// in riid. Upon successful return, *ppv contains the requested interface -/// pointer. Upon failure, *ppv contains NULL. -/// -/// While this function is similar to CoCreateInstance, there is no COM involvement. -/// -typedef HRESULT (__stdcall *DxcCreateInstanceProc)( - _In_ REFCLSID rclsid, - _In_ REFIID riid, - _Out_ LPVOID* ppv -); - -typedef HRESULT(__stdcall *DxcCreateInstance2Proc)( - _In_ IMalloc *pMalloc, - _In_ REFCLSID rclsid, - _In_ REFIID riid, - _Out_ LPVOID* ppv - ); - -/// -/// Creates a single uninitialized object of the class associated with a specified CLSID. -/// -/// -/// The CLSID associated with the data and code that will be used to create the object. -/// -/// -/// A reference to the identifier of the interface to be used to communicate -/// with the object. -/// -/// -/// Address of pointer variable that receives the interface pointer requested -/// in riid. Upon successful return, *ppv contains the requested interface -/// pointer. Upon failure, *ppv contains NULL. -/// -/// While this function is similar to CoCreateInstance, there is no COM involvement. -/// -DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance( - _In_ REFCLSID rclsid, - _In_ REFIID riid, - _Out_ LPVOID* ppv - ); - -DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2( - _In_ IMalloc *pMalloc, - _In_ REFCLSID rclsid, - _In_ REFIID riid, - _Out_ LPVOID* ppv -); - - -// IDxcBlob is an alias of ID3D10Blob and ID3DBlob -struct __declspec(uuid("8BA5FB08-5195-40e2-AC58-0D989C3A0102")) -IDxcBlob : public IUnknown { -public: - virtual LPVOID STDMETHODCALLTYPE GetBufferPointer(void) = 0; - virtual SIZE_T STDMETHODCALLTYPE GetBufferSize(void) = 0; -}; - -struct __declspec(uuid("7241d424-2646-4191-97c0-98e96e42fc68")) -IDxcBlobEncoding : public IDxcBlob { -public: - virtual HRESULT STDMETHODCALLTYPE GetEncoding(_Out_ BOOL *pKnown, - _Out_ UINT32 *pCodePage) = 0; -}; - -struct __declspec(uuid("e5204dc7-d18c-4c3c-bdfb-851673980fe7")) -IDxcLibrary : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob( - _In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateBlobFromFile( - LPCWSTR pFileName, _In_opt_ UINT32* codePage, - _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned( - LPBYTE pText, UINT32 size, UINT32 codePage, - _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy( - _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage, - _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc( - _In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage, - _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateIncludeHandler( - _COM_Outptr_ IDxcIncludeHandler **ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly( - _In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0; - virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8( - _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; - virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16( - _In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0; -}; - -struct __declspec(uuid("CEDB484A-D4E9-445A-B991-CA21CA157DC2")) -IDxcOperationResult : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0; - virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **pResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **pErrors) = 0; -}; - -struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c")) -IDxcIncludeHandler : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE LoadSource( - _In_ LPCWSTR pFilename, // Candidate filename. - _COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource // Resultant source object for included file, nullptr if not found. - ) = 0; -}; - -struct DxcDefine { - LPCWSTR Name; - _Maybenull_ LPCWSTR Value; -}; - -struct __declspec(uuid("8c210bf3-011f-4422-8d70-6f9acb8db617")) -IDxcCompiler : public IUnknown { - // Compile a single entry point to the target shader model - virtual HRESULT STDMETHODCALLTYPE Compile( - _In_ IDxcBlob *pSource, // Source text to compile - _In_opt_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers. - _In_ LPCWSTR pEntryPoint, // entry point name - _In_ LPCWSTR pTargetProfile, // shader profile to compile - _In_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments - _In_ UINT32 argCount, // Number of arguments - _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines - _In_ UINT32 defineCount, // Number of defines - _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional) - _COM_Outptr_ IDxcOperationResult **ppResult // Compiler output status, buffer, and errors - ) = 0; - - // Preprocess source text - virtual HRESULT STDMETHODCALLTYPE Preprocess( - _In_ IDxcBlob *pSource, // Source text to preprocess - _In_opt_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers. - _In_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments - _In_ UINT32 argCount, // Number of arguments - _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines - _In_ UINT32 defineCount, // Number of defines - _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional) - _COM_Outptr_ IDxcOperationResult **ppResult // Preprocessor output status, buffer, and errors - ) = 0; - - // Disassemble a program. - virtual HRESULT STDMETHODCALLTYPE Disassemble( - _In_ IDxcBlob *pSource, // Program to disassemble. - _COM_Outptr_ IDxcBlobEncoding **ppDisassembly // Disassembly text. - ) = 0; -}; - -struct __declspec(uuid("A005A9D9-B8BB-4594-B5C9-0E633BEC4D37")) -IDxcCompiler2 : public IDxcCompiler { - // Compile a single entry point to the target shader model with debug information. - virtual HRESULT STDMETHODCALLTYPE CompileWithDebug( - _In_ IDxcBlob *pSource, // Source text to compile - _In_opt_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers. - _In_ LPCWSTR pEntryPoint, // Entry point name - _In_ LPCWSTR pTargetProfile, // Shader profile to compile - _In_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments - _In_ UINT32 argCount, // Number of arguments - _In_count_(defineCount) const DxcDefine *pDefines, // Array of defines - _In_ UINT32 defineCount, // Number of defines - _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional) - _COM_Outptr_ IDxcOperationResult **ppResult, // Compiler output status, buffer, and errors - _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. - _COM_Outptr_opt_ IDxcBlob **ppDebugBlob // Debug blob - ) = 0; -}; - -struct __declspec(uuid("F1B5BE2A-62DD-4327-A1C2-42AC1E1E78E6")) -IDxcLinker : public IUnknown { -public: - // Register a library with name to ref it later. - virtual HRESULT RegisterLibrary( - _In_opt_ LPCWSTR pLibName, // Name of the library. - _In_ IDxcBlob *pLib // Library blob. - ) = 0; - - // Links the shader and produces a shader blob that the Direct3D runtime can - // use. - virtual HRESULT STDMETHODCALLTYPE Link( - _In_opt_ LPCWSTR pEntryName, // Entry point name - _In_ LPCWSTR pTargetProfile, // shader profile to link - _In_count_(libCount) - const LPCWSTR *pLibNames, // Array of library names to link - UINT32 libCount, // Number of libraries to link - _In_count_(argCount) - const LPCWSTR *pArguments, // Array of pointers to arguments - _In_ UINT32 argCount, // Number of arguments - _COM_Outptr_ IDxcOperationResult * - *ppResult // Linker output status, buffer, and errors - ) = 0; - // Links the shader with export and produces a shader blob that the Direct3D - // runtime can use. - virtual HRESULT STDMETHODCALLTYPE LinkWithExports( - _In_opt_ LPCWSTR pEntryName, // Entry point name - _In_ LPCWSTR pTargetProfile, // shader profile to link - _In_count_(libCount) - const LPCWSTR *pLibNames, // Array of library names to link - UINT32 libCount, // Number of libraries to link - _In_count_(argCount) - const LPCWSTR *pArguments, // Array of pointers to arguments - _In_ UINT32 argCount, // Number of arguments - _In_count_(exportCount) const DxcDefine *pExports, // Array of exports - _In_ UINT32 exportCount, // Number of exports - _COM_Outptr_ IDxcOperationResult * - *ppResult // Linker output status, buffer, and errors - ) = 0; -}; - -static const UINT32 DxcValidatorFlags_Default = 0; -static const UINT32 DxcValidatorFlags_InPlaceEdit = 1; // Validator is allowed to update shader blob in-place. -static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2; -static const UINT32 DxcValidatorFlags_ModuleOnly = 4; -static const UINT32 DxcValidatorFlags_ValidMask = 0x7; - -struct __declspec(uuid("A6E82BD2-1FD7-4826-9811-2857E797F49A")) -IDxcValidator : public IUnknown { - // Validate a shader. - virtual HRESULT STDMETHODCALLTYPE Validate( - _In_ IDxcBlob *pShader, // Shader to validate. - _In_ UINT32 Flags, // Validation flags. - _COM_Outptr_ IDxcOperationResult **ppResult // Validation output status, buffer, and errors - ) = 0; -}; - -struct __declspec(uuid("334b1f50-2292-4b35-99a1-25588d8c17fe")) -IDxcContainerBuilder : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0; // Loads DxilContainer to the builder - virtual HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) = 0; // Part to add to the container - virtual HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC) = 0; // Remove the part with fourCC - virtual HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult) = 0; // Builds a container of the given container builder state -}; - -struct __declspec(uuid("091f7a26-1c1f-4948-904b-e6e3a8a771d5")) -IDxcAssembler : public IUnknown { - // Assemble dxil in ll or llvm bitcode to DXIL container. - virtual HRESULT STDMETHODCALLTYPE AssembleToContainer( - _In_ IDxcBlob *pShader, // Shader to assemble. - _COM_Outptr_ IDxcOperationResult **ppResult // Assembly output status, buffer, and errors - ) = 0; -}; - -struct __declspec(uuid("d2c21b26-8350-4bdc-976a-331ce6f4c54c")) -IDxcContainerReflection : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pContainer) = 0; // Container to load. - virtual HRESULT STDMETHODCALLTYPE GetPartCount(_Out_ UINT32 *pResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetPartKind(UINT32 idx, _Out_ UINT32 *pResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetPartContent(UINT32 idx, _COM_Outptr_ IDxcBlob **ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE FindFirstPartKind(UINT32 kind, _Out_ UINT32 *pResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetPartReflection(UINT32 idx, REFIID iid, void **ppvObject) = 0; -}; - -struct __declspec(uuid("AE2CD79F-CC22-453F-9B6B-B124E7A5204C")) -IDxcOptimizerPass : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE GetOptionName(_COM_Outptr_ LPWSTR *ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetDescription(_COM_Outptr_ LPWSTR *ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetOptionArgCount(_Out_ UINT32 *pCount) = 0; - virtual HRESULT STDMETHODCALLTYPE GetOptionArgName(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE GetOptionArgDescription(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0; -}; - -struct __declspec(uuid("25740E2E-9CBA-401B-9119-4FB42F39F270")) -IDxcOptimizer : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE GetAvailablePassCount(_Out_ UINT32 *pCount) = 0; - virtual HRESULT STDMETHODCALLTYPE GetAvailablePass(UINT32 index, _COM_Outptr_ IDxcOptimizerPass** ppResult) = 0; - virtual HRESULT STDMETHODCALLTYPE RunOptimizer(IDxcBlob *pBlob, - _In_count_(optionCount) LPCWSTR *ppOptions, UINT32 optionCount, - _COM_Outptr_ IDxcBlob **pOutputModule, - _COM_Outptr_opt_ IDxcBlobEncoding **ppOutputText) = 0; -}; - -static const UINT32 DxcVersionInfoFlags_None = 0; -static const UINT32 DxcVersionInfoFlags_Debug = 1; // Matches VS_FF_DEBUG -static const UINT32 DxcVersionInfoFlags_Internal = 2; // Internal Validator (non-signing) - -struct __declspec(uuid("b04f5b50-2059-4f12-a8ff-a1e0cde1cc7e")) -IDxcVersionInfo : public IUnknown { - virtual HRESULT STDMETHODCALLTYPE GetVersion(_Out_ UINT32 *pMajor, _Out_ UINT32 *pMinor) = 0; - virtual HRESULT STDMETHODCALLTYPE GetFlags(_Out_ UINT32 *pFlags) = 0; -}; - -// {73e22d93-e6ce-47f3-b5bf-f0664f39c1b0} -__declspec(selectany) extern const CLSID CLSID_DxcCompiler = { - 0x73e22d93, - 0xe6ce, - 0x47f3, - { 0xb5, 0xbf, 0xf0, 0x66, 0x4f, 0x39, 0xc1, 0xb0 } -}; - -// {EF6A8087-B0EA-4D56-9E45-D07E1A8B7806} -__declspec(selectany) extern const GUID CLSID_DxcLinker = { - 0xef6a8087, - 0xb0ea, - 0x4d56, - {0x9e, 0x45, 0xd0, 0x7e, 0x1a, 0x8b, 0x78, 0x6} -}; - -// {CD1F6B73-2AB0-484D-8EDC-EBE7A43CA09F} -__declspec(selectany) extern const CLSID CLSID_DxcDiaDataSource = { - 0xcd1f6b73, - 0x2ab0, - 0x484d, - { 0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f } -}; - -// {6245D6AF-66E0-48FD-80B4-4D271796748C} -__declspec(selectany) extern const GUID CLSID_DxcLibrary = { - 0x6245d6af, - 0x66e0, - 0x48fd, - { 0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c } -}; - -// {8CA3E215-F728-4CF3-8CDD-88AF917587A1} -__declspec(selectany) extern const GUID CLSID_DxcValidator = { - 0x8ca3e215, - 0xf728, - 0x4cf3, - { 0x8c, 0xdd, 0x88, 0xaf, 0x91, 0x75, 0x87, 0xa1 } -}; - -// {D728DB68-F903-4F80-94CD-DCCF76EC7151} -__declspec(selectany) extern const GUID CLSID_DxcAssembler = { - 0xd728db68, - 0xf903, - 0x4f80, - { 0x94, 0xcd, 0xdc, 0xcf, 0x76, 0xec, 0x71, 0x51 } -}; - -// {b9f54489-55b8-400c-ba3a-1675e4728b91} -__declspec(selectany) extern const GUID CLSID_DxcContainerReflection = { - 0xb9f54489, - 0x55b8, - 0x400c, - { 0xba, 0x3a, 0x16, 0x75, 0xe4, 0x72, 0x8b, 0x91 } -}; - -// {AE2CD79F-CC22-453F-9B6B-B124E7A5204C} -__declspec(selectany) extern const GUID CLSID_DxcOptimizer = { - 0xae2cd79f, - 0xcc22, - 0x453f, - {0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c} -}; - -// {94134294-411f-4574-b4d0-8741e25240d2} -__declspec(selectany) extern const GUID CLSID_DxcContainerBuilder = { - 0x94134294, - 0x411f, - 0x4574, - { 0xb4, 0xd0, 0x87, 0x41, 0xe2, 0x52, 0x40, 0xd2 } -}; -#endif diff --git a/ffx-sssr/externals/dxc/dxcapi.use.h b/ffx-sssr/externals/dxc/dxcapi.use.h deleted file mode 100644 index d502de2..0000000 --- a/ffx-sssr/externals/dxc/dxcapi.use.h +++ /dev/null @@ -1,162 +0,0 @@ -//********************************************************* -// -// Copyright (c) Microsoft. All rights reserved. -// This code is licensed under the MIT License (MIT). -// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF -// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY -// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR -// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. -// -//********************************************************* -////////////////////////////////////////////////////////////////////////////// -// // -// dxcapi.use.h // -// Copyright (C) Microsoft Corporation. All rights reserved. // -// This file is distributed under the University of Illinois Open Source // -// License. See LICENSE.TXT for details. // -// // -// Provides support for DXC API users. // -// // -/////////////////////////////////////////////////////////////////////////////// - -#ifndef __DXCAPI_USE_H__ -#define __DXCAPI_USE_H__ - -#include "dxc/dxcapi.h" - -namespace dxc { - -// Helper class to dynamically load the dxcompiler or a compatible libraries. -class DxcDllSupport { -protected: - HMODULE m_dll; - DxcCreateInstanceProc m_createFn; - DxcCreateInstance2Proc m_createFn2; - - HRESULT InitializeInternal(LPCWSTR dllName, LPCSTR fnName) { - if (m_dll != nullptr) return S_OK; - m_dll = LoadLibraryW(dllName); - - if (m_dll == nullptr) return HRESULT_FROM_WIN32(GetLastError()); - m_createFn = (DxcCreateInstanceProc)GetProcAddress(m_dll, fnName); - - if (m_createFn == nullptr) { - HRESULT hr = HRESULT_FROM_WIN32(GetLastError()); - FreeLibrary(m_dll); - m_dll = nullptr; - return hr; - } - - // Only basic functions used to avoid requiring additional headers. - m_createFn2 = nullptr; - char fnName2[128]; - size_t s = strlen(fnName); - if (s < sizeof(fnName2) - 2) { - memcpy(fnName2, fnName, s); - fnName2[s] = '2'; - fnName2[s + 1] = '\0'; - m_createFn2 = (DxcCreateInstance2Proc)GetProcAddress(m_dll, fnName2); - } - - return S_OK; - } - -public: - DxcDllSupport() : m_dll(nullptr), m_createFn(nullptr), m_createFn2(nullptr) { - } - - DxcDllSupport(DxcDllSupport&& other) { - m_dll = other.m_dll; other.m_dll = nullptr; - m_createFn = other.m_createFn; other.m_createFn = nullptr; - m_createFn2 = other.m_createFn2; other.m_createFn2 = nullptr; - } - - ~DxcDllSupport() { - Cleanup(); - } - - HRESULT Initialize() { - return InitializeInternal(L"dxcompiler.dll", "DxcCreateInstance"); - } - - HRESULT InitializeForDll(_In_z_ const wchar_t* dll, _In_z_ const char* entryPoint) { - return InitializeInternal(dll, entryPoint); - } - - template - HRESULT CreateInstance(REFCLSID clsid, _Outptr_ TInterface** pResult) { - return CreateInstance(clsid, __uuidof(TInterface), (IUnknown**)pResult); - } - - HRESULT CreateInstance(REFCLSID clsid, REFIID riid, _Outptr_ IUnknown **pResult) { - if (pResult == nullptr) return E_POINTER; - if (m_dll == nullptr) return E_FAIL; - HRESULT hr = m_createFn(clsid, riid, (LPVOID*)pResult); - return hr; - } - - template - HRESULT CreateInstance2(IMalloc *pMalloc, REFCLSID clsid, _Outptr_ TInterface** pResult) { - return CreateInstance2(pMalloc, clsid, __uuidof(TInterface), (IUnknown**)pResult); - } - - HRESULT CreateInstance2(IMalloc *pMalloc, REFCLSID clsid, REFIID riid, _Outptr_ IUnknown **pResult) { - if (pResult == nullptr) return E_POINTER; - if (m_dll == nullptr) return E_FAIL; - if (m_createFn2 == nullptr) return E_FAIL; - HRESULT hr = m_createFn2(pMalloc, clsid, riid, (LPVOID*)pResult); - return hr; - } - - bool HasCreateWithMalloc() const { - return m_createFn2 != nullptr; - } - - bool IsEnabled() const { - return m_dll != nullptr; - } - - void Cleanup() { - if (m_dll != nullptr) { - m_createFn = nullptr; - m_createFn2 = nullptr; - FreeLibrary(m_dll); - m_dll = nullptr; - } - } - - HMODULE Detach() { - HMODULE module = m_dll; - m_dll = nullptr; - return module; - } -}; - -inline DxcDefine GetDefine(_In_ LPCWSTR name, LPCWSTR value) { - DxcDefine result; - result.Name = name; - result.Value = value; - return result; -} - -// Checks an HRESULT and formats an error message with the appended data. -void IFT_Data(HRESULT hr, _In_opt_ LPCWSTR data); - -void EnsureEnabled(DxcDllSupport &dxcSupport); -void ReadFileIntoBlob(DxcDllSupport &dxcSupport, _In_ LPCWSTR pFileName, - _Outptr_ IDxcBlobEncoding **ppBlobEncoding); -void WriteBlobToConsole(_In_opt_ IDxcBlob *pBlob, DWORD streamType = STD_OUTPUT_HANDLE); -void WriteBlobToFile(_In_opt_ IDxcBlob *pBlob, _In_ LPCWSTR pFileName); -void WriteBlobToHandle(_In_opt_ IDxcBlob *pBlob, HANDLE hFile, _In_opt_ LPCWSTR pFileName); -void WriteUtf8ToConsole(_In_opt_count_(charCount) const char *pText, - int charCount, DWORD streamType = STD_OUTPUT_HANDLE); -void WriteUtf8ToConsoleSizeT(_In_opt_count_(charCount) const char *pText, - size_t charCount, DWORD streamType = STD_OUTPUT_HANDLE); -void WriteOperationErrorsToConsole(_In_ IDxcOperationResult *pResult, - bool outputWarnings); -void WriteOperationResultToConsole(_In_ IDxcOperationResult *pRewriteResult, - bool outputWarnings); - -} // namespace dxc - -#endif diff --git a/ffx-sssr/ffx_sssr.h b/ffx-sssr/ffx_sssr.h new file mode 100644 index 0000000..37ade0e --- /dev/null +++ b/ffx-sssr/ffx_sssr.h @@ -0,0 +1,173 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_SSSR +#define FFX_SSSR +#define FFX_SSSR_FLOAT_MAX 3.402823466e+38 + +void FFX_SSSR_InitialAdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_resolution, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, out float3 position, out float current_t) { + float2 current_mip_position = current_mip_resolution * origin.xy; + + // Intersect ray with the half box that is pointing away from the ray origin. + float2 xy_plane = floor(current_mip_position) + floor_offset; + xy_plane = xy_plane * current_mip_resolution_inv + uv_offset; + + // o + d * t = p' => t = (p' - o) / d + float2 t = (xy_plane - origin.xy) * inv_direction.xy; + current_t = min(t.x, t.y); + position = origin + current_t * direction; +} + +bool FFX_SSSR_AdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_position, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, float surface_z, inout float3 position, inout float current_t) { + // Create boundary planes + float2 xy_plane = floor(current_mip_position) + floor_offset; + xy_plane = xy_plane * current_mip_resolution_inv + uv_offset; + float3 boundary_planes = float3(xy_plane, surface_z); + + // Intersect ray with the half box that is pointing away from the ray origin. + // o + d * t = p' => t = (p' - o) / d + float3 t = (boundary_planes - origin) * inv_direction; + + // Prevent using z plane when shooting out of the depth buffer. +#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE + t.z = direction.z < 0 ? t.z : FFX_SSSR_FLOAT_MAX; +#else + t.z = direction.z > 0 ? t.z : FFX_SSSR_FLOAT_MAX; +#endif + + // Choose nearest intersection with a boundary. + float t_min = min(min(t.x, t.y), t.z); + +#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE + // Larger z means closer to the camera. + bool above_surface = surface_z < position.z; +#else + // Smaller z means closer to the camera. + bool above_surface = surface_z > position.z; +#endif + + // Decide whether we are able to advance the ray until we hit the xy boundaries or if we had to clamp it at the surface. + bool skipped_tile = t_min != t.z && above_surface; + + // Make sure to only advance the ray if we're still above the surface. + current_t = above_surface ? t_min : current_t; + + // Advance ray + position = origin + current_t * direction; + + return skipped_tile; +} + +float2 FFX_SSSR_GetMipResolution(float2 screen_dimensions, int mip_level) { + return screen_dimensions * pow(0.5, mip_level); +} + +// Requires origin and direction of the ray to be in screen space [0, 1] x [0, 1] +float3 FFX_SSSR_HierarchicalRaymarch(float3 origin, float3 direction, bool is_mirror, float2 screen_size, int most_detailed_mip, uint min_traversal_occupancy, uint max_traversal_intersections, out bool valid_hit) { + const float3 inv_direction = direction != 0 ? 1.0 / direction : FFX_SSSR_FLOAT_MAX; + + // Start on mip with highest detail. + int current_mip = most_detailed_mip; + + // Could recompute these every iteration, but it's faster to hoist them out and update them. + float2 current_mip_resolution = FFX_SSSR_GetMipResolution(screen_size, current_mip); + float2 current_mip_resolution_inv = rcp(current_mip_resolution); + + // Offset to the bounding boxes uv space to intersect the ray with the center of the next pixel. + // This means we ever so slightly over shoot into the next region. + float2 uv_offset = 0.005 * exp2(most_detailed_mip) / screen_size; + uv_offset = direction.xy < 0 ? -uv_offset : uv_offset; + + // Offset applied depending on current mip resolution to move the boundary to the left/right upper/lower border depending on ray direction. + float2 floor_offset = direction.xy < 0 ? 0 : 1; + + // Initially advance ray to avoid immediate self intersections. + float current_t; + float3 position; + FFX_SSSR_InitialAdvanceRay(origin, direction, inv_direction, current_mip_resolution, current_mip_resolution_inv, floor_offset, uv_offset, position, current_t); + + bool exit_due_to_low_occupancy = false; + int i = 0; + while (i < max_traversal_intersections && current_mip >= most_detailed_mip && !exit_due_to_low_occupancy) { + float2 current_mip_position = current_mip_resolution * position.xy; + float surface_z = FFX_SSSR_LoadDepth(current_mip_position, current_mip); + bool skipped_tile = FFX_SSSR_AdvanceRay(origin, direction, inv_direction, current_mip_position, current_mip_resolution_inv, floor_offset, uv_offset, surface_z, position, current_t); + current_mip += skipped_tile ? 1 : -1; + current_mip_resolution *= skipped_tile ? 0.5 : 2; + current_mip_resolution_inv *= skipped_tile ? 2 : 0.5; + ++i; + + exit_due_to_low_occupancy = !is_mirror && WaveActiveCountBits(true) <= min_traversal_occupancy; + } + + valid_hit = (i <= max_traversal_intersections); + + return position; +} + +float FFX_SSSR_ValidateHit(float3 hit, float2 uv, float3 world_space_ray_direction, float2 screen_size, float depth_buffer_thickness) { + // Reject hits outside the view frustum + if (any(hit.xy < 0) || any(hit.xy > 1)) { + return 0; + } + + // Reject the hit if we didnt advance the ray significantly to avoid immediate self reflection + float2 manhattan_dist = abs(hit.xy - uv); + if(all(manhattan_dist < (2 / screen_size))) { + return 0; + } + + // Don't lookup radiance from the background. + int2 texel_coords = int2(screen_size * hit.xy); + float surface_z = FFX_SSSR_LoadDepth(texel_coords / 2, 1); +#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE + if (surface_z == 0.0) { +#else + if (surface_z == 1.0) { +#endif + return 0; + } + + // We check if we hit the surface from the back, these should be rejected. + float3 hit_normal = FFX_SSSR_LoadNormal(texel_coords); + if (dot(hit_normal, world_space_ray_direction) > 0) { + return 0; + } + + float3 view_space_surface = FFX_SSSR_ScreenSpaceToViewSpace(float3(hit.xy, surface_z)); + float3 view_space_hit = FFX_SSSR_ScreenSpaceToViewSpace(hit); + float distance = length(view_space_surface - view_space_hit); + + // Fade out hits near the screen borders + float2 fov = 0.05 * float2(screen_size.y / screen_size.x, 1); + float2 border = smoothstep(0, fov, hit.xy) * (1 - smoothstep(1 - fov, 1, hit.xy)); + float vignette = border.x * border.y; + + // We accept all hits that are within a reasonable minimum distance below the surface. + // Add constant in linear space to avoid growing of the reflections toward the reflected objects. + float confidence = 1 - smoothstep(0, depth_buffer_thickness, distance); + confidence *= confidence; + + return vignette * confidence; +} + +#endif //FFX_SSSR diff --git a/ffx-sssr/inc/ffx_sssr.h b/ffx-sssr/inc/ffx_sssr.h deleted file mode 100644 index 1d3d572..0000000 --- a/ffx-sssr/inc/ffx_sssr.h +++ /dev/null @@ -1,319 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#define FFX_SSSR_MAKE_VERSION(a,b,c) (((a) << 22) | ((b) << 12) | (c)) - -#define FFX_SSSR_API_VERSION FFX_SSSR_MAKE_VERSION(1, 1, 0) - -#define FFX_SSSR_STATIC_LIBRARY - -#ifndef FFX_SSSR_STATIC_LIBRARY -#ifdef WIN32 -#ifdef EXPORT_API -#define FFX_SSSR_API __declspec(dllexport) -#else -#define FFX_SSSR_API __declspec(dllimport) -#endif -#elif defined(__GNUC__) -#ifdef EXPORT_API -#define FFX_SSSR_API __attribute__((visibility ("default"))) -#else -#define FFX_SSSR_API -#endif -#endif -#else -#define FFX_SSSR_API -#endif - -typedef uint32_t FfxSssrFlags; - -#define FFX_SSSR_DEFINE_HANDLE(object) typedef struct object##_T* object; - -FFX_SSSR_DEFINE_HANDLE(FfxSssrContext) -FFX_SSSR_DEFINE_HANDLE(FfxSssrReflectionView) - -/*! - Forward declarations. -*/ -typedef struct FfxSssrD3D12CreateContextInfo FfxSssrD3D12CreateContextInfo; -typedef struct FfxSssrD3D12CreateReflectionViewInfo FfxSssrD3D12CreateReflectionViewInfo; -typedef struct FfxSssrD3D12CommandEncodeInfo FfxSssrD3D12CommandEncodeInfo; -typedef struct FfxSssrVkCreateContextInfo FfxSssrVkCreateContextInfo; -typedef struct FfxSssrVkCreateReflectionViewInfo FfxSssrVkCreateReflectionViewInfo; -typedef struct FfxSssrVkCommandEncodeInfo FfxSssrVkCommandEncodeInfo; - -/** - The return codes for the API functions. -*/ -enum FfxSssrStatus -{ - FFX_SSSR_STATUS_OK = 0, - - FFX_SSSR_STATUS_INVALID_VALUE = -1, - FFX_SSSR_STATUS_INVALID_OPERATION = -2, - FFX_SSSR_STATUS_OUT_OF_MEMORY = -3, - FFX_SSSR_STATUS_INCOMPATIBLE_API = -4, - FFX_SSSR_STATUS_INTERNAL_ERROR = -5 -}; - -/** - The minimum number of ray samples per quad for variable rate tracing. -*/ -enum FfxSssrRaySamplesPerQuad -{ - FFX_SSSR_RAY_SAMPLES_PER_QUAD_1, - FFX_SSSR_RAY_SAMPLES_PER_QUAD_2, - FFX_SSSR_RAY_SAMPLES_PER_QUAD_4 -}; - -/** - The available flags for creating a reflection view. -*/ -enum FfxSssrCreateReflectionViewFlagBits -{ - FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS = 1 << 0, ///< Set this flag if the application wishes to retrieve timing results. Don't set this flag in release builds. - FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_NORMAL_BUFFERS = 1 << 1, ///< Set this flag if the application writes to alternate surfaces. Don't set this flag to signal that the application copies the provided normal surfaces each frame. - FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_ROUGHNESS_BUFFERS = 1 << 2 ///< Set this flag if the application writes to alternate surfaces. Don't set this flag to signal that the application copies the provided roughness surfaces each frame. -}; -typedef FfxSssrFlags FfxSssrCreateReflectionViewFlags; - -/** - The available flags for resolving a reflection view. -*/ -enum FfxSssrResolveReflectionViewFlagBits -{ - FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE = 1 << 0, ///< Run denoiser passes on intersection results. - FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING = 1 << 1, ///< Enforces shooting a ray for temporally unstable pixels. -}; -typedef FfxSssrFlags FfxSssrResolveReflectionViewFlags; - -/** - The callback function for logging. - - \param pMessage The message to be logged. -*/ -typedef void (*PFN_ffxSssrLoggingFunction)(const char* pMessage, void* pUserData); - -/** - The callback information for logging. -*/ -typedef struct FfxSssrLoggingCallbacks -{ - void* pUserData; - PFN_ffxSssrLoggingFunction pfnLogging; -} FfxSssrLoggingCallbacks; - -/** - The parameters for creating a context. -*/ -typedef struct FfxSssrCreateContextInfo -{ - uint32_t apiVersion; - uint32_t maxReflectionViewCount; - uint32_t frameCountBeforeMemoryReuse; - size_t uploadBufferSize; - const FfxSssrLoggingCallbacks* pLoggingCallbacks; ///< Can be null. - const wchar_t* pRoughnessTextureFormat; ///< Used in the HLSL files to define the format of the resource containing surface roughness. - const wchar_t* pUnpackRoughnessSnippet; ///< Used in the HLSL files to unpack the roughness from the provided resource. - const wchar_t* pNormalsTextureFormat; ///< Used in the HLSL files to define the format of the resource containing the normals. - const wchar_t* pUnpackNormalsSnippet; ///< Used in the HLSL files to unpack the normals from the provided resource. - const wchar_t* pSceneTextureFormat; ///< Used in the HLSL files to define the format of the resource containing the rendered scene. - const wchar_t* pUnpackSceneRadianceSnippet; ///< Used in the HLSL files to unpack the rendered scene from the provided resource. - const wchar_t* pDepthTextureFormat; ///< Used in the HLSL files to define the format of the resource containing depth. - const wchar_t* pUnpackDepthSnippet; ///< Used in the HLSL files to unpack the depth values from the provided resource. - const wchar_t* pMotionVectorFormat; ///< Used in the HLSL files to define the format of the resource containing the motion vectors. - const wchar_t* pUnpackMotionVectorsSnippet; ///< Used in the HLSL files to unpack the motion vectors from the provided resource. - union - { - const FfxSssrD3D12CreateContextInfo* pD3D12CreateContextInfo; - const FfxSssrVkCreateContextInfo* pVkCreateContextInfo; - }; -} FfxSssrCreateContextInfo; - -/** - The parameters for creating a reflection view. -*/ -typedef struct FfxSssrCreateReflectionViewInfo -{ - FfxSssrCreateReflectionViewFlags flags; - uint32_t outputWidth; - uint32_t outputHeight; - union - { - const FfxSssrD3D12CreateReflectionViewInfo* pD3D12CreateReflectionViewInfo; - const FfxSssrVkCreateReflectionViewInfo* pVkCreateReflectionViewInfo; - }; -} FfxSssrCreateReflectionViewInfo; - -/** - The parameters for resolving a reflection view. -*/ -typedef struct FfxSssrResolveReflectionViewInfo -{ - FfxSssrResolveReflectionViewFlags flags; - float temporalStabilityScale; ///< Value between 0 and 1. High values prioritize temporal stability wheras low values avoid ghosting. - uint32_t maxTraversalIterations; ///< Maximum number of iterations to find the intersection with the depth buffer. - uint32_t mostDetailedDepthHierarchyMipLevel; ///< Applies only to non-mirror reflections. Mirror reflections always use 0 as most detailed mip. - uint32_t minTraversalOccupancy; ///< Minimum number of threads per wave to keep the intersection kernel running. - float depthBufferThickness; ///< Unit in view space. Any intersections further behind the depth buffer are rejected as invalid hits. - FfxSssrRaySamplesPerQuad samplesPerQuad; ///< Number of samples per 4 pixels in denoised regions. Mirror reflections are not affected by this. - float roughnessThreshold; ///< Shoot reflection rays for roughness values that are lower than this threshold. - union - { - const FfxSssrD3D12CommandEncodeInfo* pD3D12CommandEncodeInfo; ///< A pointer to the Direct3D12 command encoding parameters. - const FfxSssrVkCommandEncodeInfo* pVkCommandEncodeInfo; ///< A pointer to the Vulkan command encoding parameters. - }; -} FfxSssrResolveReflectionViewInfo; - -// API functions -#ifdef __cplusplus -extern "C" -{ -#endif - /** - Creates a new context. - - \param pCreateContextInfo The context creation information. - \param outContext The context. - \return The corresponding error code. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrCreateContext(const FfxSssrCreateContextInfo* pCreateContextInfo, FfxSssrContext* outContext); - - /** - Destroys the context. - - \param context The context to be destroyed. - \return The corresponding error code. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrDestroyContext(FfxSssrContext context); - - /** - Creates a new reflection view. - - \param context The context to be used. - \param pCreateReflectionViewInfo The reflection view creation information. - \param outReflectionView The reflection view resource. - \return The corresponding error code. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrCreateReflectionView(FfxSssrContext context, const FfxSssrCreateReflectionViewInfo* pCreateReflectionViewInfo, FfxSssrReflectionView* outReflectionView); - - /** - Destroys the reflection view. - - \param context The context to be used. - \param reflectionView The reflection view resource. - \return The corresponding error code. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrDestroyReflectionView(FfxSssrContext context, FfxSssrReflectionView reflectionView); - - /** - Encodes the command(s) for resolving the given reflection view. - - \param context The context to be used. - \param reflectionView The resource for the reflection view. - \param pResolveReflectionViewInfo The reflection view information. - \return The corresponding error code. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrEncodeResolveReflectionView(FfxSssrContext context, FfxSssrReflectionView reflectionView, const FfxSssrResolveReflectionViewInfo* pResolveReflectionViewInfo); - - /** - Advances the frame index. - - \param context The context to be used. - \return The corresponding error code. - - \note Please call this once a frame so the library is able to safely re-use memory blocks after frameCountBeforeMemoryReuse frames have passed. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrAdvanceToNextFrame(FfxSssrContext context); - - /** - Gets the number of GPU ticks spent in the tile classification pass. - - \param context The context to be used. - \param reflectionView The resource for the reflection view. - \param outTileClassificationElapsedTime The number of GPU ticks spent in the tile classification pass. - \return The corresponding error code. - - \note This method will only function if the reflection view was created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag. - Also, note that it will actually return the time that was spent in the tile classification pass frameCountBeforeMemoryReuse frames ago. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrReflectionViewGetTileClassificationElapsedTime(FfxSssrContext context, FfxSssrReflectionView reflectionView, uint64_t* outTileClassificationElapsedTime); - - - /** - Gets the number of GPU ticks spent intersecting reflection rays with the depth buffer. - - \param context The context to be used. - \param reflectionView The resource for the reflection view. - \param outIntersectionElapsedTime The number of GPU ticks spent intersecting reflection rays with the depth buffer. - \return The corresponding error code. - - \note This method will only function if the reflection view was created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag. - Also, note that it will actually return the time that was spent resolving frameCountBeforeMemoryReuse frames ago. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrReflectionViewGetIntersectionElapsedTime(FfxSssrContext context, FfxSssrReflectionView reflectionView, uint64_t* outIntersectionElapsedTime); - - /** - Gets the number of GPU ticks spent denoising. - - \param context The context to be used. - \param reflectionView The resource for the reflection view. - \param outDenoisingElapsedTime The number of GPU ticks spent denoising. - \return The corresponding error code. - - \note This method will only function if the reflection view was created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag. - Also, note that it will actually return the time that was spent denoising frameCountBeforeMemoryReuse frames ago. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrReflectionViewGetDenoisingElapsedTime(FfxSssrContext context, FfxSssrReflectionView reflectionView, uint64_t* outDenoisingElapsedTime); - - /** - Gets the view and projection matrices for the reflection view. - - \param context The context to be used. - \param reflectionView The resource for the reflection view. - \param outViewMatrix The output value for the view matrix. - \param outProjectionMatrix The output value for the projection matrix. - \return The corresponding error code. - - \note The output matrices will be 4x4 row-major matrices. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrReflectionViewGetCameraParameters(FfxSssrContext context, FfxSssrReflectionView reflectionView, float* outViewMatrix, float* outProjectionMatrix); - - /** - Sets the view and projection matrices for the reflection view. - - \param context The context to be used. - \param reflectionView The resource for the reflection view. - \param pViewMatrix The input value for the view matrix. - \param pProjectionMatrix The input value for the projection matrix. - \return The corresponding error code. - - \note The input matrices are expected to be 4x4 row-major matrices. - */ - FFX_SSSR_API FfxSssrStatus ffxSssrReflectionViewSetCameraParameters(FfxSssrContext context, FfxSssrReflectionView reflectionView, const float* pViewMatrix, const float* pProjectionMatrix); - -#ifdef __cplusplus -} -#endif diff --git a/ffx-sssr/inc/ffx_sssr_d3d12.h b/ffx-sssr/inc/ffx_sssr_d3d12.h deleted file mode 100644 index a3a938b..0000000 --- a/ffx-sssr/inc/ffx_sssr_d3d12.h +++ /dev/null @@ -1,60 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include - -/** - The parameters for creating a Direct3D12 context. -*/ -typedef struct FfxSssrD3D12CreateContextInfo -{ - ID3D12Device* pDevice; - ID3D12GraphicsCommandList* pUploadCommandList; ///< Command list to upload static resources. The application has to synchronize to make sure the uploads are done. -} FfxSssrD3D12CreateContextInfo; - -/** - The parameters for creating a Direct3D12 reflection view. -*/ -typedef struct FfxSssrD3D12CreateReflectionViewInfo -{ - DXGI_FORMAT sceneFormat; ///< The format of the sceneSRV to allow creating matching internal resources. - D3D12_CPU_DESCRIPTOR_HANDLE sceneSRV; ///< The rendered scene without reflections. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE depthBufferHierarchySRV; ///< Full downsampled depth buffer. Each lower detail mip containing the minimum values of the higher detailed mip. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE motionBufferSRV; ///< The per pixel motion vectors. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE normalBufferSRV; ///< The surface normals in world space. Each channel mapped to [0, 1]. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE roughnessBufferSRV; ///< Perceptual roughness squared per pixel. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE normalHistoryBufferSRV; ///< Last frames normalBufferSRV. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE roughnessHistoryBufferSRV; ///< Last frames roughnessHistoryBufferSRV. The descriptor handle must be allocated on a heap allowing CPU reads. - D3D12_CPU_DESCRIPTOR_HANDLE environmentMapSRV; ///< Environment cube map serving as a fallback for ray misses. The descriptor handle must be allocated on a heap allowing CPU reads. - const D3D12_SAMPLER_DESC * pEnvironmentMapSamplerDesc; ///< Description for the environment map sampler. - D3D12_CPU_DESCRIPTOR_HANDLE reflectionViewUAV; ///< The fully resolved reflection view. Make sure to synchronize for UAV writes. The descriptor handle must be allocated on a heap allowing CPU reads. -} FfxSssrD3D12CreateReflectionViewInfo; - -/** - \brief The parameters for encoding Direct3D12 device commands. -*/ -typedef struct FfxSssrD3D12CommandEncodeInfo -{ - ID3D12GraphicsCommandList* pCommandList; ///< The Direct3D12 command list to be used for command encoding. -} FfxSssrD3D12CommandEncodeInfo; diff --git a/ffx-sssr/inc/ffx_sssr_vk.h b/ffx-sssr/inc/ffx_sssr_vk.h deleted file mode 100644 index e0ba2ed..0000000 --- a/ffx-sssr/inc/ffx_sssr_vk.h +++ /dev/null @@ -1,61 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -/** - The parameters for creating a Vulkan context. -*/ -typedef struct FfxSssrVkCreateContextInfo -{ - VkDevice device; - VkPhysicalDevice physicalDevice; - VkCommandBuffer uploadCommandBuffer; ///< Vulkan command buffer to upload static resources. The application has to begin the command buffer and has to handle synchronization to make sure the uploads are done. -} FfxSssrVkCreateContextInfo; - -/** - The parameters for creating a Vulkan reflection view. -*/ -typedef struct FfxSssrVkCreateReflectionViewInfo -{ - VkFormat sceneFormat; ///< The format of the sceneSRV to allow creating matching internal resources. - VkImageView sceneSRV; ///< The rendered scene without reflections. - VkImageView depthBufferHierarchySRV; ///< Full downsampled depth buffer. Each lower detail mip containing the minimum values of the higher detailed mip. - VkImageView motionBufferSRV; ///< The per pixel motion vectors. - VkImageView normalBufferSRV; ///< The surface normals in world space. Each channel mapped to [0, 1]. - VkImageView roughnessBufferSRV; ///< Perceptual roughness squared per pixel. - VkImageView normalHistoryBufferSRV; ///< Last frames normalBufferSRV. - VkImageView roughnessHistoryBufferSRV; ///< Last frames roughnessHistoryBufferSRV. - VkSampler environmentMapSampler; ///< Environment map sampler used when looking up the fallback for ray misses. - VkImageView environmentMapSRV; ///< Environment map serving as a fallback for ray misses. - VkImageView reflectionViewUAV; ///< The fully resolved reflection view. Make sure to synchronize for UAV writes. - VkCommandBuffer uploadCommandBuffer; ///< Vulkan command buffer to upload static resources. The application has to begin the command buffer and has to handle synchronization to make sure the uploads are done. -} FfxSssrVkCreateReflectionViewInfo; - -/** - \brief The parameters for encoding Vulkan device commands. -*/ -typedef struct FfxSssrVkCommandEncodeInfo -{ - VkCommandBuffer commandBuffer; ///< The Vulkan command buffer to be used for command encoding. -} FfxSssrVkCommandEncodeInfo; diff --git a/ffx-sssr/shaders/classify_tiles.hlsl b/ffx-sssr/shaders/classify_tiles.hlsl deleted file mode 100644 index 7202541..0000000 --- a/ffx-sssr/shaders/classify_tiles.hlsl +++ /dev/null @@ -1,131 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_CLASSIFY_TILES -#define FFX_SSSR_CLASSIFY_TILES - -// In: -[[vk::binding(0, 1)]] Texture2D g_roughness : register(t0); - -// Out: -[[vk::binding(1, 1)]] RWBuffer g_tile_list : register(u0); -[[vk::binding(2, 1)]] RWBuffer g_ray_list : register(u1); -[[vk::binding(3, 1)]] globallycoherent RWBuffer g_tile_counter : register(u2); -[[vk::binding(4, 1)]] globallycoherent RWBuffer g_ray_counter : register(u3); -[[vk::binding(5, 1)]] RWTexture2D g_temporally_denoised_reflections : register(u4); -[[vk::binding(6, 1)]] RWTexture2D g_temporally_denoised_reflections_history : register(u5); -[[vk::binding(7, 1)]] RWTexture2D g_ray_lengths : register(u6); -[[vk::binding(8, 1)]] RWTexture2D g_temporal_variance : register(u7); -[[vk::binding(9, 1)]] RWTexture2D g_denoised_reflections : register(u8); - -groupshared uint g_ray_count; -groupshared uint g_ray_base_index; -groupshared uint g_denoise_count; - -[numthreads(8, 8, 1)] -void main(uint2 did : SV_DispatchThreadID, uint group_index : SV_GroupIndex) -{ - bool is_first_lane_of_wave = WaveIsFirstLane(); - bool is_first_lane_of_threadgroup = group_index == 0; - - // First we figure out on a per thread basis if we need to shoot a reflection ray. - uint2 screen_size; - g_roughness.GetDimensions(screen_size.x, screen_size.y); - - // Disable offscreen pixels - bool needs_ray = !(did.x >= screen_size.x || did.y >= screen_size.y); - - // Dont shoot a ray on very rough surfaces. - float roughness = FfxSssrUnpackRoughness(g_roughness.Load(int3(did, 0))); - needs_ray = needs_ray && IsGlossy(roughness); - - // Also we dont need to run the denoiser on mirror reflections. - bool needs_denoiser = needs_ray && !IsMirrorReflection(roughness); - - // Decide which ray to keep - bool is_base_ray = IsBaseRay(did, g_samples_per_quad); - needs_ray = needs_ray && (!needs_denoiser || is_base_ray); // Make sure to not deactivate mirror reflection rays. - - if (g_temporal_variance_guided_tracing_enabled && needs_denoiser && !needs_ray) - { - float temporal_variance = g_temporal_variance.Load(did); - bool has_temporal_variance = temporal_variance != 0.0; - - // If temporal variance is too high, we enforce a ray anyway. - needs_ray = needs_ray || has_temporal_variance; - } - - // Now we know for each thread if it needs to shoot a ray and wether or not a denoiser pass has to run on this pixel. - // Thus, we need to compact the rays and append them all at once to the ray list. - // Also, if there is at least one pixel in that tile that needs a denoiser, we have to append that tile to the tile list. - - if (is_first_lane_of_threadgroup) - { - g_ray_count = 0; - g_denoise_count = 0; - } - GroupMemoryBarrierWithGroupSync(); // Wait for reset to finish - - uint local_ray_index_in_wave = WavePrefixCountBits(needs_ray); - uint wave_ray_count = WaveActiveCountBits(needs_ray); - bool wave_needs_denoiser = WaveActiveAnyTrue(needs_denoiser); - uint wave_count = wave_needs_denoiser ? 1 : 0; - - uint local_ray_index_of_wave; - if (is_first_lane_of_wave) - { - InterlockedAdd(g_ray_count, wave_ray_count, local_ray_index_of_wave); - InterlockedAdd(g_denoise_count, wave_count); - } - local_ray_index_of_wave = WaveReadLaneFirst(local_ray_index_of_wave); - - GroupMemoryBarrierWithGroupSync(); // Wait for ray compaction to finish - - if (is_first_lane_of_threadgroup) - { - bool must_denoise = g_denoise_count > 0; - uint denoise_count = must_denoise ? 1 : 0; - uint ray_count = g_ray_count; - - uint tile_index; - uint ray_base_index = 0; - - InterlockedAdd(g_tile_counter[0], denoise_count, tile_index); - InterlockedAdd(g_ray_counter[0], ray_count, ray_base_index); - - int cleaned_index = must_denoise ? tile_index : -1; - g_tile_list[cleaned_index] = Pack(did); // Write out pixel coords of upper left pixel - g_ray_base_index = ray_base_index; - } - GroupMemoryBarrierWithGroupSync(); // Wait for ray base index to become available - - int2 target = needs_ray ? int2(-1, -1) : did; - int ray_index = needs_ray ? g_ray_base_index + local_ray_index_of_wave + local_ray_index_in_wave : -1; - - g_ray_list[ray_index] = Pack(did); // Write out pixel to trace - // Clear intersection targets as there wont be any ray that overwrites them - g_temporally_denoised_reflections[target] = 0; - g_ray_lengths[target] = 0; - g_temporal_variance[did] = needs_ray ? (1 - g_skip_denoiser) : 0; // Re-purpose g_temporal_variance to hold the information for the spatial pass if a ray has been shot. Always write 0 if no denoiser is running. -} - -#endif // FFX_SSSR_CLASSIFY_TILES \ No newline at end of file diff --git a/ffx-sssr/shaders/common.hlsl b/ffx-sssr/shaders/common.hlsl deleted file mode 100644 index c882630..0000000 --- a/ffx-sssr/shaders/common.hlsl +++ /dev/null @@ -1,243 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_COMMON -#define FFX_SSSR_COMMON - -#define FFX_SSSR_PI 3.14159265358979f -#define FFX_SSSR_GOLDEN_RATIO 1.61803398875f - -#define FFX_SSSR_FLOAT_MAX 3.402823466e+38 - -#define FFX_SSSR_FALSE 0 -#define FFX_SSSR_TRUE 1 - -#define FFX_SSSR_USE_ROUGHNESS_OVERRIDE FFX_SSSR_FALSE -#define FFX_SSSR_ROUGHNESS_OVERRIDE 0.1 - -#define FFX_SSSR_TEMPORAL_VARIANCE_THRESHOLD 0.0005 - -#if FFX_SSSR_USE_ROUGHNESS_OVERRIDE -float FfxSssrUnpackRoughness(FFX_SSSR_ROUGHNESS_TEXTURE_FORMAT packed) { return FFX_SSSR_ROUGHNESS_OVERRIDE; } -#else -FFX_SSSR_ROUGHNESS_UNPACK_FUNCTION -#endif - -FFX_SSSR_NORMALS_UNPACK_FUNCTION -FFX_SSSR_MOTION_VECTOR_UNPACK_FUNCTION -FFX_SSSR_DEPTH_UNPACK_FUNCTION -FFX_SSSR_SCENE_RADIANCE_UNPACK_FUNCTION - -// Common constants -[[vk::binding(0, 0)]] cbuffer Constants : register(b0) -{ - float4x4 g_inv_view_proj; - float4x4 g_proj; - float4x4 g_inv_proj; - float4x4 g_view; - float4x4 g_inv_view; - float4x4 g_prev_view_proj; - - uint g_frame_index; - uint g_max_traversal_intersections; - uint g_min_traversal_occupancy; - uint g_most_detailed_mip; - float g_temporal_stability_factor; - float g_depth_buffer_thickness; - uint g_samples_per_quad; - uint g_temporal_variance_guided_tracing_enabled; - float g_roughness_threshold; - uint g_skip_denoiser; -}; - -// Mat must be able to transform origin from its current space into screen space. -float3 ProjectPosition(float3 origin, float4x4 mat) -{ - float4 projected = mul(float4(origin, 1), mat); - projected.xyz /= projected.w; - projected.xy = 0.5 * projected.xy + 0.5; - projected.y = (1 - projected.y); - return projected.xyz; -} - -// Mat must be able to transform origin from screen space to a linear space. -float3 InvProjectPosition(float3 origin, float4x4 mat) -{ - origin.y = (1 - origin.y); - origin.xy = 2 * origin.xy - 1; - float4 projected = mul(float4(origin, 1), mat); - projected.xyz /= projected.w; - return projected.xyz; -} - -// Origin and direction must be in the same space and mat must be able to transform from that space into screen space. -float3 ProjectDirection(float3 origin, float3 direction, float3 screen_space_origin, float4x4 mat) -{ - float3 offsetted = ProjectPosition(origin + direction, mat); - return offsetted - screen_space_origin; -} - -struct Ray -{ - float3 origin; - float3 direction; -}; - -// Create a ray that originates at the depth buffer surface and points away from the camera. -Ray CreateViewSpaceRay(float3 screen_space_pos) -{ - float3 view_space_pos = InvProjectPosition(screen_space_pos, g_inv_proj); - Ray view_space_ray; - view_space_ray.origin = view_space_pos; - view_space_ray.direction = view_space_pos; - return view_space_ray; -} - -float3 LoadNormal(int2 index, Texture2D tex) -{ - return FfxSssrUnpackNormals(tex.Load(int3(index, 0))); -} - -float LoadRoughness(int2 index, Texture2D tex) -{ - return FfxSssrUnpackRoughness(tex.Load(int3(index, 0))); -} - -bool IsGlossy(float roughness) -{ - return roughness < g_roughness_threshold; -} - -bool IsMirrorReflection(float roughness) -{ - return roughness < 0.0001; -} - -float GetEdgeStoppingNormalWeight(float3 normal_p, float3 normal_q, float sigma) -{ - return pow(max(dot(normal_p, normal_q), 0.0), sigma); -} - -float GetEdgeStoppingRoughnessWeight(float roughness_p, float roughness_q, float sigma_min, float sigma_max) -{ - return 1.0 - smoothstep(sigma_min, sigma_max, abs(roughness_p - roughness_q)); -} - -min16float GetEdgeStoppingRoughnessWeightFP16(min16float roughness_p, min16float roughness_q, min16float sigma_min, min16float sigma_max) -{ - return 1.0 - smoothstep(sigma_min, sigma_max, abs(roughness_p - roughness_q)); -} - -// Roughness weight to prevent ghosting on pure mirror reflections -float GetRoughnessAccumulationWeight(float roughness) -{ - float near_singular_roughness = 0.00001; - return smoothstep(0.0, near_singular_roughness, roughness); -} - -float Gaussian(float x, float m, float sigma) -{ - float a = length(x - m) / sigma; - a *= a; - return exp(-0.5 * a); -} - -float Luminance(float3 clr) -{ - return max(dot(clr, float3(0.299, 0.587, 0.114)), 0.00001); -} - -uint Pack(uint2 coord) -{ - return (coord.x & 0xFFFF) | (coord.y & 0xFFFF) << 16; -} - -uint2 Unpack(uint packed) -{ - return uint2(packed & 0xFFFF, packed >> 16); -} - -bool IsBaseRay(uint2 did, uint samples_per_quad) -{ - switch (samples_per_quad) - { - case 1: - return ((did.x & 1) | (did.y & 1)) == 0; // Deactivates 3 out of 4 rays - case 2: - return (did.x & 1) == (did.y & 1); // Deactivates 2 out of 4 rays. Keeps diagonal. - default: // case 4: - return true; - } -} - -// Has to match the calculation in IsBaseRay. Assumes lane is in Z order. -// i.e. 4 consecutive lanes 0 1 2 3 -// are remapped to -// 0 1 -// 2 3 -uint GetBaseLane(uint lane, uint samples_per_quad) -{ - switch (samples_per_quad) - { - case 1: - return lane & (~0b11); // Map to upper left - case 2: - return lane ^ 0b1; // Toggle horizontal - default: // case 4: - return lane; // Identity - } -} - -uint PackFloat16(min16float2 v) -{ - uint2 p = f32tof16(float2(v)); - return p.x | (p.y << 16); -} - -min16float2 UnpackFloat16(uint a) -{ - float2 tmp = f16tof32( - uint2(a & 0xFFFF, a >> 16)); - return min16float2(tmp); -} - - -// From ffx_a.h - - - -uint BitfieldExtract(uint src, uint off, uint bits) { uint mask = (1 << bits) - 1; return (src >> off) & mask; } // ABfe -uint BitfieldInsert(uint src, uint ins, uint bits) { uint mask = (1 << bits) - 1; return (ins & mask) | (src & (~mask)); } // ABfiM - -// LANE TO 8x8 MAPPING -// =================== -// 00 01 08 09 10 11 18 19 -// 02 03 0a 0b 12 13 1a 1b -// 04 05 0c 0d 14 15 1c 1d -// 06 07 0e 0f 16 17 1e 1f -// 20 21 28 29 30 31 38 39 -// 22 23 2a 2b 32 33 3a 3b -// 24 25 2c 2d 34 35 3c 3d -// 26 27 2e 2f 36 37 3e 3f -uint2 RemapLane8x8(uint lane) { return uint2(BitfieldInsert(BitfieldExtract(lane, 2u, 3u), lane, 1u), BitfieldInsert(BitfieldExtract(lane, 3u, 3u), BitfieldExtract(lane, 1u, 2u), 2u)); } // ARmpRed8x8 - -#endif // FFX_SSSR_COMMON \ No newline at end of file diff --git a/ffx-sssr/shaders/intersect.hlsl b/ffx-sssr/shaders/intersect.hlsl deleted file mode 100644 index 92a1a5a..0000000 --- a/ffx-sssr/shaders/intersect.hlsl +++ /dev/null @@ -1,365 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_INTERSECT -#define FFX_SSSR_INTERSECT - -// In: -[[vk::binding(0, 1)]] Texture2D g_lit_scene : register(t0); // scene rendered with lighting and shadows -[[vk::binding(1, 1)]] Texture2D g_depth_buffer_hierarchy : register(t1); -[[vk::binding(2, 1)]] Texture2D g_normal : register(t2); -[[vk::binding(3, 1)]] Texture2D g_roughness : register(t3); -[[vk::binding(4, 1)]] TextureCube g_environment_map : register(t4); -[[vk::binding(5, 1)]] Buffer g_sobol_buffer : register(t5); -[[vk::binding(6, 1)]] Buffer g_ranking_tile_buffer : register(t6); -[[vk::binding(7, 1)]] Buffer g_scrambling_tile_buffer : register(t7); -[[vk::binding(8, 1)]] Buffer g_ray_list : register(t8); - -// Samplers: -[[vk::binding(9, 1)]] SamplerState g_linear_sampler : register(s0); -[[vk::binding(10, 1)]] SamplerState g_environment_map_sampler : register(s1); - -// Out: -[[vk::binding(11, 1)]] RWTexture2D g_intersection_result : register(u0); // reflection colors at the end of the intersect pass. -[[vk::binding(12, 1)]] RWTexture2D g_ray_lengths : register(u1); -[[vk::binding(13, 1)]] RWTexture2D g_denoised_reflections : register(u2); // Mirror reflections don't need to be denoised, the intersection pass can just write them to the final target. - -// Blue Noise Sampler by Eric Heitz. Returns a value in the range [0, 1]. -float SampleRandomNumber(in uint pixel_i, in uint pixel_j, in uint sample_index, in uint sample_dimension) -{ - // Wrap arguments - pixel_i = pixel_i & 127u; - pixel_j = pixel_j & 127u; - sample_index = sample_index & 255u; - sample_dimension = sample_dimension & 255u; - - // xor index based on optimized ranking - const uint ranked_sample_index = sample_index ^ g_ranking_tile_buffer[sample_dimension + (pixel_i + pixel_j * 128u) * 8u]; - - // Fetch value in sequence - uint value = g_sobol_buffer[sample_dimension + ranked_sample_index * 256u]; - - // If the dimension is optimized, xor sequence value based on optimized scrambling - value = value ^ g_scrambling_tile_buffer[(sample_dimension % 8u) + (pixel_i + pixel_j * 128u) * 8u]; - - // Convert to float and return - return (value + 0.5f) / 256.0f; -} - -float2 SampleRandomVector2(uint2 pixel) -{ - const uint sample_index = 0; - float2 u = float2( - fmod(SampleRandomNumber(pixel.x, pixel.y, sample_index, 0u) + (g_frame_index & 0xFFu) * FFX_SSSR_GOLDEN_RATIO, 1.0f), - fmod(SampleRandomNumber(pixel.x, pixel.y, sample_index, 1u) + (g_frame_index & 0xFFu) * FFX_SSSR_GOLDEN_RATIO, 1.0f)); - return u; -} - -#define M_PI FFX_SSSR_PI - -// http://jcgt.org/published/0007/04/01/paper.pdf by Eric Heitz -// Input Ve: view direction -// Input alpha_x, alpha_y: roughness parameters -// Input U1, U2: uniform random numbers -// Output Ne: normal sampled with PDF D_Ve(Ne) = G1(Ve) * max(0, dot(Ve, Ne)) * D(Ne) / Ve.z -float3 sampleGGXVNDF(float3 Ve, float alpha_x, float alpha_y, float U1, float U2) -{ - // Section 3.2: transforming the view direction to the hemisphere configuration - float3 Vh = normalize(float3(alpha_x * Ve.x, alpha_y * Ve.y, Ve.z)); - // Section 4.1: orthonormal basis (with special case if cross product is zero) - float lensq = Vh.x * Vh.x + Vh.y * Vh.y; - float3 T1 = lensq > 0 ? float3(-Vh.y, Vh.x, 0) * rsqrt(lensq) : float3(1, 0, 0); - float3 T2 = cross(Vh, T1); - // Section 4.2: parameterization of the projected area - float r = sqrt(U1); - float phi = 2.0 * M_PI * U2; - float t1 = r * cos(phi); - float t2 = r * sin(phi); - float s = 0.5 * (1.0 + Vh.z); - t2 = (1.0 - s) * sqrt(1.0 - t1 * t1) + s * t2; - // Section 4.3: reprojection onto hemisphere - float3 Nh = t1 * T1 + t2 * T2 + sqrt(max(0.0, 1.0 - t1 * t1 - t2 * t2)) * Vh; - // Section 3.4: transforming the normal back to the ellipsoid configuration - float3 Ne = normalize(float3(alpha_x * Nh.x, alpha_y * Nh.y, max(0.0, Nh.z))); - return Ne; -} - -float3 Sample_GGX_VNDF_Ellipsoid(float3 Ve, float alpha_x, float alpha_y, float U1, float U2) -{ - return sampleGGXVNDF(Ve, alpha_x, alpha_y, U1, U2); -} - -float3 Sample_GGX_VNDF_Hemisphere(float3 Ve, float alpha, float U1, float U2) -{ - return Sample_GGX_VNDF_Ellipsoid(Ve, alpha, alpha, U1, U2); -} - -float3x3 CreateTBN(float3 N) -{ - float3 U; - if (abs(N.z) > 0.0) - { - float k = sqrt(N.y * N.y + N.z * N.z); - U.x = 0.0; U.y = -N.z / k; U.z = N.y / k; - } - else - { - float k = sqrt(N.x * N.x + N.y * N.y); - U.x = N.y / k; U.y = -N.x / k; U.z = 0.0; - } - - float3x3 TBN; - TBN[0] = U; - TBN[1] = cross(N, U); - TBN[2] = N; - return transpose(TBN); -} - -float3 SampleReflectionVector(float3 view_direction, float3 normal, float roughness, int2 did) -{ - float3x3 tbn_transform = CreateTBN(normal); - float3 view_direction_tbn = mul(-view_direction, tbn_transform); - - float2 u = SampleRandomVector2(did); - - float3 sampled_normal_tbn = Sample_GGX_VNDF_Hemisphere(view_direction_tbn, roughness, u.x, u.y); - // sampled_normal_tbn = float3(0, 0, 1); // Overwrite normal sample to produce perfect reflection. - - float3 reflected_direction_tbn = reflect(-view_direction_tbn, sampled_normal_tbn); - - // Transform reflected_direction back to the initial space. - float3x3 inv_tbn_transform = transpose(tbn_transform); - return mul(reflected_direction_tbn, inv_tbn_transform); -} - -float2 GetMipResolution(float2 screen_dimensions, int mip_level) -{ - return screen_dimensions * pow(0.5, mip_level); -} - -float LoadDepth(float2 idx, int mip) -{ - return FfxSssrUnpackDepth(g_depth_buffer_hierarchy.Load(int3(idx, mip))); -} - -void InitialAdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_resolution, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, out float3 position, out float current_t) -{ - float2 current_mip_position = current_mip_resolution * origin.xy; - - // Intersect ray with the half box that is pointing away from the ray origin. - float2 xy_plane = floor(current_mip_position) + floor_offset; - xy_plane = xy_plane * current_mip_resolution_inv + uv_offset; - - // o + d * t = p' => t = (p' - o) / d - float2 t = (xy_plane - origin.xy) * inv_direction.xy; - current_t = min(t.x, t.y); - position = origin + current_t * direction; -} - - -bool AdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_position, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, float surface_z, inout float3 position, inout float current_t) -{ - // Create boundary planes - float2 xy_plane = floor(current_mip_position) + floor_offset; - xy_plane = xy_plane * current_mip_resolution_inv + uv_offset; - float3 boundary_planes = float3(xy_plane, surface_z); - - // Intersect ray with the half box that is pointing away from the ray origin. - // o + d * t = p' => t = (p' - o) / d - float3 t = (boundary_planes - origin) * inv_direction; - - // Prevent using z plane when shooting out of the depth buffer. - t.z = direction.z > 0 ? t.z : FFX_SSSR_FLOAT_MAX; - - // Choose nearest intersection with a boundary. - float t_min = min(min(t.x, t.y), t.z); - - // Smaller z means closer to the camera. - bool above_surface = surface_z > position.z; - - // Decide whether we are able to advance the ray until we hit the xy boundaries or if we had to clamp it at the surface. - bool skipped_tile = t_min != t.z && above_surface; - - // Make sure to only advance the ray if we're still above the surface. - current_t = above_surface ? t_min : current_t; - - // Advance ray - position = origin + current_t * direction; - - return skipped_tile; -} - -// Requires origin and direction of the ray to be in screen space [0, 1] x [0, 1] -float3 HierarchicalRaymarch(float3 origin, float3 direction, bool is_mirror, float2 screen_size, out bool valid_hit) -{ - int most_detailed_mip = is_mirror ? 0 : g_most_detailed_mip; - - const float3 inv_direction = direction != 0 ? 1.0 / direction : FFX_SSSR_FLOAT_MAX; - - // Start on mip with highest detail. - int current_mip = most_detailed_mip; - - // Could recompute these every iteration, but it's faster to hoist them out and update them. - float2 current_mip_resolution = GetMipResolution(screen_size, current_mip); - float2 current_mip_resolution_inv = rcp(current_mip_resolution); - - // Offset to the bounding boxes in uv space to intersect the ray with the center of the next pixel. - // This means we ever so slightly over shoot into the next region. - float2 uv_offset = 0.005 * exp2(most_detailed_mip) / screen_size; - uv_offset = direction.xy < 0 ? -uv_offset : uv_offset; - - // Offset applied depending on current mip resolution to move the boundary to the left/right upper/lower border depending on ray direction. - float2 floor_offset = direction.xy < 0 ? 0 : 1; - - // Initially advance ray to avoid immediate self intersections. - float current_t; - float3 position; - InitialAdvanceRay(origin, direction, inv_direction, current_mip_resolution, current_mip_resolution_inv, floor_offset, uv_offset, position, current_t); - - const uint min_traversal_occupancy = g_min_traversal_occupancy; - const uint max_traversal_intersections = g_max_traversal_intersections; - - bool exit_due_to_low_occupancy = false; - int i = 0; - while (i < max_traversal_intersections && current_mip >= most_detailed_mip && !exit_due_to_low_occupancy) - { - float2 current_mip_position = current_mip_resolution * position.xy; - float surface_z = LoadDepth(current_mip_position, current_mip); - bool skipped_tile = AdvanceRay(origin, direction, inv_direction, current_mip_position, current_mip_resolution_inv, floor_offset, uv_offset, surface_z, position, current_t); - current_mip += skipped_tile ? 1 : -1; - current_mip_resolution *= skipped_tile ? 0.5 : 2; - current_mip_resolution_inv *= skipped_tile ? 2 : 0.5; - ++i; - - exit_due_to_low_occupancy = !is_mirror && WaveActiveCountBits(true) <= min_traversal_occupancy; - } - - valid_hit = (i < max_traversal_intersections); - - return position; -} - -float ValidateHit(float3 hit, Ray reflected_ray, float3 world_space_ray_direction, float2 screen_size) -{ - // Reject hits outside the view frustum - if (any(hit.xy < 0) || any(hit.xy > 1)) - { - return 0; - } - - // Don't lookup radiance from the background. - int2 texel_coords = int2(screen_size * hit.xy); - float surface_z = LoadDepth(texel_coords / 2, 1); - if (surface_z == 1.0) - { - return 0; - } - - // We check if we hit the surface from the back, these should be rejected. - float3 hit_normal = LoadNormal(texel_coords, g_normal); - if (dot(hit_normal, world_space_ray_direction) > 0) - { - return 0; - } - - float3 view_space_surface = CreateViewSpaceRay(float3(hit.xy, surface_z)).origin; - float3 view_space_hit = CreateViewSpaceRay(hit).origin; - float distance = length(view_space_surface - view_space_hit); - - // Fade out hits near the screen borders - float2 fov = 0.05 * float2(screen_size.y / screen_size.x, 1); - float2 border = smoothstep(0, fov, hit.xy) * (1 - smoothstep(1 - fov, 1, hit.xy)); - float vignette = border.x * border.y; - - // We accept all hits that are within a reasonable minimum distance below the surface. - // Add constant in linear space to avoid growing of the reflections toward the reflected objects. - float confidence = 1 - smoothstep(0, g_depth_buffer_thickness, distance); - confidence *= confidence; - - return vignette * confidence; -} - -void Intersect(int2 did) -{ - uint2 screen_size; - g_intersection_result.GetDimensions(screen_size.x, screen_size.y); - - const uint skip_denoiser = g_skip_denoiser; - - float2 uv = (did + 0.5) / screen_size; - float3 world_space_normal = LoadNormal(did, g_normal); - float roughness = LoadRoughness(did, g_roughness); - bool is_mirror = IsMirrorReflection(roughness); - - int most_detailed_mip = is_mirror ? 0 : g_most_detailed_mip; - float2 mip_resolution = GetMipResolution(screen_size, most_detailed_mip); - float z = LoadDepth(uv * mip_resolution, most_detailed_mip); - - Ray screen_space_ray; - screen_space_ray.origin = float3(uv, z); - - Ray view_space_ray = CreateViewSpaceRay(screen_space_ray.origin); - - float3 view_space_surface_normal = mul(float4(normalize(world_space_normal), 0), g_view).xyz; - float3 view_space_reflected_direction = SampleReflectionVector(view_space_ray.direction, view_space_surface_normal, roughness, did); - screen_space_ray.direction = ProjectDirection(view_space_ray.origin, view_space_reflected_direction, screen_space_ray.origin, g_proj); - - bool valid_hit; - float3 hit = HierarchicalRaymarch(screen_space_ray.origin, screen_space_ray.direction, is_mirror, screen_size, valid_hit); - float3 world_space_reflected_direction = mul(float4(view_space_reflected_direction, 0), g_inv_view).xyz; - float confidence = valid_hit ? ValidateHit(hit, screen_space_ray, world_space_reflected_direction, screen_size) : 0; - - float3 world_space_origin = InvProjectPosition(screen_space_ray.origin, g_inv_view_proj); - float3 world_space_hit = InvProjectPosition(hit, g_inv_view_proj); - float3 world_space_ray = world_space_hit - world_space_origin.xyz; - - float3 reflection_radiance = 0; - if (confidence > 0) - { - // Found an intersection with the depth buffer -> We can lookup the color from lit scene. - reflection_radiance = FfxSssrUnpackSceneRadiance(g_lit_scene.Load(int3(screen_size * hit.xy, 0))); - } - - // Sample environment map. - float3 environment_lookup = g_environment_map.SampleLevel(g_environment_map_sampler, world_space_reflected_direction, 0).xyz; - reflection_radiance = confidence * reflection_radiance + (1 - confidence) * environment_lookup; - - g_intersection_result[did] = float4(reflection_radiance, 1); - g_ray_lengths[did] = length(world_space_ray); - - // The denoisers won't copy the value of a mirror reflection, so we write it out to the final target - int2 idx = (is_mirror || skip_denoiser) ? did : int2(-1, -1); - g_denoised_reflections[idx] = float4(reflection_radiance.xyz, 1); -} - -[numthreads(8, 8, 1)] -void main(uint group_index : SV_GroupIndex, uint group_id : SV_GroupID) -{ - // We can encounter some remainders here. - // Worst case is that they are tracing a few more rays than necessary but they can't produce artifacts. - uint ray_index = group_id * 64 + group_index; - uint packed_coords = g_ray_list[ray_index]; - uint2 coords = Unpack(packed_coords); - Intersect((int2)coords); -} - -#endif // FFX_SSSR_INTERSECT \ No newline at end of file diff --git a/ffx-sssr/shaders/prepare_indirect_args.hlsl b/ffx-sssr/shaders/prepare_indirect_args.hlsl deleted file mode 100644 index e6afa0e..0000000 --- a/ffx-sssr/shaders/prepare_indirect_args.hlsl +++ /dev/null @@ -1,52 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_INDIRECT_ARGS -#define FFX_SSSR_INDIRECT_ARGS - -// In/Out: -[[vk::binding(0, 1)]] RWBuffer g_tile_counter : register(u0); -[[vk::binding(1, 1)]] RWBuffer g_ray_counter : register(u1); - -// Out: -[[vk::binding(2, 1)]] RWBuffer g_intersect_args : register(u2); -[[vk::binding(3, 1)]] RWBuffer g_denoiser_args : register(u3); - -[numthreads(1, 1, 1)] -void main() -{ - uint tile_counter = g_tile_counter[0]; - uint ray_counter = g_ray_counter[0]; - - g_tile_counter[0] = 0; - g_ray_counter[0] = 0; - - g_intersect_args[0] = (ray_counter + 63) / 64; - g_intersect_args[1] = 1; - g_intersect_args[2] = 1; - - g_denoiser_args[0] = tile_counter; - g_denoiser_args[1] = 1; - g_denoiser_args[2] = 1; -} - -#endif // FFX_SSSR_INDIRECT_ARGS \ No newline at end of file diff --git a/ffx-sssr/shaders/resolve_eaw.hlsl b/ffx-sssr/shaders/resolve_eaw.hlsl deleted file mode 100644 index 20a4e44..0000000 --- a/ffx-sssr/shaders/resolve_eaw.hlsl +++ /dev/null @@ -1,192 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_EAW_RESOLVE -#define FFX_SSSR_EAW_RESOLVE - -// In: -[[vk::binding(0, 1)]] Texture2D g_normal : register(t0); -[[vk::binding(1, 1)]] Texture2D g_roughness : register(t1); -[[vk::binding(2, 1)]] Texture2D g_depth_buffer : register(t2); -[[vk::binding(3, 1)]] Buffer g_tile_list : register(t3); - -// Out: -[[vk::binding(4, 1)]] RWTexture2D g_temporally_denoised_reflections : register(u0); -[[vk::binding(5, 1)]] RWTexture2D g_denoised_reflections : register(u1); // will hold the reflection colors at the end of the resolve pass. - -groupshared uint g_shared_0[12][12]; -groupshared uint g_shared_1[12][12]; - -void LoadFromGroupSharedMemory(int2 idx, out min16float3 radiance, out min16float roughness) -{ - uint2 tmp; - tmp.x = g_shared_0[idx.x][idx.y]; - tmp.y = g_shared_1[idx.x][idx.y]; - - min16float4 min16tmp = min16float4(UnpackFloat16(tmp.x), UnpackFloat16(tmp.y)); - radiance = min16tmp.xyz; - roughness = min16tmp.w; -} - -void StoreInGroupSharedMemory(int2 idx, min16float3 radiance, min16float roughness) -{ - min16float4 tmp = min16float4(radiance, roughness); - g_shared_0[idx.x][idx.y] = PackFloat16(tmp.xy); - g_shared_1[idx.x][idx.y] = PackFloat16(tmp.zw); -} - -min16float3 LoadRadiance(int2 idx) -{ - return g_temporally_denoised_reflections.Load(int3(idx, 0)).xyz; -} - -min16float LoadRoughnessValue(int2 idx) -{ - return FfxSssrUnpackRoughness(g_roughness.Load(int3(idx, 0))); -} - -min16float4 ResolveScreenspaceReflections(int2 gtid, min16float center_roughness) -{ - const min16float roughness_sigma_min = 0.001; - const min16float roughness_sigma_max = 0.01; - - min16float3 sum = 0.0; - min16float total_weight = 0.0; - - const int radius = 2; - for (int dy = -radius; dy <= radius; ++dy) - { - for (int dx = -radius; dx <= radius; ++dx) - { - int2 texel_coords = gtid + int2(dx, dy); - - min16float3 radiance; - min16float roughness; - LoadFromGroupSharedMemory(texel_coords, radiance, roughness); - - min16float weight = GetEdgeStoppingRoughnessWeightFP16(center_roughness, roughness, roughness_sigma_min, roughness_sigma_max); - sum += weight * radiance; - total_weight += weight; - } - } - - sum /= max(total_weight, 0.0001); - return min16float4(sum, 1); -} - -void LoadWithOffset(int2 did, int2 offset, out min16float3 radiance, out min16float roughness) -{ - did += offset; - radiance = LoadRadiance(did); - roughness = LoadRoughnessValue(did); -} - -void StoreWithOffset(int2 gtid, int2 offset, min16float3 radiance, min16float roughness) -{ - gtid += offset; - StoreInGroupSharedMemory(gtid, radiance, roughness); -} - -void InitializeGroupSharedMemory(int2 did, int2 gtid) -{ - int2 offset_0 = 0; - if (gtid.x < 4) - { - offset_0 = int2(8, 0); - } - else if (gtid.y >= 4) - { - offset_0 = int2(4, 4); - } - else - { - offset_0 = -gtid; // map all threads to the same memory location to guarantee cache hits. - } - - int2 offset_1 = 0; - if (gtid.y < 4) - { - offset_1 = int2(0, 8); - } - else - { - offset_1 = -gtid; // map all threads to the same memory location to guarantee cache hits. - } - - min16float3 radiance_0; - min16float roughness_0; - - min16float3 radiance_1; - min16float roughness_1; - - min16float3 radiance_2; - min16float roughness_2; - - /// XXA - /// XXA - /// BBC - - did -= 2; - LoadWithOffset(did, int2(0, 0), radiance_0, roughness_0); // X - LoadWithOffset(did, offset_0, radiance_1, roughness_1); // A & C - LoadWithOffset(did, offset_1, radiance_2, roughness_2); // B - - StoreWithOffset(gtid, int2(0, 0), radiance_0, roughness_0); // X - if (gtid.x < 4 || gtid.y >= 4) - { - StoreWithOffset(gtid, offset_0, radiance_1, roughness_1); // A & C - } - if (gtid.y < 4) - { - StoreWithOffset(gtid, offset_1, radiance_2, roughness_2); // B - } -} - -void Resolve(int2 did, int2 gtid) -{ - InitializeGroupSharedMemory(did, gtid); - GroupMemoryBarrierWithGroupSync(); - - gtid += 2; // Center threads in groupshared memory - - min16float3 center_radiance; - min16float center_roughness; - LoadFromGroupSharedMemory(gtid, center_radiance, center_roughness); - - if (!IsGlossy(center_roughness) || IsMirrorReflection(center_roughness)) - { - return; - } - - g_denoised_reflections[did.xy] = ResolveScreenspaceReflections(gtid, center_roughness); -} - -[numthreads(8, 8, 1)] -void main(uint2 group_thread_id : SV_GroupThreadID, uint group_id : SV_GroupID) -{ - uint packed_base_coords = g_tile_list[group_id]; - uint2 base_coords = Unpack(packed_base_coords); - uint2 coords = base_coords + group_thread_id; - Resolve((int2)coords, (int2)group_thread_id); -} - -#endif // FFX_SSSR_EAW_RESOLVE \ No newline at end of file diff --git a/ffx-sssr/shaders/resolve_spatial.hlsl b/ffx-sssr/shaders/resolve_spatial.hlsl deleted file mode 100644 index d40f823..0000000 --- a/ffx-sssr/shaders/resolve_spatial.hlsl +++ /dev/null @@ -1,265 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_SPATIAL_RESOLVE -#define FFX_SSSR_SPATIAL_RESOLVE - -// In: -[[vk::binding(0, 1)]] Texture2D g_depth_buffer : register(t0); -[[vk::binding(1, 1)]] Texture2D g_normal : register(t1); -[[vk::binding(2, 1)]] Texture2D g_roughness : register(t2); -[[vk::binding(3, 1)]] Texture2D g_intersection_result : register(t3); // reflection colors at the end of the intersect pass. -[[vk::binding(4, 1)]] Texture2D g_has_ray : register(t4); -[[vk::binding(5, 1)]] Buffer g_tile_list : register(t5); - -// Out: -[[vk::binding(6, 1)]] RWTexture2D g_spatially_denoised_reflections : register(u0); -[[vk::binding(7, 1)]] RWTexture2D g_ray_lengths : register(u1); - -// Only really need 16x16 but 17x17 avoids bank conflicts. -groupshared uint g_shared_0[17][17]; -groupshared uint g_shared_1[17][17]; -groupshared uint g_shared_2[17][17]; -groupshared uint g_shared_3[17][17]; -groupshared float g_shared_depth[17][17]; - -min16float4 LoadRadianceFromGroupSharedMemory(int2 idx) -{ - uint2 tmp; - tmp.x = g_shared_0[idx.x][idx.y]; - tmp.y = g_shared_1[idx.x][idx.y]; - return min16float4(UnpackFloat16(tmp.x), UnpackFloat16(tmp.y)); -} - -min16float3 LoadNormalFromGroupSharedMemory(int2 idx) -{ - uint2 tmp; - tmp.x = g_shared_2[idx.x][idx.y]; - tmp.y = g_shared_3[idx.x][idx.y]; - return min16float4(UnpackFloat16(tmp.x), UnpackFloat16(tmp.y)).xyz; -} - -float LoadDepthFromGroupSharedMemory(int2 idx) -{ - return g_shared_depth[idx.x][idx.y]; -} - -void StoreInGroupSharedMemory(int2 idx, min16float4 radiance, min16float3 normal, float depth) -{ - g_shared_0[idx.x][idx.y] = PackFloat16(radiance.xy); - g_shared_1[idx.x][idx.y] = PackFloat16(radiance.zw); - g_shared_2[idx.x][idx.y] = PackFloat16(normal.xy); - g_shared_3[idx.x][idx.y] = PackFloat16(min16float2(normal.z, 0)); - g_shared_depth[idx.x][idx.y] = depth; -} - -min16float LoadRayLengthFP16(int2 idx) -{ - return g_ray_lengths.Load(idx); -} - -min16float3 LoadRadianceFP16(int2 idx) -{ - return g_intersection_result.Load(int3(idx, 0)).xyz; -} - -min16float3 LoadNormalFP16(int2 idx) -{ - return (min16float3) FfxSssrUnpackNormals(g_normal.Load(int3(idx, 0))); -} - -float LoadDepth(int2 idx) -{ - return FfxSssrUnpackDepth(g_depth_buffer.Load(int3(idx, 0))); -} - -bool LoadHasRay(int2 idx) -{ - return g_has_ray.Load(int3(idx, 0)); -} - -void LoadWithOffset(int2 did, int2 offset, out min16float ray_length, out min16float3 radiance, out min16float3 normal, out float depth, out bool has_ray) -{ - did += offset; - ray_length = LoadRayLengthFP16(did); - radiance = LoadRadianceFP16(did); - normal = LoadNormalFP16(did); - depth = LoadDepth(did); - has_ray = LoadHasRay(did); -} - -void StoreWithOffset(int2 gtid, int2 offset, min16float ray_length, min16float3 radiance, min16float3 normal, float depth) -{ - gtid += offset; - StoreInGroupSharedMemory(gtid, min16float4(radiance, ray_length), normal, depth); // Pack ray length and radiance together -} - -void InitializeGroupSharedMemory(int2 did, int2 gtid) -{ - const uint samples_per_quad = g_samples_per_quad; - - // First pass, load (1 + 3 + 8 + 3 + 1) = (16x16) region into shared memory. - // That is a guard band of 3, the inner region of 8 plus one additional band to catch base pixels if we didn't shoot rays for the respective edges/corners of the loaded region. - - int2 offset_0 = 0; - int2 offset_1 = int2(8, 0); - int2 offset_2 = int2(0, 8); - int2 offset_3 = int2(8, 8); - - min16float ray_length_0; - min16float3 radiance_0; - min16float3 normal_0; - float depth_0; - bool has_ray_0; - - min16float ray_length_1; - min16float3 radiance_1; - min16float3 normal_1; - float depth_1; - bool has_ray_1; - - min16float ray_length_2; - min16float3 radiance_2; - min16float3 normal_2; - float depth_2; - bool has_ray_2; - - min16float ray_length_3; - min16float3 radiance_3; - min16float3 normal_3; - float depth_3; - bool has_ray_3; - - /// XA - /// BC - - did -= 4; // 1 + 3 => additional band + left band - LoadWithOffset(did, offset_0, ray_length_0, radiance_0, normal_0, depth_0, has_ray_0); // X - LoadWithOffset(did, offset_1, ray_length_1, radiance_1, normal_1, depth_1, has_ray_1); // A - LoadWithOffset(did, offset_2, ray_length_2, radiance_2, normal_2, depth_2, has_ray_2); // B - LoadWithOffset(did, offset_3, ray_length_3, radiance_3, normal_3, depth_3, has_ray_3); // C - - // If own values are invalid, because no ray created them, lookup the values from the neighboring threads - const int lane_index = WaveGetLaneIndex(); - const int base_lane_index = GetBaseLane(lane_index, samples_per_quad); // As offsets are multiples of 8, we always get the same base lane index no matter the offset. - const bool is_base_ray = base_lane_index == lane_index; - - const int lane_index_0 = (has_ray_0 || is_base_ray) ? lane_index : base_lane_index; - const int lane_index_1 = (has_ray_1 || is_base_ray) ? lane_index : base_lane_index; - const int lane_index_2 = (has_ray_2 || is_base_ray) ? lane_index : base_lane_index; - const int lane_index_3 = (has_ray_3 || is_base_ray) ? lane_index : base_lane_index; - - radiance_0 = WaveReadLaneAt(radiance_0, lane_index_0); - radiance_1 = WaveReadLaneAt(radiance_1, lane_index_1); - radiance_2 = WaveReadLaneAt(radiance_2, lane_index_2); - radiance_3 = WaveReadLaneAt(radiance_3, lane_index_3); - - ray_length_0 = WaveReadLaneAt(ray_length_0, lane_index_0); - ray_length_1 = WaveReadLaneAt(ray_length_1, lane_index_1); - ray_length_2 = WaveReadLaneAt(ray_length_2, lane_index_2); - ray_length_3 = WaveReadLaneAt(ray_length_3, lane_index_3); - - StoreWithOffset(gtid, offset_0, ray_length_0, radiance_0, normal_0, depth_0); // X - StoreWithOffset(gtid, offset_1, ray_length_1, radiance_1, normal_1, depth_1); // A - StoreWithOffset(gtid, offset_2, ray_length_2, radiance_2, normal_2, depth_2); // B - StoreWithOffset(gtid, offset_3, ray_length_3, radiance_3, normal_3, depth_3); // C -} - -min16float3 ResolveScreenspaceReflections(int2 gtid, min16float3 center_radiance, min16float3 center_normal, float center_depth) -{ - float3 accumulated_radiance = center_radiance; - float accumulated_weight = 1; - - const float normal_sigma = 64.0; - const float depth_sigma = 0.02; - - // First 15 numbers of Halton(2,3) streteched to [-3,3] - const int2 reuse_offsets[] = { - 0, 1, - -2, 1, - 2, -3, - -3, 0, - 1, 2, - -1, -2, - 3, 0, - -3, 3, - 0, -3, - -1, -1, - 2, 1, - -2, -2, - 1, 0, - 0, 2, - 3, -1 - }; - const uint sample_count = 15; - - for (int i = 0; i < sample_count; ++i) - { - int2 new_idx = gtid + reuse_offsets[i]; - min16float3 normal = LoadNormalFromGroupSharedMemory(new_idx); - float depth = LoadDepthFromGroupSharedMemory(new_idx); - min16float4 radiance = LoadRadianceFromGroupSharedMemory(new_idx); - float weight = 1 - * GetEdgeStoppingNormalWeight((float3)center_normal, (float3)normal, normal_sigma) - * Gaussian(center_depth, depth, depth_sigma) - ; - - // Accumulate all contributions. - accumulated_weight += weight; - accumulated_radiance += weight * radiance.xyz; - } - - accumulated_radiance /= max(accumulated_weight, 0.00001); - return accumulated_radiance; -} - -void Resolve(int2 did, int2 gtid) -{ - float center_roughness = LoadRoughness(did, g_roughness); - InitializeGroupSharedMemory(did, gtid); - GroupMemoryBarrierWithGroupSync(); - - if (!IsGlossy(center_roughness) || IsMirrorReflection(center_roughness)) - { - return; - } - - gtid += 4; // Center threads in groupshared memory - - min16float4 center_radiance = LoadRadianceFromGroupSharedMemory(gtid); - min16float3 center_normal = LoadNormalFromGroupSharedMemory(gtid); - float center_depth = LoadDepthFromGroupSharedMemory(gtid); - g_spatially_denoised_reflections[did.xy] = min16float4(ResolveScreenspaceReflections(gtid, center_radiance.xyz, center_normal, center_depth), 1); - g_ray_lengths[did.xy] = center_radiance.w; // ray_length -} - -[numthreads(64, 1, 1)] -void main(uint group_thread_id_linear : SV_GroupThreadID, uint group_id : SV_GroupID) -{ - uint packed_base_coords = g_tile_list[group_id]; - uint2 base_coords = Unpack(packed_base_coords); - uint2 group_thread_id_2d = RemapLane8x8(group_thread_id_linear); - uint2 coords = base_coords + group_thread_id_2d; - Resolve((int2)coords, (int2)group_thread_id_2d); -} - -#endif // FFX_SSSR_SPATIAL_RESOLVE \ No newline at end of file diff --git a/ffx-sssr/shaders/resolve_temporal.hlsl b/ffx-sssr/shaders/resolve_temporal.hlsl deleted file mode 100644 index 9ea6137..0000000 --- a/ffx-sssr/shaders/resolve_temporal.hlsl +++ /dev/null @@ -1,244 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_TEMPORAL_RESOLVE -#define FFX_SSSR_TEMPORAL_RESOLVE - -// In: -[[vk::binding(0, 1)]] Texture2D g_normal : register(t0); -[[vk::binding(1, 1)]] Texture2D g_roughness : register(t1); -[[vk::binding(2, 1)]] Texture2D g_normal_history : register(t2); -[[vk::binding(3, 1)]] Texture2D g_roughness_history : register(t3); -[[vk::binding(4, 1)]] Texture2D g_depth_buffer : register(t4); -[[vk::binding(5, 1)]] Texture2D g_motion_vectors : register(t5); -[[vk::binding(6, 1)]] Texture2D g_temporally_denoised_reflections_history : register(t6); // reflection colors at the end of the temporal resolve pass of the previous frame. -[[vk::binding(7, 1)]] Texture2D g_ray_lengths : register(t7); -[[vk::binding(8, 1)]] Buffer g_tile_list : register(t8); - -// Out: -[[vk::binding(9, 1)]] RWTexture2D g_temporally_denoised_reflections : register(u0); -[[vk::binding(10, 1)]] RWTexture2D g_spatially_denoised_reflections : register(u1); // Technically still an input, but we have to keep it as UAV -[[vk::binding(11, 1)]] RWTexture2D g_temporal_variance : register(u2); - -// From "Temporal Reprojection Anti-Aliasing" -// https://github.com/playdeadgames/temporal -/********************************************************************** -Copyright (c) [2015] [Playdead] - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -********************************************************************/ -float3 ClipAABB(float3 aabb_min, float3 aabb_max, float3 prev_sample) -{ - // Main idea behind clipping - it prevents clustering when neighbor color space - // is distant from history sample - - // Here we find intersection between color vector and aabb color box - - // Note: only clips towards aabb center - float3 aabb_center = 0.5 * (aabb_max + aabb_min); - float3 extent_clip = 0.5 * (aabb_max - aabb_min) + 0.001; - - // Find color vector - float3 color_vector = prev_sample - aabb_center; - // Transform into clip space - float3 color_vector_clip = color_vector / extent_clip; - // Find max absolute component - color_vector_clip = abs(color_vector_clip); - float max_abs_unit = max(max(color_vector_clip.x, color_vector_clip.y), color_vector_clip.z); - - if (max_abs_unit > 1.0) - { - return aabb_center + color_vector / max_abs_unit; // clip towards color vector - } - else - { - return prev_sample; // point is inside aabb - } -} - -// Estimates spatial reflection radiance standard deviation -float3 EstimateStdDeviation(int2 did, RWTexture2D tex) -{ - float3 color_sum = 0.0; - float3 color_sum_squared = 0.0; - - int radius = 1; - float weight = (radius * 2.0 + 1.0) * (radius * 2.0 + 1.0); - - for (int dx = -radius; dx <= radius; dx++) - { - for (int dy = -radius; dy <= radius; dy++) - { - int2 texel_coords = did + int2(dx, dy); - float3 value = tex.Load(texel_coords).xyz; - color_sum += value; - color_sum_squared += value * value; - } - } - - float3 color_std = (color_sum_squared - color_sum * color_sum / weight) / (weight - 1.0); - return sqrt(max(color_std, 0.0)); -} - -float3 SampleRadiance(int2 texel_coords, Texture2D tex) -{ - return tex.Load(int3(texel_coords, 0)).xyz; -} - -float2 GetSurfaceReprojection(int2 did, float2 uv, float2 motion_vector) -{ - // Reflector position reprojection - float2 history_uv = uv - motion_vector; - return history_uv; -} - -float2 GetHitPositionReprojection(int2 did, float2 uv, float reflected_ray_length) -{ - float z = FfxSssrUnpackDepth(g_depth_buffer.Load(int3(did, 0))); - float3 view_space_ray = CreateViewSpaceRay(float3(uv, z)).direction; - - // We start out with reconstructing the ray length in view space. - // This includes the portion from the camera to the reflecting surface as well as the portion from the surface to the hit position. - float surface_depth = length(view_space_ray); - float ray_length = surface_depth + reflected_ray_length; - - // We then perform a parallax correction by shooting a ray - // of the same length "straight through" the reflecting surface - // and reprojecting the tip of that ray to the previous frame. - view_space_ray /= surface_depth; // == normalize(view_space_ray) - view_space_ray *= ray_length; - float3 world_hit_position = mul(float4(view_space_ray, 1), g_inv_view).xyz; // This is the "fake" hit position if we would follow the ray straight through the surface. - float3 prev_hit_position = ProjectPosition(world_hit_position, g_prev_view_proj); - float2 history_uv = prev_hit_position.xy; - return history_uv; -} - -float SampleHistory(float2 uv, uint2 image_size, float3 normal, float roughness, float3 radiance_min, float3 radiance_max, out float3 radiance) -{ - int2 texel_coords = int2(image_size * uv); - radiance = SampleRadiance(texel_coords, g_temporally_denoised_reflections_history); - radiance = ClipAABB(radiance_min, radiance_max, radiance); - - float3 history_normal = LoadNormal(texel_coords, g_normal_history); - float history_roughness = LoadRoughness(texel_coords, g_roughness_history); - - const float normal_sigma = 8.0; - const float roughness_sigma_min = 0.01; - const float roughness_sigma_max = 0.1; - const float main_accumulation_factor = 0.90 + 0.1 * g_temporal_stability_factor; - - float accumulation_speed = main_accumulation_factor - * GetEdgeStoppingNormalWeight(normal, history_normal, normal_sigma) - * GetEdgeStoppingRoughnessWeight(roughness, history_roughness, roughness_sigma_min, roughness_sigma_max) - * GetRoughnessAccumulationWeight(roughness) - ; - - return saturate(accumulation_speed); -} - -float ComputeTemporalVariance(float3 history_radiance, float3 radiance) -{ - // Check temporal variance. - float history_luminance = Luminance(history_radiance); - float luminance = Luminance(radiance); - return abs(history_luminance - luminance) / max(max(history_luminance, luminance), 0.00001); -} - -float4 ResolveScreenspaceReflections(int2 did, float2 uv, uint2 image_size, float roughness) -{ - float3 normal = LoadNormal(did, g_normal); - float3 radiance = g_spatially_denoised_reflections.Load(did).xyz; - float3 radiance_history = g_temporally_denoised_reflections_history.Load(int3(did, 0)).xyz; - float ray_length = g_ray_lengths.Load(int3(did, 0)); - - // And clip it to the local neighborhood - float2 motion_vector = FfxSssrUnpackMotionVectors(g_motion_vectors.Load(int3(did, 0))); - float3 color_std = EstimateStdDeviation(did, g_spatially_denoised_reflections); - color_std *= (dot(motion_vector, motion_vector) == 0) ? 8 : 2.2; // Allow more accumulation if the surface did not move. - - float3 radiance_min = radiance.xyz - color_std; - float3 radiance_max = radiance + color_std; - - // Reproject point on the reflecting surface - float2 surface_reprojection_uv = GetSurfaceReprojection(did, uv, motion_vector); - - // Reproject hit point - float2 hit_reprojection_uv = GetHitPositionReprojection(did, uv, ray_length); - - float2 reprojection_uv; - reprojection_uv = (roughness < 0.05) ? hit_reprojection_uv : surface_reprojection_uv; - - float3 reprojection = 0; - float weight = 0; - if (all(reprojection_uv > 0.0) && all(reprojection_uv < 1.0)) - { - weight = SampleHistory(reprojection_uv, image_size, normal, roughness, radiance_min, radiance_max, reprojection); - } - - radiance = lerp(radiance, reprojection, weight); - float temporal_variance = ComputeTemporalVariance(radiance_history, radiance) > FFX_SSSR_TEMPORAL_VARIANCE_THRESHOLD ? 1 : 0; - return float4(radiance.xyz, temporal_variance); -} - -void Resolve(int2 did) -{ - float roughness = LoadRoughness(did, g_roughness); - if (!IsGlossy(roughness) || IsMirrorReflection(roughness)) - { - return; - } - - uint2 image_size; - g_temporally_denoised_reflections.GetDimensions(image_size.x, image_size.y); - float2 uv = float2(did.x + 0.5, did.y + 0.5) / image_size; - - float4 resolve = ResolveScreenspaceReflections(did.xy, uv, image_size, roughness); - g_temporally_denoised_reflections[did.xy] = float4(resolve.xyz, 1); - g_temporal_variance[did.xy] = resolve.w; -} - -[numthreads(8, 8, 1)] -void main(uint2 group_thread_id : SV_GroupThreadID, uint group_id : SV_GroupID) -{ - uint packed_base_coords = g_tile_list[group_id]; - uint2 base_coords = Unpack(packed_base_coords); - uint2 coords = base_coords + group_thread_id; - Resolve((int2)coords); -} - -#endif // FFX_SSSR_TEMPORAL_RESOLVE \ No newline at end of file diff --git a/ffx-sssr/sourceToHeader.py b/ffx-sssr/sourceToHeader.py deleted file mode 100644 index 30c79e5..0000000 --- a/ffx-sssr/sourceToHeader.py +++ /dev/null @@ -1,12 +0,0 @@ -import sys - -def escape(line): - line = line.replace ('"', '\\"') - line = line.replace ('\n', '\\n') - return line - -if __name__=='__main__': - print ('const char {}[] = '.format (sys.argv[2])) - for l in open(sys.argv[1], 'r').readlines(): - print ('"{}"'.format (escape (l))) - print (';') \ No newline at end of file diff --git a/ffx-sssr/src/context.cpp b/ffx-sssr/src/context.cpp deleted file mode 100644 index d0f2c53..0000000 --- a/ffx-sssr/src/context.cpp +++ /dev/null @@ -1,257 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "context.h" - -#ifdef FFX_SSSR_D3D12 - #include "ffx_sssr_d3d12.h" - #include "d3d12/context_d3d12.h" -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - #include "ffx_sssr_vk.h" - #include "vk/context_vk.h" -#endif // FFX_SSSR_VK - -namespace ffx_sssr -{ - /** - The constructor for the Context class. - - \param create_context_info The context creation information. - */ - Context::Context(FfxSssrCreateContextInfo const& create_context_info) - : frame_index_(0u) - , frame_count_before_reuse_(create_context_info.frameCountBeforeMemoryReuse) - , logging_function_(create_context_info.pLoggingCallbacks ? create_context_info.pLoggingCallbacks->pfnLogging : nullptr) - , logging_function_user_data_(create_context_info.pLoggingCallbacks ? create_context_info.pLoggingCallbacks->pUserData : nullptr) - , api_call_("ffxSssrCreateContext") - , reflection_view_id_dispenser_(create_context_info.maxReflectionViewCount) - , reflection_view_view_matrices_(create_context_info.maxReflectionViewCount) - , reflection_view_projection_matrices_(create_context_info.maxReflectionViewCount) - { - // Create platform-specific context(s) -#ifdef FFX_SSSR_D3D12 - if (create_context_info.pD3D12CreateContextInfo) - { - if (!create_context_info.pD3D12CreateContextInfo->pDevice) - { - throw reflection_error(*this, FFX_SSSR_STATUS_INVALID_VALUE, "pDevice must not be nullptr, cannot create Direct3D12 context"); - } - - context_d3d12_ = std::make_unique(*this, create_context_info); - } -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - if (create_context_info.pVkCreateContextInfo) - { - if (!create_context_info.pVkCreateContextInfo->device) - { - throw reflection_error(*this, FFX_SSSR_STATUS_INVALID_VALUE, "device must not be VK_NULL_HANDLE, cannot create Vulkan context"); - } - - context_vk_ = std::make_unique(*this, create_context_info); - } -#endif // FFX_SSSR_VK - } - - /** - The destructor for the Context class. - */ - Context::~Context() - { - } - - /** - Destroys the object. - - \param object_id The identifier of the object to be destroyed. - */ - void Context::DestroyObject(std::uint64_t object_id) - { - if (!IsObjectValid(object_id)) - { - return; // object was already destroyed - } - - auto const resource_type = GetResourceType(object_id); - - switch (resource_type) - { - case kResourceType_ReflectionView: - { - reflection_view_view_matrices_.Erase(ID(object_id)); - reflection_view_projection_matrices_.Erase(ID(object_id)); - -#ifdef FFX_SSSR_D3D12 - if (context_d3d12_) - context_d3d12_->reflection_views_.Erase(ID(object_id)); -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - if (context_vk_) - context_vk_->reflection_views_.Erase(ID(object_id)); -#endif // FFX_SSSR_VK - - reflection_view_id_dispenser_.FreeId(object_id); - } - break; - default: - { - FFX_SSSR_ASSERT(0); // should never happen - } - break; - } - } - - /** - Checks whether the object is valid. - - \param object_id The identifier of the object to be checked. - \return true if the object is still valid, false otherwise. - */ - bool Context::IsObjectValid(std::uint64_t object_id) const - { - auto const resource_type = GetResourceType(object_id); - - switch (resource_type) - { - case kResourceType_ReflectionView: - { - if (reflection_view_id_dispenser_.IsValid(object_id)) - { - return true; - } - } - break; - default: - { - FFX_SSSR_ASSERT(0); // should never happen - } - break; - } - - return false; - } - - /** - Creates the reflection view. - - \param reflection_view_id The identifier of the reflection view object. - \param create_reflection_view_info The reflection view creation information. - */ - void Context::CreateReflectionView(std::uint64_t reflection_view_id, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info) - { -#ifdef FFX_SSSR_D3D12 - if (context_d3d12_ && create_reflection_view_info.pD3D12CreateReflectionViewInfo) - context_d3d12_->CreateReflectionView(reflection_view_id, create_reflection_view_info); -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - if (context_vk_ && create_reflection_view_info.pVkCreateReflectionViewInfo) - context_vk_->CreateReflectionView(reflection_view_id, create_reflection_view_info); -#endif // FFX_SSSR_VK - } - - /** - Resolves the reflection view. - - \param reflection_view_id The identifier of the reflection view object. - \param resolve_reflection_view_info The reflection view resolve information. - */ - void Context::ResolveReflectionView(std::uint64_t reflection_view_id, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info) - { - FFX_SSSR_ASSERT(reflection_view_view_matrices_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(reflection_view_projection_matrices_.At(ID(reflection_view_id))); - -#ifdef FFX_SSSR_D3D12 - context_d3d12_->ResolveReflectionView(reflection_view_id, resolve_reflection_view_info); -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - context_vk_->ResolveReflectionView(reflection_view_id, resolve_reflection_view_info); -#endif // FFX_SSSR_VK - } - - /** - Gets the number of GPU ticks spent in the tile classification pass when resolving the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent in the tile classification pass when resolving the view. - */ - void Context::GetReflectionViewTileClassificationElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - -#ifdef FFX_SSSR_D3D12 - if (context_d3d12_) - context_d3d12_->GetReflectionViewTileClassificationElapsedTime(reflection_view_id, elapsed_time); -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - if (context_vk_) - context_vk_->GetReflectionViewTileClassificationElapsedTime(reflection_view_id, elapsed_time); -#endif // FFX_SSSR_VK - } - - /** - Gets the number of GPU ticks spent resolving the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent resolving the view. - */ - void Context::GetReflectionViewIntersectionElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - -#ifdef FFX_SSSR_D3D12 - if (context_d3d12_) - context_d3d12_->GetReflectionViewIntersectionElapsedTime(reflection_view_id, elapsed_time); -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - if (context_vk_) - context_vk_->GetReflectionViewIntersectionElapsedTime(reflection_view_id, elapsed_time); -#endif // FFX_SSSR_VK - } - - /** - Gets the number of GPU ticks spent denoising for the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent denoising. - */ - void Context::GetReflectionViewDenoisingElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - -#ifdef FFX_SSSR_D3D12 - if (context_d3d12_) - context_d3d12_->GetReflectionViewDenoisingElapsedTime(reflection_view_id, elapsed_time); -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - if (context_vk_) - context_vk_->GetReflectionViewDenoisingElapsedTime(reflection_view_id, elapsed_time); -#endif // FFX_SSSR_VK - } -} diff --git a/ffx-sssr/src/context.h b/ffx-sssr/src/context.h deleted file mode 100644 index d5b7454..0000000 --- a/ffx-sssr/src/context.h +++ /dev/null @@ -1,137 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include - -#include "float3.h" -#include "memory.h" -#include "matrix4.h" -#include "reflection_error.h" -#include "resources.h" - -namespace ffx_sssr -{ - class ContextD3D12; - class ContextVK; - - /** - The Context class encapsulates the data for a single execution context. - - \note An object identifier possesses the following structure: - - top 16 bits: resource identifier (see kResourceType_Xxx). - - next 16 bits: generational identifier (so deleting twice does not crash). - - bottom 32 bits: object index (for looking up attached components). - */ - class Context - { - FFX_SSSR_NON_COPYABLE(Context); - - public: - Context(FfxSssrCreateContextInfo const& create_context_info); - ~Context(); - - inline std::uint32_t& GetFrameIndex(); - inline std::uint32_t GetFrameIndex() const; - inline std::uint32_t GetFrameCountBeforeReuse() const; - - template - void CreateObject(std::uint64_t& object_id); - void DestroyObject(std::uint64_t object_id); - - template - bool IsOfType(std::uint64_t object_id) const; - bool IsObjectValid(std::uint64_t object_id) const; - - template - inline std::uint32_t GetObjectCount() const; - template - inline std::uint32_t GetMaxObjectCount() const; - - inline ContextD3D12* GetContextD3D12(); - inline ContextD3D12 const* GetContextD3D12() const; - - inline ContextVK* GetContextVK(); - inline ContextVK const* GetContextVK() const; - - void CreateReflectionView(std::uint64_t reflection_view_id, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - void ResolveReflectionView(std::uint64_t reflection_view_id, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info); - - inline char const* GetAPICall() const; - inline void SetAPICall(char const* api_call); - - inline static char const* GetErrorName(FfxSssrStatus error); - inline void Error(FfxSssrStatus error, char const* format, ...) const; - inline void Error(FfxSssrStatus error, char const* format, va_list args) const; - inline void AdvanceToNextFrame(); - - void GetReflectionViewTileClassificationElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - void GetReflectionViewIntersectionElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - void GetReflectionViewDenoisingElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - - inline void GetReflectionViewViewMatrix(std::uint64_t reflection_view_id, matrix4& view_matrix) const; - inline void SetReflectionViewViewMatrix(std::uint64_t reflection_view_id, matrix4 const& view_matrix); - inline void GetReflectionViewProjectionMatrix(std::uint64_t reflection_view_id, matrix4& projection_matrix) const; - inline void SetReflectionViewProjectionMatrix(std::uint64_t reflection_view_id, matrix4 const& projection_matrix); - - protected: - friend class ContextD3D12; - friend class ContextVK; - - static inline ResourceType GetResourceType(std::uint64_t object_id); - static inline void SetResourceType(std::uint64_t& object_id, ResourceType resource_type); - - inline bool CreateObject(std::uint64_t& object_id, ResourceType resource_type, IdDispenser& id_dispenser); - - // The index of the current frame. - std::uint32_t frame_index_; - // The number of frames before memory can be re-used. - std::uint32_t const frame_count_before_reuse_; - // The logging function to be used to print out messages. - PFN_ffxSssrLoggingFunction logging_function_; - // The user data to be supplied to the logging function. - void* logging_function_user_data_; - // The API call that is currently being executed. - char const* api_call_; - -#ifdef FFX_SSSR_D3D12 - // The Direct3D12 context object. - std::unique_ptr context_d3d12_; -#endif // FFX_SSSR_D3D12 - -#ifdef FFX_SSSR_VK - // The Direct3D12 context object. - std::unique_ptr context_vk_; -#endif // FFX_SSSR_VK - - // The list of reflection view identifiers. - IdDispenser reflection_view_id_dispenser_; - - // The array of per reflection view view matrices. - SparseArray reflection_view_view_matrices_; - // The array of per reflection view projection matrices. - SparseArray reflection_view_projection_matrices_; - }; -} - -#include "context.inl" diff --git a/ffx-sssr/src/context.inl b/ffx-sssr/src/context.inl deleted file mode 100644 index 5150f6f..0000000 --- a/ffx-sssr/src/context.inl +++ /dev/null @@ -1,399 +0,0 @@ -#include "context.h" -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - Creates a new reflection view. - - \param object_id The identifier of the new reflection view. - */ - template<> - inline void Context::CreateObject(std::uint64_t& object_id) - { - if (!CreateObject(object_id, kResourceType_ReflectionView, reflection_view_id_dispenser_)) - { - throw reflection_error(*this, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Unable to create a new reflection view resource"); - } - - // Populate the default reflection view properties - matrix4 const identity_matrix; - SetReflectionViewViewMatrix(object_id, identity_matrix); - SetReflectionViewProjectionMatrix(object_id, identity_matrix); - } - - /** - Creates a new object. - - \param object_id The identifier of the new object. - */ - template - void Context::CreateObject(std::uint64_t& object_id) - { - (void)object_id; - - static_assert(0, "An unsupported resource type was supplied"); - } - - /** - Gets the index of the current frame. - - \return The index of the current frame. - */ - std::uint32_t& Context::GetFrameIndex() - { - return frame_index_; - } - - /** - Gets the index of the current frame. - - \return The index of the current frame. - */ - std::uint32_t Context::GetFrameIndex() const - { - return frame_index_; - } - - /** - Gets the number of frames before memory can be re-used. - - \return The number of frames before memory can be re-used. - */ - std::uint32_t Context::GetFrameCountBeforeReuse() const - { - return frame_count_before_reuse_; - } - - /** - Checks whether the object is of the given type. - - \param object_id The identifier of the object to be checked. - \return true if the object is of the given type, false otherwise. - */ - template - bool Context::IsOfType(std::uint64_t object_id) const - { - return (GetResourceType(object_id) == RESOURCE_TYPE ? true : false); - } - - /** - Gets the number of objects for the given type. - - \return The number of created objects. - */ - template - std::uint32_t Context::GetObjectCount() const - { - switch (RESOURCE_TYPE) - { - case kResourceType_ReflectionView: - return reflection_view_id_dispenser_.GetIdCount(); - default: - { - FFX_SSSR_ASSERT(0); // should never happen - } - break; - } - - return 0u; - } - - /** - Gets the maximum number of objects for the given type. - - \return The maximum number of objects. - */ - template - std::uint32_t Context::GetMaxObjectCount() const - { - switch (RESOURCE_TYPE) - { - case kResourceType_ReflectionView: - return reflection_view_id_dispenser_.GetMaxIdCount(); - default: - { - FFX_SSSR_ASSERT(0); // should never happen - } - break; - } - - return 0u; - } - - /** - Gets the Direct3D12 context. - - \return The Direct3D12 context. - */ - ContextD3D12* Context::GetContextD3D12() - { -#ifdef FFX_SSSR_D3D12 - return context_d3d12_.get(); -#endif // FFX_SSSR_D3D12 - - return nullptr; - } - - /** - Gets the Direct3D12 context. - - \return The Direct3D12 context. - */ - ContextD3D12 const* Context::GetContextD3D12() const - { -#ifdef FFX_SSSR_D3D12 - return context_d3d12_.get(); -#endif // FFX_SSSR_D3D12 - - return nullptr; - } - - /** - Gets the Vulkan context. - - \return The Vulkan context. - */ - inline ContextVK * Context::GetContextVK() - { -#ifdef FFX_SSSR_VK - return context_vk_.get(); -#endif // FFX_SSSR_VK - - return nullptr; - } - - /** - Gets the Vulkan context. - - \return The Vulkan context. - */ - inline ContextVK const * Context::GetContextVK() const - { -#ifdef FFX_SSSR_VK - return context_vk_.get(); -#endif // FFX_SSSR_VK - - return nullptr; - } - - /** - Gets the current API call. - - \return The current API call. - */ - char const* Context::GetAPICall() const - { - return (api_call_ ? api_call_ : ""); - } - - /** - Sets the current API call. - - \param api_call The current API call. - */ - void Context::SetAPICall(char const* api_call) - { - api_call_ = api_call; - } - - /** - Gets the error name. - - \param error The error code to be queried. - \return The name corresponding to the error code. - */ - char const* Context::GetErrorName(FfxSssrStatus error) - { - switch (error) - { - case FFX_SSSR_STATUS_OK: - return "OK"; - case FFX_SSSR_STATUS_INVALID_VALUE: - return "Invalid value"; - case FFX_SSSR_STATUS_INVALID_OPERATION: - return "Invalid operation"; - case FFX_SSSR_STATUS_OUT_OF_MEMORY: - return "Out of memory"; - case FFX_SSSR_STATUS_INCOMPATIBLE_API: - return "Incompatible API"; - case FFX_SSSR_STATUS_INTERNAL_ERROR: - return "Internal error"; - default: - break; - } - - return ""; - } - - /** - Signals the error. - - \param error The error to be signalled. - \param format The format for the error message. - \param ... The content of the error message. - */ - void Context::Error(FfxSssrStatus error, char const* format, ...) const - { - va_list args; - va_start(args, format); - Error(error, format, args); - va_end(args); - } - - /** - Signals the error. - - \param error The error to be signalled. - \param format The format for the error message. - \param args The content of the error message. - */ - void Context::Error(FfxSssrStatus error, char const* format, va_list args) const - { - char buffer[2048], message[2048]; - - if (logging_function_) - { - snprintf(buffer, sizeof(buffer), "%s: %s (%d: %s)", GetAPICall(), format, static_cast(error), GetErrorName(error)); - vsnprintf(message, sizeof(message), buffer, args); - logging_function_(message, logging_function_user_data_); - } - } - - /** - Advances the frame index. - */ - void Context::AdvanceToNextFrame() - { - ++frame_index_; - } - - /** - Gets the view matrix for the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param view_matrix The output value for the view matrix. - */ - void Context::GetReflectionViewViewMatrix(std::uint64_t reflection_view_id, matrix4& view_matrix) const - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - - auto const reflection_view_view_matrix = reflection_view_view_matrices_.At(ID(reflection_view_id)); - - FFX_SSSR_ASSERT(reflection_view_view_matrix); // should never happen - - view_matrix = *reflection_view_view_matrix; - } - - /** - Sets the view matrix for the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param view_matrix The input value for the view matrix. - */ - void Context::SetReflectionViewViewMatrix(std::uint64_t reflection_view_id, matrix4 const& view_matrix) - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - - reflection_view_view_matrices_.Insert(ID(reflection_view_id), view_matrix); - } - - /** - Gets the projection matrix for the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param projection_matrix The output value for the projection matrix. - */ - void Context::GetReflectionViewProjectionMatrix(std::uint64_t reflection_view_id, matrix4& projection_matrix) const - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - - auto const reflection_view_projection_matrix = reflection_view_projection_matrices_.At(ID(reflection_view_id)); - - FFX_SSSR_ASSERT(reflection_view_projection_matrix); // should never happen - - projection_matrix = *reflection_view_projection_matrix; - } - - /** - Sets the projection matrix for the reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param projection_matrix The input value for the projection matrix. - */ - void Context::SetReflectionViewProjectionMatrix(std::uint64_t reflection_view_id, matrix4 const& projection_matrix) - { - FFX_SSSR_ASSERT(IsOfType(reflection_view_id) && IsObjectValid(reflection_view_id)); - - reflection_view_projection_matrices_.Insert(ID(reflection_view_id), projection_matrix); - } - - /** - Decodes the resource type from the object identifier. - - \param object_id The object identifier to be decoded. - \return The resource type corresponding to the object. - */ - ResourceType Context::GetResourceType(std::uint64_t object_id) - { - auto const resource_type = static_cast(object_id >> 48); - - return static_cast(std::min(resource_type - 1u, static_cast(kResourceType_Count))); - } - - /** - Encodes the resource type into the object identifier. - - \param object_id The object identifier to be encoded. - \param resource_type The resource type for the object. - */ - void Context::SetResourceType(std::uint64_t& object_id, ResourceType resource_type) - { - FFX_SSSR_ASSERT(resource_type < kResourceType_Count); - - object_id |= ((static_cast(resource_type) + 1ull) << 48); - } - - /** - Creates a new object. - - \param object_id The identifier of the new object. - \param resource_type The resource type of the new object. - \param id_dispenser The dispenser for allocating the object identifier. - \return true if the object was created properly, false otherwise. - */ - bool Context::CreateObject(std::uint64_t& object_id, ResourceType resource_type, IdDispenser& id_dispenser) - { - FFX_SSSR_ASSERT(resource_type < kResourceType_Count); - - if (!id_dispenser.AllocateId(object_id)) - { - return false; - } - - SetResourceType(object_id, resource_type); - - return true; - } -} diff --git a/ffx-sssr/src/d3d12/context_d3d12.cpp b/ffx-sssr/src/d3d12/context_d3d12.cpp deleted file mode 100644 index 1c550f1..0000000 --- a/ffx-sssr/src/d3d12/context_d3d12.cpp +++ /dev/null @@ -1,856 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "context_d3d12.h" - -#include -#include - -#include "utils.h" -#include "context.h" -#include "reflection_view.h" -#include "ffx_sssr_d3d12.h" - -#include "shader_common.h" -#include "shader_classify_tiles.h" -#include "shader_intersect.h" -#include "shader_prepare_indirect_args.h" -#include "shader_resolve_eaw.h" -#include "shader_resolve_spatial.h" -#include "shader_resolve_temporal.h" - -namespace -{ - auto constexpr D3D12_VENDOR_ID_AMD = 0x1002u; - auto constexpr D3D12_VENDOR_ID_INTEL = 0x8086u; - auto constexpr D3D12_VENDOR_ID_NVIDIA = 0x10DEu; - - - namespace _1 - { - #include "samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp" - } - - namespace _2 - { - #include "samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_2spp.cpp" - } - - /** - The available blue noise samplers for various sampling modes. - */ - struct - { - std::int32_t const (&sobol_buffer_)[256 * 256]; - std::int32_t const (&ranking_tile_buffer_)[128 * 128 * 8]; - std::int32_t const (&scrambling_tile_buffer_)[128 * 128 * 8]; - } - const g_sampler_states[] = - { - { _1::sobol_256spp_256d, _1::rankingTile, _1::scramblingTile }, - { _2::sobol_256spp_256d, _2::rankingTile, _2::scramblingTile }, - }; - - /** - Initializes the descriptor range. - - \param range_type The type of the descriptor range. - \param num_descriptors The number of descriptors in the range. - \param base_shader_register The base descriptor for the range in shader code. - \return The resulting descriptor range. - */ - inline D3D12_DESCRIPTOR_RANGE InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE range_type, std::uint32_t num_descriptors, std::uint32_t base_shader_register) - { - D3D12_DESCRIPTOR_RANGE descriptor_range = {}; - descriptor_range.RangeType = range_type; - descriptor_range.NumDescriptors = num_descriptors; - descriptor_range.BaseShaderRegister = base_shader_register; - descriptor_range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - return descriptor_range; - } - - /** - Initializes the root parameter as descriptor table. - - \param num_descriptor_ranges The number of descriptor ranges for this parameter. - \param descriptor_ranges The array of descriptor ranges for this parameter. - \return The resulting root parameter. - */ - inline D3D12_ROOT_PARAMETER InitAsDescriptorTable(std::uint32_t num_descriptor_ranges, D3D12_DESCRIPTOR_RANGE const* descriptor_ranges) - { - D3D12_ROOT_PARAMETER root_parameter = {}; - root_parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - root_parameter.DescriptorTable.NumDescriptorRanges = num_descriptor_ranges; - root_parameter.DescriptorTable.pDescriptorRanges = descriptor_ranges; - root_parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // CS - return root_parameter; - } - - /** - Initializes the root parameter as constant buffer view. - - \param shader_register The slot of this constant buffer view. - \return The resulting root parameter. - */ - inline D3D12_ROOT_PARAMETER InitAsConstantBufferView(std::uint32_t shader_register) - { - D3D12_ROOT_PARAMETER root_parameter = {}; - root_parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; - root_parameter.Descriptor.RegisterSpace = 0; - root_parameter.Descriptor.ShaderRegister = shader_register; - root_parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // CS - return root_parameter; - } - - /** - Initializes a linear sampler for a static sampler description. - - \param shader_register The slot of this sampler. - \return The resulting sampler description. - */ - inline D3D12_STATIC_SAMPLER_DESC InitLinearSampler(std::uint32_t shader_register) - { - D3D12_STATIC_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplerDesc.MinLOD = 0.0f; - samplerDesc.MaxLOD = D3D12_FLOAT32_MAX; - samplerDesc.MipLODBias = 0; - samplerDesc.MaxAnisotropy = 1; - samplerDesc.ShaderRegister = shader_register; - samplerDesc.RegisterSpace = 0; - samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // Compute - return samplerDesc; - } -} - -namespace ffx_sssr -{ - /** - The constructor for the ContextD3D12 class. - - \param context The execution context. - \param create_context_info The context creation information. - */ - ContextD3D12::ContextD3D12(Context& context, FfxSssrCreateContextInfo const& create_context_info) : - context_(context) - , device_(GetValidDevice(context, create_context_info.pD3D12CreateContextInfo->pDevice)) - , shader_compiler_(context) - , samplers_were_populated_(false) - , upload_buffer_(*this, create_context_info.uploadBufferSize) - , tile_classification_pass_() - , indirect_args_pass_() - , intersection_pass_() - , spatial_denoising_pass_() - , temporal_denoising_pass_() - , eaw_denoising_pass_() - , indirect_dispatch_command_signature_(nullptr) - , reflection_views_(create_context_info.maxReflectionViewCount) - { - FFX_SSSR_ASSERT(device_ != nullptr); - CompileShaders(create_context_info); - CreateRootSignatures(); - CreatePipelineStates(); - - // Create command signature for indirect arguments - { - D3D12_INDIRECT_ARGUMENT_DESC dispatch = {}; - dispatch.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; - - D3D12_COMMAND_SIGNATURE_DESC desc = {}; - desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); - desc.NodeMask = 0; - desc.NumArgumentDescs = 1; - desc.pArgumentDescs = &dispatch; - - HRESULT hr; - hr = device_->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(&indirect_dispatch_command_signature_)); - if (!SUCCEEDED(hr)) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create command signature for indirect dispatch."); - } - } - - // Create our blue noise samplers - BlueNoiseSamplerD3D12* blue_noise_samplers[] = { &blue_noise_sampler_1spp_, &blue_noise_sampler_2spp_ }; - static_assert(FFX_SSSR_ARRAY_SIZE(blue_noise_samplers) == FFX_SSSR_ARRAY_SIZE(g_sampler_states), "Sampler arrays don't match."); - for (auto i = 0u; i < FFX_SSSR_ARRAY_SIZE(g_sampler_states); ++i) - { - auto const& sampler_state = g_sampler_states[i]; - BlueNoiseSamplerD3D12* sampler = blue_noise_samplers[i]; - - if (!AllocateSRVBuffer(sizeof(sampler_state.sobol_buffer_), - &sampler->sobol_buffer_, - D3D12_RESOURCE_STATE_COPY_DEST, - L"SSSR Sobol Buffer") || - !AllocateSRVBuffer(sizeof(sampler_state.ranking_tile_buffer_), - &sampler->ranking_tile_buffer_, - D3D12_RESOURCE_STATE_COPY_DEST, - L"SSSR Ranking Tile Buffer") || - !AllocateSRVBuffer(sizeof(sampler_state.scrambling_tile_buffer_), - &sampler->scrambling_tile_buffer_, - D3D12_RESOURCE_STATE_COPY_DEST, - L"SSSR Scrambling Tile Buffer")) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Unable to create SRV buffer(s) for sampler."); - } - } - - ID3D12GraphicsCommandList * command_list = create_context_info.pD3D12CreateContextInfo->pUploadCommandList; - if (!samplers_were_populated_) - { - std::int32_t* upload_buffer; - - // Upload the relevant data to the various samplers - for (auto i = 0u; i < FFX_SSSR_ARRAY_SIZE(g_sampler_states); ++i) - { - auto const& sampler_state = g_sampler_states[i]; - BlueNoiseSamplerD3D12* sampler = blue_noise_samplers[i]; - - FFX_SSSR_ASSERT(sampler->sobol_buffer_); - FFX_SSSR_ASSERT(sampler->ranking_tile_buffer_); - FFX_SSSR_ASSERT(sampler->scrambling_tile_buffer_); - - if (!upload_buffer_.AllocateBuffer(sizeof(sampler_state.sobol_buffer_), upload_buffer)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %llukiB of upload memory, consider increasing uploadBufferSize", RoundedDivide(sizeof(sampler_state.sobol_buffer_), 1024ull)); - } - memcpy(upload_buffer, sampler_state.sobol_buffer_, sizeof(sampler_state.sobol_buffer_)); - - command_list->CopyBufferRegion(sampler->sobol_buffer_, - 0ull, - upload_buffer_.GetResource(), - static_cast(upload_buffer_.GetOffset(upload_buffer)), - sizeof(sampler_state.sobol_buffer_)); - - if (!upload_buffer_.AllocateBuffer(sizeof(sampler_state.ranking_tile_buffer_), upload_buffer)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %llukiB of upload memory, consider increasing uploadBufferSize", RoundedDivide(sizeof(sampler_state.ranking_tile_buffer_), 1024ull)); - } - memcpy(upload_buffer, sampler_state.ranking_tile_buffer_, sizeof(sampler_state.ranking_tile_buffer_)); - - command_list->CopyBufferRegion(sampler->ranking_tile_buffer_, - 0ull, - upload_buffer_.GetResource(), - static_cast(upload_buffer_.GetOffset(upload_buffer)), - sizeof(sampler_state.ranking_tile_buffer_)); - - if (!upload_buffer_.AllocateBuffer(sizeof(sampler_state.scrambling_tile_buffer_), upload_buffer)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %llukiB of upload memory, consider increasing uploadBufferSize", RoundedDivide(sizeof(sampler_state.scrambling_tile_buffer_), 1024ull)); - } - memcpy(upload_buffer, sampler_state.scrambling_tile_buffer_, sizeof(sampler_state.scrambling_tile_buffer_)); - - command_list->CopyBufferRegion(sampler->scrambling_tile_buffer_, - 0ull, - upload_buffer_.GetResource(), - static_cast(upload_buffer_.GetOffset(upload_buffer)), - sizeof(sampler_state.scrambling_tile_buffer_)); - } - - // Transition the resources for usage - D3D12_RESOURCE_BARRIER resource_barriers[3 * FFX_SSSR_ARRAY_SIZE(g_sampler_states)]; - memset(resource_barriers, 0, sizeof(resource_barriers)); - - for (auto i = 0u; i < FFX_SSSR_ARRAY_SIZE(g_sampler_states); ++i) - { - BlueNoiseSamplerD3D12* sampler = blue_noise_samplers[i]; - - auto& sobol_buffer_resource_barrier = resource_barriers[3u * i + 0u]; - sobol_buffer_resource_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - sobol_buffer_resource_barrier.Transition.pResource = sampler->sobol_buffer_; - sobol_buffer_resource_barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - sobol_buffer_resource_barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - sobol_buffer_resource_barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - - auto& ranking_tile_buffer_resource_barrier = resource_barriers[3u * i + 1u]; - ranking_tile_buffer_resource_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - ranking_tile_buffer_resource_barrier.Transition.pResource = sampler->ranking_tile_buffer_; - ranking_tile_buffer_resource_barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - ranking_tile_buffer_resource_barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - ranking_tile_buffer_resource_barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - - auto& scrambling_tile_buffer_resource_barrier = resource_barriers[3u * i + 2u]; - scrambling_tile_buffer_resource_barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - scrambling_tile_buffer_resource_barrier.Transition.pResource = sampler->scrambling_tile_buffer_; - scrambling_tile_buffer_resource_barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - scrambling_tile_buffer_resource_barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - scrambling_tile_buffer_resource_barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - } - - command_list->ResourceBarrier(FFX_SSSR_ARRAY_SIZE(resource_barriers), - resource_barriers); - - // Flag that the samplers are now ready to use - samplers_were_populated_ = true; - } - } - - /** - The destructor for the ContextD3D12 class. - */ - ContextD3D12::~ContextD3D12() - { - if (indirect_dispatch_command_signature_) - indirect_dispatch_command_signature_->Release(); - indirect_dispatch_command_signature_ = nullptr; - } - - /** - Gets the number of GPU ticks spent in the tile classification pass. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent in the tile classification pass. - */ - void ContextD3D12::GetReflectionViewTileClassificationElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - auto const& reflection_view = reflection_views_[ID(reflection_view_id)]; - - if (!((reflection_view.flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Cannot query the tile classification elapsed time of a reflection view that was not created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag"); - } - - elapsed_time = reflection_view.tile_classification_elapsed_time_; - } - - /** - Gets the number of GPU ticks spent intersecting the depth buffer. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent intersecting the depth buffer. - */ - void ContextD3D12::GetReflectionViewIntersectionElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - auto const& reflection_view = reflection_views_[ID(reflection_view_id)]; - - if (!((reflection_view.flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Cannot query the intersection elapsed time of a reflection view that was not created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag"); - } - - elapsed_time = reflection_view.intersection_elapsed_time_; - } - - /** - Gets the number of GPU ticks spent denoising the Direct3D12 reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent denoising. - */ - void ContextD3D12::GetReflectionViewDenoisingElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - auto const& reflection_view = reflection_views_[ID(reflection_view_id)]; - - if (!((reflection_view.flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Cannot query the denoising elapsed time of a reflection view that was not created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag"); - } - - elapsed_time = reflection_view.denoising_elapsed_time_; - } - - /** - Creates the Direct3D12 reflection view. - - \param reflection_view_id The identifier of the reflection view object. - \param create_reflection_view_info The reflection view creation information. - */ - void ContextD3D12::CreateReflectionView(std::uint64_t reflection_view_id, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info) - { - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo); - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - // Check user arguments - if (!create_reflection_view_info.outputWidth || !create_reflection_view_info.outputHeight) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The outputWidth and outputHeight parameters are required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->depthBufferHierarchySRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The depthBufferHierarchySRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->motionBufferSRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The motionBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->normalBufferSRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The normalBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->roughnessBufferSRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The roughnessBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->normalHistoryBufferSRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The normalHistoryBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->roughnessHistoryBufferSRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The roughnessHistoryBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->reflectionViewUAV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The reflectionViewUAV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->environmentMapSRV.ptr) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The environmentMapSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pD3D12CreateReflectionViewInfo->pEnvironmentMapSamplerDesc) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The pEnvironmentMapSamplerDesc parameter is required when creating a reflection view"); - if(create_reflection_view_info.pD3D12CreateReflectionViewInfo->sceneFormat == DXGI_FORMAT_UNKNOWN) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The sceneFormat parameter is required when creating a reflection view"); - - // Create the reflection view - auto& reflection_view = reflection_views_.Insert(ID(reflection_view_id)); - reflection_view.Create(context_, create_reflection_view_info); - } - - /** - Resolves the Direct3D12 reflection view. - - \param reflection_view_id The identifier of the reflection view object. - \param resolve_reflection_view_info The reflection view resolve information. - */ - void ContextD3D12::ResolveReflectionView(std::uint64_t reflection_view_id, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info) - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - FFX_SSSR_ASSERT(context_.reflection_view_view_matrices_.At(ID(reflection_view_id))); - FFX_SSSR_ASSERT(context_.reflection_view_projection_matrices_.At(ID(reflection_view_id))); - - ReflectionView reflection_view; - reflection_view.view_matrix_ = context_.reflection_view_view_matrices_[ID(reflection_view_id)]; - reflection_view.projection_matrix_ = context_.reflection_view_projection_matrices_[ID(reflection_view_id)]; - - reflection_views_[ID(reflection_view_id)].Resolve(context_, reflection_view, resolve_reflection_view_info); - } - - void ContextD3D12::CompileShaders(FfxSssrCreateContextInfo const& create_context_info) - { - struct - { - char const* shader_name_ = nullptr; - char const* content_ = nullptr; - char const* profile_ = nullptr; - } - const shader_source[] = - { - { "prepare_indirect_args", prepare_indirect_args, "cs_6_0"}, - { "classify_tiles", classify_tiles, "cs_6_0"}, - { "intersect", intersect, "cs_6_0"}, - { "resolve_spatial", resolve_spatial, "cs_6_0"}, - { "resolve_temporal", resolve_temporal, "cs_6_0"}, - { "resolve_eaw", resolve_eaw, "cs_6_0"}, - }; - - auto const common_include = std::string(common); - - DxcDefine defines[10]; - defines[0].Name = L"FFX_SSSR_ROUGHNESS_TEXTURE_FORMAT"; - defines[0].Value = create_context_info.pRoughnessTextureFormat; - defines[1].Name = L"FFX_SSSR_ROUGHNESS_UNPACK_FUNCTION"; - defines[1].Value = create_context_info.pUnpackRoughnessSnippet; - defines[2].Name = L"FFX_SSSR_NORMALS_TEXTURE_FORMAT"; - defines[2].Value = create_context_info.pNormalsTextureFormat; - defines[3].Name = L"FFX_SSSR_NORMALS_UNPACK_FUNCTION"; - defines[3].Value = create_context_info.pUnpackNormalsSnippet; - defines[4].Name = L"FFX_SSSR_MOTION_VECTOR_TEXTURE_FORMAT"; - defines[4].Value = create_context_info.pMotionVectorFormat; - defines[5].Name = L"FFX_SSSR_MOTION_VECTOR_UNPACK_FUNCTION"; - defines[5].Value = create_context_info.pUnpackMotionVectorsSnippet; - defines[6].Name = L"FFX_SSSR_DEPTH_TEXTURE_FORMAT"; - defines[6].Value = create_context_info.pDepthTextureFormat; - defines[7].Name = L"FFX_SSSR_DEPTH_UNPACK_FUNCTION"; - defines[7].Value = create_context_info.pUnpackDepthSnippet; - defines[8].Name = L"FFX_SSSR_SCENE_TEXTURE_FORMAT"; - defines[8].Value = create_context_info.pSceneTextureFormat; - defines[9].Name = L"FFX_SSSR_SCENE_RADIANCE_UNPACK_FUNCTION"; - defines[9].Value = create_context_info.pUnpackSceneRadianceSnippet; - - static_assert(FFX_SSSR_ARRAY_SIZE(shader_source) == kShader_Count, "'kShader_Count' filenames must be provided for building the various shaders"); - std::stringstream shader_content; - for (auto i = 0u; i < kShader_Count; ++i) - { - // Append common includes - shader_content.str(std::string()); - shader_content.clear(); - shader_content << common << std::endl << shader_source[i].content_; - - shaders_[i] = shader_compiler_.CompileShaderString( - shader_content.str().c_str(), - static_cast(shader_content.str().size()), - shader_source[i].shader_name_, - shader_source[i].profile_, - nullptr, 0, - defines, FFX_SSSR_ARRAY_SIZE(defines)); - } - } - - void ContextD3D12::CreateRootSignatures() - { - auto CreateRootSignature = [this]( - ShaderPass& pass - , const LPCWSTR name - , std::uint32_t num_descriptor_ranges - , D3D12_DESCRIPTOR_RANGE const* descriptor_ranges - ) { - - D3D12_DESCRIPTOR_RANGE environment_map_sampler_range = {}; - environment_map_sampler_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - environment_map_sampler_range.NumDescriptors = 1; - environment_map_sampler_range.BaseShaderRegister = 1; - environment_map_sampler_range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - - D3D12_ROOT_PARAMETER root[] = { - InitAsDescriptorTable(num_descriptor_ranges, descriptor_ranges), - InitAsConstantBufferView(0), - InitAsDescriptorTable(1, &environment_map_sampler_range), // g_environment_map_sampler - }; - - D3D12_STATIC_SAMPLER_DESC sampler_descs[] = { InitLinearSampler(0) }; // g_linear_sampler - - D3D12_ROOT_SIGNATURE_DESC rs_desc = {}; - rs_desc.NumParameters = FFX_SSSR_ARRAY_SIZE(root); - rs_desc.pParameters = root; - rs_desc.NumStaticSamplers = FFX_SSSR_ARRAY_SIZE(sampler_descs); - rs_desc.pStaticSamplers = sampler_descs; - - HRESULT hr; - ID3DBlob* rs, * rsError; - hr = D3D12SerializeRootSignature(&rs_desc, D3D_ROOT_SIGNATURE_VERSION_1, &rs, &rsError); - if (FAILED(hr)) - { - if (rsError) - { - std::string const error_message(static_cast(rsError->GetBufferPointer())); - rsError->Release(); - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to serialize root signature:\r\n> %s", error_message.c_str()); - } - else - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to serialize root signature"); - } - } - - hr = GetDevice()->CreateRootSignature(0, rs->GetBufferPointer(), rs->GetBufferSize(), IID_PPV_ARGS(&pass.root_signature_)); - rs->Release(); - if (FAILED(hr)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create root signature."); - } - - pass.root_signature_->SetName(name); - pass.descriptor_count_ = num_descriptor_ranges; - }; - - // Assemble the shader pass for tile classification - { - D3D12_DESCRIPTOR_RANGE ranges[] = { - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0), // g_roughness - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0), // g_tile_list - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1), // g_ray_list - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 2), // g_tile_counter - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 3), // g_ray_counter - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 4), // g_temporally_denoised_reflections - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 5), // g_temporally_denoised_reflections_history - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 6), // g_ray_lengths - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 7), // g_temporal_variance - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 8), // g_denoised_reflections - }; - CreateRootSignature(tile_classification_pass_, L"SSSR Tile Classification Root Signature", FFX_SSSR_ARRAY_SIZE(ranges), ranges); - } - - // Assemble the shader pass that prepares the indirect arguments - { - D3D12_DESCRIPTOR_RANGE ranges[] = { - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0), // g_tile_counter - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1), // g_ray_counter - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 2), // g_intersect_args - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 3), // g_denoiser_args - }; - CreateRootSignature(indirect_args_pass_, L"SSSR Indirect Arguments Pass Root Signature", FFX_SSSR_ARRAY_SIZE(ranges), ranges); - } - - // Assemble the shader pass for intersecting reflection rays with the depth buffer - { - D3D12_DESCRIPTOR_RANGE ranges[] = { - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0), // g_lit_scene - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1), // g_depth_buffer_hierarchy - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2), // g_normal - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3), // g_roughness - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 4), // g_environment_map - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 5), // g_sobol_buffer - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 6), // g_ranking_tile_buffer - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 7), // g_scrambling_tile_buffer - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 8), // g_ray_list - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0), // g_intersection_result - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1), // g_ray_lengths - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 2), // g_denoised_reflections - - }; - CreateRootSignature(intersection_pass_, L"SSSR Depth Buffer Intersection Root Signature", FFX_SSSR_ARRAY_SIZE(ranges), ranges); - } - - // Assemble the shader pass for spatial resolve - { - D3D12_DESCRIPTOR_RANGE ranges[] = { - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0), // g_depth_buffer - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1), // g_normal - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2), // g_roughness - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3), // g_intersection_result - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 4), // g_has_ray - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 5), // g_tile_list - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0), // g_spatially_denoised_reflections - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1), // g_ray_lengths - }; - CreateRootSignature(spatial_denoising_pass_, L"SSSR Spatial Resolve Root Signature", FFX_SSSR_ARRAY_SIZE(ranges), ranges); - } - - // Assemble the shader pass for temporal resolve - { - D3D12_DESCRIPTOR_RANGE ranges[] = { - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0), // g_normal - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1), // g_roughness - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2), // g_normal_history - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3), // g_roughness_history - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 4), // g_depth_buffer - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 5), // g_motion_vectors - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 6), // g_temporally_denoised_reflections_history - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 7), // g_ray_lengths - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 8), // g_tile_list - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0), // g_temporally_denoised_reflections - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1), // g_spatially_denoised_reflections - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 2), // g_temporal_variance - }; - CreateRootSignature(temporal_denoising_pass_, L"SSSR Temporal Resolve Root Signature", FFX_SSSR_ARRAY_SIZE(ranges), ranges); - } - - // Assemble the shader pass for EAW resolve - { - D3D12_DESCRIPTOR_RANGE ranges[] = { - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0), // g_normal - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1), // g_roughness - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2), // g_depth_buffer - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3), // g_tile_list - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0), // g_temporally_denoised_reflections - InitDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1), // g_denoised_reflections - }; - CreateRootSignature(eaw_denoising_pass_, L"SSSR EAW Resolve Root Signature", FFX_SSSR_ARRAY_SIZE(ranges), ranges); - } - } - - void ContextD3D12::CreatePipelineStates() - { - auto Compile = [this](ShaderPass& pass, ContextD3D12::Shader shader, const LPCWSTR name) { - FFX_SSSR_ASSERT(pass.root_signature_ != nullptr); - - // Create the pipeline state object - D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_state_desc = {}; - pipeline_state_desc.pRootSignature = pass.root_signature_; - pipeline_state_desc.CS = GetShader(shader); - - HRESULT hr = GetDevice()->CreateComputePipelineState(&pipeline_state_desc, - IID_PPV_ARGS(&pass.pipeline_state_)); - if (!SUCCEEDED(hr)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create compute pipeline state"); - } - - pass.pipeline_state_->SetName(name); - }; - - Compile(tile_classification_pass_, ContextD3D12::kShader_TileClassification, L"SSSR Tile Classification Pipeline"); - Compile(indirect_args_pass_, ContextD3D12::kShader_IndirectArguments, L"SSSR Indirect Arguments Pipeline"); - Compile(intersection_pass_, ContextD3D12::kShader_Intersection, L"SSSR Intersect Pipeline"); - Compile(spatial_denoising_pass_, ContextD3D12::kShader_SpatialResolve, L"SSSR Spatial Resolve Pipeline"); - Compile(temporal_denoising_pass_, ContextD3D12::kShader_TemporalResolve, L"SSSR Temporal Resolve Pipeline"); - Compile(eaw_denoising_pass_, ContextD3D12::kShader_EAWResolve, L"SSSR EAW Resolve Pipeline"); - } - - const ContextD3D12::ShaderPass& ContextD3D12::GetTileClassificationPass() const - { - return tile_classification_pass_; - } - - const ContextD3D12::ShaderPass& ContextD3D12::GetIndirectArgsPass() const - { - return indirect_args_pass_; - } - - const ContextD3D12::ShaderPass& ContextD3D12::GetIntersectionPass() const - { - return intersection_pass_; - } - - const ContextD3D12::ShaderPass& ContextD3D12::GetSpatialDenoisingPass() const - { - return spatial_denoising_pass_; - } - - const ContextD3D12::ShaderPass& ContextD3D12::GetTemporalDenoisingPass() const - { - return temporal_denoising_pass_; - } - - const ContextD3D12::ShaderPass& ContextD3D12::GetEawDenoisingPass() const - { - return eaw_denoising_pass_; - } - - ID3D12CommandSignature* ContextD3D12::GetIndirectDispatchCommandSignature() - { - return indirect_dispatch_command_signature_; - } - - /** - Allocate a buffer resource to use as a shader resource view. - - \param buffer_size The size of the buffer (in bytes). - \param resource The created SRV buffer resource. - \param initial_resource_state The initial resource state. - \param resource_name An optional name for the resource. - \return true if the resource was allocated successfully. - */ - bool ContextD3D12::AllocateSRVBuffer(std::size_t buffer_size, ID3D12Resource** resource, D3D12_RESOURCE_STATES initial_resource_state, wchar_t const* resource_name) const - { - FFX_SSSR_ASSERT(resource != nullptr); - - D3D12_HEAP_PROPERTIES heap_properties = {}; - heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_properties.CreationNodeMask = 1u; - heap_properties.VisibleNodeMask = 1u; - - D3D12_RESOURCE_DESC resource_desc = {}; - resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resource_desc.Width = static_cast(buffer_size); - resource_desc.Height = 1u; - resource_desc.DepthOrArraySize = 1u; - resource_desc.MipLevels = 1u; - resource_desc.SampleDesc.Count = 1u; - resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - if (!SUCCEEDED(device_->CreateCommittedResource(&heap_properties, - D3D12_HEAP_FLAG_NONE, - &resource_desc, - initial_resource_state, - nullptr, - IID_PPV_ARGS(resource)))) - { - return false; // failed to create committed resource - } - - if (resource_name) - { - (*resource)->SetName(resource_name); - } - - return true; - } - - /** - Allocate a buffer resource to use as an unordered access view. - - \param buffer_size The size of the buffer (in bytes). - \param resource The created UAV buffer resource. - \param initial_resource_state The initial resource state. - \param resource_name An optional name for the resource. - \return true if the resource was allocated successfully. - */ - bool ContextD3D12::AllocateUAVBuffer(std::size_t buffer_size, ID3D12Resource** resource, D3D12_RESOURCE_STATES initial_resource_state, wchar_t const* resource_name) const - { - FFX_SSSR_ASSERT(resource != nullptr); - - D3D12_HEAP_PROPERTIES heap_properties = {}; - heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_properties.CreationNodeMask = 1u; - heap_properties.VisibleNodeMask = 1u; - - D3D12_RESOURCE_DESC resource_desc = {}; - resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resource_desc.Width = static_cast(buffer_size); - resource_desc.Height = 1u; - resource_desc.DepthOrArraySize = 1u; - resource_desc.MipLevels = 1u; - resource_desc.SampleDesc.Count = 1u; - resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - - if (!SUCCEEDED(device_->CreateCommittedResource(&heap_properties, - D3D12_HEAP_FLAG_NONE, - &resource_desc, - initial_resource_state, - nullptr, - IID_PPV_ARGS(resource)))) - { - return false; // failed to create committed resource - } - - if (resource_name) - { - (*resource)->SetName(resource_name); - } - - return true; - } - - /** - Allocate a buffer resource to use as a readback resource. - - \param buffer_size The size of the buffer (in bytes). - \param resource The created readback buffer resource. - \param initial_resource_state The initial resource state. - \param resource_name An optional name for the resource. - \return true if the resource was allocated successfully. - */ - bool ContextD3D12::AllocateReadbackBuffer(std::size_t buffer_size, ID3D12Resource** resource, D3D12_RESOURCE_STATES initial_resource_state, wchar_t const* resource_name) const - { - FFX_SSSR_ASSERT(resource != nullptr); - - D3D12_HEAP_PROPERTIES heap_properties = {}; - heap_properties.Type = D3D12_HEAP_TYPE_READBACK; - heap_properties.CreationNodeMask = 1u; - heap_properties.VisibleNodeMask = 1u; - - D3D12_RESOURCE_DESC resource_desc = {}; - resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resource_desc.Width = static_cast(buffer_size); - resource_desc.Height = 1u; - resource_desc.DepthOrArraySize = 1u; - resource_desc.MipLevels = 1u; - resource_desc.SampleDesc.Count = 1u; - resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - if (!SUCCEEDED(device_->CreateCommittedResource(&heap_properties, - D3D12_HEAP_FLAG_NONE, - &resource_desc, - initial_resource_state, - nullptr, - IID_PPV_ARGS(resource)))) - { - return false; // failed to create committed resource - } - - if (resource_name) - { - (*resource)->SetName(resource_name); - } - - return true; - } -} diff --git a/ffx-sssr/src/d3d12/context_d3d12.h b/ffx-sssr/src/d3d12/context_d3d12.h deleted file mode 100644 index 49fc0e7..0000000 --- a/ffx-sssr/src/d3d12/context_d3d12.h +++ /dev/null @@ -1,167 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include -#include - -#include "sampler_d3d12.h" -#include "reflection_view_d3d12.h" -#include "upload_buffer_d3d12.h" -#include "shader_compiler_d3d12.h" - -namespace ffx_sssr -{ - class Context; - class ReflectionViewD3D12; - - /** - The ContextD3D12 class encapsulates the data for a single Direct3D12 stochastic screen space reflections execution context. - */ - class ContextD3D12 - { - FFX_SSSR_NON_COPYABLE(ContextD3D12); - - public: - /** - The available shaders. - */ - enum Shader - { - kShader_IndirectArguments, - kShader_TileClassification, - kShader_Intersection, - kShader_SpatialResolve, - kShader_TemporalResolve, - kShader_EAWResolve, - - kShader_Count - }; - - ContextD3D12(Context& context, FfxSssrCreateContextInfo const& create_context_info); - ~ContextD3D12(); - - inline Context& GetContext(); - inline ID3D12Device* GetDevice() const; - inline Context const& GetContext() const; - inline UploadBufferD3D12& GetUploadBuffer(); - - inline ShaderD3D12 const& GetShader(Shader shader) const; - inline BlueNoiseSamplerD3D12 const& GetSampler1SPP() const; - inline BlueNoiseSamplerD3D12 const& GetSampler2SPP() const; - - void GetReflectionViewTileClassificationElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - void GetReflectionViewIntersectionElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - void GetReflectionViewDenoisingElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - - void CreateReflectionView(std::uint64_t reflection_view_id, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - void ResolveReflectionView(std::uint64_t reflection_view_id, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info); - - static inline ID3D12Device* GetValidDevice(Context& context, ID3D12Device* device); - static inline ID3D12GraphicsCommandList* GetCommandList(Context& context, ID3D12GraphicsCommandList* command_list); - - protected: - friend class Context; - friend class ReflectionViewD3D12; - - /** - The ShaderPass class holds the data for an individual shader pass. - */ - class ShaderPass - { - FFX_SSSR_NON_COPYABLE(ShaderPass); - - public: - inline ShaderPass(); - inline ~ShaderPass(); - - inline operator bool() const; - - inline ShaderPass(ShaderPass&& other) noexcept; - inline ShaderPass& operator =(ShaderPass&& other) noexcept; - - inline void SafeRelease(); - - // The pipeline state object. - ID3D12PipelineState* pipeline_state_; - // The root signature to be used. - ID3D12RootSignature* root_signature_; - // The number of descriptors in the root signature. - std::uint32_t descriptor_count_; - }; - - void CompileShaders(FfxSssrCreateContextInfo const& create_context_info); - void CreateRootSignatures(); - void CreatePipelineStates(); - - const ShaderPass& GetTileClassificationPass() const; - const ShaderPass& GetIndirectArgsPass() const; - const ShaderPass& GetIntersectionPass() const; - const ShaderPass& GetSpatialDenoisingPass() const; - const ShaderPass& GetTemporalDenoisingPass() const; - const ShaderPass& GetEawDenoisingPass() const; - - ID3D12CommandSignature* GetIndirectDispatchCommandSignature(); - - bool AllocateSRVBuffer(std::size_t buffer_size, ID3D12Resource** resource, D3D12_RESOURCE_STATES initial_resource_state, wchar_t const* resource_name = nullptr) const; - bool AllocateUAVBuffer(std::size_t buffer_size, ID3D12Resource** resource, D3D12_RESOURCE_STATES initial_resource_state, wchar_t const* resource_name = nullptr) const; - bool AllocateReadbackBuffer(std::size_t buffer_size, ID3D12Resource** resource, D3D12_RESOURCE_STATES initial_resource_state, wchar_t const* resource_name = nullptr) const; - - // The execution context. - Context& context_; - // The device to be used. - ID3D12Device* device_; - // The compiled reflections shaders. - std::array shaders_; - // The compiler to be used for building the Direct3D12 shaders. - ShaderCompilerD3D12 shader_compiler_; - // The Blue Noise sampler optimized for 1 sample per pixel. - BlueNoiseSamplerD3D12 blue_noise_sampler_1spp_; - // The Blue Noise sampler optimized for 2 samples per pixel. - BlueNoiseSamplerD3D12 blue_noise_sampler_2spp_; - // The flag for whether the samplers were populated. - bool samplers_were_populated_; - // The buffer to be used for uploading memory from the CPU to the GPU. - UploadBufferD3D12 upload_buffer_; - // The array of reflection views to be resolved. - SparseArray reflection_views_; - - // The shader pass that classifies tiles. - ShaderPass tile_classification_pass_; - // The shader pass that prepares the indirect arguments. - ShaderPass indirect_args_pass_; - // The shader pass intersecting reflection rays with the depth buffer. - ShaderPass intersection_pass_; - // The shader pass that does spatial denoising. - ShaderPass spatial_denoising_pass_; - // The shader pass that does temporal denoising. - ShaderPass temporal_denoising_pass_; - // The shader pass that does the second spatial denoising. - ShaderPass eaw_denoising_pass_; - - // The command signature for the indirect dispatches. - ID3D12CommandSignature* indirect_dispatch_command_signature_; - }; -} - -#include "context_d3d12.inl" diff --git a/ffx-sssr/src/d3d12/context_d3d12.inl b/ffx-sssr/src/d3d12/context_d3d12.inl deleted file mode 100644 index a88bb65..0000000 --- a/ffx-sssr/src/d3d12/context_d3d12.inl +++ /dev/null @@ -1,225 +0,0 @@ -#include "context_d3d12.h" -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - - /** - Gets the context. - - \return The context. - */ - Context& ContextD3D12::GetContext() - { - return context_; - } - - /** - Gets the Direct3D12 device. - - \return The Direct3D12 device. - */ - ID3D12Device* ContextD3D12::GetDevice() const - { - return device_; - } - - /** - Gets the context. - - \return The context. - */ - Context const& ContextD3D12::GetContext() const - { - return context_; - } - - /** - Gets hold of the upload buffer. - - \return The upload buffer. - */ - UploadBufferD3D12& ContextD3D12::GetUploadBuffer() - { - return upload_buffer_; - } - - /** - Gets the shader. - - \param shader The shader to be retrieved. - \return The requested shader. - */ - ShaderD3D12 const& ContextD3D12::GetShader(Shader shader) const - { - FFX_SSSR_ASSERT(shader < kShader_Count); - return shaders_[shader]; - } - - /** - Gets a blue noise sampler with 1 sample per pixel. - - \return The requested sampler. - */ - inline BlueNoiseSamplerD3D12 const & ContextD3D12::GetSampler1SPP() const - { - FFX_SSSR_ASSERT(blue_noise_sampler_1spp_.sobol_buffer_); - FFX_SSSR_ASSERT(blue_noise_sampler_1spp_.ranking_tile_buffer_); - FFX_SSSR_ASSERT(blue_noise_sampler_1spp_.scrambling_tile_buffer_); - return blue_noise_sampler_1spp_; - } - - /** - Gets a blue noise sampler with 2 samples per pixel. - - \return The requested sampler. - */ - inline BlueNoiseSamplerD3D12 const & ContextD3D12::GetSampler2SPP() const - { - FFX_SSSR_ASSERT(blue_noise_sampler_2spp_.sobol_buffer_); - FFX_SSSR_ASSERT(blue_noise_sampler_2spp_.ranking_tile_buffer_); - FFX_SSSR_ASSERT(blue_noise_sampler_2spp_.scrambling_tile_buffer_); - return blue_noise_sampler_2spp_; - } - - /** - Gets a valid device. - - \param context The context to be used. - \param device The Direct3D12 device. - \return The device. - */ - ID3D12Device* ContextD3D12::GetValidDevice(Context& context, ID3D12Device* device) - { - if (!device) - throw reflection_error(context, FFX_SSSR_STATUS_INVALID_VALUE, "No device was supplied."); - - D3D12_FEATURE_DATA_SHADER_MODEL supportedShaderModel = {}; - supportedShaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_2; - HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &supportedShaderModel, sizeof(D3D12_FEATURE_DATA_SHADER_MODEL)); - if(!SUCCEEDED(hr)) - throw reflection_error(context, FFX_SSSR_STATUS_INVALID_VALUE, "Unable to check for shader model support on provided device."); - - if(supportedShaderModel.HighestShaderModel < D3D_SHADER_MODEL_6_2) - throw reflection_error(context, FFX_SSSR_STATUS_INVALID_VALUE, "Device does not support shader model 6.2."); - - return device; - } - - /** - Gets the command list. - - \param context The context to be used. - \param command_list The Direct3D12 command list. - \return The command list. - */ - ID3D12GraphicsCommandList* ContextD3D12::GetCommandList(Context& context, ID3D12GraphicsCommandList* command_list) - { - if (!command_list) - throw reflection_error(context, FFX_SSSR_STATUS_INVALID_VALUE, "No command list was supplied, cannot encode device commands"); - return command_list; - } - - - /** - The constructor for the ShaderPass class. - */ - ContextD3D12::ShaderPass::ShaderPass() - : pipeline_state_(nullptr) - , root_signature_(nullptr) - , descriptor_count_(0) - { - } - - /** - The constructor for the ShaderPass class. - - \param other The shader pass to be moved. - */ - ContextD3D12::ShaderPass::ShaderPass(ShaderPass&& other) noexcept - : pipeline_state_(other.pipeline_state_) - , root_signature_(other.root_signature_) - , descriptor_count_(other.descriptor_count_) - { - other.pipeline_state_ = nullptr; - other.root_signature_ = nullptr; - other.descriptor_count_ = 0; - } - - /** - The destructor for the ShaderPass class. - */ - ContextD3D12::ShaderPass::~ShaderPass() - { - SafeRelease(); - } - - /** - Assigns the shader pass. - - \param other The shader pass to be moved. - \return The assigned shader pass. - */ - ContextD3D12::ShaderPass& ContextD3D12::ShaderPass::operator =(ShaderPass&& other) noexcept - { - if (this != &other) - { - pipeline_state_ = other.pipeline_state_; - root_signature_ = other.root_signature_; - descriptor_count_ = other.descriptor_count_; - - other.pipeline_state_ = nullptr; - other.root_signature_ = nullptr; - descriptor_count_ = 0; - } - - return *this; - } - - /** - Releases the shader pass. - */ - inline void ContextD3D12::ShaderPass::SafeRelease() - { - if (pipeline_state_) - pipeline_state_->Release(); - pipeline_state_ = nullptr; - - if (root_signature_) - root_signature_->Release(); - root_signature_ = nullptr; - - descriptor_count_ = 0; - } - - /** - Checks whether the shader pass is valid. - - \return true if the shader pass is valid, false otherwise. - */ - ContextD3D12::ShaderPass::operator bool() const - { - return (pipeline_state_ && root_signature_); - } -} diff --git a/ffx-sssr/src/d3d12/descriptor_heap_d3d12.cpp b/ffx-sssr/src/d3d12/descriptor_heap_d3d12.cpp deleted file mode 100644 index 9fd625d..0000000 --- a/ffx-sssr/src/d3d12/descriptor_heap_d3d12.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "descriptor_heap_d3d12.h" - -#include "reflection_error.h" -#include "context_d3d12.h" - -namespace ffx_sssr -{ - /** - The constructor for the DescriptorHeapD3D12 class. - - \param context The context to be used. - */ - DescriptorHeapD3D12::DescriptorHeapD3D12(Context& context) - : context_(context) - , descriptor_heap_(nullptr) - , descriptor_handle_size_(0u) - , static_descriptor_heap_size_(0u) - , static_descriptor_heap_cursor_(0u) - , dynamic_descriptor_heap_size_(0u) - , dynamic_descriptor_heap_cursor_(0u) - { - } - - /** - The destructor for the DescriptorHeapD3D12 class. - */ - DescriptorHeapD3D12::~DescriptorHeapD3D12() - { - Destroy(); - } - - /** - Creates the Direct3D12 descriptor heap. - - \param descriptor_heap_type The type of descriptor heap to be created. - \param static_descriptor_count The number of static descriptors to be allocated. - \param dynamic_descriptor_count The number of dynamic descriptors to be allocated. - */ - void DescriptorHeapD3D12::Create(D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type, std::uint32_t static_descriptor_count, std::uint32_t dynamic_descriptor_count) - { - HRESULT result; - - // Populate the allocation ranges - auto const static_descriptor_heap_size = static_descriptor_count; - auto const dynamic_descriptor_heap_size = dynamic_descriptor_count * context_.GetFrameCountBeforeReuse(); - - // Create the descriptor heap - auto const descriptor_count = static_descriptor_heap_size + dynamic_descriptor_heap_size; - auto const descriptor_handle_size = context_.GetContextD3D12()->GetDevice()->GetDescriptorHandleIncrementSize(descriptor_heap_type); - - D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {}; - descriptor_heap_desc.Type = descriptor_heap_type; - descriptor_heap_desc.NumDescriptors = descriptor_count; - descriptor_heap_desc.Flags = (descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_RTV || descriptor_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_DSV ? D3D12_DESCRIPTOR_HEAP_FLAG_NONE : D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); - - ID3D12DescriptorHeap* descriptor_heap; - result = context_.GetContextD3D12()->GetDevice()->CreateDescriptorHeap(&descriptor_heap_desc, IID_PPV_ARGS(&descriptor_heap)); - if (!SUCCEEDED(result)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create descriptor heap"); - } - descriptor_heap->SetName(L"SSSR Descriptor Heap"); - - // Assign the base members - if (descriptor_heap_) - descriptor_heap_->Release(); - descriptor_heap_ = descriptor_heap; - descriptor_handle_size_ = descriptor_handle_size; - static_descriptor_heap_size_ = static_descriptor_heap_size; - static_descriptor_heap_cursor_ = 0u; - dynamic_descriptor_heap_size_ = dynamic_descriptor_heap_size; - dynamic_descriptor_heap_cursor_ = 0u; - } - - /** - Destroys the Direct3D12 descriptor heap. - */ - void DescriptorHeapD3D12::Destroy() - { - if (descriptor_heap_) - descriptor_heap_->Release(); - descriptor_heap_ = nullptr; - } -} diff --git a/ffx-sssr/src/d3d12/descriptor_heap_d3d12.h b/ffx-sssr/src/d3d12/descriptor_heap_d3d12.h deleted file mode 100644 index bb18d14..0000000 --- a/ffx-sssr/src/d3d12/descriptor_heap_d3d12.h +++ /dev/null @@ -1,115 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include - -#include "context.h" - -namespace ffx_sssr -{ - class DescriptorHeapD3D12; - - /** - The DescriptorD3D12 class represents an individual Direct3D12 descriptor handle. - */ - class DescriptorD3D12 - { - public: - inline DescriptorD3D12(); - - inline D3D12_CPU_DESCRIPTOR_HANDLE GetCPUDescriptor(std::uint32_t descriptor_index = 0u) const; - inline D3D12_GPU_DESCRIPTOR_HANDLE GetGPUDescriptor(std::uint32_t descriptor_index = 0u) const; - - protected: - friend class DescriptorHeapD3D12; - - // The number of descriptors available. - std::uint32_t descriptor_count_; - // The size of an individual descriptor handle. - std::uint32_t descriptor_handle_size_; - // The CPU-side descriptor handle. - D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor_handle_; - // The GPU-side descriptor handle. - D3D12_GPU_DESCRIPTOR_HANDLE gpu_descriptor_handle_; - }; - - /** - The DescriptorHeapD3D12 class represents a Direct3D12 heap for allocating descriptors of a given type. - */ - class DescriptorHeapD3D12 - { - FFX_SSSR_NON_COPYABLE(DescriptorHeapD3D12); - - public: - DescriptorHeapD3D12(Context& context); - ~DescriptorHeapD3D12(); - - inline ID3D12DescriptorHeap* const& GetDescriptorHeap() const; - - inline bool AllocateStaticDescriptor(DescriptorD3D12& descriptor, std::uint32_t descriptor_count = 1u); - inline bool AllocateDynamicDescriptor(DescriptorD3D12& descriptor, std::uint32_t descriptor_count = 1u); - - void Create(D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type, std::uint32_t static_descriptor_count, std::uint32_t dynamic_descriptor_count); - void Destroy(); - - protected: - /** - The Range class describes an allocated range within a descriptor heap. - */ - class Range - { - public: - inline Range(); - inline Range(std::uint32_t range_start, std::uint32_t range_size); - - inline bool Overlap(Range const& other) const; - - // The index of the allocation frame for this range. - std::uint32_t frame_index_; - // The start of the range in the heap. - std::uint32_t range_start_; - // The size of the allocation range. - std::uint32_t range_size_; - }; - - // The context to be used. - Context& context_; - // The Direct3D12 descriptor heap. - ID3D12DescriptorHeap* descriptor_heap_; - // The size of an individual descriptor handle. - std::uint32_t descriptor_handle_size_; - // The size of the heap for allocating static descriptors. - std::uint32_t static_descriptor_heap_size_; - // The cursor of the heap for allocating static descriptors. - std::uint32_t static_descriptor_heap_cursor_; - // The size of the heap for allocating dynamic descriptors. - std::uint32_t dynamic_descriptor_heap_size_; - // The cursor of the heap for allocating dynamic descriptors. - std::uint32_t dynamic_descriptor_heap_cursor_; - // The allocated ranges with the dynamic descriptor heap. - std::deque dynamic_descriptor_heap_ranges_; - }; -} - -#include "descriptor_heap_d3d12.inl" diff --git a/ffx-sssr/src/d3d12/descriptor_heap_d3d12.inl b/ffx-sssr/src/d3d12/descriptor_heap_d3d12.inl deleted file mode 100644 index 37a5ab0..0000000 --- a/ffx-sssr/src/d3d12/descriptor_heap_d3d12.inl +++ /dev/null @@ -1,192 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the DescriptorD3D12 class. - */ - DescriptorD3D12::DescriptorD3D12() - : descriptor_count_(0u) - , descriptor_handle_size_(0u) - , cpu_descriptor_handle_{0ull} - , gpu_descriptor_handle_{0ull} - { - } - - /** - Gets the CPU descriptor. - - \param descriptor_index The index of the descriptor. - \return The CPU descriptor handle. - */ - D3D12_CPU_DESCRIPTOR_HANDLE DescriptorD3D12::GetCPUDescriptor(std::uint32_t descriptor_index) const - { - FFX_SSSR_ASSERT(descriptor_index < descriptor_count_); - auto cpu_descriptor_handle = cpu_descriptor_handle_; - cpu_descriptor_handle.ptr += static_cast(descriptor_index) * static_cast(descriptor_handle_size_); - return cpu_descriptor_handle; - } - - /** - Gets the GPU descriptor. - - \param descriptor_index The index of the descriptor. - \return The GPU descriptor handle. - */ - D3D12_GPU_DESCRIPTOR_HANDLE DescriptorD3D12::GetGPUDescriptor(std::uint32_t descriptor_index) const - { - FFX_SSSR_ASSERT(descriptor_index < descriptor_count_); - auto gpu_descriptor_handle = gpu_descriptor_handle_; - gpu_descriptor_handle.ptr += static_cast(descriptor_index) * static_cast(descriptor_handle_size_); - return gpu_descriptor_handle; - } - - /** - The constructor for the Range class. - */ - DescriptorHeapD3D12::Range::Range() - : frame_index_(0u) - , range_start_(0u) - , range_size_(0u) - { - } - - /** - The constructor for the Range class. - - \param range_start The start of the range in the heap. - \param range_size The size of the allocation range. - */ - DescriptorHeapD3D12::Range::Range(std::uint32_t range_start, std::uint32_t range_size) - : frame_index_(0u) - , range_start_(range_start) - , range_size_(range_size) - { - } - - /** - Checks whether the ranges overlap. - - \param other The range to be checked for overlap. - \return true if the ranges overlap, false otherwise. - */ - bool DescriptorHeapD3D12::Range::Overlap(Range const& other) const - { - return (range_start_ < other.range_start_ + other.range_size_ && other.range_start_ < range_start_ + range_size_); - } - - /** - Gets the Direct3D12 descriptor heap. - - \return The Direct3D12 descriptor heap. - */ - ID3D12DescriptorHeap* const& DescriptorHeapD3D12::GetDescriptorHeap() const - { - return descriptor_heap_; - } - - /** - Allocates a static descriptor. - - \param descriptor The allocated descriptor. - \param descriptor_count The number of descriptors to be allocated. - \return true if the descriptor was allocated successfully, false otherwise. - */ - bool DescriptorHeapD3D12::AllocateStaticDescriptor(DescriptorD3D12& descriptor, std::uint32_t descriptor_count) - { - // Calculate the new cursor position - auto const static_descriptor_heap_cursor = static_descriptor_heap_cursor_ + descriptor_count; - - if (static_descriptor_heap_cursor > static_descriptor_heap_size_) - { - return false; // out of memory - } - - // Populate the descriptor handles - descriptor.descriptor_count_ = descriptor_count; - descriptor.descriptor_handle_size_ = descriptor_handle_size_; - descriptor.cpu_descriptor_handle_ = descriptor_heap_->GetCPUDescriptorHandleForHeapStart(); - descriptor.cpu_descriptor_handle_.ptr += static_cast(static_descriptor_heap_cursor_) * static_cast(descriptor_handle_size_); - descriptor.gpu_descriptor_handle_ = descriptor_heap_->GetGPUDescriptorHandleForHeapStart(); - descriptor.gpu_descriptor_handle_.ptr += static_cast(static_descriptor_heap_cursor_) * static_cast(descriptor_handle_size_); - - // Advance the allocation cursor - static_descriptor_heap_cursor_ = static_descriptor_heap_cursor; - - return true; - } - - /** - Allocates a dynamic descriptor. - - \param descriptor The allocated descriptor. - \param descriptor_count The number of descriptors to be allocated. - \return true if the descriptor was allocated successfully, false otherwise. - */ - bool DescriptorHeapD3D12::AllocateDynamicDescriptor(DescriptorD3D12& descriptor, std::uint32_t descriptor_count) - { - // Calculate the new cursor position - auto dynamic_descriptor_heap_cursor = dynamic_descriptor_heap_cursor_ + descriptor_count; - - if (dynamic_descriptor_heap_cursor > dynamic_descriptor_heap_size_) - { - dynamic_descriptor_heap_cursor_ = 0u; // loop back - dynamic_descriptor_heap_cursor = descriptor_count; - } - if (dynamic_descriptor_heap_cursor > dynamic_descriptor_heap_size_) - { - return false; // not enough memory available - } - - // Check whether we can safely reuse the allocation range - Range dynamic_descriptor_heap_range(dynamic_descriptor_heap_cursor_, descriptor_count); - - while (!dynamic_descriptor_heap_ranges_.empty() && dynamic_descriptor_heap_ranges_.front().Overlap(dynamic_descriptor_heap_range)) - { - FFX_SSSR_ASSERT(context_.GetFrameIndex() >= dynamic_descriptor_heap_ranges_.front().frame_index_); - - if (context_.GetFrameIndex() - dynamic_descriptor_heap_ranges_.front().frame_index_ < context_.GetFrameCountBeforeReuse()) - { - return false; // next available range is still in flight! - } - - dynamic_descriptor_heap_ranges_.pop_front(); - } - - // Populate the descriptor handles - descriptor.descriptor_count_ = descriptor_count; - descriptor.descriptor_handle_size_ = descriptor_handle_size_; - descriptor.cpu_descriptor_handle_ = descriptor_heap_->GetCPUDescriptorHandleForHeapStart(); - descriptor.cpu_descriptor_handle_.ptr += (static_cast(static_descriptor_heap_size_) + static_cast(dynamic_descriptor_heap_cursor_)) * static_cast(descriptor_handle_size_); - descriptor.gpu_descriptor_handle_ = descriptor_heap_->GetGPUDescriptorHandleForHeapStart(); - descriptor.gpu_descriptor_handle_.ptr += (static_cast(static_descriptor_heap_size_) + static_cast(dynamic_descriptor_heap_cursor_)) * static_cast(descriptor_handle_size_); - - // Advance the allocation cursor - dynamic_descriptor_heap_range.frame_index_ = context_.GetFrameIndex(); - dynamic_descriptor_heap_ranges_.push_back(dynamic_descriptor_heap_range); - dynamic_descriptor_heap_cursor_ = dynamic_descriptor_heap_cursor; - - return true; - } -} diff --git a/ffx-sssr/src/d3d12/reflection_view_d3d12.cpp b/ffx-sssr/src/d3d12/reflection_view_d3d12.cpp deleted file mode 100644 index 221c98a..0000000 --- a/ffx-sssr/src/d3d12/reflection_view_d3d12.cpp +++ /dev/null @@ -1,1013 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "reflection_view_d3d12.h" - -#include -#include - -#include "context.h" -#include "reflection_error.h" -#include "reflection_view.h" -#include "context_d3d12.h" -#include "ffx_sssr_d3d12.h" -#include "descriptor_heap_d3d12.h" - -namespace ffx_sssr -{ - /** - The constructor for the ReflectionViewD3D12 class. - */ - ReflectionViewD3D12::ReflectionViewD3D12() - : width_(0) - , height_(0) - , flags_(0) - , descriptor_heap_cbv_srv_uav_(nullptr) - , descriptor_heap_samplers_(nullptr) - , resource_heap_(nullptr) - , tile_list_(nullptr) - , tile_counter_(nullptr) - , ray_list_(nullptr) - , ray_counter_(nullptr) - , intersection_pass_indirect_args_(nullptr) - , denoiser_pass_indirect_args_(nullptr) - , temporal_denoiser_result_() - , ray_lengths_(nullptr) - , temporal_variance_(nullptr) - , tile_classification_elapsed_time_(0) - , intersection_elapsed_time_(0) - , denoising_elapsed_time_(0) - , timestamp_query_heap_(nullptr) - , timestamp_query_buffer_(nullptr) - , timestamp_queries_() - , timestamp_queries_index_(0) - , scene_format_(DXGI_FORMAT_UNKNOWN) - , tile_classification_descriptor_table_() - , indirect_args_descriptor_table_() - , intersection_descriptor_table_() - , spatial_denoising_descriptor_table_() - , temporal_denoising_descriptor_table_() - , eaw_denoising_descriptor_table_() - , sampler_descriptor_table_() - , prev_view_projection_() - { - } - - /** - The constructor for the ReflectionViewD3D12 class. - - \param other The reflection view to be moved. - */ - ReflectionViewD3D12::ReflectionViewD3D12(ReflectionViewD3D12&& other) noexcept - : width_(other.width_) - , height_(other.height_) - , flags_(other.flags_) - , descriptor_heap_cbv_srv_uav_(other.descriptor_heap_cbv_srv_uav_) - , descriptor_heap_samplers_(other.descriptor_heap_samplers_) - , tile_classification_elapsed_time_(other.tile_classification_elapsed_time_) - , intersection_elapsed_time_(other.intersection_elapsed_time_) - , denoising_elapsed_time_(other.denoising_elapsed_time_) - , timestamp_query_heap_(other.timestamp_query_heap_) - , timestamp_query_buffer_(other.timestamp_query_buffer_) - , timestamp_queries_(std::move(other.timestamp_queries_)) - , timestamp_queries_index_(other.timestamp_queries_index_) - , resource_heap_(other.resource_heap_) - , tile_list_(other.tile_list_) - , tile_counter_(other.tile_counter_) - , ray_list_(other.ray_list_) - , ray_counter_(other.ray_counter_) - , intersection_pass_indirect_args_(other.intersection_pass_indirect_args_) - , denoiser_pass_indirect_args_(other.denoiser_pass_indirect_args_) - , ray_lengths_(other.ray_lengths_) - , temporal_variance_(other.temporal_variance_) - , scene_format_(other.scene_format_) - , prev_view_projection_(other.prev_view_projection_) - { - other.timestamp_query_heap_ = nullptr; - other.timestamp_query_buffer_ = nullptr; - other.descriptor_heap_cbv_srv_uav_ = nullptr; - other.descriptor_heap_samplers_ = nullptr; - - for (int i = 0; i < 2; ++i) - { - temporal_denoiser_result_[i] = other.temporal_denoiser_result_[i]; - tile_classification_descriptor_table_[i] = other.tile_classification_descriptor_table_[i]; - indirect_args_descriptor_table_[i] = other.indirect_args_descriptor_table_[i]; - intersection_descriptor_table_[i] = other.intersection_descriptor_table_[i]; - spatial_denoising_descriptor_table_[i] = other.spatial_denoising_descriptor_table_[i]; - temporal_denoising_descriptor_table_[i] = other.temporal_denoising_descriptor_table_[i]; - eaw_denoising_descriptor_table_[i] = other.eaw_denoising_descriptor_table_[i]; - other.temporal_denoiser_result_[i] = nullptr; - } - sampler_descriptor_table_ = other.sampler_descriptor_table_; - - other.resource_heap_ = nullptr; - other.tile_list_ = nullptr; - other.tile_counter_ = nullptr; - other.ray_list_ = nullptr; - other.ray_counter_ = nullptr; - other.intersection_pass_indirect_args_ = nullptr; - other.denoiser_pass_indirect_args_ = nullptr; - other.ray_lengths_ = nullptr; - other.temporal_variance_ = nullptr; - other.timestamp_query_buffer_ = nullptr; - other.timestamp_query_heap_ = nullptr; - } - - /** - The destructor for the ReflectionViewD3D12 class. - */ - ReflectionViewD3D12::~ReflectionViewD3D12() - { - Destroy(); - } - - /** - Assigns the reflection view. - - \param other The reflection view to be moved. - \return The assigned reflection view. - */ - ReflectionViewD3D12& ReflectionViewD3D12::operator =(ReflectionViewD3D12&& other) noexcept - { - if (this != &other) - { - width_ = other.width_; - height_ = other.height_; - flags_ = other.flags_; - - descriptor_heap_cbv_srv_uav_ = other.descriptor_heap_cbv_srv_uav_; - descriptor_heap_samplers_ = other.descriptor_heap_samplers_; - tile_classification_elapsed_time_ = other.tile_classification_elapsed_time_; - intersection_elapsed_time_ = other.intersection_elapsed_time_; - denoising_elapsed_time_ = other.denoising_elapsed_time_; - timestamp_query_heap_ = other.timestamp_query_heap_; - timestamp_query_buffer_ = other.timestamp_query_buffer_; - timestamp_queries_ = other.timestamp_queries_;; - timestamp_queries_index_ = other.timestamp_queries_index_; - resource_heap_ = other.resource_heap_; - tile_list_ = other.tile_list_; - tile_counter_ = other.tile_counter_; - ray_list_ = other.ray_list_; - ray_counter_ = other.ray_counter_; - intersection_pass_indirect_args_ = other.intersection_pass_indirect_args_; - denoiser_pass_indirect_args_ = other.denoiser_pass_indirect_args_; - ray_lengths_ = other.ray_lengths_; - temporal_variance_ = other.temporal_variance_; - scene_format_ = other.scene_format_; - prev_view_projection_ = other.prev_view_projection_; - - other.timestamp_query_heap_ = nullptr; - other.timestamp_query_buffer_ = nullptr; - other.descriptor_heap_cbv_srv_uav_ = nullptr; - other.descriptor_heap_samplers_ = nullptr; - - for (int i = 0; i < 2; ++i) - { - temporal_denoiser_result_[i] = other.temporal_denoiser_result_[i]; - tile_classification_descriptor_table_[i] = other.tile_classification_descriptor_table_[i]; - indirect_args_descriptor_table_[i] = other.indirect_args_descriptor_table_[i]; - intersection_descriptor_table_[i] = other.intersection_descriptor_table_[i]; - spatial_denoising_descriptor_table_[i] = other.spatial_denoising_descriptor_table_[i]; - temporal_denoising_descriptor_table_[i] = other.temporal_denoising_descriptor_table_[i]; - eaw_denoising_descriptor_table_[i] = other.eaw_denoising_descriptor_table_[i]; - - other.temporal_denoiser_result_[i] = nullptr; - } - sampler_descriptor_table_ = other.sampler_descriptor_table_; - - other.resource_heap_ = nullptr; - other.tile_list_ = nullptr; - other.tile_counter_ = nullptr; - other.ray_list_ = nullptr; - other.ray_counter_ = nullptr; - other.intersection_pass_indirect_args_ = nullptr; - other.denoiser_pass_indirect_args_ = nullptr; - other.ray_lengths_ = nullptr; - other.temporal_variance_ = nullptr; - other.timestamp_query_buffer_ = nullptr; - other.timestamp_query_heap_ = nullptr; - } - - return *this; - } - - /** - Creates the reflection view. - - \param context The context to be used. - \param create_reflection_view_info The reflection view creation information. - */ - void ReflectionViewD3D12::Create(Context& context, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info) - { - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo != nullptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->sceneFormat != DXGI_FORMAT_UNKNOWN); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->depthBufferHierarchySRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->motionBufferSRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->normalBufferSRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->roughnessBufferSRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->normalHistoryBufferSRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->roughnessHistoryBufferSRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->environmentMapSRV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->pEnvironmentMapSamplerDesc); - FFX_SSSR_ASSERT(create_reflection_view_info.pD3D12CreateReflectionViewInfo->reflectionViewUAV.ptr); - FFX_SSSR_ASSERT(create_reflection_view_info.outputWidth && create_reflection_view_info.outputHeight); - - // Populate the reflection view properties - width_ = create_reflection_view_info.outputWidth; - height_ = create_reflection_view_info.outputHeight; - flags_ = create_reflection_view_info.flags; - scene_format_ = create_reflection_view_info.pD3D12CreateReflectionViewInfo->sceneFormat; - - // Create reflection view resources - CreateDescriptorHeaps(context); - - // Create tile classification-related buffers - { - ID3D12Device * device = context.GetContextD3D12()->GetDevice(); - - uint32_t num_tiles = RoundedDivide(width_, 8u) * RoundedDivide(height_, 8u); - uint32_t num_pixels = width_ * height_; - - uint32_t tile_list_element_count = num_tiles; - uint32_t tile_counter_element_count = 1; - uint32_t ray_list_element_count = num_pixels; - uint32_t ray_counter_element_count = 1; - uint32_t intersection_pass_indirect_args_element_count = 3; - uint32_t denoiser_pass_indirect_args_element_count = 3; - - // Helper function to create resource descriptions for 1D Buffers - auto BufferDesc = [](uint32_t num_elements) { - D3D12_RESOURCE_DESC desc = {}; - desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - desc.Alignment = 0; - desc.Width = num_elements * 4; - desc.Height = 1; - desc.DepthOrArraySize = 1; - desc.MipLevels = 1; - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - return desc; - }; - - D3D12_RESOURCE_DESC tile_list_desc = BufferDesc(num_tiles); - D3D12_RESOURCE_DESC tile_counter_desc = BufferDesc(1); - D3D12_RESOURCE_DESC ray_list_desc = BufferDesc(num_pixels); - D3D12_RESOURCE_DESC ray_counter_desc = BufferDesc(1); - constexpr uint32_t indirect_arguments_member_count = 3; - static_assert(sizeof(D3D12_DISPATCH_ARGUMENTS) == indirect_arguments_member_count * 4, "Size of indirect arguments buffer does not match D3D12_DISPATCH_ARGUMENTS."); - D3D12_RESOURCE_DESC intersection_pass_indirect_args_desc = BufferDesc(3); - D3D12_RESOURCE_DESC denoiser_pass_indirect_args_desc = BufferDesc(3); - - D3D12_RESOURCE_DESC resource_descs[] = { - tile_list_desc, tile_counter_desc, ray_list_desc, ray_counter_desc, intersection_pass_indirect_args_desc, denoiser_pass_indirect_args_desc - }; - - D3D12_RESOURCE_ALLOCATION_INFO allocation_info = device->GetResourceAllocationInfo(0, FFX_SSSR_ARRAY_SIZE(resource_descs), resource_descs); - D3D12_HEAP_DESC heap_desc = {}; - heap_desc.Alignment = allocation_info.Alignment; - heap_desc.SizeInBytes = allocation_info.SizeInBytes; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heap_desc.Properties.CreationNodeMask = 0; - heap_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Properties.VisibleNodeMask = 0; - - HRESULT hr = device->CreateHeap(&heap_desc, IID_PPV_ARGS(&resource_heap_)); - if (!SUCCEEDED(hr)) - { - throw reflection_error(context, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to create resource heap."); - } - - UINT64 heap_offset = 0; - auto CreatePlacedResource = [this, &context, &heap_offset, &allocation_info]( - D3D12_RESOURCE_DESC * desc - , D3D12_RESOURCE_STATES initial_state - , REFIID riidResource - , _COM_Outptr_opt_ void **ppvResource) - { - ID3D12Device * device = context.GetContextD3D12()->GetDevice(); - HRESULT hr = device->CreatePlacedResource(resource_heap_, heap_offset, desc, initial_state, nullptr, riidResource, ppvResource); - if (!SUCCEEDED(hr)) - { - throw reflection_error(context, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to create placed resource."); - } - - heap_offset += desc->Width; - heap_offset = RoundedDivide(heap_offset, allocation_info.Alignment) * allocation_info.Alignment; - }; - - CreatePlacedResource(&tile_list_desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, IID_PPV_ARGS(&tile_list_)); - CreatePlacedResource(&tile_counter_desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, IID_PPV_ARGS(&tile_counter_)); - CreatePlacedResource(&ray_list_desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, IID_PPV_ARGS(&ray_list_)); - CreatePlacedResource(&ray_counter_desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, IID_PPV_ARGS(&ray_counter_)); - CreatePlacedResource(&intersection_pass_indirect_args_desc, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, IID_PPV_ARGS(&intersection_pass_indirect_args_)); - CreatePlacedResource(&denoiser_pass_indirect_args_desc, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, IID_PPV_ARGS(&denoiser_pass_indirect_args_)); - - tile_list_->SetName(L"SSSR Tile List"); - tile_counter_->SetName(L"SSSR Tile Counter"); - ray_list_->SetName(L"SSSR Ray List"); - ray_counter_->SetName(L"SSSR Ray Counter"); - intersection_pass_indirect_args_->SetName(L"SSSR Intersect Indirect Args"); - denoiser_pass_indirect_args_->SetName(L"SSSR Denoiser Indirect Args"); - } - - // Create denoising-related resources - { - auto CreateCommittedResource = [this, &context]( - DXGI_FORMAT format - , REFIID riidResource - , _COM_Outptr_opt_ void **ppvResource) { - HRESULT hr; - ID3D12Device * device = context.GetContextD3D12()->GetDevice(); - D3D12_HEAP_PROPERTIES default_heap = {}; - default_heap.Type = D3D12_HEAP_TYPE_DEFAULT; - default_heap.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - default_heap.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - default_heap.CreationNodeMask = 1; - default_heap.VisibleNodeMask = 1; - - D3D12_RESOURCE_DESC desc = {}; - desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - desc.Alignment = 0; - desc.Width = width_; - desc.Height = height_; - desc.DepthOrArraySize = 1; - desc.MipLevels = 0; - desc.Format = format; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - - hr = device->CreateCommittedResource( - &default_heap, - D3D12_HEAP_FLAG_NONE, - &desc, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - NULL, - riidResource, ppvResource); - - if (!SUCCEEDED(hr)) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create intermediate target."); - } - }; - - CreateCommittedResource(scene_format_, IID_PPV_ARGS(&temporal_denoiser_result_[0])); - CreateCommittedResource(scene_format_, IID_PPV_ARGS(&temporal_denoiser_result_[1])); - CreateCommittedResource(DXGI_FORMAT_R16_FLOAT, IID_PPV_ARGS(&ray_lengths_)); - CreateCommittedResource(DXGI_FORMAT_R8_UNORM, IID_PPV_ARGS(&temporal_variance_)); - - temporal_denoiser_result_[0]->SetName(L"SSSR Temporal Denoised Result 0"); - temporal_denoiser_result_[1]->SetName(L"SSSR Temporal Denoised Result 1"); - ray_lengths_->SetName(L"SSSR Ray Lengths"); - temporal_variance_->SetName(L"SSSR Temporal Variance"); - } - - ContextD3D12* d3d12_context = context.GetContextD3D12(); - - // Setup the descriptor tables - { - descriptor_heap_samplers_->AllocateStaticDescriptor(sampler_descriptor_table_, 1); - - // Suballocate descriptor heap for descriptor tables - for (int i = 0; i < 2; ++i) - { - DescriptorD3D12 table; - descriptor_heap_cbv_srv_uav_->AllocateStaticDescriptor(table, d3d12_context->GetTileClassificationPass().descriptor_count_); - tile_classification_descriptor_table_[i] = table; - - descriptor_heap_cbv_srv_uav_->AllocateStaticDescriptor(table, d3d12_context->GetIndirectArgsPass().descriptor_count_); - indirect_args_descriptor_table_[i] = table; - - descriptor_heap_cbv_srv_uav_->AllocateStaticDescriptor(table, d3d12_context->GetIntersectionPass().descriptor_count_); - intersection_descriptor_table_[i] = table; - - descriptor_heap_cbv_srv_uav_->AllocateStaticDescriptor(table, d3d12_context->GetSpatialDenoisingPass().descriptor_count_); - spatial_denoising_descriptor_table_[i] = table; - - descriptor_heap_cbv_srv_uav_->AllocateStaticDescriptor(table, d3d12_context->GetTemporalDenoisingPass().descriptor_count_); - temporal_denoising_descriptor_table_[i] = table; - - descriptor_heap_cbv_srv_uav_->AllocateStaticDescriptor(table, d3d12_context->GetEawDenoisingPass().descriptor_count_); - eaw_denoising_descriptor_table_[i] = table; - } - - ID3D12Device * device = context.GetContextD3D12()->GetDevice(); - UINT descriptor_size = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - - D3D12_CPU_DESCRIPTOR_HANDLE scene_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->sceneSRV; - D3D12_CPU_DESCRIPTOR_HANDLE depth_hierarchy_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->depthBufferHierarchySRV; - D3D12_CPU_DESCRIPTOR_HANDLE motion_buffer_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->motionBufferSRV; - D3D12_CPU_DESCRIPTOR_HANDLE normal_buffer_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->normalBufferSRV; - D3D12_CPU_DESCRIPTOR_HANDLE roughness_buffer_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->roughnessBufferSRV; - D3D12_CPU_DESCRIPTOR_HANDLE normal_history_buffer_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->normalHistoryBufferSRV; - D3D12_CPU_DESCRIPTOR_HANDLE roughness_history_buffer_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->roughnessHistoryBufferSRV; - D3D12_CPU_DESCRIPTOR_HANDLE environment_map_srv = create_reflection_view_info.pD3D12CreateReflectionViewInfo->environmentMapSRV; - D3D12_CPU_DESCRIPTOR_HANDLE output_buffer_uav = create_reflection_view_info.pD3D12CreateReflectionViewInfo->reflectionViewUAV; - const D3D12_SAMPLER_DESC* environment_map_sampler_desc = create_reflection_view_info.pD3D12CreateReflectionViewInfo->pEnvironmentMapSamplerDesc; - - D3D12_CPU_DESCRIPTOR_HANDLE normal_buffers[] = { normal_buffer_srv, normal_history_buffer_srv }; - D3D12_CPU_DESCRIPTOR_HANDLE roughness_buffers[] = { roughness_buffer_srv, roughness_history_buffer_srv }; - - bool ping_pong_normal = (create_reflection_view_info.flags & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_NORMAL_BUFFERS) != 0; - bool ping_pong_roughness = (create_reflection_view_info.flags & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_ROUGHNESS_BUFFERS) != 0; - - // Helper function to create a default shader resource view for a Texture2D - auto SRV_Tex2D = [](DXGI_FORMAT format) { - D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; - shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - shader_resource_view_desc.Texture2D.MipLevels = -1; - shader_resource_view_desc.Texture2D.MostDetailedMip = 0; - shader_resource_view_desc.Texture2D.PlaneSlice = 0; - shader_resource_view_desc.Texture2D.ResourceMinLODClamp = 0; - shader_resource_view_desc.Format = format; - return shader_resource_view_desc; - }; - - // Helper function to create a default unordered access view for a Texture2D - auto UAV_Tex2D = [](DXGI_FORMAT format) { - D3D12_UNORDERED_ACCESS_VIEW_DESC unordered_access_view_desc = {}; - unordered_access_view_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - unordered_access_view_desc.Texture2D.MipSlice = 0; - unordered_access_view_desc.Texture2D.PlaneSlice = 0; - unordered_access_view_desc.Format = format; - return unordered_access_view_desc; - }; - - // Helper function to create a default unordered access view for a Buffer - auto UAV_Buffer = [](uint32_t num_elements) { - D3D12_UNORDERED_ACCESS_VIEW_DESC unordered_access_view_desc = {}; - unordered_access_view_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - unordered_access_view_desc.Buffer.CounterOffsetInBytes = 0; - unordered_access_view_desc.Buffer.FirstElement = 0; - unordered_access_view_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; - unordered_access_view_desc.Buffer.NumElements = num_elements; - unordered_access_view_desc.Buffer.StructureByteStride = 4; - unordered_access_view_desc.Format = DXGI_FORMAT_UNKNOWN; - return unordered_access_view_desc; - }; - - auto SRV_Buffer = [](uint32_t num_elements) { - D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; - shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - shader_resource_view_desc.Buffer.FirstElement = 0; - shader_resource_view_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; - shader_resource_view_desc.Buffer.NumElements = num_elements; - shader_resource_view_desc.Buffer.StructureByteStride = 4; - shader_resource_view_desc.Format = DXGI_FORMAT_UNKNOWN; - shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - return shader_resource_view_desc; - }; - - // Place the descriptors - device->CreateSampler(environment_map_sampler_desc, sampler_descriptor_table_.GetCPUDescriptor(0)); // g_environment_map_sampler - for (int i = 0; i < 2; ++i) - { - uint32_t num_tiles = RoundedDivide(width_, 8u) * RoundedDivide(height_, 8u); - uint32_t num_pixels = width_ * height_; - - // Tile Classifier pass - { - DescriptorD3D12 table = tile_classification_descriptor_table_[i]; - uint32_t offset = 0; - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_roughness - device->CreateUnorderedAccessView(tile_list_, nullptr, &UAV_Buffer(num_tiles), table.GetCPUDescriptor(offset++)); // g_tile_list - device->CreateUnorderedAccessView(ray_list_, nullptr, &UAV_Buffer(num_pixels), table.GetCPUDescriptor(offset++)); // g_ray_list - device->CreateUnorderedAccessView(tile_counter_, nullptr, &UAV_Buffer(1), table.GetCPUDescriptor(offset++)); // g_tile_counter - device->CreateUnorderedAccessView(ray_counter_, nullptr, &UAV_Buffer(1), table.GetCPUDescriptor(offset++)); // g_ray_counter - device->CreateUnorderedAccessView(temporal_denoiser_result_[i], nullptr, &UAV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_temporally_denoised_reflections - device->CreateUnorderedAccessView(temporal_denoiser_result_[1 - i], nullptr, &UAV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_temporally_denoised_reflections_history - device->CreateUnorderedAccessView(ray_lengths_, nullptr, &UAV_Tex2D(DXGI_FORMAT_R16_FLOAT), table.GetCPUDescriptor(offset++)); // g_ray_lengths - device->CreateUnorderedAccessView(temporal_variance_, nullptr, &UAV_Tex2D(DXGI_FORMAT_R8_UNORM), table.GetCPUDescriptor(offset++)); // g_temporal_variance - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), output_buffer_uav, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_denoised_reflections - } - - // Indirect args pass - { - DescriptorD3D12 table = indirect_args_descriptor_table_[i]; - uint32_t offset = 0; - device->CreateUnorderedAccessView(tile_counter_, nullptr, &UAV_Buffer(1), table.GetCPUDescriptor(offset++)); // g_tile_counter - device->CreateUnorderedAccessView(ray_counter_, nullptr, &UAV_Buffer(1), table.GetCPUDescriptor(offset++)); // g_ray_counter - - constexpr uint32_t indirect_arguments_member_count = 3; - static_assert(sizeof(D3D12_DISPATCH_ARGUMENTS) == indirect_arguments_member_count * 4, "Size of indirect arguments buffer does not match D3D12_DISPATCH_ARGUMENTS."); - device->CreateUnorderedAccessView(intersection_pass_indirect_args_, nullptr, &UAV_Buffer(indirect_arguments_member_count), table.GetCPUDescriptor(offset++)); // g_intersect_args - device->CreateUnorderedAccessView(denoiser_pass_indirect_args_, nullptr, &UAV_Buffer(indirect_arguments_member_count), table.GetCPUDescriptor(offset++)); // g_denoiser_args - } - - // Intersection pass - { - DescriptorD3D12 table = intersection_descriptor_table_[i]; - uint32_t offset = 0; - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), scene_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_lit_scene - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), depth_hierarchy_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_depth_buffer_hierarchy - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_normal - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_roughness - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), environment_map_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_environment_map - - // Blue noise sampler - D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; - shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - auto const& sampler = context.GetContextD3D12()->GetSampler2SPP(); - shader_resource_view_desc.Buffer.NumElements = static_cast(sampler.sobol_buffer_->GetDesc().Width / sizeof(std::int32_t)); - shader_resource_view_desc.Buffer.StructureByteStride = static_cast(sizeof(std::int32_t)); - device->CreateShaderResourceView(sampler.sobol_buffer_, &shader_resource_view_desc, table.GetCPUDescriptor(offset++)); // g_sobol_buffer - shader_resource_view_desc.Buffer.NumElements = static_cast(sampler.ranking_tile_buffer_->GetDesc().Width / sizeof(std::int32_t)); - shader_resource_view_desc.Buffer.StructureByteStride = static_cast(sizeof(std::int32_t)); - device->CreateShaderResourceView(sampler.ranking_tile_buffer_, &shader_resource_view_desc, table.GetCPUDescriptor(offset++)); // g_ranking_tile_buffer - shader_resource_view_desc.Buffer.NumElements = static_cast(sampler.scrambling_tile_buffer_->GetDesc().Width / sizeof(std::int32_t)); - shader_resource_view_desc.Buffer.StructureByteStride = static_cast(sizeof(std::int32_t)); - device->CreateShaderResourceView(sampler.scrambling_tile_buffer_, &shader_resource_view_desc, table.GetCPUDescriptor(offset++)); // g_scrambling_tile_buffer - - device->CreateShaderResourceView(ray_list_, &SRV_Buffer(num_pixels), table.GetCPUDescriptor(offset++)); // g_ray_list - device->CreateUnorderedAccessView(temporal_denoiser_result_[i], nullptr, &UAV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_intersection_result - device->CreateUnorderedAccessView(ray_lengths_, nullptr, &UAV_Tex2D(DXGI_FORMAT_R16_FLOAT), table.GetCPUDescriptor(offset++)); // g_ray_lengths - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), output_buffer_uav, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_denoised_reflections - } - - // Spatial denoising pass - { - DescriptorD3D12 table = spatial_denoising_descriptor_table_[i]; - uint32_t offset = 0; - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), depth_hierarchy_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_depth_buffer - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_normal - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_roughness - device->CreateShaderResourceView(temporal_denoiser_result_[i], &SRV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_intersection_result - device->CreateShaderResourceView(temporal_variance_, &SRV_Tex2D(DXGI_FORMAT_R8_UNORM), table.GetCPUDescriptor(offset++)); // g_has_ray - device->CreateShaderResourceView(tile_list_, &SRV_Buffer(num_tiles), table.GetCPUDescriptor(offset++)); // g_tile_list - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), output_buffer_uav, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_spatially_denoised_reflections - device->CreateUnorderedAccessView(ray_lengths_, nullptr, &UAV_Tex2D(DXGI_FORMAT_R16_FLOAT), table.GetCPUDescriptor(offset++)); // g_ray_lengths - } - - // Temporal denoising pass - { - DescriptorD3D12 table = temporal_denoising_descriptor_table_[i]; - uint32_t offset = 0; - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_normal - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_roughness - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_normal ? normal_buffers[1 - i] : normal_history_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_normal_history - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_roughness ? roughness_buffers[1 - i] : roughness_history_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_roughness_history - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), depth_hierarchy_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_depth_buffer - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), motion_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_motion_vectors - device->CreateShaderResourceView(temporal_denoiser_result_[1 - i], &SRV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_temporally_denoised_reflections_history - device->CreateShaderResourceView(ray_lengths_, &SRV_Tex2D(DXGI_FORMAT_R16_FLOAT), table.GetCPUDescriptor(offset++)); // g_ray_lengths - device->CreateShaderResourceView(tile_list_, &SRV_Buffer(num_tiles), table.GetCPUDescriptor(offset++)); // g_tile_list - device->CreateUnorderedAccessView(temporal_denoiser_result_[i], nullptr, &UAV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_temporally_denoised_reflections - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), output_buffer_uav, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_spatially_denoised_reflections - device->CreateUnorderedAccessView(temporal_variance_, nullptr, &UAV_Tex2D(DXGI_FORMAT_R8_UNORM), table.GetCPUDescriptor(offset++)); // g_temporal_variance - } - - // EAW denoising pass - { - DescriptorD3D12 table = eaw_denoising_descriptor_table_[i]; - uint32_t offset = 0; - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_normal - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_roughness - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), depth_hierarchy_srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_depth_buffer - device->CreateShaderResourceView(tile_list_, &SRV_Buffer(num_tiles), table.GetCPUDescriptor(offset++)); // g_tile_list - device->CreateUnorderedAccessView(temporal_denoiser_result_[i], nullptr, &UAV_Tex2D(scene_format_), table.GetCPUDescriptor(offset++)); // g_temporally_denoised_reflections - device->CopyDescriptorsSimple(1, table.GetCPUDescriptor(offset++), output_buffer_uav, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_denoised_reflections - } - } - } - - // Create timestamp querying resources if enabled - if ((create_reflection_view_info.flags & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto const query_heap_size = kTimestampQuery_Count * context.GetFrameCountBeforeReuse() * sizeof(std::uint64_t); - - D3D12_QUERY_HEAP_DESC query_heap_desc = {}; - query_heap_desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; - query_heap_desc.Count = static_cast(query_heap_size); - - if (!SUCCEEDED(context.GetContextD3D12()->GetDevice()->CreateQueryHeap(&query_heap_desc, - IID_PPV_ARGS(×tamp_query_heap_)))) - { - throw reflection_error(context, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Unable to create timestamp query heap"); - } - - if (!context.GetContextD3D12()->AllocateReadbackBuffer(query_heap_size, - ×tamp_query_buffer_, - D3D12_RESOURCE_STATE_COPY_DEST, - L"TimestampQueryBuffer")) - { - throw reflection_error(context, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Unable to allocate readback buffer"); - } - - timestamp_queries_.resize(context.GetFrameCountBeforeReuse()); - - for (auto& timestamp_queries : timestamp_queries_) - { - timestamp_queries.reserve(kTimestampQuery_Count); - } - } - } - - /** - Destroys the reflection view. - */ - void ReflectionViewD3D12::Destroy() - { - if (descriptor_heap_cbv_srv_uav_) - delete descriptor_heap_cbv_srv_uav_; - descriptor_heap_cbv_srv_uav_ = nullptr; - - if (descriptor_heap_samplers_) - delete descriptor_heap_samplers_; - descriptor_heap_samplers_ = nullptr; - -#define FFX_SSSR_SAFE_RELEASE(x)\ - if(x) { x->Release(); }\ - x = nullptr; - - FFX_SSSR_SAFE_RELEASE(timestamp_query_heap_); - FFX_SSSR_SAFE_RELEASE(timestamp_query_buffer_); - FFX_SSSR_SAFE_RELEASE(temporal_denoiser_result_[0]); - FFX_SSSR_SAFE_RELEASE(temporal_denoiser_result_[1]); - FFX_SSSR_SAFE_RELEASE(ray_lengths_); - FFX_SSSR_SAFE_RELEASE(temporal_variance_); - FFX_SSSR_SAFE_RELEASE(tile_list_); - FFX_SSSR_SAFE_RELEASE(tile_counter_); - FFX_SSSR_SAFE_RELEASE(ray_list_); - FFX_SSSR_SAFE_RELEASE(ray_counter_); - FFX_SSSR_SAFE_RELEASE(intersection_pass_indirect_args_); - FFX_SSSR_SAFE_RELEASE(denoiser_pass_indirect_args_); - FFX_SSSR_SAFE_RELEASE(resource_heap_); - -#undef FFX_SSSR_SAFE_RELEASE - - timestamp_queries_.resize(0u); - } - - /** - Creates the descriptor heaps. - - \param context The context to be used. - */ - void ReflectionViewD3D12::CreateDescriptorHeaps(Context& context) - { - ContextD3D12* d3d12_context = context.GetContextD3D12(); - - FFX_SSSR_ASSERT(!descriptor_heap_cbv_srv_uav_); - FFX_SSSR_ASSERT(!descriptor_heap_samplers_); - - descriptor_heap_cbv_srv_uav_ = new DescriptorHeapD3D12(context); - FFX_SSSR_ASSERT(descriptor_heap_cbv_srv_uav_ != nullptr); - std::uint32_t descriptor_count - = d3d12_context->GetTileClassificationPass().descriptor_count_ - + d3d12_context->GetIndirectArgsPass().descriptor_count_ - + d3d12_context->GetIntersectionPass().descriptor_count_ - + d3d12_context->GetSpatialDenoisingPass().descriptor_count_ - + d3d12_context->GetTemporalDenoisingPass().descriptor_count_ - + d3d12_context->GetEawDenoisingPass().descriptor_count_; - descriptor_heap_cbv_srv_uav_->Create(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2 * descriptor_count, 0u); - - descriptor_heap_samplers_ = new DescriptorHeapD3D12(context); - FFX_SSSR_ASSERT(descriptor_heap_samplers_ != nullptr); - descriptor_heap_samplers_->Create(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 1, 0u); // g_environment_map_sampler - } - - /** - Gets the index of the current timestamp query. - - \return The index of the current timestamp query. - */ - std::uint32_t ReflectionViewD3D12::GetTimestampQueryIndex() const - { - return timestamp_queries_index_ * kTimestampQuery_Count + static_cast(timestamp_queries_[timestamp_queries_index_].size()); - } - - float Clamp(float value, float min, float max) - { - if (value < min) - { - return min; - } - else if (value > max) - { - return max; - } - return value; - } - - /** - Resolves the Direct3D12 reflection view. - - \param context The context to be used. - \param reflection_view The reflection view to be resolved. - \param resolve_reflection_view_info The reflection view resolve information. - */ - void ReflectionViewD3D12::Resolve(Context& context, ReflectionView const& reflection_view, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info) - { - // Get hold of the command list for recording - FFX_SSSR_ASSERT(resolve_reflection_view_info.pD3D12CommandEncodeInfo); - auto const command_list = ContextD3D12::GetCommandList(context, resolve_reflection_view_info.pD3D12CommandEncodeInfo->pCommandList); - FFX_SSSR_ASSERT(descriptor_heap_cbv_srv_uav_ && descriptor_heap_samplers_ && command_list); - FFX_SSSR_ASSERT(resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_1 || resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_2 || resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_4); - - // Query timestamp value prior to resolving the reflection view - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - if (!timestamp_queries.empty()) - { - std::uint64_t* data; - - // Reset performance counters - tile_classification_elapsed_time_ = 0ull; - denoising_elapsed_time_ = 0ull; - intersection_elapsed_time_ = 0ull; - - auto const start_index = timestamp_queries_index_ * kTimestampQuery_Count; - - D3D12_RANGE read_range = {}; - read_range.Begin = start_index * sizeof(std::uint64_t); - read_range.End = (start_index + timestamp_queries.size()) * sizeof(std::uint64_t); - - timestamp_query_buffer_->Map(0u, - &read_range, - reinterpret_cast(&data)); - - for (auto i = 0u, j = 1u; j < timestamp_queries.size(); ++i, ++j) - { - auto const elapsed_time = (data[j] - data[i]); - - switch (timestamp_queries[j]) - { - case kTimestampQuery_TileClassification: - tile_classification_elapsed_time_ = elapsed_time; - break; - case kTimestampQuery_Intersection: - intersection_elapsed_time_ = elapsed_time; - break; - case kTimestampQuery_Denoising: - denoising_elapsed_time_ = elapsed_time; - break; - default: - // unrecognized timestamp query - break; - } - } - - timestamp_query_buffer_->Unmap(0u, nullptr); - } - - timestamp_queries.clear(); - - command_list->EndQuery(timestamp_query_heap_, - D3D12_QUERY_TYPE_TIMESTAMP, - GetTimestampQueryIndex()); - - timestamp_queries.push_back(kTimestampQuery_Init); - } - - // Encode the relevant pass data - struct PassData - { - matrix4 inv_view_projection_; - matrix4 projection_; - matrix4 inv_projection_; - matrix4 view_; - matrix4 inv_view_; - matrix4 prev_view_projection_; - std::uint32_t frame_index_; - std::uint32_t max_traversal_intersections_; - std::uint32_t min_traversal_occupancy_; - std::uint32_t most_detailed_mip_; - float temporal_stability_factor_; - float depth_buffer_thickness_; - std::uint32_t samples_per_quad_; - std::uint32_t temporal_variance_guided_tracing_enabled_; - float roughness_threshold_; - std::uint32_t skip_denoiser_; - }; - auto& upload_buffer = context.GetContextD3D12()->GetUploadBuffer(); - PassData* pass_data; - if (!upload_buffer.AllocateBuffer(sizeof(PassData), pass_data)) - { - throw reflection_error(context, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %u bytes of upload memory, consider increasing uploadBufferSize", sizeof(PassData)); - } - - // Fill constant buffer - matrix4 view_projection = reflection_view.projection_matrix_ * reflection_view.view_matrix_; - pass_data->inv_view_projection_ = matrix4::inverse(view_projection); - pass_data->projection_ = reflection_view.projection_matrix_; - pass_data->inv_projection_ = matrix4::inverse(reflection_view.projection_matrix_); - pass_data->view_ = reflection_view.view_matrix_; - pass_data->inv_view_ = matrix4::inverse(reflection_view.view_matrix_); - pass_data->prev_view_projection_ = prev_view_projection_; - pass_data->frame_index_ = context.GetFrameIndex(); - - float temporal_stability_scale = Clamp(resolve_reflection_view_info.temporalStabilityScale, 0, 1); - pass_data->max_traversal_intersections_ = resolve_reflection_view_info.maxTraversalIterations; - pass_data->min_traversal_occupancy_ = resolve_reflection_view_info.minTraversalOccupancy; - pass_data->most_detailed_mip_ = resolve_reflection_view_info.mostDetailedDepthHierarchyMipLevel; - pass_data->temporal_stability_factor_ = temporal_stability_scale * temporal_stability_scale; - pass_data->depth_buffer_thickness_ = resolve_reflection_view_info.depthBufferThickness; - pass_data->samples_per_quad_ = resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_4 ? 4 : (resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_2 ? 2 : 1); - pass_data->temporal_variance_guided_tracing_enabled_ = resolve_reflection_view_info.flags & FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING ? 1 : 0; - pass_data->roughness_threshold_ = resolve_reflection_view_info.roughnessThreshold; - pass_data->skip_denoiser_ = resolve_reflection_view_info.flags & FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE ? 0 : 1; - prev_view_projection_ = view_projection; - - std::uint32_t current_frame = context.GetFrameIndex() & 1u; - ID3D12DescriptorHeap *descriptor_heaps[] = { descriptor_heap_cbv_srv_uav_->GetDescriptorHeap(), descriptor_heap_samplers_->GetDescriptorHeap() }; - command_list->SetDescriptorHeaps(FFX_SSSR_ARRAY_SIZE(descriptor_heaps), descriptor_heaps); - - ID3D12Resource * cb_resource = upload_buffer.GetResource(); - size_t offset = upload_buffer.GetOffset(pass_data); - - auto UAVBarrier = [](ID3D12Resource * resource) { - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.UAV.pResource = resource; - return barrier; - }; - - auto Transition = [](ID3D12Resource * resource, D3D12_RESOURCE_STATES from, D3D12_RESOURCE_STATES to) - { - D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = resource; - barrier.Transition.StateBefore = from; - barrier.Transition.StateAfter = to; - barrier.Transition.Subresource = 0; - return barrier; - }; - - ContextD3D12* d3d12_context = context.GetContextD3D12(); - - // Tile Classification pass - { - command_list->SetComputeRootSignature(d3d12_context->GetTileClassificationPass().root_signature_); - command_list->SetComputeRootDescriptorTable(0, tile_classification_descriptor_table_[current_frame].GetGPUDescriptor()); - command_list->SetComputeRootConstantBufferView(1, cb_resource->GetGPUVirtualAddress() + offset); - command_list->SetPipelineState(d3d12_context->GetTileClassificationPass().pipeline_state_); - uint32_t dim_x = RoundedDivide(width_, 8u); - uint32_t dim_y = RoundedDivide(height_, 8u); - command_list->Dispatch(dim_x, dim_y, 1); - } - - // Ensure that the tile classification pass finished - D3D12_RESOURCE_BARRIER classification_results_barriers[] = { - UAVBarrier(ray_list_), - UAVBarrier(tile_list_), - Transition(intersection_pass_indirect_args_, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), - Transition(denoiser_pass_indirect_args_, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) - }; - command_list->ResourceBarrier(FFX_SSSR_ARRAY_SIZE(classification_results_barriers), classification_results_barriers); - - // Indirect Arguments pass - { - command_list->SetComputeRootSignature(d3d12_context->GetIndirectArgsPass().root_signature_); - command_list->SetComputeRootDescriptorTable(0, indirect_args_descriptor_table_[current_frame].GetGPUDescriptor()); - command_list->SetComputeRootConstantBufferView(1, cb_resource->GetGPUVirtualAddress() + offset); - command_list->SetPipelineState(d3d12_context->GetIndirectArgsPass().pipeline_state_); - command_list->Dispatch(1, 1, 1); - } - - // Query the amount of time spent in the intersection pass - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - FFX_SSSR_ASSERT(timestamp_queries.size() == 1ull && timestamp_queries[0] == kTimestampQuery_Init); - - command_list->EndQuery(timestamp_query_heap_, - D3D12_QUERY_TYPE_TIMESTAMP, - GetTimestampQueryIndex()); - - timestamp_queries.push_back(kTimestampQuery_TileClassification); - } - - // Ensure that the arguments are written - D3D12_RESOURCE_BARRIER indirect_arguments_barriers[] = { - UAVBarrier(intersection_pass_indirect_args_), - UAVBarrier(denoiser_pass_indirect_args_), - Transition(intersection_pass_indirect_args_, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT), - Transition(denoiser_pass_indirect_args_, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT) - }; - command_list->ResourceBarrier(FFX_SSSR_ARRAY_SIZE(indirect_arguments_barriers), indirect_arguments_barriers); - - // Intersection pass - { - command_list->SetComputeRootSignature(d3d12_context->GetIntersectionPass().root_signature_); - command_list->SetComputeRootDescriptorTable(0, intersection_descriptor_table_[current_frame].GetGPUDescriptor()); - command_list->SetComputeRootConstantBufferView(1, cb_resource->GetGPUVirtualAddress() + offset); - command_list->SetComputeRootDescriptorTable(2, sampler_descriptor_table_.GetGPUDescriptor()); - command_list->SetPipelineState(d3d12_context->GetIntersectionPass().pipeline_state_); - command_list->ExecuteIndirect(d3d12_context->GetIndirectDispatchCommandSignature(), 1, intersection_pass_indirect_args_, 0, nullptr, 0); - } - - // Query the amount of time spent in the intersection pass - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - FFX_SSSR_ASSERT(timestamp_queries.size() == 2ull && timestamp_queries[1] == kTimestampQuery_TileClassification); - - command_list->EndQuery(timestamp_query_heap_, - D3D12_QUERY_TYPE_TIMESTAMP, - GetTimestampQueryIndex()); - - timestamp_queries.push_back(kTimestampQuery_Intersection); - } - - if (resolve_reflection_view_info.flags & FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE) - { - // Ensure that the intersection pass finished - command_list->ResourceBarrier(1, &UAVBarrier(temporal_denoiser_result_[current_frame])); - - // Spatial denoiser passes - { - command_list->SetComputeRootSignature(d3d12_context->GetSpatialDenoisingPass().root_signature_); - command_list->SetComputeRootDescriptorTable(0, spatial_denoising_descriptor_table_[current_frame].GetGPUDescriptor()); - command_list->SetComputeRootConstantBufferView(1, cb_resource->GetGPUVirtualAddress() + offset); - command_list->SetPipelineState(d3d12_context->GetSpatialDenoisingPass().pipeline_state_); - command_list->ExecuteIndirect(d3d12_context->GetIndirectDispatchCommandSignature(), 1, denoiser_pass_indirect_args_, 0, nullptr, 0); - } - - // Ensure that the spatial denoising pass finished. We don't have the resource for the final result available, thus we have to wait for any UAV access to finish. - command_list->ResourceBarrier(1, &UAVBarrier(nullptr)); - - // Temporal denoiser passes - { - command_list->SetComputeRootSignature(d3d12_context->GetTemporalDenoisingPass().root_signature_); - command_list->SetComputeRootDescriptorTable(0, temporal_denoising_descriptor_table_[current_frame].GetGPUDescriptor()); - command_list->SetComputeRootConstantBufferView(1, cb_resource->GetGPUVirtualAddress() + offset); - command_list->SetPipelineState(d3d12_context->GetTemporalDenoisingPass().pipeline_state_); - command_list->ExecuteIndirect(d3d12_context->GetIndirectDispatchCommandSignature(), 1, denoiser_pass_indirect_args_, 0, nullptr, 0); - } - - // Ensure that the temporal denoising pass finished - command_list->ResourceBarrier(1, &UAVBarrier(temporal_denoiser_result_[current_frame])); - - // EAW denoiser passes - { - command_list->SetComputeRootSignature(d3d12_context->GetEawDenoisingPass().root_signature_); - command_list->SetComputeRootDescriptorTable(0, eaw_denoising_descriptor_table_[current_frame].GetGPUDescriptor()); - command_list->SetComputeRootConstantBufferView(1, cb_resource->GetGPUVirtualAddress() + offset); - command_list->SetPipelineState(d3d12_context->GetEawDenoisingPass().pipeline_state_); - command_list->ExecuteIndirect(d3d12_context->GetIndirectDispatchCommandSignature(), 1, denoiser_pass_indirect_args_, 0, nullptr, 0); - } - - // Query the amount of time spent in the denoiser passes - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - FFX_SSSR_ASSERT(timestamp_queries.size() == 3ull && timestamp_queries[2] == kTimestampQuery_Intersection); - - command_list->EndQuery(timestamp_query_heap_, - D3D12_QUERY_TYPE_TIMESTAMP, - GetTimestampQueryIndex()); - - timestamp_queries.push_back(kTimestampQuery_Denoising); - } - } - - // Resolve the timestamp query data - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto const start_index = timestamp_queries_index_ * kTimestampQuery_Count; - - command_list->ResolveQueryData(timestamp_query_heap_, - D3D12_QUERY_TYPE_TIMESTAMP, - start_index, - static_cast(timestamp_queries_[timestamp_queries_index_].size()), - timestamp_query_buffer_, - start_index * sizeof(std::uint64_t)); - - timestamp_queries_index_ = (timestamp_queries_index_ + 1u) % context.GetFrameCountBeforeReuse(); - } - } -} diff --git a/ffx-sssr/src/d3d12/reflection_view_d3d12.h b/ffx-sssr/src/d3d12/reflection_view_d3d12.h deleted file mode 100644 index 423be51..0000000 --- a/ffx-sssr/src/d3d12/reflection_view_d3d12.h +++ /dev/null @@ -1,153 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include -#include - -#include "macros.h" -#include "matrix4.h" -#include "ffx_sssr.h" -#include "descriptor_heap_d3d12.h" - -namespace ffx_sssr -{ - class Context; - class ReflectionView; - - /** - The ReflectionViewD3D12 class encapsulates the data required for resolving an individual reflection view. - */ - class ReflectionViewD3D12 - { - FFX_SSSR_NON_COPYABLE(ReflectionViewD3D12); - - public: - - /** - The available timestamp queries. - */ - enum TimestampQuery - { - kTimestampQuery_Init, - kTimestampQuery_TileClassification, - kTimestampQuery_Intersection, - kTimestampQuery_Denoising, - - kTimestampQuery_Count - }; - - /** - The type definition for an array of timestamp queries. - */ - using TimestampQueries = std::vector; - - ReflectionViewD3D12(); - ~ReflectionViewD3D12(); - - ReflectionViewD3D12(ReflectionViewD3D12&& other) noexcept; - ReflectionViewD3D12& operator =(ReflectionViewD3D12&& other) noexcept; - - void Create(Context& context, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - void Destroy(); - - void CreateDescriptorHeaps(Context& context); - - std::uint32_t GetTimestampQueryIndex() const; - - void Resolve(Context& context, ReflectionView const& reflection_view, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info); - - // The width of the reflection view (in texels). - std::uint32_t width_; - // The height of the reflection view (in texels). - std::uint32_t height_; - // The reflection view creation flags. - FfxSssrCreateReflectionViewFlags flags_; - - // The descriptor heap for CBVs, SRVs, and UAVs. - DescriptorHeapD3D12* descriptor_heap_cbv_srv_uav_; - - // The descriptor heap for samplers. - DescriptorHeapD3D12* descriptor_heap_samplers_; - - // Single heap containing all resources. - ID3D12Heap * resource_heap_; - - // Containing all tiles that need at least one ray. - ID3D12Resource * tile_list_; - ID3D12Resource * tile_counter_; - // Containing all rays that need to be traced. - ID3D12Resource * ray_list_; - ID3D12Resource * ray_counter_; - // Indirect arguments for intersection pass. - ID3D12Resource * intersection_pass_indirect_args_; - // Indirect arguments for denoiser pass. - ID3D12Resource * denoiser_pass_indirect_args_; - // Intermediate result of the temporal denoising pass - double buffered to keep history and aliases the intersection result. - ID3D12Resource * temporal_denoiser_result_[2]; - // Holds the length of each reflection ray - used for temporal reprojection. - ID3D12Resource * ray_lengths_; - // Holds the temporal variance of the last two frames. - ID3D12Resource * temporal_variance_; - - // The number of GPU ticks spent in the tile classification pass. - std::uint64_t tile_classification_elapsed_time_; - // The number of GPU ticks spent in depth buffer intersection. - std::uint64_t intersection_elapsed_time_; - // The number of GPU ticks spent denoising. - std::uint64_t denoising_elapsed_time_; - // The query heap for the recorded timestamps. - ID3D12QueryHeap * timestamp_query_heap_; - // The buffer for reading the timestamp queries. - ID3D12Resource * timestamp_query_buffer_; - // The array of timestamp that were queried. - std::vector timestamp_queries_; - // The index of the active set of timestamp queries. - std::uint32_t timestamp_queries_index_; - - // Format of the resolved scene. - DXGI_FORMAT scene_format_; - - // The descriptor tables. One per shader pass per frame. - // Even with more than 2 frames in flight we only swap between the last two - // as we keep only one frame of history. - - // Descriptor tables of the tile classification pass. - DescriptorD3D12 tile_classification_descriptor_table_[2]; - // Descriptor tables of the indirect arguments pass. - DescriptorD3D12 indirect_args_descriptor_table_[2]; - // Descriptor tables of the depth buffer intersection pass. - DescriptorD3D12 intersection_descriptor_table_[2]; - // Descriptor tables of the spatial denoising pass. - DescriptorD3D12 spatial_denoising_descriptor_table_[2]; - // Descriptor tables of the temporal denoising pass. - DescriptorD3D12 temporal_denoising_descriptor_table_[2]; - // Descriptor tables of the eaw denoising pass. - DescriptorD3D12 eaw_denoising_descriptor_table_[2]; - // Descriptor tables for the environment map sampler. - DescriptorD3D12 sampler_descriptor_table_; - - // The view projection matrix of the last frame. - matrix4 prev_view_projection_; - }; -} diff --git a/ffx-sssr/src/d3d12/sampler_d3d12.cpp b/ffx-sssr/src/d3d12/sampler_d3d12.cpp deleted file mode 100644 index 9d80b3b..0000000 --- a/ffx-sssr/src/d3d12/sampler_d3d12.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "sampler_d3d12.h" - -namespace ffx_sssr -{ - /** - The constructor for the BlueNoiseSamplerD3D12 class. - */ - BlueNoiseSamplerD3D12::BlueNoiseSamplerD3D12() - : sobol_buffer_(nullptr) - , ranking_tile_buffer_(nullptr) - , scrambling_tile_buffer_(nullptr) - { - } - - /** - The constructor for the BlueNoiseSamplerD3D12 class. - - \param other The sampler to be moved. - */ - BlueNoiseSamplerD3D12::BlueNoiseSamplerD3D12(BlueNoiseSamplerD3D12&& other) noexcept - : sobol_buffer_(other.sobol_buffer_) - , ranking_tile_buffer_(other.ranking_tile_buffer_) - , scrambling_tile_buffer_(other.scrambling_tile_buffer_) - { - other.sobol_buffer_ = nullptr; - other.ranking_tile_buffer_ = nullptr; - other.scrambling_tile_buffer_ = nullptr; - } - - /** - The destructor for the BlueNoiseSamplerD3D12 class. - */ - BlueNoiseSamplerD3D12::~BlueNoiseSamplerD3D12() - { - if (sobol_buffer_) - sobol_buffer_->Release(); - if (ranking_tile_buffer_) - ranking_tile_buffer_->Release(); - if (scrambling_tile_buffer_) - scrambling_tile_buffer_->Release(); - } - - /** - Assigns the sampler. - - \param other The sampler to be moved. - \return The assigned sampler. - */ - BlueNoiseSamplerD3D12& BlueNoiseSamplerD3D12::operator =(BlueNoiseSamplerD3D12&& other) noexcept - { - if (this != &other) - { - sobol_buffer_ = other.sobol_buffer_; - ranking_tile_buffer_ = other.ranking_tile_buffer_; - scrambling_tile_buffer_ = other.scrambling_tile_buffer_; - - other.sobol_buffer_ = nullptr; - other.ranking_tile_buffer_ = nullptr; - other.scrambling_tile_buffer_ = nullptr; - } - - return *this; - } -} diff --git a/ffx-sssr/src/d3d12/sampler_d3d12.h b/ffx-sssr/src/d3d12/sampler_d3d12.h deleted file mode 100644 index 13dc989..0000000 --- a/ffx-sssr/src/d3d12/sampler_d3d12.h +++ /dev/null @@ -1,54 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include "macros.h" -#include "ffx_sssr.h" - -namespace ffx_sssr -{ - /** - The BlueNoiseSamplerD3D12 class represents a blue-noise sampler to be used for random number generation. - - \note Original implementation can be found here: https://eheitzresearch.wordpress.com/762-2/ - */ - class BlueNoiseSamplerD3D12 - { - FFX_SSSR_NON_COPYABLE(BlueNoiseSamplerD3D12); - - public: - BlueNoiseSamplerD3D12(); - ~BlueNoiseSamplerD3D12(); - - BlueNoiseSamplerD3D12(BlueNoiseSamplerD3D12&& other) noexcept; - BlueNoiseSamplerD3D12& BlueNoiseSamplerD3D12::operator =(BlueNoiseSamplerD3D12&& other) noexcept; - - // The Sobol sequence buffer. - ID3D12Resource* sobol_buffer_; - // The ranking tile buffer for sampling. - ID3D12Resource* ranking_tile_buffer_; - // The scrambling tile buffer for sampling. - ID3D12Resource* scrambling_tile_buffer_; - }; -} diff --git a/ffx-sssr/src/d3d12/shader_compiler_d3d12.cpp b/ffx-sssr/src/d3d12/shader_compiler_d3d12.cpp deleted file mode 100644 index cd01097..0000000 --- a/ffx-sssr/src/d3d12/shader_compiler_d3d12.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "shader_compiler_d3d12.h" - -#include -#include -#include - -#if FFX_SSSR_DUMP_SHADERS -#include -#endif // FFX_SSSR_DUMP_SHADERS - -#include "reflection_error.h" -#include "utils.h" - -namespace ffx_sssr -{ - /** - The constructor for the ShaderCompilerD3D12 class. - - \param context The context to be used. - */ - ShaderCompilerD3D12::ShaderCompilerD3D12(Context& context) - : context_(context) - , dxc_include_handler_(nullptr) - , dxc_compiler_(nullptr) - , dxc_library_(nullptr) - { - } - - /** - The destructor for the ShaderCompilerD3D12 class. - */ - ShaderCompilerD3D12::~ShaderCompilerD3D12() - { - if (dxc_compiler_) - dxc_compiler_->Release(); - if (dxc_library_) - dxc_library_->Release(); - if (dxc_include_handler_) - dxc_include_handler_->Release(); - - dxc_dll_support_.Cleanup(); - } - - /** - Compiles the shader file. - - \param filename The location of the shader file. - \param profile The targeted shader model. - \param defines The list of defines to be used. - \param define_count The number of defines. - \return The compiled shader. - */ - ShaderD3D12 ShaderCompilerD3D12::CompileShaderFile(char const* filename, char const* profile, LPCWSTR* arguments, std::uint32_t argument_count, DxcDefine* defines, std::uint32_t define_count) - { - HRESULT result; - FFX_SSSR_ASSERT(filename && profile); - - if (!LoadShaderCompiler()) - { - return ShaderD3D12(); - } - - // Compile the shader code from source - IDxcBlobEncoding* dxc_source; - auto const shader_filename = StringToWString(filename); - result = dxc_library_->CreateBlobFromFile(shader_filename.c_str(), nullptr, &dxc_source); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Could not create shader blob from %s", filename); - - ShaderD3D12 shader = CompileShaderBlob(dxc_source, shader_filename.c_str(), profile, arguments, argument_count, defines, define_count); - - dxc_source->Release(); - - return shader; - } - - ShaderD3D12 ShaderCompilerD3D12::CompileShaderString(char const * string, std::uint32_t string_size, char const* shader_name, char const * profile, LPCWSTR * arguments, std::uint32_t argument_count, DxcDefine * defines, std::uint32_t define_count) - { - HRESULT result; - FFX_SSSR_ASSERT(string && profile); - - if (!LoadShaderCompiler()) - { - return ShaderD3D12(); - } - - IDxcBlobEncoding* dxc_source; - result = dxc_library_->CreateBlobWithEncodingFromPinned((LPBYTE)string, string_size, 0, &dxc_source); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Could not create blob with encoding from pinned for %s", shader_name); - - auto const wc_shader_name = StringToWString(shader_name); - - ShaderD3D12 shader = CompileShaderBlob(dxc_source, wc_shader_name.c_str(), profile, arguments, argument_count, defines, define_count); - - dxc_source->Release(); - - return shader; - } - - bool ShaderCompilerD3D12::LoadShaderCompiler() - { - // Load shader compiler - if (!dxc_dll_support_.IsEnabled()) - { - HRESULT result = dxc_dll_support_.Initialize(); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to initialize dxcompiler.dll support"); - - result = dxc_dll_support_.CreateInstance(CLSID_DxcCompiler, &dxc_compiler_); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create DXC compiler instance"); - - result = dxc_dll_support_.CreateInstance(CLSID_DxcLibrary, &dxc_library_); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create DXC library instance"); - - result = dxc_library_->CreateIncludeHandler(&dxc_include_handler_); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create DXC include handler"); - } - else if (!dxc_compiler_ || !dxc_library_) - { - return false; // failed to create DXC instances - } - - return true; - } - - ShaderD3D12 ShaderCompilerD3D12::CompileShaderBlob(IDxcBlob * dxc_source, wchar_t const * shader_name, char const * profile, LPCWSTR * arguments, std::uint32_t argument_count, DxcDefine * defines, std::uint32_t define_count) - { - HRESULT result; - - std::vector resolved_defines; - resolved_defines.reserve(define_count); - - for (uint32_t i = 0; i < define_count; ++i) - { - if (defines[i].Name != nullptr) - { - resolved_defines.push_back(defines[i]); - if (resolved_defines.back().Value == nullptr) - { - resolved_defines.back().Value = L"1"; - } - } - } - - ShaderD3D12 shader; - IDxcOperationResult* dxc_result; - auto const target_profile = StringToWString(profile); - result = dxc_compiler_->Compile(dxc_source, - shader_name, - L"main", - target_profile.c_str(), - arguments, - argument_count, - resolved_defines.data(), - static_cast(resolved_defines.size()), - dxc_include_handler_, - &dxc_result); - - // Check for compilation errors - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to compile D3D12 shader source code"); - if (FAILED(dxc_result->GetStatus(&result)) || FAILED(result)) - { - IDxcBlobEncoding* dxc_error; - dxc_result->GetErrorBuffer(&dxc_error); - std::string const error(static_cast(dxc_error->GetBufferPointer())); - dxc_result->Release(); - dxc_error->Release(); - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to compile shader file:\r\n> %s", error.c_str()); - } - - // Get hold of the program blob - IDxcBlob* dxc_program = nullptr; - dxc_result->GetResult(&dxc_program); - FFX_SSSR_ASSERT(dxc_program != nullptr); - dxc_result->Release(); - -#if FFX_SSSR_DUMP_SHADERS - IDxcBlobEncoding* disasm; - HRESULT hr = dxc_compiler_->Disassemble(dxc_program, &disasm); - if (SUCCEEDED(hr)) - { - std::wstring path = shader_name + std::wstring(L".dxil.disasm"); - std::ofstream filestream(path.c_str()); - filestream.write((const char*)disasm->GetBufferPointer(), disasm->GetBufferSize()); - filestream.close(); - disasm->Release(); - } -#endif // FFX_SSSR_DUMP_SHADERS - - // Retrieve the shader bytecode - shader.BytecodeLength = dxc_program->GetBufferSize(); - auto const shader_bytecode = malloc(shader.BytecodeLength); - FFX_SSSR_ASSERT(shader_bytecode != nullptr); // out of memory - memcpy(shader_bytecode, dxc_program->GetBufferPointer(), shader.BytecodeLength); - shader.pShaderBytecode = shader_bytecode; - dxc_program->Release(); - -#if FFX_SSSR_DUMP_SHADERS - std::wstring path = shader_name + std::wstring(L".dxil"); - std::ofstream filestream(path.c_str(), std::ios::binary | std::ios::out); - filestream.write((const char*)shader.pShaderBytecode, shader.BytecodeLength); - filestream.close(); -#endif // FFX_SSSR_DUMP_SHADERS - - return shader; - } -} diff --git a/ffx-sssr/src/d3d12/shader_compiler_d3d12.h b/ffx-sssr/src/d3d12/shader_compiler_d3d12.h deleted file mode 100644 index f711ce1..0000000 --- a/ffx-sssr/src/d3d12/shader_compiler_d3d12.h +++ /dev/null @@ -1,81 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include - -#include "macros.h" - -namespace ffx_sssr -{ - class Context; - - /** - The ShaderD3D12 class is a simple helper for freeing the shader bytecode upon destruction. - */ - class ShaderD3D12 : public D3D12_SHADER_BYTECODE - { - FFX_SSSR_NON_COPYABLE(ShaderD3D12); - - public: - inline ShaderD3D12(); - inline ~ShaderD3D12(); - - inline operator bool() const; - - inline ShaderD3D12(ShaderD3D12&& other) noexcept; - inline ShaderD3D12& operator =(ShaderD3D12&& other) noexcept; - }; - - /** - The ShaderCompilerD3D12 class is a utility for compiling Direct3D12 shader code. - */ - class ShaderCompilerD3D12 - { - FFX_SSSR_NON_COPYABLE(ShaderCompilerD3D12); - - public: - ShaderCompilerD3D12(Context& context); - ~ShaderCompilerD3D12(); - - ShaderD3D12 CompileShaderFile(char const* filename, char const* profile, LPCWSTR* arguments = nullptr, std::uint32_t argument_count = 0, DxcDefine* defines = nullptr, std::uint32_t define_count = 0u); - ShaderD3D12 CompileShaderString(char const* string, std::uint32_t string_size, char const* shader_name, char const* profile, LPCWSTR* arguments = nullptr, std::uint32_t argument_count = 0, DxcDefine* defines = nullptr, std::uint32_t define_count = 0u); - - protected: - bool LoadShaderCompiler(); - ShaderD3D12 CompileShaderBlob(IDxcBlob* dxc_source, wchar_t const* shader_name, char const* profile, LPCWSTR* arguments = nullptr, std::uint32_t argument_count = 0, DxcDefine* defines = nullptr, std::uint32_t define_count = 0u); - - // The context to be used. - Context& context_; - // A helper for loading the dxcompiler library. - dxc::DxcDllSupport dxc_dll_support_; - // The Direct3D12 include handler. - IDxcIncludeHandler* dxc_include_handler_; - // The Direct3D12 shader compiler. - IDxcCompiler2* dxc_compiler_; - // The Direct3D12 shader library. - IDxcLibrary* dxc_library_; - }; -} - -#include "shader_compiler_d3d12.inl" diff --git a/ffx-sssr/src/d3d12/shader_compiler_d3d12.inl b/ffx-sssr/src/d3d12/shader_compiler_d3d12.inl deleted file mode 100644 index ef1356b..0000000 --- a/ffx-sssr/src/d3d12/shader_compiler_d3d12.inl +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the ShaderD3D12 class. - */ - ShaderD3D12::ShaderD3D12() - { - memset(this, 0, sizeof(*this)); - } - - /** - The destructor for the ShaderD3D12 class. - */ - ShaderD3D12::~ShaderD3D12() - { - free(const_cast(pShaderBytecode)); - } - - /** - The constructor for the ShaderD3D12 class. - - \param other The shader to be moved. - */ - ShaderD3D12::ShaderD3D12(ShaderD3D12&& other) noexcept - { - pShaderBytecode = other.pShaderBytecode; - BytecodeLength = other.BytecodeLength; - - other.pShaderBytecode = nullptr; - } - - /** - Assigns the shader. - - \param other The shader to be moved. - \return The assigned shader. - */ - ShaderD3D12& ShaderD3D12::operator =(ShaderD3D12&& other) noexcept - { - if (this != &other) - { - pShaderBytecode = other.pShaderBytecode; - BytecodeLength = other.BytecodeLength; - - other.pShaderBytecode = nullptr; - } - - return *this; - } - - /** - Checks whether the shader is valid. - - \return true if the shader is valid, false otherwise. - */ - ShaderD3D12::operator bool() const - { - return pShaderBytecode != nullptr; - } -} diff --git a/ffx-sssr/src/d3d12/upload_buffer_d3d12.cpp b/ffx-sssr/src/d3d12/upload_buffer_d3d12.cpp deleted file mode 100644 index 99973eb..0000000 --- a/ffx-sssr/src/d3d12/upload_buffer_d3d12.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "upload_buffer_d3d12.h" - -#include "utils.h" -#include "context.h" -#include "context_d3d12.h" - -namespace ffx_sssr -{ - /** - The constructor for the UploadBufferD3D12 class. - - \param context The Direct3D12 context to be used. - \param buffer_size The size of the upload buffer (in bytes). - */ - UploadBufferD3D12::UploadBufferD3D12(ContextD3D12& context, std::size_t buffer_size) - : data_(nullptr) - , context_(context.GetContext()) - , buffer_(nullptr) - , blocks_(buffer_size) - { - FFX_SSSR_ASSERT(context.GetDevice()); - - D3D12_HEAP_PROPERTIES heap_properties = {}; - heap_properties.Type = D3D12_HEAP_TYPE_UPLOAD; - heap_properties.CreationNodeMask = 1u; - heap_properties.VisibleNodeMask = 1u; - - D3D12_RESOURCE_DESC resource_desc = {}; - resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - resource_desc.Width = static_cast(buffer_size); - resource_desc.Height = 1u; - resource_desc.DepthOrArraySize = 1u; - resource_desc.MipLevels = 1u; - resource_desc.SampleDesc.Count = 1u; - resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - - if (!SUCCEEDED(context.GetDevice()->CreateCommittedResource(&heap_properties, - D3D12_HEAP_FLAG_NONE, - &resource_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&buffer_)))) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %uMiB for the upload buffer", RoundedDivide(buffer_size, 1024ull * 1024ull)); - } - - D3D12_RANGE range = {}; - range.Begin = 0u; - range.End = static_cast(buffer_size); - - if (!SUCCEEDED(buffer_->Map(0u, - &range, - reinterpret_cast(&data_)))) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Cannot map the Direct3D12 upload buffer"); - } - - buffer_->SetName(L"UploadBufferRing"); - } - - /** - The destructor for the UploadBufferD3D12 class. - */ - UploadBufferD3D12::~UploadBufferD3D12() - { - if (buffer_) - { - if (data_) - { - D3D12_RANGE range = {}; - range.Begin = 0u; - range.End = static_cast(buffer_->GetDesc().Width); - - buffer_->Unmap(0u, &range); - } - - buffer_->Release(); - } - } - - /** - Allocates a buffer. - - \param size The size of the buffer (in bytes). - \param gpu_address The GPU virtual address. - \param data The pointer to the pinned memory. - \return true if the buffer was allocated successfully, false otherwise. - */ - bool UploadBufferD3D12::AllocateBuffer(std::size_t size, D3D12_GPU_VIRTUAL_ADDRESS& gpu_address, void*& data) - { - std::size_t start; - - auto const memory_block = blocks_.AcquireBlock(start, size, 256u); - - if (!memory_block) - { - return false; - } - - data = static_cast(data_) + start; - gpu_address = buffer_->GetGPUVirtualAddress() + start; - - memory_block->block_index_ = context_.GetFrameIndex(); - memory_block->frame_index_ = &context_.GetFrameIndex(); - memory_block->frame_count_before_reuse_ = context_.GetFrameCountBeforeReuse(); - - return true; - } - - /** - Creates a constant buffer view for the allocated range. - - \param data The pointer to the allocated memory. - \param size The size of the allocated range (in bytes). - \param cpu_descriptor The CPU descriptor to be used. - */ - void UploadBufferD3D12::CreateConstantBufferView(void const* data, std::size_t size, D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor) const - { - auto const offset = static_cast(data) - static_cast(data_); - FFX_SSSR_ASSERT(buffer_ && data >= data_ && offset + size <= buffer_->GetDesc().Width); // buffer overflow! - - D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {}; - constant_buffer_view_desc.BufferLocation = buffer_->GetGPUVirtualAddress() + offset; - constant_buffer_view_desc.SizeInBytes = static_cast(Align(size, 256ull)); - - context_.GetContextD3D12()->GetDevice()->CreateConstantBufferView(&constant_buffer_view_desc, - cpu_descriptor); - } - - /** - Creates a shader resource view for the allocated range. - - \param data The pointer to the allocated memory. - \param size The size of the allocated range (in bytes). - \param stride The size of an individual element (in bytes). - \param cpu_descriptor The CPU descriptor to be used. - */ - void UploadBufferD3D12::CreateShaderResourceView(void const* data, std::size_t size, std::size_t stride, D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor) const - { - auto const offset = static_cast(data) - static_cast(data_); - FFX_SSSR_ASSERT(buffer_ && data >= data_ && offset + size <= buffer_->GetDesc().Width); // buffer overflow! - - D3D12_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {}; - shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - shader_resource_view_desc.Buffer.FirstElement = static_cast(offset / stride); - shader_resource_view_desc.Buffer.NumElements = static_cast(size / stride); - shader_resource_view_desc.Buffer.StructureByteStride = static_cast(stride); - - context_.GetContextD3D12()->GetDevice()->CreateShaderResourceView(buffer_, - &shader_resource_view_desc, - cpu_descriptor); - } -} diff --git a/ffx-sssr/src/d3d12/upload_buffer_d3d12.h b/ffx-sssr/src/d3d12/upload_buffer_d3d12.h deleted file mode 100644 index f47c4e7..0000000 --- a/ffx-sssr/src/d3d12/upload_buffer_d3d12.h +++ /dev/null @@ -1,88 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include "memory.h" - -namespace ffx_sssr -{ - class Context; - class ContextD3D12; - - /** - The UploadBufferD3D12 class allows to transfer some memory from the CPU to the GPU. - */ - class UploadBufferD3D12 - { - FFX_SSSR_NON_COPYABLE(UploadBufferD3D12); - - public: - UploadBufferD3D12(ContextD3D12& context, std::size_t buffer_size); - ~UploadBufferD3D12(); - - inline std::size_t GetSize() const; - inline ID3D12Resource* GetResource() const; - inline std::size_t GetOffset(void *data) const; - - template - bool AllocateBuffer(std::size_t size, TYPE*& data); - template - bool AllocateBuffer(std::size_t size, TYPE*& data, D3D12_GPU_VIRTUAL_ADDRESS& gpu_address); - - void CreateConstantBufferView(void const* data, std::size_t size, D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor) const; - void CreateShaderResourceView(void const* data, std::size_t size, std::size_t stride, D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor) const; - - protected: - bool AllocateBuffer(std::size_t size, D3D12_GPU_VIRTUAL_ADDRESS& gpu_address, void*& data); - - /** - The Block class represents an individual synchronizable block to be upload for memory upload. - */ - class Block - { - public: - inline Block(); - - inline bool CanBeReused() const; - - // The index of the currently calculated frame. - std::uint32_t* frame_index_; - // The frame at which this block was created. - std::uint32_t block_index_; - // The number of elapsed frames before re-use. - std::uint32_t frame_count_before_reuse_; - }; - - // The pointer to the mapped data. - void* data_; - // The context to be used. - Context& context_; - // The resource to the upload buffer. - ID3D12Resource* buffer_; - // The available blocks for memory upload. - RingBuffer blocks_; - }; -} - -#include "upload_buffer_d3d12.inl" diff --git a/ffx-sssr/src/d3d12/upload_buffer_d3d12.inl b/ffx-sssr/src/d3d12/upload_buffer_d3d12.inl deleted file mode 100644 index 62109ec..0000000 --- a/ffx-sssr/src/d3d12/upload_buffer_d3d12.inl +++ /dev/null @@ -1,128 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the Block class. - */ - UploadBufferD3D12::Block::Block() - : frame_index_(nullptr) - , block_index_(0u) - , frame_count_before_reuse_(0u) - { - } - - /** - Checks whether the memory block can now be re-used. - - \return true if the memory block can be re-used, false otherwise. - */ - bool UploadBufferD3D12::Block::CanBeReused() const - { - FFX_SSSR_ASSERT(frame_index_ && *frame_index_ >= block_index_); - - return (*frame_index_ - block_index_ >= frame_count_before_reuse_); - } - - /** - Gets the size of the upload buffer. - - \return The size of the upload buffer (in bytes). - */ - std::size_t UploadBufferD3D12::GetSize() const - { - return static_cast(buffer_ ? buffer_->GetDesc().Width : 0ull); - } - - /** - Gets the resource for the upload buffer. - - \return The resource for the upload buffer. - */ - ID3D12Resource* UploadBufferD3D12::GetResource() const - { - return buffer_; - } - - /** - Gets the offset for the allocate range of memory. - - \param data The allocated range of memory. - \return The offset within the upload buffer (in bytes). - */ - std::size_t UploadBufferD3D12::GetOffset(void* data) const - { - if (!data) - return 0ull; - auto const offset = static_cast(data) - static_cast(data_); - FFX_SSSR_ASSERT(buffer_ && data >= data_ && static_cast(offset) < buffer_->GetDesc().Width); // buffer overflow! - return static_cast(offset); - } - - /** - Allocates a buffer. - - \param size The size of the buffer (in bytes). - \param data The pointer to the pinned memory. - \return true if the buffer was allocated successfully, false otherwise. - */ - template - bool UploadBufferD3D12::AllocateBuffer(std::size_t size, TYPE*& data) - { - void* data_internal; - D3D12_GPU_VIRTUAL_ADDRESS gpu_address_unused; - - if (!AllocateBuffer(Align(size, 256ull), gpu_address_unused, data_internal)) - { - return false; - } - - data = static_cast(data_internal); - - return true; - } - - /** - Allocates a buffer. - - \param size The size of the buffer (in bytes). - \param data The pointer to the pinned memory. - \param gpu_address The GPU virtual address. - \return true if the buffer was allocated successfully, false otherwise. - */ - template - bool UploadBufferD3D12::AllocateBuffer(std::size_t size, TYPE*& data, D3D12_GPU_VIRTUAL_ADDRESS& gpu_address) - { - void* data_internal; - - if (!AllocateBuffer(size, gpu_address, data_internal)) - { - return false; - } - - data = static_cast(data_internal); - - return true; - } -} diff --git a/ffx-sssr/src/float3.h b/ffx-sssr/src/float3.h deleted file mode 100644 index 10c57a9..0000000 --- a/ffx-sssr/src/float3.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include - -namespace ffx_sssr -{ - /** - The tfloat3 class represents a generic 3-wide vector. - */ - template - class tfloat3 - { - public: - inline tfloat3(); - inline tfloat3(TYPE v); - inline tfloat3(TYPE x, TYPE y, TYPE z, TYPE w = static_cast(0)); - template - inline tfloat3(tfloat3 const& other); - - inline tfloat3 operator -() const; - inline tfloat3 operator /(TYPE f) const; - inline TYPE& operator [](std::uint32_t i); - inline TYPE operator [](std::uint32_t i) const; - - inline TYPE sqnorm() const; - inline TYPE norm() const; - - static inline tfloat3 normalize(tfloat3 const& v); - - // The vector X component. - TYPE x; - // The vector Y component. - TYPE y; - // The vector Z component. - TYPE z; - // The vector W component. - TYPE w; - }; - - /** - A type definition for a single precision floating-point 3-wide vector. - */ - typedef tfloat3 float3; - - /** - A type definition for a double precision floating-point 3-wide vector. - */ - typedef tfloat3 double3; - - /** - A type definition for a single precision floating-point 4-wide vector. - */ - typedef float3 float4; - - /** - A type definition for a double precision floating-point 4-wide vector. - */ - typedef double3 double4; -} - -#include "float3.inl" diff --git a/ffx-sssr/src/float3.inl b/ffx-sssr/src/float3.inl deleted file mode 100644 index fb06f69..0000000 --- a/ffx-sssr/src/float3.inl +++ /dev/null @@ -1,170 +0,0 @@ -#include "float3.h" -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the tfloat3 class. - */ - template - tfloat3::tfloat3() - : x(static_cast(0)) - , y(static_cast(0)) - , z(static_cast(0)) - , w(static_cast(0)) - { - } - - /** - The constructor for the tfloat3 class. - - \param v The value for initializing the vector. - */ - template - tfloat3::tfloat3(TYPE v) - : x(v) - , y(v) - , z(v) - , w(v) - { - } - - /** - The constructor for the tfloat3 class. - - \param x The vector X component. - \param y The vector Y component. - \param z The vector Z component. - \param w The vector W component. - */ - template - tfloat3::tfloat3(TYPE x, TYPE y, TYPE z, TYPE w) - : x(x) - , y(y) - , z(z) - , w(w) - { - } - - /** - The constructor for the tfloat3 class. - - \param other The vector to be constructing from. - */ - template - template - tfloat3::tfloat3(tfloat3 const& other) - : x(static_cast(other.x)) - , y(static_cast(other.y)) - , z(static_cast(other.z)) - , w(static_cast(other.w)) - { - } - - /** - Gets the negative vector. - - \return The negative vector. - */ - template - tfloat3 tfloat3::operator -() const - { - return tfloat3(-x, -y, -z); - } - - /** - Divides each component by the provided number. - - \return The resulting vector. - */ - template - inline tfloat3 tfloat3::operator/(TYPE f) const - { - return tfloat3(x / f, y / f, z / f, w / f); - } - - /** - Gets the given vector component. - - \param i The index of the vector component. - \return The requested vector component. - */ - template - TYPE& tfloat3::operator [](std::uint32_t i) - { - return *(&x + i); - } - - /** - Gets the given vector component. - - \param i The index of the vector component. - \return The requested vector component. - */ - template - TYPE tfloat3::operator [](std::uint32_t i) const - { - return *(&x + i); - } - - /** - Calculates the squared norm of the vector. - - \return The squared norm of the vector. - */ - template - TYPE tfloat3::sqnorm() const - { - return x * x + y * y + z * z; - } - - /** - Calculates the norm of the vector. - - \return The norm of the vector. - */ - template - TYPE tfloat3::norm() const - { - return std::sqrt(sqnorm()); - } - - /** - Normalizes the input vector. - - \param v The vector to be normalized. - \return The normalized vector. - */ - template - tfloat3 tfloat3::normalize(tfloat3 const& v) - { - auto result = v; - auto const norm_inv = static_cast(1) / v.norm(); - result.x *= norm_inv; - result.y *= norm_inv; - result.z *= norm_inv; - result.w *= norm_inv; - return result; - } -} diff --git a/ffx-sssr/src/macros.h b/ffx-sssr/src/macros.h deleted file mode 100644 index 710e10d..0000000 --- a/ffx-sssr/src/macros.h +++ /dev/null @@ -1,138 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include - -#ifdef _MSC_VER - - #define WIN32_LEAN_AND_MEAN - #include - - #undef max - #undef min - -#endif // _MSC_VER - -/** - Gets the size of a static array. - - \return The size of the static array. -*/ -#define FFX_SSSR_ARRAY_SIZE(ARRAY) \ - static_cast(sizeof(ARRAY) / sizeof(*(ARRAY))) - -/** - Makes the type non-copyable. - - \param TYPE - The type to be made non-copyable. -*/ -#define FFX_SSSR_NON_COPYABLE(TYPE) \ - TYPE(TYPE const&) = delete; \ - TYPE& operator =(TYPE const&) = delete - -#ifdef _MSC_VER - - /** - A macro to start a do while loop. - */ - #define FFX_SSSR_MULTI_LINE_MACRO_BEGIN \ - __pragma(warning(push)) \ - __pragma(warning(disable:4127)) /* conditional expression is constant */ \ - __pragma(warning(disable:4390)) /* empty controlled statement found */ \ - do \ - { - - /** - A macro to end a do while loop. - */ - #define FFX_SSSR_MULTI_LINE_MACRO_END \ - } \ - while (0) \ - __pragma(warning(pop)) - - /** - Triggers a breakpoint. - */ - #define FFX_SSSR_BREAKPOINT \ - FFX_SSSR_MULTI_LINE_MACRO_BEGIN \ - if (IsDebuggerPresent()) \ - { \ - DebugBreak(); \ - } \ - FFX_SSSR_MULTI_LINE_MACRO_END - -#else // _MSC_VER - - /** - A macro to start a do while loop. - */ - #define FFX_SSSR_MULTI_LINE_MACRO_BEGIN \ - do \ - { - - /** - A macro to end a do while loop. - */ - #define FFX_SSSR_MULTI_LINE_MACRO_END \ - } \ - while (0) - - /** - Triggers a breakpoint. - */ - #define FFX_SSSR_BREAKPOINT \ - FFX_SSSR_MULTI_LINE_MACRO_BEGIN \ - assert(0); \ - FFX_SSSR_MULTI_LINE_MACRO_END - -#endif // _MSC_VER - -#ifdef _DEBUG - - /** - Defines a condition breakpoint that only triggers if the expression evaluates to false. - - \param expr The expression to evaluate. - */ - #define FFX_SSSR_ASSERT(expr) \ - FFX_SSSR_MULTI_LINE_MACRO_BEGIN \ - if (!(expr)) \ - { \ - FFX_SSSR_BREAKPOINT; \ - } \ - FFX_SSSR_MULTI_LINE_MACRO_END - -#else // _DEBUG - - /** - Ignores the breakpoint condition in a Release build. - - \param expr The expression to be ignored. - */ - #define FFX_SSSR_ASSERT(expr) \ - FFX_SSSR_MULTI_LINE_MACRO_BEGIN \ - sizeof(expr); \ - FFX_SSSR_MULTI_LINE_MACRO_END - -#endif // _DEBUG diff --git a/ffx-sssr/src/matrix4.h b/ffx-sssr/src/matrix4.h deleted file mode 100644 index 03934ae..0000000 --- a/ffx-sssr/src/matrix4.h +++ /dev/null @@ -1,74 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include "float3.h" - -namespace ffx_sssr -{ - /** - The tmatrix4 class represents a generic 4x4 matrix. - */ - template - class tmatrix4 - { - public: - inline tmatrix4(); - template - inline tmatrix4(tmatrix4 const& other); - - inline tmatrix4 transpose() const; - inline tmatrix4 operator -() const; - - inline tmatrix4& operator +=(tmatrix4 const& other); - inline tmatrix4& operator -=(tmatrix4 const& other); - inline tmatrix4& operator *=(tmatrix4 const& other); - inline tmatrix4& operator *=(TYPE value); - - static inline tmatrix4 inverse(tmatrix4 const& m); - - // The underlying matrix data. - union - { - TYPE m[4][4]; - struct - { - TYPE m00, m01, m02, m03; - TYPE m10, m11, m12, m13; - TYPE m20, m21, m22, m23; - TYPE m30, m31, m32, m33; - }; - }; - }; - - /** - A type definition for a single precision floating-point 4x4 matrix. - */ - typedef tmatrix4 matrix4; - - /** - A type definition for a double precision floating-point 4x4 matrix. - */ - typedef tmatrix4 dmatrix4; -} - -#include "matrix4.inl" diff --git a/ffx-sssr/src/matrix4.inl b/ffx-sssr/src/matrix4.inl deleted file mode 100644 index 6fb96b9..0000000 --- a/ffx-sssr/src/matrix4.inl +++ /dev/null @@ -1,321 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the tmatrix4 class. - */ - template - tmatrix4::tmatrix4() - : m00(static_cast(1)), m01(static_cast(0)), m02(static_cast(0)), m03(static_cast(0)) - , m10(static_cast(0)), m11(static_cast(1)), m12(static_cast(0)), m13(static_cast(0)) - , m20(static_cast(0)), m21(static_cast(0)), m22(static_cast(1)), m23(static_cast(0)) - , m30(static_cast(0)), m31(static_cast(0)), m32(static_cast(0)), m33(static_cast(1)) - { - } - - /** - The constructor for the tmatrix4 class. - - \param other The matrix to be constructing from. - */ - template - template - tmatrix4::tmatrix4(tmatrix4 const& other) - : m00(static_cast(other.m00)), m01(static_cast(other.m01)), m02(static_cast(other.m02)), m03(static_cast(other.m03)) - , m10(static_cast(other.m10)), m11(static_cast(other.m11)), m12(static_cast(other.m12)), m13(static_cast(other.m13)) - , m20(static_cast(other.m20)), m21(static_cast(other.m21)), m22(static_cast(other.m22)), m23(static_cast(other.m23)) - , m30(static_cast(other.m30)), m31(static_cast(other.m31)), m32(static_cast(other.m32)), m33(static_cast(other.m33)) - { - } - - /** - Transposes the matrix. - - \return The transposed matrix. - */ - template - tmatrix4 tmatrix4::transpose() const - { - tmatrix4 result; - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - result.m[j][i] = m[i][j]; - return result; - } - - /** - Negates the matrix. - - \return The negated matrix. - */ - template - tmatrix4 tmatrix4::operator -() const - { - tmatrix4 result = *this; - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - result.m[i][j] = -m[i][j]; - return result; - } - - /** - Adds the matrices. - - \param other The matrix to be added. - \return The updated matrix. - */ - template - tmatrix4& tmatrix4::operator +=(tmatrix4 const& other) - { - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - m[i][j] += other.m[i][j]; - return *this; - } - - /** - Subtracts the matrices. - - \param other The matrices to be subtracted. - \return The updated matrix. - */ - template - tmatrix4& tmatrix4::operator -=(tmatrix4 const& other) - { - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - m[i][j] -= other.m[i][j]; - return *this; - } - - /** - Multiplies the matrices. - - \param other The matrices to be multiplied. - \return The updated matrix. - */ - template - tmatrix4& tmatrix4::operator *=(tmatrix4 const& other) - { - tmatrix4 temp; - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - { - temp.m[i][j] = static_cast(0); - for (auto k = 0u; k < 4u; ++k) - temp.m[i][j] += m[i][k] * other.m[k][j]; - } - *this = temp; - return *this; - } - - /** - Multiplies the matrix. - - \param value The value to be multiplied with. - \return The updated matrix. - */ - template - tmatrix4& tmatrix4::operator *=(TYPE value) - { - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - m[i][j] *= value; - return *this; - } - - /** - Inverts the matrix. - - \param m The matrix to be inverted. - \return The inverted matrix. - */ - template - tmatrix4 tmatrix4::inverse(tmatrix4 const& m) - { - int indxc[4], indxr[4]; - int ipiv[4] = { 0, 0, 0, 0 }; - TYPE minv[4][4]; - tmatrix4 temp = m; - memcpy(minv, &temp.m[0][0], 4 * 4 * sizeof(TYPE)); - for (int i = 0; i < 4; i++) { - int irow = -1, icol = -1; - TYPE big = static_cast(0); - - // Choose pivot - for (int j = 0; j < 4; j++) { - if (ipiv[j] != 1) { - for (int k = 0; k < 4; k++) { - if (ipiv[k] == 0) { - if (std::fabs(minv[j][k]) >= big) { - big = std::fabs(minv[j][k]); - irow = j; - icol = k; - } - } - else if (ipiv[k] > 1) - return tmatrix4(); - } - } - } - ++ipiv[icol]; - - // Swap rows _irow_ and _icol_ for pivot - if (irow != icol) { - for (int k = 0; k < 4; ++k) - std::swap(minv[irow][k], minv[icol][k]); - } - indxr[i] = irow; - indxc[i] = icol; - if (minv[icol][icol] == 0.) - return matrix4(); - - // Set $m[icol][icol]$ to one by scaling row _icol_ appropriately - TYPE pivinv = static_cast(1) / minv[icol][icol]; - minv[icol][icol] = 1.f; - for (int j = 0; j < 4; j++) - minv[icol][j] *= pivinv; - - // Subtract this row from others to zero out their columns - for (int j = 0; j < 4; j++) { - if (j != icol) { - TYPE save = minv[j][icol]; - minv[j][icol] = 0; - for (int k = 0; k < 4; k++) - minv[j][k] -= minv[icol][k] * save; - } - } - } - - // Swap columns to reflect permutation - for (int j = 3; j >= 0; j--) { - if (indxr[j] != indxc[j]) { - for (int k = 0; k < 4; k++) - std::swap(minv[k][indxr[j]], minv[k][indxc[j]]); - } - } - - tmatrix4 result; - memcpy(&result.m[0][0], minv, 4 * 4 * sizeof(TYPE)); - - return result; - } - - /** - Adds the two matrices. - - \param m1 The LHS matrix. - \param m2 The RHS matrix. - \return The resulting matrix. - */ - template - inline tmatrix4 operator +(tmatrix4 const& m1, tmatrix4 const& m2) - { - auto result = m1; - return result += m2; - } - - /** - Subtraces the two matrices. - - \param m1 The LHS matrix. - \param m2 The RHS matrix. - \return The resulting matrix. - */ - template - inline tmatrix4 operator -(tmatrix4 const& m1, tmatrix4 const& m2) - { - auto result = m1; - return result -= m2; - } - - /** - Multiplies the two matrices. - - \param m1 The LHS matrix. - \param m2 The RHS matrix. - \return The resulting matrix. - */ - template - inline tmatrix4 operator *(tmatrix4 const& m1, tmatrix4 const& m2) - { - tmatrix4 result; - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - { - result.m[i][j] = static_cast(0); - for (auto k = 0u; k < 4u; ++k) - result.m[i][j] += m1.m[i][k] * m2.m[k][j]; - } - return result; - } - - /** - Multiplies the matrix. - - \param m The LHS matrix. - \param c The RHS value. - \return The resulting matrix. - */ - template - inline tmatrix4 operator *(tmatrix4 const& m, TYPE c) - { - auto result = m; - return result *= c; - } - - /** - Multiplies the matrix. - - \param c The LHS value. - \param m The RHS matrix. - \return The resulting matrix. - */ - template - inline tmatrix4 operator *(TYPE c, tmatrix4 const& m) - { - auto result = m; - return result *= c; - } - - /** - Multiplies the vector. - - \param m The LHS matrix. - \param v The RHS vector. - \return The resulting vector. - */ - template - inline tfloat3 operator *(tmatrix4 const& m, tfloat3 const& v) - { - tfloat3 result; - - for (auto i = 0u; i < 4u; ++i) - for (auto j = 0u; j < 4u; ++j) - result[i] += m.m[i][j] * v[j]; - - return result; - } -} diff --git a/ffx-sssr/src/memory.h b/ffx-sssr/src/memory.h deleted file mode 100644 index c357671..0000000 --- a/ffx-sssr/src/memory.h +++ /dev/null @@ -1,219 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include -#include -#include - -#include "utils.h" -#include "reflection_error.h" - -namespace ffx_sssr -{ - /** - The marker for an invalid index. - */ - static constexpr auto const kInvalidIndex = 0xFFFFFFFFu; - - /** - The IdDispenser class allows to allocate and free identifiers up to a given count at constant cost. - - \note A given identifier possesses the following structure: - - top 16 bits: reserved for application use (used to flag the resource type). - - next 16 bits: generational identifier (so deleting twice does not crash). - - bottom 32 bits: object index (for looking up attached components). - */ - class IdDispenser - { - FFX_SSSR_NON_COPYABLE(IdDispenser); - - public: - inline IdDispenser(std::uint32_t max_id_count); - inline ~IdDispenser(); - - inline bool AllocateId(std::uint64_t& id); - inline void FreeId(std::uint64_t id); - - inline std::uint32_t GetIdCount() const; - inline std::uint32_t GetMaxIdCount() const; - inline bool IsValid(std::uint64_t id) const; - - protected: - inline std::uint32_t CalculateFreeIdCount() const; - - // The list of all available identifiers. - std::uint64_t* ids_; - // The index of the next available slot. - std::uint32_t next_index_; - // The number of allocated identifiers. - std::uint32_t id_count_; - // The maximum capacity of the dispenser. - std::uint32_t max_id_count_; - }; - - /** - The SparseArray class allows to insert objects at given indices while maintaining the underlying storage compacted. - */ - template - class SparseArray - { - FFX_SSSR_NON_COPYABLE(SparseArray); - - public: - /** - The iterator class allows to iterate over the inserted objects inside a sparse array. - */ - class iterator - { - friend class SparseArray; - - public: - iterator(); - - iterator& operator++(); - TYPE& operator *() const; - operator std::uint32_t() const; - bool operator !=(iterator const& other) const; - - protected: - // The iterated index. - std::uint32_t index_; - // The iterated array. - SparseArray* array_; - }; - - /** - The const_iterator class allows to iterate over the inserted objects inside a sparse array. - */ - class const_iterator - { - friend class SparseArray; - - public: - const_iterator(); - const_iterator(iterator const& other); - - const_iterator& operator++(); - TYPE const& operator *() const; - operator std::uint32_t() const; - bool operator !=(const_iterator const& other) const; - - protected: - // The iterated index. - std::uint32_t index_; - // The iterated array. - SparseArray const* array_; - }; - - SparseArray(std::uint32_t max_object_count); - ~SparseArray(); - - TYPE& operator [](std::uint32_t index); - TYPE const& operator [](std::uint32_t index) const; - - TYPE* At(std::uint32_t index); - TYPE const* At(std::uint32_t index) const; - bool Has(std::uint32_t index) const; - - TYPE& Insert(std::uint32_t index); - TYPE& Insert(std::uint32_t index, TYPE const& object); - bool Erase(std::uint32_t index); - void Clear(); - - TYPE* GetObjects(); - TYPE const* GetObjects() const; - std::uint32_t GetObjectCount() const; - std::uint32_t GetMaxObjectCount() const; - - std::uint32_t GetVirtualIndex(std::uint32_t physical_index) const; - std::uint32_t GetPhysicalIndex(std::uint32_t virtual_index) const; - - iterator begin(); - const_iterator begin() const; - const_iterator cbegin() const; - iterator end(); - const_iterator end() const; - const_iterator cend() const; - - protected: - // The storage for the allocated objects. - TYPE* objects_; - // The current size of the sparse array. - std::uint32_t object_count_; - // The maximum capacity of the sparse array. - std::uint32_t max_object_count_; - // The physical to virtual mapping table. - std::uint32_t* virtual_indices_; - // The virtual to physical mapping table. - std::uint32_t* physical_indices_; - }; - - /** - The RingBuffer class implements some standard wrap-around type of memory allocator. - - \note The BLOCK_TYPE type must implement the CanBeReused() method that is called when re-using previously acquired memory blocks. - */ - template - class RingBuffer - { - FFX_SSSR_NON_COPYABLE(RingBuffer); - - public: - RingBuffer(std::size_t size); - ~RingBuffer(); - - BLOCK_TYPE* AcquireBlock(std::size_t& start, std::size_t size, std::size_t alignment = 16u); - - protected: - /** - The Block class represents an individual block inside the ring buffer. - */ - class Block - { - public: - Block(); - - bool CanBeReused() const; - - // The underlying block. - BLOCK_TYPE block_; - // The start of the block. - std::size_t start_; - // The size of the block. - std::size_t size_; - }; - - Block const* GrabNextAvailableBlock() const; - std::size_t CalculateSpaceToNextAvailableBlock(Block const* next_block, std::size_t alignment) const; - - // The size of the ring buffer. - std::size_t size_; - // The head of the ring buffer. - std::size_t head_; - // The available blocks. - std::deque blocks_; - }; -} - -#include "memory.inl" diff --git a/ffx-sssr/src/memory.inl b/ffx-sssr/src/memory.inl deleted file mode 100644 index d3e4ce0..0000000 --- a/ffx-sssr/src/memory.inl +++ /dev/null @@ -1,831 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the IdDispenser class. - - \param max_id_count The maximum capacity of the dispenser. - */ - IdDispenser::IdDispenser(std::uint32_t max_id_count) - : ids_(max_id_count ? static_cast(malloc(max_id_count * sizeof(std::uint64_t))) : nullptr) - , next_index_(kInvalidIndex) - , id_count_(0u) - , max_id_count_(0u) - { - // Could we allocate our memory successfully? - if (max_id_count && !ids_) - { - free(ids_); - - throw reflection_error(FFX_SSSR_STATUS_OUT_OF_MEMORY); - } - - // Initialize the freelist - for (auto i = 0u; i < max_id_count; ++i) - { - ids_[i] = (i + 1 == max_id_count ? kInvalidIndex : i + 1); - } - - // Assign the base members - next_index_ = (max_id_count ? 0u : kInvalidIndex); - max_id_count_ = max_id_count; - } - - /** - The destructor for the IdDispenser class. - */ - IdDispenser::~IdDispenser() - { - // Were there any non-freed descriptors? -#if 0 - auto const leaked_id_count = (max_id_count_ - CalculateFreeIdCount()); - - FFX_SSSR_ASSERT(leaked_id_count == id_count_); - - if (leaked_id_count) - { - FFX_SSSR_PRINTLN("%u resource%s %s not destroyed properly; detected memory leak", leaked_id_count, leaked_id_count > 1 ? "s" : "", leaked_id_count > 1 ? "were" : "was"); - } -#endif - - // Release our memory - free(ids_); - } - - /** - Allocates the next available identifier. - - \param id The allocated identifier. - \return true if successful, false otherwise. - */ - bool IdDispenser::AllocateId(std::uint64_t& id) - { - // Are we out of memory? - if (next_index_ == kInvalidIndex) - { - return false; - } - - // Get hold of next available slot - auto const index = next_index_; - auto& slot = ids_[index]; - - // Advance generation - auto const next_index = static_cast(slot & 0xFFFFFFFFull); - auto const age = static_cast((slot >> 32) & 0xFFFFull) + 1u; - - // Update the freelist - next_index_ = next_index; - id = (static_cast(age) << 32) | static_cast(index); - slot = (static_cast(age) << 32) | static_cast(kInvalidIndex); - - // Keep track of number of allocated identifiers - FFX_SSSR_ASSERT(id_count_ < max_id_count_); - ++id_count_; - - return true; - } - - /** - Frees the identifier. - - \param id The identifier to be freed. - */ - void IdDispenser::FreeId(std::uint64_t id) - { - // Get hold of the freed slot - auto const index = static_cast(id & 0xFFFFFFFFull); - FFX_SSSR_ASSERT(index < max_id_count_); - auto& slot = ids_[index]; - - // Check whether this is a valid operation - auto const age = static_cast((slot >> 32) & 0xFFFFull); - - if (age != static_cast((id >> 32) & 0xFFFFull) || static_cast(slot & 0xFFFFFFFFull) != kInvalidIndex) - { - return; // identifier was already freed - } - - // Return to the freelist - slot = (static_cast(age) << 32) | static_cast(next_index_); - next_index_ = index; - - // Keep track of number of allocated identifiers - FFX_SSSR_ASSERT(id_count_ > 0u); - --id_count_; - } - - /** - Gets the number of allocated identifiers. - - \return The number of allocated identifiers. - */ - std::uint32_t IdDispenser::GetIdCount() const - { - return id_count_; - } - - /** - Gets the maximum number of identifiers that can be allocated. - - \return The maximum number of identifiers. - */ - std::uint32_t IdDispenser::GetMaxIdCount() const - { - return max_id_count_; - } - - /** - Checks whether the identifier is still valid. - - \param id The identifier to be checked. - \return true if the identifier is valid, false otherwise. - */ - bool IdDispenser::IsValid(std::uint64_t id) const - { - // Get hold of the corresponding slot - auto const index = static_cast(id & 0xFFFFFFFFull); - FFX_SSSR_ASSERT(index < max_id_count_); - auto const slot = ids_[index]; - - // Check whether the identifier is still valid - auto const age = static_cast((slot >> 32) & 0xFFFFull); - - if (age != static_cast((id >> 32) & 0xFFFFull) || static_cast(slot & 0xFFFFFFFFull) != kInvalidIndex) - { - return false; // identifier was previously freed - } - - return true; - } - - /** - Calculates the number of available identifiers. - - \return The number of remaining available identifiers. - */ - std::uint32_t IdDispenser::CalculateFreeIdCount() const - { - auto free_id_count = 0u; - - // Iterate the entire freelist - for (auto next_index = next_index_; next_index != kInvalidIndex; next_index = static_cast(ids_[next_index] & 0xFFFFFFFFull)) - { - ++free_id_count; - } - FFX_SSSR_ASSERT(free_id_count <= max_id_count_); - - return free_id_count; - } - - /** - The constructor for the iterator class. - */ - template - SparseArray::iterator::iterator() - : index_(0u) - , array_(nullptr) - { - } - - /** - Iterates over to the next object. - - \return The updated iterator. - */ - template - typename SparseArray::iterator& SparseArray::iterator::operator ++() - { - FFX_SSSR_ASSERT(array_ && index_ < array_->object_count_); - ++index_; // iterate to next - return *this; - } - - /** - Gets the iterated object. - - \return The reference to the iterated object. - */ - template - TYPE& SparseArray::iterator::operator *() const - { - FFX_SSSR_ASSERT(array_ && index_ < array_->object_count_); - return array_->objects_[index_]; - } - - /** - Gets the virtual index for the iterated object. - - \return The virtual index. - */ - template - SparseArray::iterator::operator std::uint32_t() const - { - FFX_SSSR_ASSERT(array_ && index_ < array_->object_count_); - return array_->virtual_indices_[index_]; - } - - /** - Compares the two iterators. - - \return true if the iterators are not equal, false otherwise. - */ - template - bool SparseArray::iterator::operator !=(iterator const& other) const - { - return (index_ != other.index_ || array_ != other.array_); - } - - /** - The constructor for the const_iterator class. - */ - template - SparseArray::const_iterator::const_iterator() - : index_(0u) - , array_(nullptr) - { - } - - /** - The constructor for the const_iterator class. - - \param other The iterator to be constructing from. - */ - template - SparseArray::const_iterator::const_iterator(iterator const& other) - : index_(other.index_) - , array_(other.array_) - { - } - - /** - Iterates over to the next object. - - \return The updated iterator. - */ - template - typename SparseArray::const_iterator& SparseArray::const_iterator::operator ++() - { - FFX_SSSR_ASSERT(array_ && index_ < array_->object_count_); - ++index_; // iterate to next - return *this; - } - - /** - Gets the iterated object. - - \return The reference to the iterated object. - */ - template - TYPE const& SparseArray::const_iterator::operator *() const - { - FFX_SSSR_ASSERT(array_ && index_ < array_->object_count_); - return array_->objects_[index_]; - } - - /** - Gets the virtual index for the iterated object. - - \return The virtual index. - */ - template - SparseArray::const_iterator::operator std::uint32_t() const - { - FFX_SSSR_ASSERT(array_ && index_ < array_->object_count_); - return array_->virtual_indices_[index_]; - } - - /** - Compares the two iterators. - - \return true if the iterators are not equal, false otherwise. - */ - template - bool SparseArray::const_iterator::operator !=(const_iterator const& other) const - { - return (index_ != other.index_ || array_ != other.array_); - } - - /** - The constructor for the SparseArray class. - - \param max_object_count The maximum capacity of the sparse array. - */ - template - SparseArray::SparseArray(std::uint32_t max_object_count) - : objects_(max_object_count ? static_cast(malloc(max_object_count * sizeof(TYPE))) : nullptr) - , object_count_(0u) - , max_object_count_(max_object_count) - , virtual_indices_(max_object_count ? static_cast(malloc(max_object_count * sizeof(std::uint32_t))) : nullptr) - , physical_indices_(max_object_count ? static_cast(malloc(max_object_count * sizeof(std::uint32_t))) : nullptr) - { - // Could we allocate our memory successfully? - if (max_object_count && (!objects_ || !virtual_indices_ || !physical_indices_)) - { - free(objects_); - free(virtual_indices_); - free(physical_indices_); - - throw reflection_error(FFX_SSSR_STATUS_OUT_OF_MEMORY); - } - - // Invalidate all virtual entries - for (auto i = 0u; i < max_object_count; ++i) - physical_indices_[i] = kInvalidIndex; - } - - /** - The destructor for the SparseArray class. - */ - template - SparseArray::~SparseArray() - { - // Were there any non-freed components? -#if 0 - auto const leaked_object_count = object_count_; - - if (leaked_object_count) - { - FFX_SSSR_PRINTLN("%u component%s %s not destroyed properly; detected memory leak", leaked_object_count, leaked_object_count > 1 ? "s" : "", leaked_object_count > 1 ? "were" : "was"); - } -#endif - - // Release all that was not properly destroyed - Clear(); - - // Release our memory - free(objects_); - free(virtual_indices_); - free(physical_indices_); - } - - /** - Gets the object at the given index. - - \param index The index to be queried. - \return The reference to the requested object. - */ - template - TYPE& SparseArray::operator [](std::uint32_t index) - { - auto const object = At(index); - FFX_SSSR_ASSERT(object != nullptr); - return *object; - } - - /** - Gets the object at the given index. - - \param index The index to be queried. - \return The reference to the requested object. - */ - template - TYPE const& SparseArray::operator [](std::uint32_t index) const - { - auto const object = At(index); - FFX_SSSR_ASSERT(object != nullptr); - return *object; - } - - /** - Gets the object at the given index. - - \param index The index to be queried. - \return A pointer to the requested object, or nullptr if not found. - */ - template - TYPE* SparseArray::At(std::uint32_t index) - { - FFX_SSSR_ASSERT(index < max_object_count_); - auto const physical_index = physical_indices_[index]; - if (physical_index == kInvalidIndex) - return nullptr; // not found - return &objects_[physical_index]; - } - - /** - Gets the object at the given index. - - \param index The index to be queried. - \return A pointer to the requested object, or nullptr if not found. - */ - template - TYPE const* SparseArray::At(std::uint32_t index) const - { - FFX_SSSR_ASSERT(index < max_object_count_); - auto const physical_index = physical_indices_[index]; - if (physical_index == kInvalidIndex) - return nullptr; // not found - return &objects_[physical_index]; - } - - /** - Checks whether an object exists at the given index. - - \param index The index to be checked. - \return true if an object exists, false otherwise. - */ - template - bool SparseArray::Has(std::uint32_t index) const - { - FFX_SSSR_ASSERT(index < max_object_count_); - return physical_indices_[index] != kInvalidIndex; - } - - /** - Inserts a new object inside the sparse array. - - \param index The virtual index at which to insert. - \return The reference to the inserted object. - */ - template - TYPE& SparseArray::Insert(std::uint32_t index) - { - FFX_SSSR_ASSERT(index < max_object_count_); - auto const physical_index = physical_indices_[index]; - if (physical_index != kInvalidIndex) - { - (void)objects_[physical_index].~TYPE(); - return *new(&objects_[physical_index]) TYPE(); - } - FFX_SSSR_ASSERT(object_count_ < max_object_count_); - virtual_indices_[object_count_] = index; - physical_indices_[index] = object_count_; - return *new(&objects_[object_count_++]) TYPE(); - } - - /** - Inserts a new object inside the sparse array. - - \param index The virtual index at which to insert. - \param object The object to be inserted in the array. - \return The reference to the inserted object. - */ - template - TYPE& SparseArray::Insert(std::uint32_t index, TYPE const& object) - { - FFX_SSSR_ASSERT(index < max_object_count_); - auto const physical_index = physical_indices_[index]; - if (physical_index != kInvalidIndex) - { - (void)objects_[physical_index].~TYPE(); - return *new(&objects_[physical_index]) TYPE(object); - } - FFX_SSSR_ASSERT(object_count_ < max_object_count_); - virtual_indices_[object_count_] = index; - physical_indices_[index] = object_count_; - return *new(&objects_[object_count_++]) TYPE(object); - } - - /** - Erases the object at the given index. - - \param index The virtual index at which to erase. - \return true if an object was erased, false otherwise. - */ - template - bool SparseArray::Erase(std::uint32_t index) - { - FFX_SSSR_ASSERT(index < max_object_count_); - auto const physical_index = physical_indices_[index]; - if (physical_index == kInvalidIndex) - return false; // nothing to erase here - FFX_SSSR_ASSERT(object_count_ > 0u); - if (physical_index != object_count_ - 1u) - { - std::swap(objects_[physical_index], objects_[object_count_ - 1u]); - virtual_indices_[physical_index] = virtual_indices_[object_count_ - 1u]; - physical_indices_[virtual_indices_[physical_index]] = physical_index; - } - physical_indices_[index] = kInvalidIndex; - (void)objects_[--object_count_].~TYPE(); - return true; // object has been popped - } - - /** - Clears the sparse array. - */ - template - void SparseArray::Clear() - { - for (auto i = 0u; i < object_count_; ++i) - { - physical_indices_[virtual_indices_[i]] = kInvalidIndex; - (void)objects_[i].~TYPE(); - } - object_count_ = 0u; - } - - /** - Gets the storage for the inserted objects. - - \return The array of inserted objects. - */ - template - TYPE* SparseArray::GetObjects() - { - return objects_; - } - - /** - Gets the storage for the inserted objects. - - \return The array of inserted objects. - */ - template - TYPE const* SparseArray::GetObjects() const - { - return objects_; - } - - /** - Gets the current size of the sparse array. - - \return The number of inserted objects in the array. - */ - template - std::uint32_t SparseArray::GetObjectCount() const - { - return object_count_; - } - - /** - Gets the maximum capacity of the sparse array. - - \return The maximum number of objects that can be inserted. - */ - template - std::uint32_t SparseArray::GetMaxObjectCount() const - { - return max_object_count_; - } - - /** - Gets the virtual index. - - \param physical_index The physical index to be converted. - \return The requested virtual index. - */ - template - std::uint32_t SparseArray::GetVirtualIndex(std::uint32_t physical_index) const - { - FFX_SSSR_ASSERT(physical_index < object_count_); - - return virtual_indices_[physical_index]; - } - - /** - Gets the physical index. - - \param virtual_index The virtual index to be converted. - \return The requested physical index. - */ - template - std::uint32_t SparseArray::GetPhysicalIndex(std::uint32_t virtual_index) const - { - FFX_SSSR_ASSERT(virtual_index < max_object_count_); - - return physical_indices_[virtual_index]; - } - - /** - Gets an iterator pointing at the start of the array. - - \return The requested iterator. - */ - template - typename SparseArray::iterator SparseArray::begin() - { - iterator it; - it.array_ = this; - return it; - } - - /** - Gets an iterator pointing at the start of the array. - - \return The requested iterator. - */ - template - typename SparseArray::const_iterator SparseArray::begin() const - { - const_iterator it; - it.array_ = this; - return it; - } - - /** - Gets an iterator pointing at the start of the array. - - \return The requested iterator. - */ - template - typename SparseArray::const_iterator SparseArray::cbegin() const - { - const_iterator it; - it.array_ = this; - return it; - } - - /** - Gets an iterator pointing to the end of the array. - - \return The requested iterator. - */ - template - typename SparseArray::iterator SparseArray::end() - { - iterator it; - it.index_ = object_count_; - it.array_ = this; - return it; - } - - /** - Gets an iterator pointing to the end of the array. - - \return The requested iterator. - */ - template - typename SparseArray::const_iterator SparseArray::end() const - { - const_iterator it; - it.index_ = object_count_; - it.array_ = this; - return it; - } - - /** - Gets an iterator pointing to the end of the array. - - \return The requested iterator. - */ - template - typename SparseArray::const_iterator SparseArray::cend() const - { - const_iterator it; - it.index_ = object_count_; - it.array_ = this; - return it; - } - - /** - The constructor for the Block class. - */ - template - RingBuffer::Block::Block() - : start_(0u) - , size_(0u) - { - } - - /** - Checks whether the memory block can now be re-used. - - \return true if the memory block can be re-used, false otherwise. - */ - template - bool RingBuffer::Block::CanBeReused() const - { - return const_cast(block_).CanBeReused(); - } - - /** - The constructor for the RingBuffer class. - - \param size The size of the ring buffer. - */ - template - RingBuffer::RingBuffer(std::size_t size) - : size_(size) - , head_(0u) - { - } - - /** - The destructor for the RingBuffer class. - */ - template - RingBuffer::~RingBuffer() - { - } - - /** - Acquires the next available memory block. - - \param start The start of the block (in bytes). - \param size The size of the block (in bytes). - \param alignment The alignment of the block (in bytes). - \return The acquired block, or nullptr if none could be acquired. - */ - template - BLOCK_TYPE* RingBuffer::AcquireBlock(std::size_t& start, std::size_t size, std::size_t alignment) - { - // Calculate the amount of space available - auto next_block = GrabNextAvailableBlock(); - auto const new_head = Align(head_, alignment); // account for alignment requirements - auto space_available = CalculateSpaceToNextAvailableBlock(next_block, alignment); - - // If there isn't enough space left, try to make some room - while (size > space_available) - { - if (!next_block) - { - if (!head_) - return nullptr; // not enough memory in the whole buffer to make space for this request - head_ = 0u; // loop back to the beginning - return AcquireBlock(start, size, alignment); - } - - do - { - // Can we free this block? - if (!next_block->CanBeReused()) - return nullptr; // unable to make room for this request - - // Get rid of the freed block and advance - blocks_.pop_front(); - next_block = GrabNextAvailableBlock(); - space_available = CalculateSpaceToNextAvailableBlock(next_block, alignment); - } - while (next_block && size > space_available); - } - FFX_SSSR_ASSERT(size <= space_available); - - // Insert the new block - blocks_.emplace_back(); - auto& new_block = blocks_.back(); - new_block.start_ = new_head; - new_block.size_ = size; - - // Advance head to new position - start = new_head; - head_ = new_head + size; - - return &new_block.block_; - } - - /** - Grabs the next available block. - - \return The next available block. - */ - template - typename RingBuffer::Block const* RingBuffer::GrabNextAvailableBlock() const - { - Block const* next_block = nullptr; - if (!blocks_.empty()) - next_block = &blocks_[0]; - if (next_block && next_block->start_ + next_block->size_ <= head_) - next_block = nullptr; // we haven't reached back to that block yet - return next_block; - } - - /** - Calculates the amount of space left before reaching the tail. - - \param next_block The next available memory block. - \param alignment The alignment of the block (in bytes). - \return The amount of space available (in bytes). - */ - template - std::size_t RingBuffer::CalculateSpaceToNextAvailableBlock(Block const* next_block, std::size_t alignment) const - { - auto const new_head = Align(head_, alignment); - FFX_SSSR_ASSERT(!next_block || next_block->start_ + next_block->size_ > head_); - return std::max(next_block ? next_block->start_ : size_, new_head) - new_head; - } - - /** - Gets the index for the given object identifier. - - \param object_id The object identifier to be evaluated. - \return The index for the given object identifier. - */ - static inline std::uint32_t ID(std::uint64_t object_id) - { - return static_cast(object_id & 0xFFFFFFFFull); - } -} diff --git a/ffx-sssr/src/reflection_error.cpp b/ffx-sssr/src/reflection_error.cpp deleted file mode 100644 index f59cc6a..0000000 --- a/ffx-sssr/src/reflection_error.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "reflection_error.h" - -#include "context.h" - -namespace ffx_sssr -{ - /** - The constructor for the reflection_error class. - */ - reflection_error::reflection_error() - : error_(FFX_SSSR_STATUS_INTERNAL_ERROR) - { - } - - /** - The constructor for the reflection_error class. - - \param error The error code for this exception. - */ - reflection_error::reflection_error(FfxSssrStatus error) - : error_(error) - { - } - - /** - The constructor for the reflection_error class. - - \param context The context to be used. - \param error The error code for this exception. - */ - reflection_error::reflection_error(const Context& context, FfxSssrStatus error) - : error_(error) - { - (void)&context; - } - - /** - The constructor for the reflection_error class. - - \param context The context to be used. - \param error The error code for this exception. - \param format The format for the error message. - \param ... The content of the error message. - */ - reflection_error::reflection_error(const Context& context, FfxSssrStatus error, char const* format, ...) - : error_(error) - { - va_list args; - va_start(args, format); - context.Error(error, format, args); - va_end(args); - } -} diff --git a/ffx-sssr/src/reflections.cpp b/ffx-sssr/src/reflections.cpp deleted file mode 100644 index b079a44..0000000 --- a/ffx-sssr/src/reflections.cpp +++ /dev/null @@ -1,417 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "ffx_sssr.h" - -#include "context.h" - -/** - A define for starting a try block. -*/ -#define FFX_SSSR_TRY \ - try - -/** - A define for ending a try block. - - \param ERROR The error callback. -*/ -#define FFX_SSSR_CATCH(ERROR) \ - catch (ffx_sssr::reflection_error const& error) \ - { \ - ERROR(); \ - return error.error_; \ - } \ - catch (std::bad_alloc const&) \ - { \ - ERROR(); \ - return FFX_SSSR_STATUS_OUT_OF_MEMORY; \ - } \ - catch (...) \ - { \ - ERROR(); \ - return FFX_SSSR_STATUS_INTERNAL_ERROR; \ - } - -namespace -{ - /** - The APICall class is an RAII helper to mark the entry/exit points of the FFX_SSSR library API calls. - */ - class APICall - { - FFX_SSSR_NON_COPYABLE(APICall); - - public: - /** - The constructor for the APICall class. - - \param context The context to be used. - \param api_call The API call that was requested. - */ - inline APICall(ffx_sssr::Context& context, char const* api_call) - : context_(context) - { - context_.SetAPICall(api_call); - } - - /** - The destructor for the APICall class. - */ - inline ~APICall() - { - context_.SetAPICall(nullptr); - } - - protected: - // The context being in use. - ffx_sssr::Context& context_; - }; - - /** - A define for marking the entry/exit points of the FFX_SSSR library API calls. - - \param CTX The context being used. - \param API_CALL The API call that was requested. - */ - #define FFX_SSSR_API_CALL(CTX, API_CALL) \ - APICall const _api_call_##API_CALL(*CTX, #API_CALL) -} - -FfxSssrStatus ffxSssrCreateContext(const FfxSssrCreateContextInfo* pCreateContextInfo, FfxSssrContext* outContext) -{ - if (!pCreateContextInfo || !outContext) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - if (pCreateContextInfo->apiVersion != FFX_SSSR_API_VERSION) - { - return FFX_SSSR_STATUS_INCOMPATIBLE_API; - } - - ffx_sssr::Context* context; - - FFX_SSSR_TRY - { - context = new ffx_sssr::Context(*pCreateContextInfo); - - if (!context) - { - return FFX_SSSR_STATUS_OUT_OF_MEMORY; - } - - *outContext = reinterpret_cast(context); - } - FFX_SSSR_CATCH([](){}) - - context->SetAPICall(nullptr); - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrDestroyContext(FfxSssrContext context) -{ - if (!context) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const ctx = reinterpret_cast(context); - - if (!ctx) - { - return FFX_SSSR_STATUS_OK; // nothing to destroy - } - - ctx->SetAPICall("ffxSssrDestroyContext"); - - delete ctx; - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrCreateReflectionView(FfxSssrContext context, const FfxSssrCreateReflectionViewInfo* pCreateReflectionViewInfo, FfxSssrReflectionView* outReflectionView) -{ - std::uint64_t reflection_view_id = 0ull; - - auto const ctx = reinterpret_cast(context); - - if (!ctx || !pCreateReflectionViewInfo || !outReflectionView) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - FFX_SSSR_API_CALL(ctx, ffxSssrCreateReflectionView); - - FFX_SSSR_TRY - { - ctx->CreateObject(reflection_view_id); - ctx->CreateReflectionView(reflection_view_id, *pCreateReflectionViewInfo); - - *outReflectionView = reinterpret_cast(reflection_view_id); - } - FFX_SSSR_CATCH([&]() - { - if (reflection_view_id) - { - ctx->DestroyObject(reflection_view_id); - } - }) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrDestroyReflectionView(FfxSssrContext context, FfxSssrReflectionView reflectionView) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - if (!reflectionView) - { - return FFX_SSSR_STATUS_OK; // nothing to delete - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrDestroyReflectionView); - - FFX_SSSR_TRY - { - ctx->DestroyObject(reflection_view_id); - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrEncodeResolveReflectionView(FfxSssrContext context, FfxSssrReflectionView reflectionView, const FfxSssrResolveReflectionViewInfo* pResolveReflectionViewInfo) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx || !pResolveReflectionViewInfo) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrEncodeResolveReflectionView); - - FFX_SSSR_TRY - { - ctx->ResolveReflectionView(reflection_view_id, *pResolveReflectionViewInfo); - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrAdvanceToNextFrame(FfxSssrContext context) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - FFX_SSSR_API_CALL(ctx, ffxSssrAdvanceToNextFrame); - - FFX_SSSR_TRY - { - ctx->AdvanceToNextFrame(); - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} - - -FfxSssrStatus ffxSssrReflectionViewGetTileClassificationElapsedTime(FfxSssrContext context, FfxSssrReflectionView reflectionView, uint64_t* outTileClassificationElapsedTime) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx || !outTileClassificationElapsedTime) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrReflectionViewGetTileClassificationElapsedTime); - - FFX_SSSR_TRY - { - ctx->GetReflectionViewTileClassificationElapsedTime(reflection_view_id, *outTileClassificationElapsedTime); - } - FFX_SSSR_CATCH([]() {}) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrReflectionViewGetIntersectionElapsedTime(FfxSssrContext context, FfxSssrReflectionView reflectionView, uint64_t* outIntersectionElapsedTime) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx || !outIntersectionElapsedTime) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrReflectionViewGetIntersectionElapsedTime); - - FFX_SSSR_TRY - { - ctx->GetReflectionViewIntersectionElapsedTime(reflection_view_id, *outIntersectionElapsedTime); - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrReflectionViewGetDenoisingElapsedTime(FfxSssrContext context, FfxSssrReflectionView reflectionView, uint64_t* outDenoisingElapsedTime) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx || !outDenoisingElapsedTime) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrReflectionViewGetDenoisingElapsedTime); - - FFX_SSSR_TRY - { - ctx->GetReflectionViewDenoisingElapsedTime(reflection_view_id, *outDenoisingElapsedTime); - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrReflectionViewGetCameraParameters(FfxSssrContext context, FfxSssrReflectionView reflectionView, float* outViewMatrix, float* outProjectionMatrix) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx || !outViewMatrix || !outProjectionMatrix) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrReflectionViewGetCameraParameters); - - FFX_SSSR_TRY - { - ffx_sssr::matrix4 reflection_view_view_matrix, reflection_view_projection_matrix; - ctx->GetReflectionViewViewMatrix(reflection_view_id, reflection_view_view_matrix); - ctx->GetReflectionViewProjectionMatrix(reflection_view_id, reflection_view_projection_matrix); - - for (auto row = 0u; row < 4u; ++row) - { - for (auto col = 0u; col < 4u; ++col) - { - outViewMatrix[4u * row + col] = reflection_view_view_matrix.m[row][col]; - outProjectionMatrix[4u * row + col] = reflection_view_projection_matrix.m[row][col]; - } - } - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} - -FfxSssrStatus ffxSssrReflectionViewSetCameraParameters(FfxSssrContext context, FfxSssrReflectionView reflectionView, const float* pViewMatrix, const float* pProjectionMatrix) -{ - auto const ctx = reinterpret_cast(context); - - if (!ctx || !pViewMatrix || !pProjectionMatrix) - { - return FFX_SSSR_STATUS_INVALID_VALUE; - } - - auto const reflection_view_id = reinterpret_cast(reflectionView); - - if (!ctx->IsOfType(reflection_view_id) || !ctx->IsObjectValid(reflection_view_id)) - { - return FFX_SSSR_STATUS_INVALID_VALUE; // not a valid reflection view - } - - FFX_SSSR_API_CALL(ctx, ffxSssrReflectionViewSetCameraParameters); - - FFX_SSSR_TRY - { - ffx_sssr::matrix4 reflection_view_view_matrix, reflection_view_projection_matrix; - for (auto row = 0u; row < 4u; ++row) - { - for (auto col = 0u; col < 4u; ++col) - { - reflection_view_view_matrix.m[row][col] = pViewMatrix[4u * row + col]; - reflection_view_projection_matrix.m[row][col] = pProjectionMatrix[4u * row + col]; - } - } - ctx->SetReflectionViewViewMatrix(reflection_view_id, reflection_view_view_matrix); - ctx->SetReflectionViewProjectionMatrix(reflection_view_id, reflection_view_projection_matrix); - } - FFX_SSSR_CATCH([](){}) - - return FFX_SSSR_STATUS_OK; -} \ No newline at end of file diff --git a/ffx-sssr/src/utils.h b/ffx-sssr/src/utils.h deleted file mode 100644 index be3041c..0000000 --- a/ffx-sssr/src/utils.h +++ /dev/null @@ -1,97 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include "macros.h" -#include - -namespace ffx_sssr -{ - /** - Checks whether the value is a power of two. - - \param value The value to be checked. - \return true if the value is a power of two, false otherwise. - */ - template - static inline bool IsPowerOfTwo(TYPE value) - { - return !(value & (value - 1)); - } - - /** - Aligns the input value. - - \param value The value to be aligned. - \param alignment The required alignment. - \return The aligned value. - */ - template - static inline TYPE Align(TYPE value, TYPE alignment) - { - FFX_SSSR_ASSERT(IsPowerOfTwo(alignment)); - return (value + alignment - 1) & (~(alignment - 1)); - } - - /** - Performs a rounded division. - - \param value The value to be divided. - \param divisor The divisor to be used. - \return The rounded divided value. - */ - template - static inline TYPE RoundedDivide(TYPE value, TYPE divisor) - { - return (value + divisor - 1) / divisor; - } - - - /** - Converts the input string. - - \param input The string to be converted. - \return The converted string. - */ - static inline std::wstring StringToWString(std::string const& input) - { - std::wstring output; - - auto const length = MultiByteToWideChar(CP_ACP, - 0u, - input.c_str(), - static_cast(input.length() + 1u), - nullptr, - 0); - - output.resize(static_cast(length)); - - MultiByteToWideChar(CP_ACP, - 0u, - input.c_str(), - static_cast(input.length() + 1u), - &output[0], - length); - - return output; - } -} diff --git a/ffx-sssr/src/vk/buffer_vk.cpp b/ffx-sssr/src/vk/buffer_vk.cpp deleted file mode 100644 index f68e45d..0000000 --- a/ffx-sssr/src/vk/buffer_vk.cpp +++ /dev/null @@ -1,245 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "buffer_vk.h" -#include "memory.h" - -namespace ffx_sssr -{ - /** - The constructor for the BufferVK class. - */ - BufferVK::BufferVK() - : buffer_(VK_NULL_HANDLE) - , device_(VK_NULL_HANDLE) - , memory_(VK_NULL_HANDLE) - , buffer_view_(VK_NULL_HANDLE) - , mappable_(false) - , mapped_(false) - { - } - - /** - The destructor for the BufferVK class. - */ - BufferVK::~BufferVK() - { - if (mapped_) - { - Unmap(); - } - - if (buffer_) - { - vkDestroyBuffer(device_, buffer_, nullptr); - buffer_ = VK_NULL_HANDLE; - } - - if (memory_) - { - vkFreeMemory(device_, memory_, nullptr); - memory_ = VK_NULL_HANDLE; - } - - if (buffer_view_) - { - vkDestroyBufferView(device_, buffer_view_, nullptr); - buffer_view_ = VK_NULL_HANDLE; - } - - device_ = VK_NULL_HANDLE; - } - - /** - The constructor for the BufferVK class. - - \param device The VkDevice that creates the buffer view. - \param physical_device The VkPhysicalDevice to determine the right memory heap. - \param create_info The CreateInfo struct. - */ - BufferVK::BufferVK(VkDevice device, VkPhysicalDevice physical_device, const CreateInfo& create_info) - : device_(device) - , buffer_(VK_NULL_HANDLE) - , memory_(VK_NULL_HANDLE) - , buffer_view_(VK_NULL_HANDLE) - , mappable_(false) - , mapped_(false) - { - VkBufferCreateInfo buffer_create_info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - buffer_create_info.pNext = nullptr; - buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_create_info.size = create_info.size_in_bytes_; - buffer_create_info.usage = create_info.buffer_usage_; - if (VK_SUCCESS != vkCreateBuffer(device_, &buffer_create_info, nullptr, &buffer_)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - VkMemoryRequirements memory_requirements = {}; - vkGetBufferMemoryRequirements(device_, buffer_, &memory_requirements); - - VkPhysicalDeviceMemoryProperties memory_properties = {}; - vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); - - // find the right memory type for this image - int memory_type_index = -1; - for (uint32_t i = 0; i < memory_properties.memoryTypeCount; ++i) - { - const VkMemoryType& memory_type = memory_properties.memoryTypes[i]; - bool has_required_properties = memory_type.propertyFlags & create_info.memory_property_flags; - bool is_required_memory_type = memory_requirements.memoryTypeBits & (1 << i); - if (has_required_properties && is_required_memory_type) - { - memory_type_index = i; - break; - } - } - - // abort if we couldn't find the right memory type - if (memory_type_index == -1) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - if (create_info.memory_property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - { - mappable_ = true; - mapped_ = false; - } - - VkMemoryAllocateInfo memory_allocate_info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - memory_allocate_info.pNext = nullptr; - memory_allocate_info.allocationSize = memory_requirements.size; - memory_allocate_info.memoryTypeIndex = memory_type_index; - if (VK_SUCCESS != vkAllocateMemory(device_, &memory_allocate_info, nullptr, &memory_)) - { - throw reflection_error(FFX_SSSR_STATUS_OUT_OF_MEMORY); - } - - if (VK_SUCCESS != vkBindBufferMemory(device_, buffer_, memory_, 0)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - if (create_info.format_ == VK_FORMAT_UNDEFINED) - { - buffer_view_ = VK_NULL_HANDLE; - return; // Skip buffer view creation. - } - - VkBufferViewCreateInfo buffer_view_create_info = { VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO }; - buffer_view_create_info.pNext = nullptr; - buffer_view_create_info.flags = 0; - buffer_view_create_info.buffer = buffer_; - buffer_view_create_info.format = create_info.format_; - buffer_view_create_info.offset = 0; - buffer_view_create_info.range = VK_WHOLE_SIZE; - if (VK_SUCCESS != vkCreateBufferView(device_, &buffer_view_create_info, nullptr, &buffer_view_)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - - FFX_SSSR_ASSERT(create_info.name_); // require all library objects to be named. - PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(device, "vkSetDebugUtilsObjectNameEXT"); - if (vkSetDebugUtilsObjectName) - { - VkDebugUtilsObjectNameInfoEXT object_name_info = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT }; - object_name_info.pNext = nullptr; - object_name_info.objectType = VK_OBJECT_TYPE_BUFFER; - object_name_info.objectHandle = reinterpret_cast(buffer_); - object_name_info.pObjectName = create_info.name_; - - VkResult result = vkSetDebugUtilsObjectName(device, &object_name_info); - FFX_SSSR_ASSERT(result == VK_SUCCESS); - } - } - - /** - The constructor for the BufferVK class. - - \param other The buffer to be moved. - */ - BufferVK::BufferVK(BufferVK && other) noexcept - : buffer_(other.buffer_) - , memory_(other.memory_) - , device_(other.device_) - , buffer_view_(other.buffer_view_) - , mappable_(other.mappable_) - , mapped_(other.mapped_) - { - other.buffer_ = VK_NULL_HANDLE; - other.memory_ = VK_NULL_HANDLE; - other.device_ = VK_NULL_HANDLE; - other.buffer_view_ = VK_NULL_HANDLE; - other.mappable_ = false; - other.mapped_ = false; - } - - /** - Assigns the buffer. - - \param other The buffer to be moved. - \return The assigned buffer. - */ - BufferVK & BufferVK::operator=(BufferVK && other) noexcept - { - if (this != &other) - { - buffer_ = other.buffer_; - memory_ = other.memory_; - device_ = other.device_; - buffer_view_ = other.buffer_view_; - mappable_ = other.mappable_; - mapped_ = other.mapped_; - - other.buffer_ = VK_NULL_HANDLE; - other.memory_ = VK_NULL_HANDLE; - other.device_ = VK_NULL_HANDLE; - other.buffer_view_ = VK_NULL_HANDLE; - other.mappable_ = false; - other.mapped_ = false; - } - - return *this; - } - - void BufferVK::Map(void** data) - { - FFX_SSSR_ASSERT(mappable_); - FFX_SSSR_ASSERT(!mapped_); - - if (VK_SUCCESS != vkMapMemory(device_, memory_, 0, VK_WHOLE_SIZE, 0, data)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - mapped_ = true; - } - - void BufferVK::Unmap() - { - FFX_SSSR_ASSERT(mappable_); - FFX_SSSR_ASSERT(mapped_); - - vkUnmapMemory(device_, memory_); - mapped_ = false; - } -} diff --git a/ffx-sssr/src/vk/buffer_vk.h b/ffx-sssr/src/vk/buffer_vk.h deleted file mode 100644 index 08bc580..0000000 --- a/ffx-sssr/src/vk/buffer_vk.h +++ /dev/null @@ -1,68 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include "macros.h" -#include "ffx_sssr.h" - -namespace ffx_sssr -{ - /** - The BufferVK class is a helper class to create and destroy buffers on Vulkan. - */ - class BufferVK - { - FFX_SSSR_NON_COPYABLE(BufferVK); - - public: - - class CreateInfo - { - public: - VkDeviceSize size_in_bytes_; - VkMemoryPropertyFlags memory_property_flags; - VkBufferUsageFlags buffer_usage_; - VkFormat format_; - const char* name_; - }; - - BufferVK(); - ~BufferVK(); - - BufferVK(VkDevice device, VkPhysicalDevice physical_device, const CreateInfo& create_info); - - BufferVK(BufferVK&& other) noexcept; - BufferVK& BufferVK::operator =(BufferVK&& other) noexcept; - - void Map(void** data); - void Unmap(); - - VkDevice device_; - VkBuffer buffer_; - VkBufferView buffer_view_; - VkDeviceMemory memory_; // We're creating a low number of allocations for this library, so we just allocate a dedicated memory object per buffer. Normally you'd want to do sub-allocations of a larger allocation. - bool mappable_; - bool mapped_; - }; -} diff --git a/ffx-sssr/src/vk/context_vk.cpp b/ffx-sssr/src/vk/context_vk.cpp deleted file mode 100644 index 577dd94..0000000 --- a/ffx-sssr/src/vk/context_vk.cpp +++ /dev/null @@ -1,726 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "context_vk.h" - -#include -#include - -#if FFX_SSSR_DUMP_SHADERS -#include -#endif // FFX_SSSR_DUMP_SHADERS - -#include "utils.h" -#include "context.h" -#include "reflection_view.h" -#include "ffx_sssr_vk.h" - -#include "shader_common.h" -#include "shader_classify_tiles.h" -#include "shader_intersect.h" -#include "shader_prepare_indirect_args.h" -#include "shader_resolve_eaw.h" -#include "shader_resolve_spatial.h" -#include "shader_resolve_temporal.h" - -namespace -{ - auto constexpr D3D12_VENDOR_ID_AMD = 0x1002u; - auto constexpr D3D12_VENDOR_ID_INTEL = 0x8086u; - auto constexpr D3D12_VENDOR_ID_NVIDIA = 0x10DEu; - - - namespace _1 - { - #include "samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp" - } - - namespace _2 - { - #include "samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_2spp.cpp" - } - - /** - The available blue noise samplers for various sampling modes. - */ - struct - { - std::int32_t const (&sobol_buffer_)[256 * 256]; - std::int32_t const (&ranking_tile_buffer_)[128 * 128 * 8]; - std::int32_t const (&scrambling_tile_buffer_)[128 * 128 * 8]; - } - const g_sampler_states[] = - { - { _1::sobol_256spp_256d, _1::rankingTile, _1::scramblingTile }, - { _2::sobol_256spp_256d, _2::rankingTile, _2::scramblingTile }, - }; -} - -namespace ffx_sssr -{ - /** - The constructor for the ContextVK class. - - \param context The execution context. - \param create_context_info The context creation information. - */ - ContextVK::ContextVK(Context& context, FfxSssrCreateContextInfo const& create_context_info) : - context_(context) - , device_(create_context_info.pVkCreateContextInfo->device) - , physical_device_(create_context_info.pVkCreateContextInfo->physicalDevice) - , upload_buffer_(*this, create_context_info.uploadBufferSize) - , shader_compiler_(context) - , samplers_were_populated_(false) - , is_subgroup_size_control_extension_available_(false) - , tile_classification_pass_() - , indirect_args_pass_() - , intersection_pass_() - , spatial_denoising_pass_() - , temporal_denoising_pass_() - , eaw_denoising_pass_() - , reflection_views_(create_context_info.maxReflectionViewCount) - { - if (!device_) - { - throw reflection_error(context, FFX_SSSR_STATUS_INVALID_VALUE, "No device was supplied."); - } - - // Query if the implementation supports VK_EXT_subgroup_size_control - // This is the case if VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME is present. - // Rely on the application to enable the extension if it's available. - uint32_t extension_count; - if (VK_SUCCESS != vkEnumerateDeviceExtensionProperties(physical_device_, nullptr, &extension_count, NULL)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to enumerate device extension properties."); - } - std::vector device_extension_properties(extension_count); - if (VK_SUCCESS != vkEnumerateDeviceExtensionProperties(physical_device_, nullptr, &extension_count, device_extension_properties.data())) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to query device extension properties."); - } - - is_subgroup_size_control_extension_available_ = std::find_if(device_extension_properties.begin(), device_extension_properties.end(), - [](const VkExtensionProperties& extensionProps) -> bool { return strcmp(extensionProps.extensionName, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME) == 0; }) - != device_extension_properties.end(); - - upload_buffer_.Initialize(); - CompileShaders(create_context_info); - CreatePipelines(); - - // Create our blue noise samplers - BlueNoiseSamplerVK* blue_noise_samplers[] = { &blue_noise_sampler_1spp_, &blue_noise_sampler_2spp_ }; - static_assert(FFX_SSSR_ARRAY_SIZE(blue_noise_samplers) == FFX_SSSR_ARRAY_SIZE(g_sampler_states), "Sampler arrays don't match."); - for (auto i = 0u; i < FFX_SSSR_ARRAY_SIZE(g_sampler_states); ++i) - { - auto const& sampler_state = g_sampler_states[i]; - BlueNoiseSamplerVK* sampler = blue_noise_samplers[i]; - - BufferVK::CreateInfo create_info = {}; - create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - create_info.buffer_usage_ = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; - create_info.format_ = VK_FORMAT_R32_UINT; - - create_info.size_in_bytes_ = sizeof(sampler_state.sobol_buffer_); - create_info.name_ = "SSSR Sobol Buffer"; - sampler->sobol_buffer_ = BufferVK(device_, physical_device_, create_info); - - create_info.size_in_bytes_ = sizeof(sampler_state.ranking_tile_buffer_); - create_info.name_ = "SSSR Ranking Tile Buffer"; - sampler->ranking_tile_buffer_ = BufferVK(device_, physical_device_, create_info); - - create_info.size_in_bytes_ = sizeof(sampler_state.scrambling_tile_buffer_); - create_info.name_ = "SSSR Scrambling Tile Buffer"; - sampler->scrambling_tile_buffer_ = BufferVK(device_, physical_device_, create_info); - } - - VkCommandBuffer command_buffer = create_context_info.pVkCreateContextInfo->uploadCommandBuffer; - if (!samplers_were_populated_) - { - std::int32_t* upload_buffer; - - // Upload the relevant data to the various samplers - for (auto i = 0u; i < FFX_SSSR_ARRAY_SIZE(g_sampler_states); ++i) - { - auto const& sampler_state = g_sampler_states[i]; - BlueNoiseSamplerVK* sampler = blue_noise_samplers[i]; - - if (!upload_buffer_.AllocateBuffer(sizeof(sampler_state.sobol_buffer_), upload_buffer)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %llukiB of upload memory, consider increasing uploadBufferSize", RoundedDivide(sizeof(sampler_state.sobol_buffer_), 1024ull)); - } - memcpy(upload_buffer, sampler_state.sobol_buffer_, sizeof(sampler_state.sobol_buffer_)); - - VkBufferCopy region = {}; - region.srcOffset = static_cast(upload_buffer_.GetOffset(upload_buffer)); - region.dstOffset = 0; - region.size = sizeof(sampler_state.sobol_buffer_); - vkCmdCopyBuffer(command_buffer, upload_buffer_.GetResource(), sampler->sobol_buffer_.buffer_, 1, ®ion); - - if (!upload_buffer_.AllocateBuffer(sizeof(sampler_state.ranking_tile_buffer_), upload_buffer)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %llukiB of upload memory, consider increasing uploadBufferSize", RoundedDivide(sizeof(sampler_state.ranking_tile_buffer_), 1024ull)); - } - memcpy(upload_buffer, sampler_state.ranking_tile_buffer_, sizeof(sampler_state.ranking_tile_buffer_)); - - region.srcOffset = static_cast(upload_buffer_.GetOffset(upload_buffer)); - region.dstOffset = 0; - region.size = sizeof(sampler_state.ranking_tile_buffer_); - vkCmdCopyBuffer(command_buffer, upload_buffer_.GetResource(), sampler->ranking_tile_buffer_.buffer_, 1, ®ion); - - if (!upload_buffer_.AllocateBuffer(sizeof(sampler_state.scrambling_tile_buffer_), upload_buffer)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %llukiB of upload memory, consider increasing uploadBufferSize", RoundedDivide(sizeof(sampler_state.scrambling_tile_buffer_), 1024ull)); - } - memcpy(upload_buffer, sampler_state.scrambling_tile_buffer_, sizeof(sampler_state.scrambling_tile_buffer_)); - - region.srcOffset = static_cast(upload_buffer_.GetOffset(upload_buffer)); - region.dstOffset = 0; - region.size = sizeof(sampler_state.scrambling_tile_buffer_); - vkCmdCopyBuffer(command_buffer, upload_buffer_.GetResource(), sampler->scrambling_tile_buffer_.buffer_, 1, ®ion); - } - - // Flag that the samplers are now ready to use - samplers_were_populated_ = true; - } - } - - /** - The destructor for the ContextVK class. - */ - ContextVK::~ContextVK() - { - if (uniform_buffer_descriptor_set_layout_) - { - vkDestroyDescriptorSetLayout(device_, uniform_buffer_descriptor_set_layout_, nullptr); - } - } - - /** - Gets the number of GPU ticks spent in the tile classification pass. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent in the tile classification pass. - */ - void ContextVK::GetReflectionViewTileClassificationElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - auto const& reflection_view = reflection_views_[ID(reflection_view_id)]; - - if (!((reflection_view.flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Cannot query the tile classification elapsed time of a reflection view that was not created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag"); - } - - elapsed_time = reflection_view.tile_classification_elapsed_time_; - } - - /** - Gets the number of GPU ticks spent intersecting the depth buffer. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent intersecting the depth buffer. - */ - void ContextVK::GetReflectionViewIntersectionElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - auto const& reflection_view = reflection_views_[ID(reflection_view_id)]; - - if (!((reflection_view.flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Cannot query the intersection elapsed time of a reflection view that was not created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag"); - } - - elapsed_time = reflection_view.intersection_elapsed_time_; - } - - /** - Gets the number of GPU ticks spent denoising the Vulkan reflection view. - - \param reflection_view_id The identifier for the reflection view object. - \param elapsed_time The number of GPU ticks spent denoising. - */ - void ContextVK::GetReflectionViewDenoisingElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - auto const& reflection_view = reflection_views_[ID(reflection_view_id)]; - - if (!((reflection_view.flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0)) - { - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Cannot query the denoising elapsed time of a reflection view that was not created with the FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS flag"); - } - - elapsed_time = reflection_view.denoising_elapsed_time_; - } - - /** - Creates the Vulkan reflection view. - - \param reflection_view_id The identifier of the reflection view object. - \param create_reflection_view_info The reflection view creation information. - */ - void ContextVK::CreateReflectionView(std::uint64_t reflection_view_id, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info) - { - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo); - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - - // Check user arguments - if (!create_reflection_view_info.outputWidth || !create_reflection_view_info.outputHeight) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The outputWidth and outputHeight parameters are required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->depthBufferHierarchySRV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The depthBufferHierarchySRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->motionBufferSRV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The motionBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->normalBufferSRV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The normalBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->roughnessBufferSRV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The roughnessBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->normalHistoryBufferSRV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The normalHistoryBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->roughnessHistoryBufferSRV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The roughnessHistoryBufferSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->reflectionViewUAV) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The environmentMapSRV parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->environmentMapSampler) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The environmentMapSampler parameter is required when creating a reflection view"); - if(create_reflection_view_info.pVkCreateReflectionViewInfo->sceneFormat == VK_FORMAT_UNDEFINED) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The sceneFormat parameter is required when creating a reflection view"); - if (!create_reflection_view_info.pVkCreateReflectionViewInfo->uploadCommandBuffer) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_VALUE, "The uploadCommandBuffer parameter is required when creating a reflection view"); - - // Create the reflection view - auto& reflection_view = reflection_views_.Insert(ID(reflection_view_id)); - reflection_view.Create(context_, create_reflection_view_info); - } - - /** - Resolves the Vulkan reflection view. - - \param reflection_view_id The identifier of the reflection view object. - \param resolve_reflection_view_info The reflection view resolve information. - */ - void ContextVK::ResolveReflectionView(std::uint64_t reflection_view_id, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info) - { - FFX_SSSR_ASSERT(reflection_views_.At(ID(reflection_view_id))); // not created properly? - FFX_SSSR_ASSERT(context_.IsOfType(reflection_view_id) && context_.IsObjectValid(reflection_view_id)); - FFX_SSSR_ASSERT(context_.reflection_view_view_matrices_.At(ID(reflection_view_id))); - FFX_SSSR_ASSERT(context_.reflection_view_projection_matrices_.At(ID(reflection_view_id))); - - ReflectionView reflection_view; - reflection_view.view_matrix_ = context_.reflection_view_view_matrices_[ID(reflection_view_id)]; - reflection_view.projection_matrix_ = context_.reflection_view_projection_matrices_[ID(reflection_view_id)]; - - reflection_views_[ID(reflection_view_id)].Resolve(context_, reflection_view, resolve_reflection_view_info); - } - - - void ContextVK::CompileShaders(FfxSssrCreateContextInfo const& create_context_info) - { - struct - { - char const* shader_name_ = nullptr; - char const* content_ = nullptr; - char const* profile_ = nullptr; - } - const shader_source[] = - { - { "prepare_indirect_args", prepare_indirect_args, "cs_6_0"}, - { "classify_tiles", classify_tiles, "cs_6_0"}, - { "intersect", intersect, "cs_6_0"}, - { "resolve_spatial", resolve_spatial, "cs_6_0"}, - { "resolve_temporal", resolve_temporal, "cs_6_0"}, - { "resolve_eaw", resolve_eaw, "cs_6_0"}, - }; - - auto const common_include = std::string(common); - - DxcDefine defines[10]; - defines[0].Name = L"FFX_SSSR_ROUGHNESS_TEXTURE_FORMAT"; - defines[0].Value = create_context_info.pRoughnessTextureFormat; - defines[1].Name = L"FFX_SSSR_ROUGHNESS_UNPACK_FUNCTION"; - defines[1].Value = create_context_info.pUnpackRoughnessSnippet; - defines[2].Name = L"FFX_SSSR_NORMALS_TEXTURE_FORMAT"; - defines[2].Value = create_context_info.pNormalsTextureFormat; - defines[3].Name = L"FFX_SSSR_NORMALS_UNPACK_FUNCTION"; - defines[3].Value = create_context_info.pUnpackNormalsSnippet; - defines[4].Name = L"FFX_SSSR_MOTION_VECTOR_TEXTURE_FORMAT"; - defines[4].Value = create_context_info.pMotionVectorFormat; - defines[5].Name = L"FFX_SSSR_MOTION_VECTOR_UNPACK_FUNCTION"; - defines[5].Value = create_context_info.pUnpackMotionVectorsSnippet; - defines[6].Name = L"FFX_SSSR_DEPTH_TEXTURE_FORMAT"; - defines[6].Value = create_context_info.pDepthTextureFormat; - defines[7].Name = L"FFX_SSSR_DEPTH_UNPACK_FUNCTION"; - defines[7].Value = create_context_info.pUnpackDepthSnippet; - defines[8].Name = L"FFX_SSSR_SCENE_TEXTURE_FORMAT"; - defines[8].Value = create_context_info.pSceneTextureFormat; - defines[9].Name = L"FFX_SSSR_SCENE_RADIANCE_UNPACK_FUNCTION"; - defines[9].Value = create_context_info.pUnpackSceneRadianceSnippet; - - static_assert(FFX_SSSR_ARRAY_SIZE(shader_source) == kShader_Count, "'kShader_Count' filenames must be provided for building the various shaders"); - std::stringstream shader_content; - LPCWSTR dxc_arguments[] = { L"-spirv", L"-fspv-target-env=vulkan1.1" }; - for (auto i = 0u; i < kShader_Count; ++i) - { - // Append common includes - shader_content.str(std::string()); - shader_content.clear(); - shader_content << common << std::endl << shader_source[i].content_; - - shaders_[i] = shader_compiler_.CompileShaderString( - shader_content.str().c_str(), - static_cast(shader_content.str().size()), - shader_source[i].shader_name_, - shader_source[i].profile_, - dxc_arguments, FFX_SSSR_ARRAY_SIZE(dxc_arguments), - defines, FFX_SSSR_ARRAY_SIZE(defines)); - } - } - - /** - Creates the reflection view pipeline state. - - \param context The Vulkan context to be used. - */ - void ContextVK::CreatePipelines() - { - VkDescriptorSetLayoutBinding layout_binding = {}; - layout_binding.binding = 0; - layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - layout_binding.descriptorCount = 1; - layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layout_binding.pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; - descriptor_set_layout_create_info.pNext = nullptr; - descriptor_set_layout_create_info.flags = 0; - descriptor_set_layout_create_info.bindingCount = 1; - descriptor_set_layout_create_info.pBindings = &layout_binding; - if (VK_SUCCESS != vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_create_info, nullptr, &uniform_buffer_descriptor_set_layout_)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create descriptor set layout for uniform buffer"); - } - - auto Setup = [this](ShaderPass& pass, ContextVK::Shader shader, const VkDescriptorSetLayoutBinding* bindings, uint32_t bindings_count, VkPipelineShaderStageCreateFlags flags = 0) { - - pass.device_ = device_; - pass.bindings_count_ = bindings_count; - - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; - descriptor_set_layout_create_info.pNext = nullptr; - descriptor_set_layout_create_info.flags = 0; - descriptor_set_layout_create_info.bindingCount = bindings_count; - descriptor_set_layout_create_info.pBindings = bindings; - if (VK_SUCCESS != vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_create_info, nullptr, &pass.descriptor_set_layout_)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create descriptor set layout"); - } - - VkDescriptorSetLayout layouts[2]; - layouts[0] = uniform_buffer_descriptor_set_layout_; - layouts[1] = pass.descriptor_set_layout_; - - VkPipelineLayoutCreateInfo layout_create_info = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; - layout_create_info.pNext = nullptr; - layout_create_info.flags = 0; - layout_create_info.setLayoutCount = FFX_SSSR_ARRAY_SIZE(layouts); - layout_create_info.pSetLayouts = layouts; - layout_create_info.pushConstantRangeCount = 0; - layout_create_info.pPushConstantRanges = nullptr; - if (VK_SUCCESS != vkCreatePipelineLayout(device_, &layout_create_info, nullptr, &pass.pipeline_layout_)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create pipeline layout"); - } - - const ShaderVK& shader_vk = GetShader(shader); - - VkShaderModuleCreateInfo shader_create_info = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO }; - shader_create_info.pNext = nullptr; - shader_create_info.flags = 0; - shader_create_info.codeSize = shader_vk.BytecodeLength; - shader_create_info.pCode = static_cast(shader_vk.pShaderBytecode); - - VkShaderModule shader_module = VK_NULL_HANDLE; - if (VK_SUCCESS != vkCreateShaderModule(device_, &shader_create_info, nullptr, &shader_module)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create shader module"); - } - - VkPipelineShaderStageCreateInfo stage_create_info = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO }; - stage_create_info.pNext = nullptr; - stage_create_info.flags = flags; - stage_create_info.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_create_info.module = shader_module; - stage_create_info.pName = "main"; - stage_create_info.pSpecializationInfo = nullptr; - - VkComputePipelineCreateInfo create_info = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO }; - create_info.pNext = nullptr; - create_info.basePipelineHandle = VK_NULL_HANDLE; - create_info.basePipelineIndex = 0; - create_info.flags = 0; -#if FFX_SSSR_DUMP_SHADERS - create_info.flags |= VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; -#endif // FFX_SSSR_DUMP_SHADERS - create_info.layout = pass.pipeline_layout_; - create_info.stage = stage_create_info; - if (VK_SUCCESS != vkCreateComputePipelines(device_, VK_NULL_HANDLE, 1, &create_info, nullptr, &pass.pipeline_)) - { - throw reflection_error(GetContext(), FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create compute pipeline state"); - } - - vkDestroyShaderModule(device_, shader_module, nullptr); - }; - - auto Bind = [](uint32_t binding, VkDescriptorType type) - { - VkDescriptorSetLayoutBinding layout_binding = {}; - layout_binding.binding = binding; - layout_binding.descriptorType = type; - layout_binding.descriptorCount = 1; - layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layout_binding.pImmutableSamplers = nullptr; - return layout_binding; - }; - - // Assemble the shader pass for tile classification - { - uint32_t binding = 0; - VkDescriptorSetLayoutBinding layout_bindings[] = { - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_tile_list - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_list - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_tile_counter - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_counter - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporally_denoised_reflections - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporally_denoised_reflections_history - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_ray_lengths - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporal_variance - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_denoised_reflections - }; - Setup(tile_classification_pass_, ContextVK::kShader_TileClassification, layout_bindings, FFX_SSSR_ARRAY_SIZE(layout_bindings)); - } - - // Assemble the shader pass that prepares the indirect arguments - { - uint32_t binding = 0; - VkDescriptorSetLayoutBinding layout_bindings[] = { - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_tile_counter - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_counter - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_intersect_args - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_denoiser_args - }; - Setup(indirect_args_pass_, ContextVK::kShader_IndirectArguments, layout_bindings, FFX_SSSR_ARRAY_SIZE(layout_bindings)); - } - - // Assemble the shader pass for intersecting reflection rays with the depth buffer - { - uint32_t binding = 0; - VkDescriptorSetLayoutBinding layout_bindings[] = { - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_lit_scene - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer_hierarchy - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_environment_map - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_sobol_buffer - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_ranking_tile_buffer - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_scrambling_tile_buffer - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_ray_list - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLER), // g_linear_sampler - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLER), // g_environment_map_sampler - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_intersection_result - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_ray_lengths - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_denoised_reflections - }; - Setup(intersection_pass_, ContextVK::kShader_Intersection, layout_bindings, FFX_SSSR_ARRAY_SIZE(layout_bindings)); - } - - // Assemble the shader pass for spatial resolve - { - uint32_t binding = 0; - VkDescriptorSetLayoutBinding layout_bindings[] = { - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_intersection_result - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_has_ray - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_tile_list - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_spatially_denoised_reflections - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_ray_lengths - }; - Setup(spatial_denoising_pass_, ContextVK::kShader_SpatialResolve, layout_bindings, FFX_SSSR_ARRAY_SIZE(layout_bindings), - is_subgroup_size_control_extension_available_ ? VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT : 0); - } - - // Assemble the shader pass for temporal resolve - { - uint32_t binding = 0; - VkDescriptorSetLayoutBinding layout_bindings[] = { - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal_history - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness_history - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_motion_vectors - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_temporally_denoised_reflections_history - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_ray_lengths - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_tile_list - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporally_denoised_reflections - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_spatially_denoised_reflections - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporal_variance - }; - Setup(temporal_denoising_pass_, ContextVK::kShader_TemporalResolve, layout_bindings, FFX_SSSR_ARRAY_SIZE(layout_bindings)); - } - - // Assemble the shader pass for EAW resolve - { - uint32_t binding = 0; - VkDescriptorSetLayoutBinding layout_bindings[] = { - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness - Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer - Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_tile_list - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporally_denoised_reflections - Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_denoised_reflections - }; - Setup(eaw_denoising_pass_, ContextVK::kShader_EAWResolve, layout_bindings, FFX_SSSR_ARRAY_SIZE(layout_bindings)); - } - -#if FFX_SSSR_DUMP_SHADERS - tile_classification_pass_.DumpInternalRepresentations("classify_tiles.dump.spirv.amdil.isa"); - indirect_args_pass_.DumpInternalRepresentations("prepare_indirect_args.dump.spirv.amdil.isa"); - intersection_pass_.DumpInternalRepresentations("intersect.dump.spirv.amdil.isa"); - spatial_denoising_pass_.DumpInternalRepresentations("resolve_spatial.dump.spirv.amdil.isa"); - temporal_denoising_pass_.DumpInternalRepresentations("resolve_temporal.dump.spirv.amdil.isa"); - eaw_denoising_pass_.DumpInternalRepresentations("resolve_eaw.dump.spirv.amdil.isa"); -#endif // FFX_SSSR_DUMP_SHADERS - } - - const ContextVK::ShaderPass& ContextVK::GetTileClassificationPass() const - { - return tile_classification_pass_; - } - - const ContextVK::ShaderPass& ContextVK::GetIndirectArgsPass() const - { - return indirect_args_pass_; - } - - const ContextVK::ShaderPass& ContextVK::GetIntersectionPass() const - { - return intersection_pass_; - } - - const ContextVK::ShaderPass& ContextVK::GetSpatialDenoisingPass() const - { - return spatial_denoising_pass_; - } - - const ContextVK::ShaderPass& ContextVK::GetTemporalDenoisingPass() const - { - return temporal_denoising_pass_; - } - - const ContextVK::ShaderPass& ContextVK::GetEawDenoisingPass() const - { - return eaw_denoising_pass_; - } - - VkDescriptorSetLayout ContextVK::GetUniformBufferDescriptorSetLayout() const - { - return uniform_buffer_descriptor_set_layout_; - } - - void ffx_sssr::ContextVK::ShaderPass::DumpInternalRepresentations(const char* path) - { -#if FFX_SSSR_DUMP_SHADERS - VkResult res = VK_SUCCESS; - - std::ofstream filestream(path); - - PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR = (PFN_vkGetPipelineExecutablePropertiesKHR)vkGetDeviceProcAddr(device_, "vkGetPipelineExecutablePropertiesKHR"); - PFN_vkGetPipelineExecutableInternalRepresentationsKHR vkGetPipelineExecutableInternalRepresentationsKHR = (PFN_vkGetPipelineExecutableInternalRepresentationsKHR)vkGetDeviceProcAddr(device_, "vkGetPipelineExecutableInternalRepresentationsKHR"); - if (!vkGetPipelineExecutablePropertiesKHR || !vkGetPipelineExecutableInternalRepresentationsKHR) - { - FFX_SSSR_ASSERT(false); // Could not retrieve pipeline executable function pointers - is VK_KHR_pipeline_executable_properties enabled? - return; - } - - VkPipelineInfoKHR pipeline_info = { - VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR, NULL, pipeline_, - }; - - uint32_t executables_count = 0; - res = vkGetPipelineExecutablePropertiesKHR(device_, &pipeline_info, &executables_count, NULL); - FFX_SSSR_ASSERT(res == VK_SUCCESS); - std::vector executables(executables_count); - for (uint32_t i = 0; i < executables_count; ++i) - { - executables[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR; - } - res = vkGetPipelineExecutablePropertiesKHR(device_, &pipeline_info, &executables_count, executables.data()); - FFX_SSSR_ASSERT(res == VK_SUCCESS); - for (uint32_t j = 0; j < executables_count; j++) - { - const VkPipelineExecutablePropertiesKHR& exec = executables[j]; - - VkPipelineExecutableInfoKHR pipeline_exec_info = { VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR }; - pipeline_exec_info.pNext = nullptr; - pipeline_exec_info.pipeline = pipeline_; - pipeline_exec_info.executableIndex = j; - - // Internal representations - uint32_t internal_representation_count = 0; - res = vkGetPipelineExecutableInternalRepresentationsKHR(device_, &pipeline_exec_info, &internal_representation_count, NULL); - FFX_SSSR_ASSERT(res == VK_SUCCESS); - std::vector internal_representations(internal_representation_count); - for (uint32_t i = 0; i < internal_representation_count; i++) - { - internal_representations[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INTERNAL_REPRESENTATION_KHR; - } - res = vkGetPipelineExecutableInternalRepresentationsKHR(device_, &pipeline_exec_info, &internal_representation_count, internal_representations.data()); - FFX_SSSR_ASSERT(res == VK_SUCCESS); - - // For each VkPipelineExecutableInternalRepresentationKHR we now know the data size --> allocate space for pData and call vkGetPipelineExecutableInternalRepresentationsKHR again. - std::vector> data_pointers(internal_representation_count); - for (uint32_t i = 0; i < internal_representation_count; i++) - { - data_pointers[i] = std::make_unique(internal_representations[i].dataSize); - internal_representations[i].pData = data_pointers[i].get(); - } - res = vkGetPipelineExecutableInternalRepresentationsKHR(device_, &pipeline_exec_info, &internal_representation_count, internal_representations.data()); - FFX_SSSR_ASSERT(res == VK_SUCCESS); - - for (uint32_t i = 0; i < internal_representation_count; i++) - { - filestream.write(data_pointers[i].get(), internal_representations[i].dataSize); - } - } - - filestream.close(); -#endif // FFX_SSSR_DUMP_SHADERS - } - -} diff --git a/ffx-sssr/src/vk/context_vk.h b/ffx-sssr/src/vk/context_vk.h deleted file mode 100644 index 804d135..0000000 --- a/ffx-sssr/src/vk/context_vk.h +++ /dev/null @@ -1,170 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include - -#define FFX_SSSR_DUMP_SHADERS 0 - -#include "sampler_vk.h" -#include "reflection_view_vk.h" -#include "upload_buffer_vk.h" -#include "shader_compiler_vk.h" - -namespace ffx_sssr -{ - class Context; - class ReflectionViewVK; - - /** - The ContextVK class encapsulates the data for a single Vulkan stochastic screen space reflections execution context. - */ - class ContextVK - { - FFX_SSSR_NON_COPYABLE(ContextVK); - - public: - /** - The available shaders. - */ - enum Shader - { - kShader_IndirectArguments, - kShader_TileClassification, - kShader_Intersection, - kShader_SpatialResolve, - kShader_TemporalResolve, - kShader_EAWResolve, - - kShader_Count - }; - - ContextVK(Context& context, FfxSssrCreateContextInfo const& create_context_info); - ~ContextVK(); - - inline Context& GetContext(); - inline Context const& GetContext() const; - - inline VkDevice GetDevice() const; - inline VkPhysicalDevice GetPhysicalDevice() const; - inline UploadBufferVK& GetUploadBuffer(); - - inline ShaderVK const& GetShader(Shader shader) const; - inline BlueNoiseSamplerVK const& GetSampler1SPP() const; - inline BlueNoiseSamplerVK const& GetSampler2SPP() const; - - void GetReflectionViewTileClassificationElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - void GetReflectionViewIntersectionElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - void GetReflectionViewDenoisingElapsedTime(std::uint64_t reflection_view_id, std::uint64_t& elapsed_time) const; - - void CreateReflectionView(std::uint64_t reflection_view_id, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - void ResolveReflectionView(std::uint64_t reflection_view_id, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info); - - protected: - friend class Context; - friend class ReflectionViewVK; - - /** - The ShaderPass class holds the data for an individual shader pass. - */ - class ShaderPass - { - FFX_SSSR_NON_COPYABLE(ShaderPass); - - public: - inline ShaderPass(); - inline ~ShaderPass(); - - inline operator bool() const; - - inline ShaderPass(ShaderPass&& other) noexcept; - inline ShaderPass& operator =(ShaderPass&& other) noexcept; - - void DumpInternalRepresentations(const char* path); - - // The device that created the pass. - VkDevice device_; - // The pipeline state object. - VkPipeline pipeline_; - // The pipeline layout. - VkPipelineLayout pipeline_layout_; - // The descriptor set layout. - VkDescriptorSetLayout descriptor_set_layout_; - // The number of resource bindings of this pass; - uint32_t bindings_count_; - - }; - - void CompileShaders(FfxSssrCreateContextInfo const& create_context_info); - void CreatePipelines(); - - const ShaderPass& GetTileClassificationPass() const; - const ShaderPass& GetIndirectArgsPass() const; - const ShaderPass& GetIntersectionPass() const; - const ShaderPass& GetSpatialDenoisingPass() const; - const ShaderPass& GetTemporalDenoisingPass() const; - const ShaderPass& GetEawDenoisingPass() const; - VkDescriptorSetLayout GetUniformBufferDescriptorSetLayout() const; - - // The execution context. - Context& context_; - // The device to be used. - VkDevice device_; - // The physical device to be used. - VkPhysicalDevice physical_device_; - // If the VK_EXT_subgroup_size_control extension is available. - bool is_subgroup_size_control_extension_available_; - // The compiled reflections shaders. - std::array shaders_; - // The compiler to be used for building the Vulkan shaders. - ShaderCompilerVK shader_compiler_; - // The Blue Noise sampler optimized for 1 sample per pixel. - BlueNoiseSamplerVK blue_noise_sampler_1spp_; - // The Blue Noise sampler optimized for 2 samples per pixel. - BlueNoiseSamplerVK blue_noise_sampler_2spp_; - // The flag for whether the samplers were populated. - bool samplers_were_populated_; - // The buffer to be used for uploading memory from the CPU to the GPU. - UploadBufferVK upload_buffer_; - // The array of reflection views to be resolved. - SparseArray reflection_views_; - - // Same descriptor set layout for all passes. - VkDescriptorSetLayout uniform_buffer_descriptor_set_layout_; - // The shader pass that classifies tiles. - ShaderPass tile_classification_pass_; - // The shader pass that prepares the indirect arguments. - ShaderPass indirect_args_pass_; - // The shader pass intersecting reflection rays with the depth buffer. - ShaderPass intersection_pass_; - // The shader pass that does spatial denoising. - ShaderPass spatial_denoising_pass_; - // The shader pass that does temporal denoising. - ShaderPass temporal_denoising_pass_; - // The shader pass that does the second spatial denoising. - ShaderPass eaw_denoising_pass_; - }; -} - -#include "context_vk.inl" diff --git a/ffx-sssr/src/vk/context_vk.inl b/ffx-sssr/src/vk/context_vk.inl deleted file mode 100644 index fb2a83a..0000000 --- a/ffx-sssr/src/vk/context_vk.inl +++ /dev/null @@ -1,206 +0,0 @@ -#include "context_vk.h" -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - Gets the context. - - \return The context. - */ - Context& ContextVK::GetContext() - { - return context_; - } - - /** - Gets the Vulkan device. - - \return The Vulkan device. - */ - VkDevice ContextVK::GetDevice() const - { - return device_; - } - - - /** - Gets the Vulkan physical device. - - \return The Vulkan physical device. - */ - inline VkPhysicalDevice ContextVK::GetPhysicalDevice() const - { - return physical_device_; - } - - /** - Gets the context. - - \return The context. - */ - Context const& ContextVK::GetContext() const - { - return context_; - } - - /** - Gets hold of the upload buffer. - - \return The upload buffer. - */ - UploadBufferVK& ContextVK::GetUploadBuffer() - { - return upload_buffer_; - } - - /** - Gets the shader. - - \param shader The shader to be retrieved. - \param switches The set of switches to be used. - \return The requested shader. - */ - ShaderVK const& ContextVK::GetShader(Shader shader) const - { - FFX_SSSR_ASSERT(shader < kShader_Count); - return shaders_[shader]; - } - - /** - Gets a blue noise sampler with 1 sample per pixel. - - \return The requested sampler. - */ - inline BlueNoiseSamplerVK const & ContextVK::GetSampler1SPP() const - { - return blue_noise_sampler_1spp_; - } - - /** - Gets a blue noise sampler with 2 samples per pixel. - - \return The requested sampler. - */ - inline BlueNoiseSamplerVK const & ContextVK::GetSampler2SPP() const - { - return blue_noise_sampler_2spp_; - } - - /** - The constructor for the ShaderPass class. - */ - ContextVK::ShaderPass::ShaderPass() - : device_(VK_NULL_HANDLE) - , pipeline_(VK_NULL_HANDLE) - , pipeline_layout_(VK_NULL_HANDLE) - , descriptor_set_layout_(VK_NULL_HANDLE) - , bindings_count_(0) - { - } - - /** - The constructor for the ShaderPass class. - - \param other The shader pass to be moved. - */ - ContextVK::ShaderPass::ShaderPass(ShaderPass&& other) noexcept - : device_(other.device_) - , pipeline_(other.pipeline_) - , pipeline_layout_(other.pipeline_layout_) - , descriptor_set_layout_(other.descriptor_set_layout_) - , bindings_count_(other.bindings_count_) - { - other.device_ = VK_NULL_HANDLE; - other.pipeline_ = VK_NULL_HANDLE; - other.pipeline_layout_ = VK_NULL_HANDLE; - other.descriptor_set_layout_ = VK_NULL_HANDLE; - other.bindings_count_ = 0; - } - - /** - The destructor for the ShaderPass class. - */ - ContextVK::ShaderPass::~ShaderPass() - { - FFX_SSSR_ASSERT(device_); - - if (pipeline_) - { - vkDestroyPipeline(device_, pipeline_, nullptr); - } - - if (pipeline_layout_) - { - vkDestroyPipelineLayout(device_, pipeline_layout_, nullptr); - } - - if (descriptor_set_layout_) - { - vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr); - } - - device_ = VK_NULL_HANDLE; - pipeline_ = VK_NULL_HANDLE; - pipeline_layout_ = VK_NULL_HANDLE; - descriptor_set_layout_ = VK_NULL_HANDLE; - bindings_count_ = 0; - } - - /** - Assigns the shader pass. - - \param other The shader pass to be moved. - \return The assigned shader pass. - */ - ContextVK::ShaderPass& ContextVK::ShaderPass::operator =(ShaderPass&& other) noexcept - { - if (this != &other) - { - device_ = other.device_; - pipeline_ = other.pipeline_; - pipeline_layout_ = other.pipeline_layout_; - descriptor_set_layout_ = other.descriptor_set_layout_; - bindings_count_ = other.bindings_count_; - - other.device_ = VK_NULL_HANDLE; - other.pipeline_ = VK_NULL_HANDLE; - other.pipeline_layout_ = VK_NULL_HANDLE; - other.descriptor_set_layout_ = VK_NULL_HANDLE; - other.bindings_count_ = 0; - } - - return *this; - } - - /** - Checks whether the shader pass is valid. - - \return true if the shader pass is valid, false otherwise. - */ - ContextVK::ShaderPass::operator bool() const - { - return (device_ && pipeline_ && pipeline_layout_ && descriptor_set_layout_); - } -} diff --git a/ffx-sssr/src/vk/image_vk.cpp b/ffx-sssr/src/vk/image_vk.cpp deleted file mode 100644 index 3401495..0000000 --- a/ffx-sssr/src/vk/image_vk.cpp +++ /dev/null @@ -1,208 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "image_vk.h" -#include "memory.h" - -namespace ffx_sssr -{ - /** - The constructor for the ImageVK class. - */ - ImageVK::ImageVK() - : image_(VK_NULL_HANDLE) - , device_(VK_NULL_HANDLE) - , memory_(VK_NULL_HANDLE) - , image_view_(VK_NULL_HANDLE) - { - } - - /** - The destructor for the ImageVK class. - */ - ImageVK::~ImageVK() - { - if (image_) - { - vkDestroyImage(device_, image_, nullptr); - image_ = VK_NULL_HANDLE; - } - - if (memory_) - { - vkFreeMemory(device_, memory_, nullptr); - memory_ = VK_NULL_HANDLE; - } - - if (image_view_) - { - vkDestroyImageView(device_, image_view_, nullptr); - image_view_ = VK_NULL_HANDLE; - } - - device_ = VK_NULL_HANDLE; - } - - /** - The constructor for the ImageVK class. - - \param device The VkDevice that creates the image view. - \param physical_device The VkPhysicalDevice to determine the right memory heap. - \param create_info The create info. - */ - ImageVK::ImageVK(VkDevice device, VkPhysicalDevice physical_device, const CreateInfo & create_info) - : device_(device) - { - VkImageCreateInfo image_create_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; - image_create_info.pNext = nullptr; - image_create_info.flags = 0; - image_create_info.imageType = VK_IMAGE_TYPE_2D; - image_create_info.format = create_info.format_; - image_create_info.extent = { create_info.width_, create_info.height_, 1 }; - image_create_info.mipLevels = create_info.mip_levels_; - image_create_info.arrayLayers = 1; - image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_create_info.usage = create_info.image_usage_; - image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_create_info.queueFamilyIndexCount = 0; - image_create_info.pQueueFamilyIndices = nullptr; - image_create_info.initialLayout = create_info.initial_layout_; - if (VK_SUCCESS != vkCreateImage(device, &image_create_info, nullptr, &image_)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - VkMemoryRequirements memory_requirements = {}; - vkGetImageMemoryRequirements(device, image_, &memory_requirements); - - VkPhysicalDeviceMemoryProperties memory_properties = {}; - vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); - - // find the right memory type for this image - int memory_type_index = -1; - for (uint32_t i = 0; i < memory_properties.memoryTypeCount; ++i) - { - const VkMemoryType& memory_type = memory_properties.memoryTypes[i]; - bool has_required_properties = memory_type.propertyFlags & create_info.memory_property_flags; - bool is_required_memory_type = memory_requirements.memoryTypeBits & (1 << i); - if (has_required_properties && is_required_memory_type) - { - memory_type_index = i; - break; - } - } - - // abort if we couldn't find the right memory type - if (memory_type_index == -1) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - VkMemoryAllocateInfo memory_allocate_info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - memory_allocate_info.pNext = nullptr; - memory_allocate_info.allocationSize = memory_requirements.size; - memory_allocate_info.memoryTypeIndex = memory_type_index; - if (VK_SUCCESS != vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory_)) - { - throw reflection_error(FFX_SSSR_STATUS_OUT_OF_MEMORY); - } - - if (VK_SUCCESS != vkBindImageMemory(device_, image_, memory_, 0)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - VkImageSubresourceRange subresource_range = {}; - subresource_range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subresource_range.baseMipLevel = 0; - subresource_range.levelCount = create_info.mip_levels_; - subresource_range.baseArrayLayer = 0; - subresource_range.layerCount = 1; - - VkImageViewCreateInfo image_view_create_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; - image_view_create_info.pNext = VK_NULL_HANDLE; - image_view_create_info.flags = 0; - image_view_create_info.image = image_; - image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_create_info.format = create_info.format_; - image_view_create_info.subresourceRange = subresource_range; - if (VK_SUCCESS != vkCreateImageView(device_, &image_view_create_info, nullptr, &image_view_)) - { - throw reflection_error(FFX_SSSR_STATUS_INTERNAL_ERROR); - } - - FFX_SSSR_ASSERT(create_info.name_); // require all library objects to be named. - PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(device, "vkSetDebugUtilsObjectNameEXT"); - if (vkSetDebugUtilsObjectName) - { - VkDebugUtilsObjectNameInfoEXT object_name_info = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT }; - object_name_info.pNext = nullptr; - object_name_info.objectType = VK_OBJECT_TYPE_IMAGE; - object_name_info.objectHandle = reinterpret_cast(image_); - object_name_info.pObjectName = create_info.name_; - - VkResult result = vkSetDebugUtilsObjectName(device, &object_name_info); - FFX_SSSR_ASSERT(result == VK_SUCCESS); - } - } - - /** - The constructor for the ImageVK class. - - \param other The image to be moved. - */ - ImageVK::ImageVK(ImageVK && other) noexcept - : image_(other.image_) - , device_(other.device_) - , image_view_(other.image_view_) - , memory_(other.memory_) - { - other.image_ = VK_NULL_HANDLE; - other.device_ = VK_NULL_HANDLE; - other.image_view_ = VK_NULL_HANDLE; - other.memory_ = VK_NULL_HANDLE; - } - - /** - Assigns the image. - - \param other The image to be moved. - \return The assigned image. - */ - ImageVK & ImageVK::operator=(ImageVK && other) noexcept - { - if (this != &other) - { - image_ = other.image_; - device_ = other.device_; - image_view_ = other.image_view_; - memory_ = other.memory_; - - other.image_ = VK_NULL_HANDLE; - other.device_ = VK_NULL_HANDLE; - other.image_view_ = VK_NULL_HANDLE; - other.memory_ = VK_NULL_HANDLE; - } - - return *this; - } -} diff --git a/ffx-sssr/src/vk/image_vk.h b/ffx-sssr/src/vk/image_vk.h deleted file mode 100644 index 0cf0d34..0000000 --- a/ffx-sssr/src/vk/image_vk.h +++ /dev/null @@ -1,66 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include "macros.h" -#include "ffx_sssr.h" - -namespace ffx_sssr -{ - /** - The ImageVK class is a helper class to create and destroy image resources on Vulkan. - */ - class ImageVK - { - FFX_SSSR_NON_COPYABLE(ImageVK); - - public: - - class CreateInfo - { - public: - uint32_t width_; - uint32_t height_; - VkFormat format_; - uint32_t mip_levels_; - VkImageLayout initial_layout_; - VkMemoryPropertyFlags memory_property_flags; - VkImageUsageFlags image_usage_; - const char* name_; - }; - - ImageVK(); - ~ImageVK(); - - ImageVK(VkDevice device, VkPhysicalDevice physical_device, const CreateInfo& create_info); - - ImageVK(ImageVK&& other) noexcept; - ImageVK& ImageVK::operator =(ImageVK&& other) noexcept; - - VkDevice device_; - VkImage image_; - VkImageView image_view_; - VkDeviceMemory memory_; // We're creating a low number of allocations for this library, so we just allocate a dedicated memory object per buffer. Normally you'd want to do sub-allocations of a larger allocation. - }; -} diff --git a/ffx-sssr/src/vk/reflection_view_vk.cpp b/ffx-sssr/src/vk/reflection_view_vk.cpp deleted file mode 100644 index c4d06f7..0000000 --- a/ffx-sssr/src/vk/reflection_view_vk.cpp +++ /dev/null @@ -1,1094 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "reflection_view_vk.h" - -#include -#include - -#include "context.h" -#include "reflection_error.h" -#include "reflection_view.h" -#include "context_vk.h" -#include "ffx_sssr_vk.h" - -namespace ffx_sssr -{ - /** - The constructor for the ReflectionViewVK class. - */ - ReflectionViewVK::ReflectionViewVK() - : width_(0) - , height_(0) - , flags_(0) - , descriptor_pool_(0) - , tile_list_() - , tile_counter_() - , ray_list_() - , ray_counter_() - , intersection_pass_indirect_args_() - , denoiser_pass_indirect_args_() - , temporal_denoiser_result_() - , ray_lengths_() - , temporal_variance_() - , tile_classification_elapsed_time_(0) - , intersection_elapsed_time_(0) - , denoising_elapsed_time_(0) - , timestamp_query_pool_(0) - , timestamp_queries_() - , timestamp_queries_index_(0) - , scene_format_(VK_FORMAT_UNDEFINED) - , tile_classification_descriptor_set_() - , indirect_args_descriptor_set_() - , intersection_descriptor_set_() - , spatial_denoising_descriptor_set_() - , temporal_denoising_descriptor_set_() - , eaw_denoising_descriptor_set_() - , prev_view_projection_() - , uniform_buffer_descriptor_set_() - { - } - - /** - The constructor for the ReflectionViewVK class. - - \param other The reflection view to be moved. - */ - ReflectionViewVK::ReflectionViewVK(ReflectionViewVK&& other) noexcept - : width_(other.width_) - , height_(other.height_) - , flags_(other.flags_) - , descriptor_pool_(other.descriptor_pool_) - , tile_classification_elapsed_time_(other.tile_classification_elapsed_time_) - , intersection_elapsed_time_(other.intersection_elapsed_time_) - , denoising_elapsed_time_(other.denoising_elapsed_time_) - , timestamp_query_pool_(other.timestamp_query_pool_) - , timestamp_queries_(std::move(other.timestamp_queries_)) - , timestamp_queries_index_(other.timestamp_queries_index_) - , tile_list_(std::move(other.tile_list_)) - , tile_counter_(std::move(other.tile_counter_)) - , ray_list_(std::move(other.ray_list_)) - , ray_counter_(std::move(other.ray_counter_)) - , intersection_pass_indirect_args_(std::move(other.intersection_pass_indirect_args_)) - , denoiser_pass_indirect_args_(std::move(other.denoiser_pass_indirect_args_)) - , ray_lengths_(std::move(other.ray_lengths_)) - , temporal_variance_(std::move(other.temporal_variance_)) - , scene_format_(other.scene_format_) - , prev_view_projection_(other.prev_view_projection_) - { - - for (int i = 0; i < 2; ++i) - { - temporal_denoiser_result_[i] = std::move(other.temporal_denoiser_result_[i]); - - tile_classification_descriptor_set_[i] = other.tile_classification_descriptor_set_[i]; - indirect_args_descriptor_set_[i] = other.indirect_args_descriptor_set_[i]; - intersection_descriptor_set_[i] = other.intersection_descriptor_set_[i]; - spatial_denoising_descriptor_set_[i] = other.spatial_denoising_descriptor_set_[i]; - temporal_denoising_descriptor_set_[i] = other.temporal_denoising_descriptor_set_[i]; - eaw_denoising_descriptor_set_[i] = other.eaw_denoising_descriptor_set_[i]; - - other.tile_classification_descriptor_set_[i] = VK_NULL_HANDLE; - other.indirect_args_descriptor_set_[i] = VK_NULL_HANDLE; - other.intersection_descriptor_set_[i] = VK_NULL_HANDLE; - other.spatial_denoising_descriptor_set_[i] = VK_NULL_HANDLE; - other.temporal_denoising_descriptor_set_[i] = VK_NULL_HANDLE; - other.eaw_denoising_descriptor_set_[i] = VK_NULL_HANDLE; - } - - for (int i = 0; i < FFX_SSSR_ARRAY_SIZE(uniform_buffer_descriptor_set_); ++i) - { - uniform_buffer_descriptor_set_[i] = other.uniform_buffer_descriptor_set_[i]; - other.uniform_buffer_descriptor_set_[i] = VK_NULL_HANDLE; - } - - other.descriptor_pool_ = VK_NULL_HANDLE; - other.timestamp_query_pool_ = VK_NULL_HANDLE; - } - - /** - The destructor for the ReflectionViewVK class. - */ - ReflectionViewVK::~ReflectionViewVK() - { - if (linear_sampler_) - { - vkDestroySampler(device_, linear_sampler_, nullptr); - } - - if (descriptor_pool_) - { - vkResetDescriptorPool(device_, descriptor_pool_, 0); - vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); - } - - if (timestamp_query_pool_) - { - vkDestroyQueryPool(device_, timestamp_query_pool_, nullptr); - } - } - - /** - Assigns the reflection view. - - \param other The reflection view to be moved. - \return The assigned reflection view. - */ - ReflectionViewVK& ReflectionViewVK::operator =(ReflectionViewVK&& other) noexcept - { - if (this != &other) - { - width_ = other.width_; - height_ = other.height_; - flags_ = other.flags_; - scene_format_ = other.scene_format_; - prev_view_projection_ = other.prev_view_projection_; - descriptor_pool_ = other.descriptor_pool_; - device_ = other.device_; - physical_device_ = other.physical_device_; - - timestamp_queries_ = other.timestamp_queries_; - timestamp_queries_index_ = other.timestamp_queries_index_; - tile_classification_elapsed_time_ = other.tile_classification_elapsed_time_; - intersection_elapsed_time_ = other.intersection_elapsed_time_; - denoising_elapsed_time_ = other.denoising_elapsed_time_; - timestamp_query_pool_ = other.timestamp_query_pool_; - - tile_list_ = std::move(other.tile_list_); - tile_counter_ = std::move(other.tile_counter_); - ray_list_ = std::move(other.ray_list_); - ray_counter_ = std::move(other.ray_counter_); - intersection_pass_indirect_args_ = std::move(other.intersection_pass_indirect_args_); - denoiser_pass_indirect_args_ = std::move(other.denoiser_pass_indirect_args_); - ray_lengths_ = std::move(other.ray_lengths_); - temporal_variance_ = std::move(other.temporal_variance_); - - other.descriptor_pool_ = VK_NULL_HANDLE; - timestamp_query_pool_ = VK_NULL_HANDLE; - - for (int i = 0; i < 2; ++i) - { - temporal_denoiser_result_[i] = std::move(other.temporal_denoiser_result_[i]); - - tile_classification_descriptor_set_[i] = other.tile_classification_descriptor_set_[i]; - indirect_args_descriptor_set_[i] = other.indirect_args_descriptor_set_[i]; - intersection_descriptor_set_[i] = other.intersection_descriptor_set_[i]; - spatial_denoising_descriptor_set_[i] = other.spatial_denoising_descriptor_set_[i]; - temporal_denoising_descriptor_set_[i] = other.temporal_denoising_descriptor_set_[i]; - eaw_denoising_descriptor_set_[i] = other.eaw_denoising_descriptor_set_[i]; - - other.tile_classification_descriptor_set_[i] = VK_NULL_HANDLE; - other.indirect_args_descriptor_set_[i] = VK_NULL_HANDLE; - other.intersection_descriptor_set_[i] = VK_NULL_HANDLE; - other.spatial_denoising_descriptor_set_[i] = VK_NULL_HANDLE; - other.temporal_denoising_descriptor_set_[i] = VK_NULL_HANDLE; - other.eaw_denoising_descriptor_set_[i] = VK_NULL_HANDLE; - } - - for (int i = 0; i < FFX_SSSR_ARRAY_SIZE(uniform_buffer_descriptor_set_); ++i) - { - uniform_buffer_descriptor_set_[i] = other.uniform_buffer_descriptor_set_[i]; - other.uniform_buffer_descriptor_set_[i] = VK_NULL_HANDLE; - } - } - - return *this; - } - - /** - Creates the reflection view. - - \param context The context to be used. - \param create_reflection_view_info The reflection view creation information. - */ - void ReflectionViewVK::Create(Context& context, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info) - { - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo != nullptr); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->sceneFormat != VK_FORMAT_UNDEFINED); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->depthBufferHierarchySRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->motionBufferSRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->normalBufferSRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->roughnessBufferSRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->normalHistoryBufferSRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->roughnessHistoryBufferSRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->environmentMapSRV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->environmentMapSampler); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->reflectionViewUAV); - FFX_SSSR_ASSERT(create_reflection_view_info.pVkCreateReflectionViewInfo->uploadCommandBuffer); - FFX_SSSR_ASSERT(create_reflection_view_info.outputWidth && create_reflection_view_info.outputHeight); - - // Populate the reflection view properties - device_ = context.GetContextVK()->GetDevice(); - physical_device_ = context.GetContextVK()->GetPhysicalDevice(); - width_ = create_reflection_view_info.outputWidth; - height_ = create_reflection_view_info.outputHeight; - flags_ = create_reflection_view_info.flags; - scene_format_ = create_reflection_view_info.pVkCreateReflectionViewInfo->sceneFormat; - - // Create pool for timestamp queries - VkQueryPoolCreateInfo query_pool_create_info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; - query_pool_create_info.pNext = nullptr; - query_pool_create_info.flags = 0; - query_pool_create_info.queryType = VK_QUERY_TYPE_TIMESTAMP; - query_pool_create_info.queryCount = kTimestampQuery_Count * context.GetFrameCountBeforeReuse(); - query_pool_create_info.pipelineStatistics = 0; - if (VK_SUCCESS != vkCreateQueryPool(device_, &query_pool_create_info, NULL, ×tamp_query_pool_)) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create timestamp query pool"); - } - - timestamp_queries_.resize(context.GetFrameCountBeforeReuse()); - for (auto& timestamp_queries : timestamp_queries_) - { - timestamp_queries.reserve(kTimestampQuery_Count); - } - - // Create reflection view resources - CreateDescriptorPool(context); - SetupInternalResources(context, create_reflection_view_info); - AllocateDescriptorSets(context); - InitializeResourceDescriptorSets(context, create_reflection_view_info); - } - - /** - Returns an upper limit of required descriptors. - - \return The conservative count of total descriptors. - */ - uint32_t ReflectionViewVK::GetConservativeResourceDescriptorCount(const Context& context) const - { - const ContextVK* vk_context = context.GetContextVK(); - uint32_t resource_descriptor_count = vk_context->GetTileClassificationPass().bindings_count_ - + vk_context->GetIndirectArgsPass().bindings_count_ - + vk_context->GetIntersectionPass().bindings_count_ - + vk_context->GetSpatialDenoisingPass().bindings_count_ - + vk_context->GetTemporalDenoisingPass().bindings_count_ - + vk_context->GetEawDenoisingPass().bindings_count_; - resource_descriptor_count *= 2; // double buffering descriptors - return resource_descriptor_count; - } - - /** - Creates the descriptor pool. - - \param context The context to be used. - */ - void ReflectionViewVK::CreateDescriptorPool(const Context& context) - { - FFX_SSSR_ASSERT(!descriptor_pool_); - uint32_t resource_descriptor_count = GetConservativeResourceDescriptorCount(context); - - uint32_t frame_count = context.GetFrameCountBeforeReuse(); - uint32_t uniform_buffer_descriptor_count = frame_count; - - // Low descriptor counts overall, so we just allocate the max count per type. - VkDescriptorPoolSize pool_sizes[5]; - pool_sizes[0].descriptorCount = resource_descriptor_count; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[1].descriptorCount = resource_descriptor_count; - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - pool_sizes[2].descriptorCount = resource_descriptor_count; - pool_sizes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[3].descriptorCount = resource_descriptor_count; - pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - pool_sizes[4].descriptorCount = uniform_buffer_descriptor_count; - pool_sizes[4].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - - uint32_t uniform_buffer_set_count = frame_count; - uint32_t resources_set_count = 2 * 8; // 8 passes double buffered - - VkDescriptorPoolCreateInfo create_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.maxSets = uniform_buffer_set_count + resources_set_count; - create_info.poolSizeCount = FFX_SSSR_ARRAY_SIZE(pool_sizes); - create_info.pPoolSizes = pool_sizes; - - if (VK_SUCCESS != vkCreateDescriptorPool(device_, &create_info, nullptr, &descriptor_pool_)) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create descriptor pool."); - } - } - - /** - Creates all internal resources and handles initial resource transitions. - - \param context The context to be used. - \param reflection_view The reflection view to be resolved. - - */ - void ReflectionViewVK::SetupInternalResources(Context & context, FfxSssrCreateReflectionViewInfo const & create_reflection_view_info) - { - VkSamplerCreateInfo sampler_info = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; - sampler_info.pNext = nullptr; - sampler_info.flags = 0; - sampler_info.magFilter = VK_FILTER_LINEAR; - sampler_info.minFilter = VK_FILTER_LINEAR; - sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - sampler_info.mipLodBias = 0; - sampler_info.anisotropyEnable = false; - sampler_info.maxAnisotropy = 0; - sampler_info.compareEnable = false; - sampler_info.compareOp = VK_COMPARE_OP_NEVER; - sampler_info.minLod = 0; - sampler_info.maxLod = 16; - sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - sampler_info.unnormalizedCoordinates = false; - if (VK_SUCCESS != vkCreateSampler(device_, &sampler_info, nullptr, &linear_sampler_)) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to create linear sampler"); - } - - // Create tile classification-related buffers - { - uint32_t num_tiles = RoundedDivide(width_, 8u) * RoundedDivide(height_, 8u); - uint32_t num_pixels = width_ * height_; - - uint32_t tile_list_element_count = num_tiles; - uint32_t tile_counter_element_count = 1; - uint32_t ray_list_element_count = num_pixels; - uint32_t ray_counter_element_count = 1; - uint32_t intersection_pass_indirect_args_element_count = 3; - uint32_t denoiser_pass_indirect_args_element_count = 3; - - BufferVK::CreateInfo create_info = {}; - create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - create_info.format_ = VK_FORMAT_R32_UINT; - create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; - - create_info.size_in_bytes_ = tile_list_element_count * sizeof(uint32_t); - create_info.name_ = "SSSR Tile List"; - tile_list_ = BufferVK(device_, physical_device_, create_info); - - create_info.size_in_bytes_ = ray_list_element_count * sizeof(uint32_t); - create_info.name_ = "SSSR Ray List"; - ray_list_ = BufferVK(device_, physical_device_, create_info); - - create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - create_info.size_in_bytes_ = tile_counter_element_count * sizeof(uint32_t); - create_info.name_ = "SSSR Tile Counter"; - tile_counter_ = BufferVK(device_, physical_device_, create_info); - - create_info.size_in_bytes_ = ray_counter_element_count * sizeof(uint32_t); - create_info.name_ = "SSSR Ray Counter"; - ray_counter_ = BufferVK(device_, physical_device_, create_info); - - create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; - - create_info.size_in_bytes_ = intersection_pass_indirect_args_element_count * sizeof(uint32_t); - create_info.name_ = "SSSR Intersect Indirect Args"; - intersection_pass_indirect_args_ = BufferVK(device_, physical_device_, create_info); - - create_info.size_in_bytes_ = denoiser_pass_indirect_args_element_count * sizeof(uint32_t); - create_info.name_ = "SSSR Denoiser Indirect Args"; - denoiser_pass_indirect_args_ = BufferVK(device_, physical_device_, create_info); - } - - // Create denoising-related resources - { - ImageVK::CreateInfo create_info = {}; - create_info.width_ = width_; - create_info.height_ = height_; - create_info.mip_levels_ = 1; - create_info.initial_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; - create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - create_info.image_usage_ = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - create_info.format_ = scene_format_; - create_info.name_ = "SSSR Temporal Denoised Result 0"; - temporal_denoiser_result_[0] = ImageVK(device_, physical_device_, create_info); - - create_info.format_ = scene_format_; - create_info.name_ = "SSSR Temporal Denoised Result 1"; - temporal_denoiser_result_[1] = ImageVK(device_, physical_device_, create_info); - - create_info.format_ = VK_FORMAT_R16_SFLOAT; - create_info.name_ = "SSSR Ray Lengths"; - ray_lengths_ = ImageVK(device_, physical_device_, create_info); - - create_info.format_ = VK_FORMAT_R8_UNORM; - create_info.name_ = "SSSR Temporal Variance"; - temporal_variance_ = ImageVK(device_, physical_device_, create_info); - } - - VkCommandBuffer command_buffer = create_reflection_view_info.pVkCreateReflectionViewInfo->uploadCommandBuffer; - - VkImageMemoryBarrier image_barriers[] = { - Transition(temporal_denoiser_result_[0].image_, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), - Transition(temporal_denoiser_result_[1].image_, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), - Transition(ray_lengths_.image_, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), - Transition(temporal_variance_.image_, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL) - }; - TransitionBarriers(command_buffer, image_barriers, FFX_SSSR_ARRAY_SIZE(image_barriers)); - - // Initial clear of counters. Successive clears are handled by the indirect arguments pass. - vkCmdFillBuffer(command_buffer, ray_counter_.buffer_, 0, VK_WHOLE_SIZE, 0); - vkCmdFillBuffer(command_buffer, tile_counter_.buffer_, 0, VK_WHOLE_SIZE, 0); - - VkClearColorValue clear_calue = {}; - clear_calue.float32[0] = 0; - clear_calue.float32[1] = 0; - clear_calue.float32[2] = 0; - clear_calue.float32[3] = 0; - - VkImageSubresourceRange subresource_range = {}; - subresource_range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subresource_range.baseArrayLayer = 0; - subresource_range.baseMipLevel = 0; - subresource_range.layerCount = 1; - subresource_range.levelCount = 1; - - // Initial resource clears - vkCmdClearColorImage(command_buffer, temporal_denoiser_result_[0].image_, VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); - vkCmdClearColorImage(command_buffer, temporal_denoiser_result_[1].image_, VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); - vkCmdClearColorImage(command_buffer, ray_lengths_.image_, VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); - vkCmdClearColorImage(command_buffer, temporal_variance_.image_, VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); - } - - /** - Allocate all required descriptor sets from the descriptor pool. - This includes double buffering of the resource descriptor sets and - multi-buffering of the descriptor set containing the uniform buffer descriptor. - - \param context The context to be used. - */ - void ReflectionViewVK::AllocateDescriptorSets(Context& context) - { - ContextVK* vk_context = context.GetContextVK(); - for (int i = 0; i < 2; ++i) - { - tile_classification_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetTileClassificationPass().descriptor_set_layout_); - indirect_args_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetIndirectArgsPass().descriptor_set_layout_); - intersection_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetIntersectionPass().descriptor_set_layout_); - spatial_denoising_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetSpatialDenoisingPass().descriptor_set_layout_); - temporal_denoising_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetTemporalDenoisingPass().descriptor_set_layout_); - eaw_denoising_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetEawDenoisingPass().descriptor_set_layout_); - } - - uint32_t frame_count = context.GetFrameCountBeforeReuse(); - for (uint32_t i = 0; i < frame_count; ++i) - { - uniform_buffer_descriptor_set_[i] = AllocateDescriptorSet(context, vk_context->GetUniformBufferDescriptorSetLayout()); - } - } - - /** - Allocate a single descriptor set from the descriptor pool. - - \param context The context to be used. - \param layout The layout of the descriptor set. - \return The allocated set. - */ - VkDescriptorSet ReflectionViewVK::AllocateDescriptorSet(Context& context, VkDescriptorSetLayout layout) - { - VkDescriptorSetAllocateInfo alloc_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO }; - alloc_info.descriptorPool = descriptor_pool_; - alloc_info.pNext = nullptr; - alloc_info.descriptorSetCount = 1; - alloc_info.pSetLayouts = &layout; - - VkDescriptorSet set; - if (VK_SUCCESS != vkAllocateDescriptorSets(device_, &alloc_info, &set)) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to allocate descriptor set"); - } - return set; - } - - /** - Initializes the resource descriptor sets of each pass. - The uniform buffer on the other hand is updated each frame and thus not handled here. - - \param context The context to be used. - \param reflection_view The reflection view to be resolved. - - */ - void ReflectionViewVK::InitializeResourceDescriptorSets(Context & context, FfxSssrCreateReflectionViewInfo const & create_reflection_view_info) - { - VkImageView scene_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->sceneSRV; - VkImageView depth_hierarchy_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->depthBufferHierarchySRV; - VkImageView motion_buffer_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->motionBufferSRV; - VkImageView normal_buffer_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->normalBufferSRV; - VkImageView roughness_buffer_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->roughnessBufferSRV; - VkImageView normal_history_buffer_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->normalHistoryBufferSRV; - VkImageView roughness_history_buffer_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->roughnessHistoryBufferSRV; - VkSampler environment_map_sampler = create_reflection_view_info.pVkCreateReflectionViewInfo->environmentMapSampler; - VkImageView environment_map_srv = create_reflection_view_info.pVkCreateReflectionViewInfo->environmentMapSRV; - VkImageView output_buffer_uav = create_reflection_view_info.pVkCreateReflectionViewInfo->reflectionViewUAV; - - VkImageView normal_buffers[] = { normal_buffer_srv, normal_history_buffer_srv }; - VkImageView roughness_buffers[] = { roughness_buffer_srv, roughness_history_buffer_srv }; - - bool ping_pong_normal = (create_reflection_view_info.flags & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_NORMAL_BUFFERS) != 0; - bool ping_pong_roughness = (create_reflection_view_info.flags & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_ROUGHNESS_BUFFERS) != 0; - - uint32_t descriptor_count = GetConservativeResourceDescriptorCount(context); - std::vector image_infos; - std::vector write_desc_sets; - image_infos.reserve(descriptor_count); - write_desc_sets.reserve(descriptor_count); - uint32_t binding = 0; - VkDescriptorSet target_set = VK_NULL_HANDLE; - -#define FFX_SSSR_DEBUG_DESCRIPTOR_SETUP 0 - - auto BindSampler = [this, &target_set, &binding, &write_desc_sets, &image_infos](VkSampler sampler) { - VkDescriptorImageInfo image_info = {}; - image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.imageView = VK_NULL_HANDLE; - image_info.sampler = sampler; - image_infos.push_back(image_info); - - VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; - write_set.pNext = nullptr; - write_set.dstSet = target_set; - write_set.dstBinding = binding++; - write_set.dstArrayElement = 0; - write_set.descriptorCount = 1; - write_set.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - write_set.pImageInfo = &image_infos.back(); - write_set.pBufferInfo = nullptr; - write_set.pTexelBufferView = nullptr; - write_desc_sets.push_back(write_set); - -#if FFX_SSSR_DEBUG_DESCRIPTOR_SETUP - vkUpdateDescriptorSets(device_, 1, &write_set, 0, nullptr); -#endif - }; - - auto BindImage = [this, &target_set, &binding, &write_desc_sets, &image_infos](VkDescriptorType type, VkImageView view, VkImageLayout layout) { - VkDescriptorImageInfo image_info = {}; - image_info.imageLayout = layout; - image_info.imageView = view; - image_info.sampler = VK_NULL_HANDLE; - image_infos.push_back(image_info); - - VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; - write_set.pNext = nullptr; - write_set.dstSet = target_set; - write_set.dstBinding = binding++; - write_set.dstArrayElement = 0; - write_set.descriptorCount = 1; - write_set.descriptorType = type; - write_set.pImageInfo = &image_infos.back(); - write_set.pBufferInfo = nullptr; - write_set.pTexelBufferView = nullptr; - write_desc_sets.push_back(write_set); - -#if FFX_SSSR_DEBUG_DESCRIPTOR_SETUP - vkUpdateDescriptorSets(device_, 1, &write_set, 0, nullptr); -#endif - }; - - auto BindBuffer = [this, &target_set, &binding, &write_desc_sets](VkDescriptorType type, const VkBufferView& buffer) { - VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; - write_set.pNext = nullptr; - write_set.dstSet = target_set; - write_set.dstBinding = binding++; - write_set.dstArrayElement = 0; - write_set.descriptorCount = 1; - write_set.descriptorType = type; - write_set.pImageInfo = nullptr; - write_set.pBufferInfo = nullptr; - write_set.pTexelBufferView = &buffer; - write_desc_sets.push_back(write_set); - -#if FFX_SSSR_DEBUG_DESCRIPTOR_SETUP - vkUpdateDescriptorSets(device_, 1, &write_set, 0, nullptr); -#endif - }; - - // Place the descriptors - for (int i = 0; i < 2; ++i) - { - // Tile Classifier pass - { - target_set = tile_classification_descriptor_set_[i]; - binding = 0; - - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_roughness - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, tile_list_.buffer_view_); // g_tile_list - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, ray_list_.buffer_view_); // g_ray_list - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, tile_counter_.buffer_view_); // g_tile_counter - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, ray_counter_.buffer_view_); // g_ray_counter - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_denoiser_result_[i].image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_temporally_denoised_reflections - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_denoiser_result_[1 - i].image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_temporally_denoised_reflections_history - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ray_lengths_.image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_ray_lengths - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_variance_.image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_temporal_variance - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, output_buffer_uav, VK_IMAGE_LAYOUT_GENERAL); // g_denoised_reflections - } - - // Indirect args pass - { - target_set = indirect_args_descriptor_set_[i]; - binding = 0; - - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, tile_counter_.buffer_view_); // g_tile_counter - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, ray_counter_.buffer_view_); // g_ray_counter - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, intersection_pass_indirect_args_.buffer_view_); // g_intersect_args - BindBuffer(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, denoiser_pass_indirect_args_.buffer_view_); // g_denoiser_args - } - - // Intersection pass - { - target_set = intersection_descriptor_set_[i]; - binding = 0; - - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, scene_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_lit_scene - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, depth_hierarchy_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_depth_buffer_hierarchy - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_normal - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_roughness - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, environment_map_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_environment_map - - auto const& sampler = context.GetContextVK()->GetSampler2SPP(); - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, sampler.sobol_buffer_.buffer_view_); // g_sobol_buffer - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, sampler.ranking_tile_buffer_.buffer_view_); // g_ranking_tile_buffer - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, sampler.scrambling_tile_buffer_.buffer_view_); // g_scrambling_tile_buffer - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, ray_list_.buffer_view_); // g_ray_list - - BindSampler(linear_sampler_); // g_linear_sampler - BindSampler(environment_map_sampler); // g_environment_map_sampler - - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_denoiser_result_[i].image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_intersection_result - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ray_lengths_.image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_ray_lengths - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, output_buffer_uav, VK_IMAGE_LAYOUT_GENERAL); // g_denoised_reflections - } - - // Spatial denoising pass - { - target_set = spatial_denoising_descriptor_set_[i]; - binding = 0; - - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, depth_hierarchy_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_depth_buffer - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_normal - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_roughness - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, temporal_denoiser_result_[i].image_view_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_intersection_result - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, temporal_variance_.image_view_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_has_ray - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, tile_list_.buffer_view_); // g_tile_list - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, output_buffer_uav, VK_IMAGE_LAYOUT_GENERAL); // g_spatially_denoised_reflections - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ray_lengths_.image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_ray_lengths - } - - // Temporal denoising pass - { - target_set = temporal_denoising_descriptor_set_[i]; - binding = 0; - - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_normal - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_roughness - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_normal ? normal_buffers[1 - i] : normal_history_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_normal_history - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_roughness ? roughness_buffers[1 - i] : roughness_history_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_roughness_history - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, depth_hierarchy_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_depth_buffer - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, motion_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_motion_vectors - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, temporal_denoiser_result_[1 - i].image_view_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_temporally_denoised_reflections_history - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ray_lengths_.image_view_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_ray_lengths - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, tile_list_.buffer_view_); // g_tile_list - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_denoiser_result_[i].image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_temporally_denoised_reflections - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, output_buffer_uav, VK_IMAGE_LAYOUT_GENERAL); // g_spatially_denoised_reflections - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_variance_.image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_temporal_variance - } - - // EAW denoising pass - { - target_set = eaw_denoising_descriptor_set_[i]; - binding = 0; - - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_normal ? normal_buffers[i] : normal_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_normal - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, ping_pong_roughness ? roughness_buffers[i] : roughness_buffer_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_roughness - BindImage(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, depth_hierarchy_srv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); // g_depth_buffer - BindBuffer(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, tile_list_.buffer_view_); // g_tile_list - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, temporal_denoiser_result_[i].image_view_, VK_IMAGE_LAYOUT_GENERAL); // g_temporally_denoised_reflections - BindImage(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, output_buffer_uav, VK_IMAGE_LAYOUT_GENERAL); // g_denoised_reflections - } - } - vkUpdateDescriptorSets(device_, static_cast(write_desc_sets.size()), write_desc_sets.data(), 0, nullptr); - } - - /** - Gets the index of the current timestamp query. - - \return The index of the current timestamp query. - */ - std::uint32_t ReflectionViewVK::GetTimestampQueryIndex() const - { - return timestamp_queries_index_ * kTimestampQuery_Count + static_cast(timestamp_queries_[timestamp_queries_index_].size()); - } - - float Clamp(float value, float min, float max) - { - if (value < min) - { - return min; - } - else if (value > max) - { - return max; - } - return value; - } - - /** - Resolves the Vulkan reflection view. - - \param context The context to be used. - \param reflection_view The reflection view to be resolved. - \param resolve_reflection_view_info The reflection view resolve information. - */ - void ReflectionViewVK::Resolve(Context& context, ReflectionView const& reflection_view, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info) - { - auto const command_buffer = resolve_reflection_view_info.pVkCommandEncodeInfo->commandBuffer; - if (!command_buffer) - { - throw reflection_error(context, FFX_SSSR_STATUS_INVALID_VALUE, "No command buffer was supplied, cannot encode device commands"); - } - - FFX_SSSR_ASSERT(resolve_reflection_view_info.pVkCommandEncodeInfo); - FFX_SSSR_ASSERT(resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_1 || resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_2 || resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_4); - - // Query timestamp value prior to resolving the reflection view - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - auto const start_index = timestamp_queries_index_ * kTimestampQuery_Count; - - if (!timestamp_queries.empty()) - { - // Reset performance counters - tile_classification_elapsed_time_ = 0ull; - denoising_elapsed_time_ = 0ull; - intersection_elapsed_time_ = 0ull; - - uint32_t timestamp_count = static_cast(timestamp_queries.size()); - - uint64_t data[kTimestampQuery_Count * 8]; // maximum of 8 frames in flight allowed - VkResult result = vkGetQueryPoolResults(device_, - timestamp_query_pool_, - start_index, - timestamp_count, - timestamp_count * sizeof(uint64_t), - data, - sizeof(uint64_t), - VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); - - if (result == VK_SUCCESS) - { - for (auto i = 0u, j = 1u; j < timestamp_count; ++i, ++j) - { - auto const elapsed_time = (data[j] - data[i]); - - switch (timestamp_queries[j]) - { - case kTimestampQuery_TileClassification: - tile_classification_elapsed_time_ = elapsed_time; - break; - case kTimestampQuery_Intersection: - intersection_elapsed_time_ = elapsed_time; - break; - case kTimestampQuery_Denoising: - denoising_elapsed_time_ = elapsed_time; - break; - default: - // unrecognized timestamp query - break; - } - } - } - else if (result != VK_NOT_READY) - { - throw reflection_error(context, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to query timestamp query results"); - } - } - - timestamp_queries.clear(); - - vkCmdResetQueryPool(command_buffer, timestamp_query_pool_, start_index, kTimestampQuery_Count); - - vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, timestamp_query_pool_, GetTimestampQueryIndex()); - timestamp_queries.push_back(kTimestampQuery_Init); - } - - // Encode the relevant pass data - struct PassData - { - matrix4 inv_view_projection_; - matrix4 projection_; - matrix4 inv_projection_; - matrix4 view_; - matrix4 inv_view_; - matrix4 prev_view_projection_; - std::uint32_t frame_index_; - std::uint32_t max_traversal_intersections_; - std::uint32_t min_traversal_occupancy_; - std::uint32_t most_detailed_mip_; - float temporal_stability_factor_; - float depth_buffer_thickness_; - std::uint32_t samples_per_quad_; - std::uint32_t temporal_variance_guided_tracing_enabled_; - float roughness_threshold_; - std::uint32_t skip_denoiser_; - }; - auto& upload_buffer = context.GetContextVK()->GetUploadBuffer(); - PassData* pass_data; - if (!upload_buffer.AllocateBuffer(sizeof(PassData), pass_data)) - { - throw reflection_error(context, FFX_SSSR_STATUS_OUT_OF_MEMORY, "Failed to allocate %u bytes of upload memory, consider increasing uploadBufferSize", sizeof(PassData)); - } - - // Fill constant buffer - matrix4 view_projection = reflection_view.projection_matrix_ * reflection_view.view_matrix_; - pass_data->inv_view_projection_ = matrix4::inverse(view_projection); - pass_data->projection_ = reflection_view.projection_matrix_; - pass_data->inv_projection_ = matrix4::inverse(reflection_view.projection_matrix_); - pass_data->view_ = reflection_view.view_matrix_; - pass_data->inv_view_ = matrix4::inverse(reflection_view.view_matrix_); - pass_data->prev_view_projection_ = prev_view_projection_; - pass_data->frame_index_ = context.GetFrameIndex(); - - float temporal_stability_scale = Clamp(resolve_reflection_view_info.temporalStabilityScale, 0, 1); - pass_data->max_traversal_intersections_ = resolve_reflection_view_info.maxTraversalIterations; - pass_data->min_traversal_occupancy_ = resolve_reflection_view_info.minTraversalOccupancy; - pass_data->most_detailed_mip_ = resolve_reflection_view_info.mostDetailedDepthHierarchyMipLevel; - pass_data->temporal_stability_factor_ = temporal_stability_scale * temporal_stability_scale; - pass_data->depth_buffer_thickness_ = resolve_reflection_view_info.depthBufferThickness; - pass_data->samples_per_quad_ = resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_4 ? 4 : (resolve_reflection_view_info.samplesPerQuad == FFX_SSSR_RAY_SAMPLES_PER_QUAD_2 ? 2 : 1); - pass_data->temporal_variance_guided_tracing_enabled_ = resolve_reflection_view_info.flags & FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING ? 1 : 0; - pass_data->roughness_threshold_ = resolve_reflection_view_info.roughnessThreshold; - pass_data->skip_denoiser_ = resolve_reflection_view_info.flags & FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE ? 0 : 1; - prev_view_projection_ = view_projection; - - uint32_t uniform_buffer_index = context.GetFrameIndex() % context.GetFrameCountBeforeReuse(); - VkDescriptorSet uniform_buffer_descriptor_set = uniform_buffer_descriptor_set_[uniform_buffer_index]; - - // Update descriptor to sliding window in upload buffer that contains the updated pass data - { - VkDescriptorBufferInfo buffer_info = {}; - buffer_info.buffer = upload_buffer.GetResource(); - buffer_info.offset = upload_buffer.GetOffset(pass_data); - buffer_info.range = sizeof(PassData); - - VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; - write_set.pNext = nullptr; - write_set.dstSet = uniform_buffer_descriptor_set; - write_set.dstBinding = 0; - write_set.dstArrayElement = 0; - write_set.descriptorCount = 1; - write_set.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - write_set.pImageInfo = nullptr; - write_set.pBufferInfo = &buffer_info; - write_set.pTexelBufferView = nullptr; - vkUpdateDescriptorSets(device_, 1, &write_set, 0, nullptr); - } - - std::uint32_t resource_descriptor_set_index = context.GetFrameIndex() & 1u; - - ContextVK* vk_context = context.GetContextVK(); - - // Tile Classification pass - { - VkDescriptorSet sets[] = { uniform_buffer_descriptor_set, tile_classification_descriptor_set_[resource_descriptor_set_index] }; - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetTileClassificationPass().pipeline_); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetTileClassificationPass().pipeline_layout_, 0, FFX_SSSR_ARRAY_SIZE(sets), sets, 0, nullptr); - uint32_t dim_x = RoundedDivide(width_, 8u); - uint32_t dim_y = RoundedDivide(height_, 8u); - vkCmdDispatch(command_buffer, dim_x, dim_y, 1); - } - - // Ensure that the tile classification pass finished - ComputeBarrier(command_buffer); - - // Indirect Arguments pass - { - VkDescriptorSet sets[] = { uniform_buffer_descriptor_set, indirect_args_descriptor_set_[resource_descriptor_set_index] }; - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetIndirectArgsPass().pipeline_); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetIndirectArgsPass().pipeline_layout_, 0, FFX_SSSR_ARRAY_SIZE(sets), sets, 0, nullptr); - vkCmdDispatch(command_buffer, 1, 1, 1); - } - - // Query the amount of time spent in the intersection pass - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - FFX_SSSR_ASSERT(timestamp_queries.size() == 1ull && timestamp_queries[0] == kTimestampQuery_Init); - - vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, timestamp_query_pool_, GetTimestampQueryIndex()); - timestamp_queries.push_back(kTimestampQuery_TileClassification); - } - - // Ensure that the arguments are written - IndirectArgumentsBarrier(command_buffer); - - // Intersection pass - { - VkDescriptorSet sets[] = { uniform_buffer_descriptor_set, intersection_descriptor_set_[resource_descriptor_set_index] }; - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetIntersectionPass().pipeline_); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetIntersectionPass().pipeline_layout_, 0, FFX_SSSR_ARRAY_SIZE(sets), sets, 0, nullptr); - vkCmdDispatchIndirect(command_buffer, intersection_pass_indirect_args_.buffer_, 0); - } - - // Query the amount of time spent in the intersection pass - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - FFX_SSSR_ASSERT(timestamp_queries.size() == 2ull && timestamp_queries[1] == kTimestampQuery_TileClassification); - - vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, timestamp_query_pool_, GetTimestampQueryIndex()); - timestamp_queries.push_back(kTimestampQuery_Intersection); - } - - if (resolve_reflection_view_info.flags & FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE) - { - // Ensure that the intersection pass finished - VkImageMemoryBarrier intersection_finished_barriers[] = { - Transition(temporal_denoiser_result_[resource_descriptor_set_index].image_, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), - Transition(temporal_variance_.image_, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) - }; - TransitionBarriers(command_buffer, intersection_finished_barriers, FFX_SSSR_ARRAY_SIZE(intersection_finished_barriers)); - - // Spatial denoiser passes - { - VkDescriptorSet sets[] = { uniform_buffer_descriptor_set, spatial_denoising_descriptor_set_[resource_descriptor_set_index] }; - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetSpatialDenoisingPass().pipeline_); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetSpatialDenoisingPass().pipeline_layout_, 0, FFX_SSSR_ARRAY_SIZE(sets), sets, 0, nullptr); - vkCmdDispatchIndirect(command_buffer, denoiser_pass_indirect_args_.buffer_, 0); - } - - // Ensure that the spatial denoising pass finished. We don't have the resource for the final result available, thus we have to wait for any UAV access to finish. - VkImageMemoryBarrier spatial_denoiser_finished_barriers[] = { - Transition(temporal_denoiser_result_[resource_descriptor_set_index].image_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), - Transition(temporal_denoiser_result_[1 - resource_descriptor_set_index].image_, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), - Transition(temporal_variance_.image_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), - Transition(ray_lengths_.image_, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) - }; - TransitionBarriers(command_buffer, spatial_denoiser_finished_barriers, FFX_SSSR_ARRAY_SIZE(spatial_denoiser_finished_barriers)); - - // Temporal denoiser passes - { - VkDescriptorSet sets[] = { uniform_buffer_descriptor_set, temporal_denoising_descriptor_set_[resource_descriptor_set_index] }; - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetTemporalDenoisingPass().pipeline_); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetTemporalDenoisingPass().pipeline_layout_, 0, FFX_SSSR_ARRAY_SIZE(sets), sets, 0, nullptr); - vkCmdDispatchIndirect(command_buffer, denoiser_pass_indirect_args_.buffer_, 0); - } - - // Ensure that the temporal denoising pass finished - VkImageMemoryBarrier temporal_denoiser_finished_barriers[] = { - Transition(ray_lengths_.image_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), - Transition(temporal_denoiser_result_[1 - resource_descriptor_set_index].image_, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), - }; - TransitionBarriers(command_buffer, temporal_denoiser_finished_barriers, FFX_SSSR_ARRAY_SIZE(temporal_denoiser_finished_barriers)); - - // EAW denoiser passes - { - VkDescriptorSet sets[] = { uniform_buffer_descriptor_set, eaw_denoising_descriptor_set_[resource_descriptor_set_index] }; - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetEawDenoisingPass().pipeline_); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, vk_context->GetEawDenoisingPass().pipeline_layout_, 0, FFX_SSSR_ARRAY_SIZE(sets), sets, 0, nullptr); - vkCmdDispatchIndirect(command_buffer, denoiser_pass_indirect_args_.buffer_, 0); - } - - // Query the amount of time spent in the denoiser passes - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - auto& timestamp_queries = timestamp_queries_[timestamp_queries_index_]; - - FFX_SSSR_ASSERT(timestamp_queries.size() == 3ull && timestamp_queries[2] == kTimestampQuery_Intersection); - - vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, timestamp_query_pool_, GetTimestampQueryIndex()); - timestamp_queries.push_back(kTimestampQuery_Denoising); - } - } - - // Move timestamp queries to next frame - if ((flags_ & FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS) != 0) - { - timestamp_queries_index_ = (timestamp_queries_index_ + 1u) % context.GetFrameCountBeforeReuse(); - } - } - - VkImageMemoryBarrier ReflectionViewVK::Transition(VkImage image, VkImageLayout before, VkImageLayout after) const - { - VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.oldLayout = before; - barrier.newLayout = after; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = image; - - VkImageSubresourceRange subresourceRange = {}; - subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subresourceRange.baseArrayLayer = 0; - subresourceRange.layerCount = 1; - subresourceRange.baseMipLevel = 0; - subresourceRange.levelCount = 1; - - barrier.subresourceRange = subresourceRange; - return barrier; - } - - void ReflectionViewVK::TransitionBarriers(VkCommandBuffer command_buffer, const VkImageMemoryBarrier * image_barriers, uint32_t image_barriers_count) const - { - vkCmdPipelineBarrier(command_buffer, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, - 0, nullptr, - 0, nullptr, - image_barriers_count, image_barriers); - } - - void ReflectionViewVK::ComputeBarrier(VkCommandBuffer command_buffer) const - { - VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - vkCmdPipelineBarrier(command_buffer, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, - 1, &barrier, - 0, nullptr, - 0, nullptr); - } - - void ReflectionViewVK::IndirectArgumentsBarrier(VkCommandBuffer command_buffer) const - { - VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - vkCmdPipelineBarrier(command_buffer, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, - 0, - 1, &barrier, - 0, nullptr, - 0, nullptr); - } -} diff --git a/ffx-sssr/src/vk/reflection_view_vk.h b/ffx-sssr/src/vk/reflection_view_vk.h deleted file mode 100644 index 19563f5..0000000 --- a/ffx-sssr/src/vk/reflection_view_vk.h +++ /dev/null @@ -1,164 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include -#include -#include - -#include "macros.h" -#include "matrix4.h" -#include "ffx_sssr.h" -#include "buffer_vk.h" -#include "image_vk.h" - -namespace ffx_sssr -{ - class Context; - class ReflectionView; - - /** - The ReflectionViewVK class encapsulates the data required for resolving an individual reflection view. - */ - class ReflectionViewVK - { - FFX_SSSR_NON_COPYABLE(ReflectionViewVK); - - public: - - /** - The available timestamp queries. - */ - enum TimestampQuery - { - kTimestampQuery_Init, - kTimestampQuery_TileClassification, - kTimestampQuery_Intersection, - kTimestampQuery_Denoising, - - kTimestampQuery_Count - }; - - /** - The type definition for an array of timestamp queries. - */ - using TimestampQueries = std::vector; - - ReflectionViewVK(); - ~ReflectionViewVK(); - - ReflectionViewVK(ReflectionViewVK&& other) noexcept; - ReflectionViewVK& operator =(ReflectionViewVK&& other) noexcept; - - void Create(Context& context, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - - uint32_t GetConservativeResourceDescriptorCount(const Context& context) const; - void CreateDescriptorPool(const Context& context); - void SetupInternalResources(Context& context, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - - void AllocateDescriptorSets(Context& context); - VkDescriptorSet AllocateDescriptorSet(Context& context, VkDescriptorSetLayout layout); - void InitializeResourceDescriptorSets(Context& context, FfxSssrCreateReflectionViewInfo const& create_reflection_view_info); - - std::uint32_t GetTimestampQueryIndex() const; - - void Resolve(Context& context, ReflectionView const& reflection_view, FfxSssrResolveReflectionViewInfo const& resolve_reflection_view_info); - - // The device that created the reflection view. Livetime handled by the context. - VkDevice device_; - // The physical device that created the reflection view. Livetime handled by the context. - VkPhysicalDevice physical_device_; - // The width of the reflection view (in texels). - std::uint32_t width_; - // The height of the reflection view (in texels). - std::uint32_t height_; - // The reflection view creation flags. - FfxSssrCreateReflectionViewFlags flags_; - - // The descriptor pool for all resource views. - VkDescriptorPool descriptor_pool_; - - // Linear sampler. - VkSampler linear_sampler_; - // Containing all tiles that need at least one ray. - BufferVK tile_list_; - BufferVK tile_counter_; - // Containing all rays that need to be traced. - BufferVK ray_list_; - BufferVK ray_counter_; - // Indirect arguments for intersection pass. - BufferVK intersection_pass_indirect_args_; - // Indirect arguments for denoiser pass. - BufferVK denoiser_pass_indirect_args_; - // Intermediate result of the temporal denoising pass - double buffered to keep history and aliases the intersection result. - ImageVK temporal_denoiser_result_[2]; - // Holds the length of each reflection ray - used for temporal reprojection. - ImageVK ray_lengths_; - // Holds the temporal variance of the last two frames. - ImageVK temporal_variance_; - - // The query pool containing the recorded timestamps. - VkQueryPool timestamp_query_pool_; - // The number of GPU ticks spent in the tile classification pass. - std::uint64_t tile_classification_elapsed_time_; - // The number of GPU ticks spent in depth buffer intersection. - std::uint64_t intersection_elapsed_time_; - // The number of GPU ticks spent denoising. - std::uint64_t denoising_elapsed_time_; - // The array of timestamp that were queried. - std::vector timestamp_queries_; - // The index of the active set of timestamp queries. - std::uint32_t timestamp_queries_index_; - - // Format of the resolved scene. - VkFormat scene_format_; - - // The descriptor tables. One per shader pass per frame. - // Even with more than 2 frames in flight we only swap between the last two - // as we keep only one frame of history. - - // Descriptor set for uniform buffers. Be conservative in the number of frames in flight. - VkDescriptorSet uniform_buffer_descriptor_set_[8]; - // Descriptor sets of the tile classification pass. - VkDescriptorSet tile_classification_descriptor_set_[2]; - // Descriptor sets of the indirect arguments pass. - VkDescriptorSet indirect_args_descriptor_set_[2]; - // Descriptor sets of the depth buffer intersection pass. - VkDescriptorSet intersection_descriptor_set_[2]; - // Descriptor sets of the spatial denoising pass. - VkDescriptorSet spatial_denoising_descriptor_set_[2]; - // Descriptor sets of the temporal denoising pass. - VkDescriptorSet temporal_denoising_descriptor_set_[2]; - // Descriptor sets of the eaw denoising pass. - VkDescriptorSet eaw_denoising_descriptor_set_[2]; - - // The view projection matrix of the last frame. - matrix4 prev_view_projection_; - - private: - VkImageMemoryBarrier Transition(VkImage image, VkImageLayout before, VkImageLayout after) const; - void TransitionBarriers(VkCommandBuffer command_buffer, const VkImageMemoryBarrier* image_barriers, uint32_t image_barriers_count) const; - void ComputeBarrier(VkCommandBuffer command_buffer) const; - void IndirectArgumentsBarrier(VkCommandBuffer command_buffer) const; - }; -} - diff --git a/ffx-sssr/src/vk/sampler_vk.cpp b/ffx-sssr/src/vk/sampler_vk.cpp deleted file mode 100644 index ecf5521..0000000 --- a/ffx-sssr/src/vk/sampler_vk.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "sampler_vk.h" - -#include - -namespace ffx_sssr -{ - /** - The constructor for the SamplerD3D12 class. - */ - BlueNoiseSamplerVK::BlueNoiseSamplerVK() - : sobol_buffer_() - , ranking_tile_buffer_() - , scrambling_tile_buffer_() - { - } - - /** - The constructor for the SamplerD3D12 class. - - \param other The sampler to be moved. - */ - BlueNoiseSamplerVK::BlueNoiseSamplerVK(BlueNoiseSamplerVK&& other) noexcept - : sobol_buffer_(std::move(other.sobol_buffer_)) - , ranking_tile_buffer_(std::move(other.ranking_tile_buffer_)) - , scrambling_tile_buffer_(std::move(other.scrambling_tile_buffer_)) - { - } - - /** - The destructor for the SamplerD3D12 class. - */ - BlueNoiseSamplerVK::~BlueNoiseSamplerVK() - { - } - - /** - Assigns the sampler. - - \param other The sampler to be moved. - \return The assigned sampler. - */ - BlueNoiseSamplerVK& BlueNoiseSamplerVK::operator =(BlueNoiseSamplerVK&& other) noexcept - { - if (this != &other) - { - sobol_buffer_ = std::move(other.sobol_buffer_); - ranking_tile_buffer_ = std::move(other.ranking_tile_buffer_); - scrambling_tile_buffer_ = std::move(other.scrambling_tile_buffer_); - } - - return *this; - } -} diff --git a/ffx-sssr/src/vk/shader_compiler_vk.cpp b/ffx-sssr/src/vk/shader_compiler_vk.cpp deleted file mode 100644 index 74d2e32..0000000 --- a/ffx-sssr/src/vk/shader_compiler_vk.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "shader_compiler_vk.h" - -#include -#include -#include - -#if FFX_SSSR_DUMP_SHADERS -#include -#endif // FFX_SSSR_DUMP_SHADERS - -#include "reflection_error.h" -#include "utils.h" - -namespace ffx_sssr -{ - /** - The constructor for the ShaderCompilerVK class. - - \param context The context to be used. - */ - ShaderCompilerVK::ShaderCompilerVK(Context& context) - : context_(context) - , dxc_include_handler_(nullptr) - , dxc_compiler_(nullptr) - , dxc_library_(nullptr) - { - } - - /** - The destructor for the ShaderCompilerVK class. - */ - ShaderCompilerVK::~ShaderCompilerVK() - { - if (dxc_compiler_) - dxc_compiler_->Release(); - if (dxc_library_) - dxc_library_->Release(); - if (dxc_include_handler_) - dxc_include_handler_->Release(); - - dxc_dll_support_.Cleanup(); - } - - /** - Compiles the shader file. - - \param filename The location of the shader file. - \param profile The targeted shader model. - \param defines The list of defines to be used. - \param define_count The number of defines. - \return The compiled shader. - */ - ShaderVK ShaderCompilerVK::CompileShaderFile(char const* filename, char const* profile, LPCWSTR* arguments, std::uint32_t argument_count, DxcDefine* defines, std::uint32_t define_count) - { - HRESULT result; - FFX_SSSR_ASSERT(filename && profile); - - if (!LoadShaderCompiler()) - { - return ShaderVK(); - } - - // Compile the shader code from source - IDxcBlobEncoding* dxc_source; - auto const shader_filename = StringToWString(filename); - result = dxc_library_->CreateBlobFromFile(shader_filename.c_str(), nullptr, &dxc_source); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Could not create shader blob from %s", filename); - - ShaderVK shader = CompileShaderBlob(dxc_source, shader_filename.c_str(), profile, arguments, argument_count, defines, define_count); - - dxc_source->Release(); - - return shader; - } - - ShaderVK ShaderCompilerVK::CompileShaderString(char const * string, std::uint32_t string_size, char const* shader_name, char const * profile, LPCWSTR * arguments, std::uint32_t argument_count, DxcDefine * defines, std::uint32_t define_count) - { - HRESULT result; - FFX_SSSR_ASSERT(string && profile); - - if (!LoadShaderCompiler()) - { - return ShaderVK(); - } - - IDxcBlobEncoding* dxc_source; - result = dxc_library_->CreateBlobWithEncodingFromPinned((LPBYTE)string, string_size, 0, &dxc_source); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INVALID_OPERATION, "Could not create blob with encoding from pinned for %s", shader_name); - - auto const wc_shader_name = StringToWString(shader_name); - - ShaderVK shader = CompileShaderBlob(dxc_source, wc_shader_name.c_str(), profile, arguments, argument_count, defines, define_count); - - dxc_source->Release(); - - return shader; - } - - bool ShaderCompilerVK::LoadShaderCompiler() - { - // Load shader compiler - if (!dxc_dll_support_.IsEnabled()) - { - HRESULT result = dxc_dll_support_.Initialize(); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to initialize dxcompiler.dll support"); - - result = dxc_dll_support_.CreateInstance(CLSID_DxcCompiler, &dxc_compiler_); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create DXC compiler instance"); - - result = dxc_dll_support_.CreateInstance(CLSID_DxcLibrary, &dxc_library_); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create DXC library instance"); - - result = dxc_library_->CreateIncludeHandler(&dxc_include_handler_); - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to create DXC include handler"); - } - else if (!dxc_compiler_ || !dxc_library_) - { - return false; // failed to create DXC instances - } - - return true; - } - - ShaderVK ShaderCompilerVK::CompileShaderBlob(IDxcBlob * dxc_source, wchar_t const * shader_name, char const * profile, LPCWSTR * arguments, std::uint32_t argument_count, DxcDefine * defines, std::uint32_t define_count) - { - HRESULT result; - - std::vector resolved_defines; - resolved_defines.reserve(define_count); - - for (uint32_t i = 0; i < define_count; ++i) - { - if (defines[i].Name != nullptr) - { - resolved_defines.push_back(defines[i]); - if (resolved_defines.back().Value == nullptr) - { - resolved_defines.back().Value = L"1"; - } - } - } - - ShaderVK shader; - IDxcOperationResult* dxc_result; - auto const target_profile = StringToWString(profile); - result = dxc_compiler_->Compile(dxc_source, - shader_name, - L"main", - target_profile.c_str(), - arguments, - argument_count, - resolved_defines.data(), - static_cast(resolved_defines.size()), - dxc_include_handler_, - &dxc_result); - - // Check for compilation errors - if (FAILED(result)) - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Failed to compile D3D12 shader source code"); - if (FAILED(dxc_result->GetStatus(&result)) || FAILED(result)) - { - IDxcBlobEncoding* dxc_error; - dxc_result->GetErrorBuffer(&dxc_error); - std::string const error(static_cast(dxc_error->GetBufferPointer())); - dxc_result->Release(); - dxc_error->Release(); - throw reflection_error(context_, FFX_SSSR_STATUS_INTERNAL_ERROR, "Unable to compile shader file:\r\n> %s", error.c_str()); - } - - // Get hold of the program blob - IDxcBlob* dxc_program = nullptr; - dxc_result->GetResult(&dxc_program); - FFX_SSSR_ASSERT(dxc_program != nullptr); - dxc_result->Release(); - - // Retrieve the shader bytecode - shader.BytecodeLength = dxc_program->GetBufferSize(); - auto const shader_bytecode = malloc(shader.BytecodeLength); - FFX_SSSR_ASSERT(shader_bytecode != nullptr); // out of memory - memcpy(shader_bytecode, dxc_program->GetBufferPointer(), shader.BytecodeLength); - shader.pShaderBytecode = shader_bytecode; - dxc_program->Release(); - -#if FFX_SSSR_DUMP_SHADERS - std::wstring path = shader_name + std::wstring(L".spirv"); - std::ofstream filestream(path.c_str(), std::ios::binary | std::ios::out); - filestream.write((const char*)shader.pShaderBytecode, shader.BytecodeLength); - filestream.close(); -#endif // FFX_SSSR_DUMP_SHADERS - - return shader; - } -} diff --git a/ffx-sssr/src/vk/shader_compiler_vk.h b/ffx-sssr/src/vk/shader_compiler_vk.h deleted file mode 100644 index 9eb85fb..0000000 --- a/ffx-sssr/src/vk/shader_compiler_vk.h +++ /dev/null @@ -1,86 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include -#include - -#include "macros.h" - -namespace ffx_sssr -{ - class Context; - - /** - The ShaderVK class is a simple helper for freeing the shader bytecode upon destruction. - */ - class ShaderVK - { - FFX_SSSR_NON_COPYABLE(ShaderVK); - - public: - inline ShaderVK(); - inline ~ShaderVK(); - - inline operator bool() const; - - inline ShaderVK(ShaderVK&& other) noexcept; - inline ShaderVK& operator =(ShaderVK&& other) noexcept; - - const void* pShaderBytecode; - SIZE_T BytecodeLength; - }; - - /** - The ShaderCompilerVK class is a utility for compiling Vulkan shader code. - */ - class ShaderCompilerVK - { - FFX_SSSR_NON_COPYABLE(ShaderCompilerVK); - - public: - ShaderCompilerVK(Context& context); - ~ShaderCompilerVK(); - - ShaderVK CompileShaderFile(char const* filename, char const* profile, LPCWSTR* arguments = nullptr, std::uint32_t argument_count = 0, DxcDefine* defines = nullptr, std::uint32_t define_count = 0u); - ShaderVK CompileShaderString(char const* string, std::uint32_t string_size, char const* shader_name, char const* profile, LPCWSTR* arguments = nullptr, std::uint32_t argument_count = 0, DxcDefine* defines = nullptr, std::uint32_t define_count = 0u); - - protected: - bool LoadShaderCompiler(); - ShaderVK CompileShaderBlob(IDxcBlob* dxc_source, wchar_t const* shader_name, char const* profile, LPCWSTR* arguments = nullptr, std::uint32_t argument_count = 0, DxcDefine* defines = nullptr, std::uint32_t define_count = 0u); - - // The context to be used. - Context& context_; - // A helper for loading the dxcompiler library. - dxc::DxcDllSupport dxc_dll_support_; - // The Vulkan include handler. - IDxcIncludeHandler* dxc_include_handler_; - // The Vulkan shader compiler. - IDxcCompiler2* dxc_compiler_; - // The Vulkan shader library. - IDxcLibrary* dxc_library_; - }; -} - -#include "shader_compiler_vk.inl" diff --git a/ffx-sssr/src/vk/shader_compiler_vk.inl b/ffx-sssr/src/vk/shader_compiler_vk.inl deleted file mode 100644 index 02f9e8d..0000000 --- a/ffx-sssr/src/vk/shader_compiler_vk.inl +++ /dev/null @@ -1,82 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the ShaderVK class. - */ - ShaderVK::ShaderVK() - { - memset(this, 0, sizeof(*this)); - } - - /** - The destructor for the ShaderVK class. - */ - ShaderVK::~ShaderVK() - { - free(const_cast(pShaderBytecode)); - } - - /** - The constructor for the ShaderVK class. - - \param other The shader to be moved. - */ - ShaderVK::ShaderVK(ShaderVK&& other) noexcept - { - pShaderBytecode = other.pShaderBytecode; - BytecodeLength = other.BytecodeLength; - - other.pShaderBytecode = nullptr; - } - - /** - Assigns the shader. - - \param other The shader to be moved. - \return The assigned shader. - */ - ShaderVK& ShaderVK::operator =(ShaderVK&& other) noexcept - { - if (this != &other) - { - pShaderBytecode = other.pShaderBytecode; - BytecodeLength = other.BytecodeLength; - - other.pShaderBytecode = nullptr; - } - return *this; - } - - /** - Checks whether the shader is valid. - - \return true if the shader is valid, false otherwise. - */ - ShaderVK::operator bool() const - { - return pShaderBytecode != nullptr; - } -} diff --git a/ffx-sssr/src/vk/upload_buffer_vk.cpp b/ffx-sssr/src/vk/upload_buffer_vk.cpp deleted file mode 100644 index a0c398e..0000000 --- a/ffx-sssr/src/vk/upload_buffer_vk.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#include "upload_buffer_vk.h" - -#include "utils.h" -#include "context.h" -#include "context_vk.h" - -namespace ffx_sssr -{ - /** - The constructor for the UploadBufferVK class. - - \param context The Vulkan context to be used. - \param buffer_size The size of the upload buffer (in bytes). - */ - UploadBufferVK::UploadBufferVK(ContextVK& context, std::size_t buffer_size) - : data_(nullptr) - , context_(context) - , buffer_() - , buffer_size_(buffer_size) - , blocks_(buffer_size) - { - FFX_SSSR_ASSERT(context.GetDevice()); - FFX_SSSR_ASSERT(context.GetPhysicalDevice()); - FFX_SSSR_ASSERT(buffer_size_ > 0); - } - - /** - The destructor for the UploadBufferVK class. - */ - UploadBufferVK::~UploadBufferVK() - { - if (buffer_.mapped_) - { - buffer_.Unmap(); - } - } - - /** - Allocates a buffer. - - \param size The size of the buffer (in bytes). - \param data The pointer to the pinned memory. - \return true if the buffer was allocated successfully, false otherwise. - */ - bool UploadBufferVK::AllocateBufferInternal(std::size_t size, void*& data) - { - std::size_t start; - - auto const memory_block = blocks_.AcquireBlock(start, size, 256u); - - if (!memory_block) - { - return false; - } - - data = static_cast(data_) + start; - - memory_block->block_index_ = context_.GetContext().GetFrameIndex(); - memory_block->frame_index_ = &context_.GetContext().GetFrameIndex(); - memory_block->frame_count_before_reuse_ = context_.GetContext().GetFrameCountBeforeReuse(); - - return true; - } - - /** - Initialize and map the upload buffer. Has to be deferred as we can't access the allocator in the constructor yet. - */ - void UploadBufferVK::Initialize() - { - BufferVK::CreateInfo create_info = {}; - create_info.memory_property_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; // TODO: VMA_MEMORY_USAGE_CPU_TO_GPU - create_info.buffer_usage_ = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - create_info.format_ = VK_FORMAT_UNDEFINED; - create_info.size_in_bytes_ = buffer_size_; - - buffer_ = BufferVK(context_.GetDevice(), context_.GetPhysicalDevice(), create_info); - buffer_.Map(&data_); - } -} diff --git a/ffx-sssr/src/vk/upload_buffer_vk.h b/ffx-sssr/src/vk/upload_buffer_vk.h deleted file mode 100644 index 9b1df3f..0000000 --- a/ffx-sssr/src/vk/upload_buffer_vk.h +++ /dev/null @@ -1,88 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -#include - -#include "memory.h" -#include "buffer_vk.h" - -namespace ffx_sssr -{ - class Context; - class ContextVK; - - /** - The UploadBufferVK class allows to transfer some memory from the CPU to the GPU. - */ - class UploadBufferVK - { - FFX_SSSR_NON_COPYABLE(UploadBufferVK); - - public: - UploadBufferVK(ContextVK& context, std::size_t buffer_size); - ~UploadBufferVK(); - - void Initialize(); - - inline std::size_t GetSize() const; - inline VkBuffer GetResource() const; - inline std::size_t GetOffset(void *data) const; - - template - bool AllocateBuffer(std::size_t size, TYPE*& data); - protected: - - bool AllocateBufferInternal(std::size_t size, void*& data); - - /** - The Block class represents an individual synchronizable block to be upload for memory upload. - */ - class Block - { - public: - inline Block(); - - inline bool CanBeReused() const; - - // The index of the currently calculated frame. - std::uint32_t* frame_index_; - // The frame at which this block was created. - std::uint32_t block_index_; - // The number of elapsed frames before re-use. - std::uint32_t frame_count_before_reuse_; - }; - - // The pointer to the mapped data. - void* data_; - // The context to be used. - ContextVK& context_; - // The resource to the upload buffer. - BufferVK buffer_; - // The maximum size of the buffer in bytes. - std::size_t buffer_size_; - // The available blocks for memory upload. - RingBuffer blocks_; - }; -} - -#include "upload_buffer_vk.inl" diff --git a/ffx-sssr/src/vk/upload_buffer_vk.inl b/ffx-sssr/src/vk/upload_buffer_vk.inl deleted file mode 100644 index 1be590b..0000000 --- a/ffx-sssr/src/vk/upload_buffer_vk.inl +++ /dev/null @@ -1,104 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ -#pragma once - -namespace ffx_sssr -{ - /** - The constructor for the Block class. - */ - UploadBufferVK::Block::Block() - : frame_index_(nullptr) - , block_index_(0u) - , frame_count_before_reuse_(0u) - { - } - - /** - Checks whether the memory block can now be re-used. - - \return true if the memory block can be re-used, false otherwise. - */ - bool UploadBufferVK::Block::CanBeReused() const - { - FFX_SSSR_ASSERT(frame_index_ && *frame_index_ >= block_index_); - - return (*frame_index_ - block_index_ >= frame_count_before_reuse_); - } - - /** - Gets the size of the upload buffer. - - \return The size of the upload buffer (in bytes). - */ - std::size_t UploadBufferVK::GetSize() const - { - return buffer_size_; - } - - /** - Gets the resource for the upload buffer. - - \return The resource for the upload buffer. - */ - VkBuffer UploadBufferVK::GetResource() const - { - return buffer_.buffer_; - } - - /** - Gets the offset for the allocate range of memory. - - \param data The allocated range of memory. - \return The offset within the upload buffer (in bytes). - */ - std::size_t UploadBufferVK::GetOffset(void* data) const - { - if (!data) - return 0ull; - auto const offset = static_cast(data) - static_cast(data_); - FFX_SSSR_ASSERT(data >= data_ && static_cast(offset) < buffer_size_); // buffer overflow! - return static_cast(offset); - } - - /** - Allocates a buffer. - - \param size The size of the buffer (in bytes). - \param data The pointer to the pinned memory. - \return true if the buffer was allocated successfully, false otherwise. - */ - template - bool UploadBufferVK::AllocateBuffer(std::size_t size, TYPE*& data) - { - void* data_internal; - - if (!AllocateBufferInternal(Align(size, 256ull), data_internal)) - { - return false; - } - - data = static_cast(data_internal); - - return true; - } -} diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 3c225ee..c1d9d11 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -3,13 +3,6 @@ set(CMAKE_GENERATOR_PLATFORM x64) project (SssrSample_${GFX_API}) -# set options for FidelityFX SSSR -if(GFX_API STREQUAL DX12) - set(FFX_SSSR_D3D12 ON) -elseif(GFX_API STREQUAL VK) - set(FFX_SSSR_VK ON) -endif() - # ouput exe to bin directory SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin) foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) @@ -19,7 +12,6 @@ endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES ) # reference libs used by both backends add_subdirectory(libs/cauldron) -add_subdirectory(../ffx-sssr libs/ffx-sssr) set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME}) diff --git a/sample/libs/dxc/CMakeLists.txt b/sample/libs/dxc/CMakeLists.txt deleted file mode 100644 index 36641b8..0000000 --- a/sample/libs/dxc/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -project (DXC) - -add_library(${PROJECT_NAME} SHARED IMPORTED GLOBAL) - -set_property(TARGET ${PROJECT_NAME} PROPERTY IMPORTED_IMPLIB dxcompiler.lib) \ No newline at end of file diff --git a/ffx-sssr/externals/samplerCPP/README.txt b/sample/libs/samplerCPP/README.txt similarity index 100% rename from ffx-sssr/externals/samplerCPP/README.txt rename to sample/libs/samplerCPP/README.txt diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_128spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_128spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_128spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_128spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_16spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_16spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_16spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_16spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_256spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_256spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_256spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_256spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_2spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_2spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_2spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_2spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_32spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_32spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_32spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_32spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_4spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_4spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_4spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_4spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_64spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_64spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_64spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_64spp.cpp diff --git a/ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_8spp.cpp b/sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_8spp.cpp similarity index 100% rename from ffx-sssr/externals/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_8spp.cpp rename to sample/libs/samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_8spp.cpp diff --git a/sample/media/Chess/wetChess_normal.png b/sample/media/Chess/wetChess_normal.png index 58625d6..bdb5a16 100644 --- a/sample/media/Chess/wetChess_normal.png +++ b/sample/media/Chess/wetChess_normal.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5e2669eeccdbde7134745b3fd0b05dce2d8066b746611df4fa6e279b66a510a -size 968772 +oid sha256:e70a9166c4b9256e6cb30529baf1f9b47bc1a4a50d0968b6b0fd47c6603b966f +size 425141 diff --git a/sample/screenshot.png b/sample/screenshot.png index fae88e2..ed4faee 100644 --- a/sample/screenshot.png +++ b/sample/screenshot.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ace027a88ef46b65910ac0983fe66acfdd5451c30ddc1d04c9c5289000d184f -size 11114536 +oid sha256:70a94482a198566f0c360b98c482e719298a6da67fd7315695cdc0c1c71cd954 +size 3681571 diff --git a/sample/src/DX12/CMakeLists.txt b/sample/src/DX12/CMakeLists.txt index 1ab0552..4cad9ab 100644 --- a/sample/src/DX12/CMakeLists.txt +++ b/sample/src/DX12/CMakeLists.txt @@ -2,22 +2,24 @@ project (SssrSample_DX12) add_compile_options(/MP) -set(Sources_src - Sources/SssrSample.cpp - Sources/SssrSample.h - Sources/SampleRenderer.cpp - Sources/SampleRenderer.h - Sources/stdafx.cpp - Sources/stdafx.h) +file(GLOB Sources_src + Sources/*.h + Sources/*.cpp + ) -set(Shaders_src - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/ApplyReflections.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/DepthDownsample.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/ffx_a.h - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/ffx_spd.h) +file(GLOB Shaders_src + ../Shaders/*.hlsl + ../Shaders/*.h + ../../../ffx-dnsr/ffx-reflection-dnsr/*.h + ../../../ffx-dnsr/ffx-reflection-dnsr/*.hlsl + ../../../ffx-sssr/*.h + ../../../ffx-sssr/*.hlsl + ../../../ffx-spd/*.h + ../../../ffx-spd/*.hlsl + ) -set(Common_src - ${CMAKE_CURRENT_SOURCE_DIR}/../Common/config.json +file(GLOB Common_src + ../Common/config.json ) source_group("Sources" FILES ${Sources_src}) @@ -46,7 +48,7 @@ copyCommand("${Shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) copyCommand("${Common_src}" ${CMAKE_HOME_DIRECTORY}/bin) add_executable(${PROJECT_NAME} WIN32 ${Sources_src} ${Shaders_src} ${Common_src}) -target_link_libraries (${PROJECT_NAME} LINK_PUBLIC Cauldron_DX12 FFX_SSSR ImGUI amd_ags DXC) +target_link_libraries (${PROJECT_NAME} LINK_PUBLIC Cauldron_DX12 ImGUI amd_ags DXC) set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin") diff --git a/sample/src/DX12/Shaders/ApplyReflections.hlsl b/sample/src/DX12/Shaders/ApplyReflections.hlsl deleted file mode 100644 index 7f03e63..0000000 --- a/sample/src/DX12/Shaders/ApplyReflections.hlsl +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_APPLY -#define FFX_SSSR_APPLY - -Texture2D reflectionTarget : register(t0); -Texture2D normalsTexture : register(t1); -Texture2D specularRoughnessTexture : register(t2); -Texture2D brdfTexture : register(t3); - -SamplerState linearSampler : register(s0); - -cbuffer Constants : register(b0) -{ - float4 viewDirection; - uint showReflectionTarget; - uint drawReflections; -}; - -struct VertexInput -{ - uint vertexId : SV_VertexID; -}; - -struct VertexOut -{ - float4 position : SV_Position; - float2 texcoord : TEXCOORD0; -}; - -VertexOut vs_main(VertexInput input) -{ - VertexOut output; - output.texcoord = float2((input.vertexId << 1) & 2, input.vertexId & 2); - output.position = float4(output.texcoord.xy * 2.0 - 1.0, 0.0, 1.0); - return output; -} - -// Important bits from the PBR shader -float3 getIBLContribution(float perceptualRoughness, float3 specularColor, float3 specularLight, float3 n, float3 v) -{ - float NdotV = clamp(dot(n, v), 0.0, 1.0); - float2 brdfSamplePoint = clamp(float2(NdotV, perceptualRoughness), float2(0.0, 0.0), float2(1.0, 1.0)); - // retrieve a scale and bias to F0. See [1], Figure 3 - float2 brdf = brdfTexture.Sample(linearSampler, brdfSamplePoint).rg; - - float3 specular = specularLight * (specularColor * brdf.x + brdf.y); - return specular; -} - -float4 ps_main(VertexOut input) : SV_Target0 -{ - input.texcoord.y = 1 - input.texcoord.y; - float3 radiance = reflectionTarget.Sample(linearSampler, input.texcoord).xyz; - float4 specularRoughness = specularRoughnessTexture.Sample(linearSampler, input.texcoord); - float3 specularColor = specularRoughness.xyz; - float perceptualRoughness = sqrt(specularRoughness.w); // specularRoughness.w contains alphaRoughness - float3 normal = 2 * normalsTexture.Sample(linearSampler, input.texcoord).xyz - 1; - float3 view = viewDirection.xyz; - - if (showReflectionTarget == 1) - { - // Show just the reflection view - return float4(radiance, 0); - } - else if (drawReflections == 1) - { - radiance = getIBLContribution(perceptualRoughness, specularColor, radiance, normal, view); - return float4(radiance, 1); // Show the reflections applied to the scene - } - else - { - // Show just the scene - return float4(0, 0, 0, 1); - } -} - -#endif // FFX_SSSR_APPLY \ No newline at end of file diff --git a/sample/src/DX12/Shaders/DepthDownsample.hlsl b/sample/src/DX12/Shaders/DepthDownsample.hlsl deleted file mode 100644 index 0ce9f54..0000000 --- a/sample/src/DX12/Shaders/DepthDownsample.hlsl +++ /dev/null @@ -1,99 +0,0 @@ -/********************************************************************** -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -********************************************************************/ - -#ifndef FFX_SSSR_DEPTH_DOWNSAMPLE -#define FFX_SSSR_DEPTH_DOWNSAMPLE - -Texture2D g_depth_buffer : register(t0); -RWTexture2D g_downsampled_depth_buffer[13] : register(u0); // 12 is the maximum amount of supported mips by the downsampling lib (4096x4096). We copy the depth buffer over for simplicity. -RWBuffer g_global_atomic : register(u13); // Single atomic counter that stores the number of remaining threadgroups to process. - -#define A_GPU -#define A_HLSL -#include "ffx_a.h" - -groupshared float g_group_shared_depth_values[16][16]; -groupshared uint g_group_shared_counter; - -#define DS_FALLBACK - -// Define fetch and store functions -AF4 SpdLoadSourceImage(ASU2 index) { return g_depth_buffer[index].xxxx; } -AF4 SpdLoad(ASU2 index) { return g_downsampled_depth_buffer[6][index].xxxx; } // 5 -> 6 as we store a copy of the depth buffer at index 0 -void SpdStore(ASU2 pix, AF4 outValue, AU1 index) { g_downsampled_depth_buffer[index + 1][pix] = outValue.x; } // + 1 as we store a copy of the depth buffer at index 0 -void SpdIncreaseAtomicCounter() { InterlockedAdd(g_global_atomic[0], 1, g_group_shared_counter); } -AU1 SpdGetAtomicCounter() { return g_group_shared_counter; } -AF4 SpdLoadIntermediate(AU1 x, AU1 y) { - float f = g_group_shared_depth_values[x][y]; - return f.xxxx; -} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) { g_group_shared_depth_values[x][y] = value.x; } -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) { return min(min(v0, v1), min(v2,v3)); } - -#include "ffx_spd.h" - -uint GetThreadgroupCount(uint2 image_size) -{ - // Each threadgroup works on 64x64 texels - return ((image_size.x + 63) / 64) * ((image_size.y + 63) / 64); -} - -// Returns mips count of a texture with specified size -float GetMipsCount(float2 texture_size) -{ - float max_dim = max(texture_size.x, texture_size.y); - return 1.0 + floor(log2(max_dim)); -} - -[numthreads(32, 8, 1)] -void main(uint3 did : SV_DispatchThreadID, uint3 gid : SV_GroupID, uint gi : SV_GroupIndex) -{ - float2 depth_image_size = 0; - g_depth_buffer.GetDimensions(depth_image_size.x, depth_image_size.y); - - // Copy most detailed level into the hierarchy and transform it. - uint2 u_depth_image_size = uint2(depth_image_size); - for (int i = 0; i < 2; ++i) - { - for (int j = 0; j < 8; ++j) - { - uint2 idx = uint2(2 * did.x + i, 8 * did.y + j); - if (idx.x < u_depth_image_size.x && idx.y < u_depth_image_size.y) - { - g_downsampled_depth_buffer[0][idx] = g_depth_buffer[idx]; - } - } - } - - float2 image_size = 0; - g_downsampled_depth_buffer[0].GetDimensions(image_size.x, image_size.y); - float mips_count = GetMipsCount(image_size); - uint threadgroup_count = GetThreadgroupCount(image_size); - - SpdDownsample( - AU2(gid.xy), - AU1(gi), - AU1(mips_count), - AU1(threadgroup_count)); -} - -#endif // FFX_SSSR_DEPTH_DOWNSAMPLE \ No newline at end of file diff --git a/ffx-sssr/src/resources.h b/sample/src/DX12/Sources/BlueNoiseSampler.cpp similarity index 85% rename from ffx-sssr/src/resources.h rename to sample/src/DX12/Sources/BlueNoiseSampler.cpp index 9d9442d..4e91067 100644 --- a/ffx-sssr/src/resources.h +++ b/sample/src/DX12/Sources/BlueNoiseSampler.cpp @@ -19,17 +19,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#pragma once +#include "stdafx.h" +#include "BlueNoiseSampler.h" -namespace ffx_sssr +namespace SSSR_SAMPLE_DX12 { - /** - The available resource types. - */ - enum ResourceType - { - kResourceType_ReflectionView, - - kResourceType_Count - }; + void BlueNoiseSamplerD3D12::OnDestroy() + { + sobolBuffer.Release(); + rankingTileBuffer.Release(); + scramblingTileBuffer.Release(); + } } diff --git a/sample/src/DX12/Sources/BlueNoiseSampler.h b/sample/src/DX12/Sources/BlueNoiseSampler.h new file mode 100644 index 0000000..be05f29 --- /dev/null +++ b/sample/src/DX12/Sources/BlueNoiseSampler.h @@ -0,0 +1,42 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once +#include "BufferDX12.h" +namespace SSSR_SAMPLE_DX12 +{ + /** + The BlueNoiseSamplerD3D12 struct represents a blue-noise sampler to be used for random number generation. + + \note Original implementation can be found here: https://eheitzresearch.wordpress.com/762-2/ + */ + struct BlueNoiseSamplerD3D12 + { + // The Sobol sequence buffer. + BufferDX12 sobolBuffer; + // The ranking tile buffer for sampling. + BufferDX12 rankingTileBuffer; + // The scrambling tile buffer for sampling. + BufferDX12 scramblingTileBuffer; + + void OnDestroy(); + }; +} \ No newline at end of file diff --git a/sample/src/DX12/Sources/BufferDX12.cpp b/sample/src/DX12/Sources/BufferDX12.cpp new file mode 100644 index 0000000..ae99833 --- /dev/null +++ b/sample/src/DX12/Sources/BufferDX12.cpp @@ -0,0 +1,85 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" + +#include "BufferDX12.h" +#include "Base/Helper.h" + +namespace SSSR_SAMPLE_DX12 +{ + void BufferDX12::Release() + { + if (m_pBuffer) + { + m_pBuffer->Release(); + } + } + + BufferDX12::BufferDX12() + { + m_pDevice = nullptr; + m_pBuffer = nullptr; + + m_numElements = 0; + m_elementSize = 0; + } + + void BufferDX12::InitFromMem(CAULDRON_DX12::Device* pDevice, const char* pDebugName, UploadHeapBuffersDX12* pUploadHeap, const void* pData, int numElements, int elementSize) + { + m_pDevice = pDevice; + m_numElements = numElements; + m_elementSize = elementSize; + + uint32_t size = numElements * elementSize; + + D3D12_HEAP_PROPERTIES defaultHeapProp = {}; + defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_RESOURCE_DESC bufDesc = {}; + bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + bufDesc.Width = size; + bufDesc.Height = 1; + bufDesc.DepthOrArraySize = 1; + bufDesc.MipLevels = 1; + bufDesc.Format = DXGI_FORMAT_UNKNOWN; + bufDesc.SampleDesc.Count = 1; + bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + m_pDevice->GetDevice()->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &bufDesc, D3D12_RESOURCE_STATE_COPY_DEST, NULL, IID_PPV_ARGS(&m_pBuffer)); + CAULDRON_DX12::SetName(m_pBuffer, pDebugName); + + pUploadHeap->AddBufferCopy(pData, size, m_pBuffer); + } + + void BufferDX12::CreateSRV(uint32_t index, CAULDRON_DX12::CBV_SRV_UAV* pRV) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = m_numElements; + srvDesc.Buffer.StructureByteStride = m_elementSize; + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;// D3D12_BUFFER_SRV_FLAG_RAW; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_pDevice->GetDevice()->CreateShaderResourceView(m_pBuffer, &srvDesc, pRV->GetCPU(index)); + } +} \ No newline at end of file diff --git a/ffx-sssr/src/reflection_error.h b/sample/src/DX12/Sources/BufferDX12.h similarity index 68% rename from ffx-sssr/src/reflection_error.h rename to sample/src/DX12/Sources/BufferDX12.h index b49195a..e3ea611 100644 --- a/ffx-sssr/src/reflection_error.h +++ b/sample/src/DX12/Sources/BufferDX12.h @@ -21,26 +21,26 @@ THE SOFTWARE. ********************************************************************/ #pragma once -#include +#include "Base/ResourceViewHeaps.h" +#include "Misc/ImgLoader.h" +#include "UploadHeapBuffersDX12.h" -#include "ffx_sssr.h" - -namespace ffx_sssr +using namespace CAULDRON_DX12; +namespace SSSR_SAMPLE_DX12 { - class Context; - - /** - The reflection_error class represents an error for the FFX_SSSR library. - */ - class reflection_error : public std::exception - { - public: - reflection_error(); - reflection_error(FfxSssrStatus error); - reflection_error(const Context& context, FfxSssrStatus error); - reflection_error(const Context& context, FfxSssrStatus error, char const* format, ...); - - // The error code for this exception. - FfxSssrStatus error_; - }; -} + class BufferDX12 + { + public: + BufferDX12(); + void InitFromMem(Device* pDevice, const char* pDebugName, UploadHeapBuffersDX12* pUploadHeap, const void* pData, int numElements, int elementSize); + void Release(); + void CreateSRV(uint32_t index, CBV_SRV_UAV* pRV); + + private: + Device* m_pDevice; + ID3D12Resource* m_pBuffer; + + int m_numElements; + int m_elementSize; + }; +} \ No newline at end of file diff --git a/sample/src/DX12/Sources/SSSR.cpp b/sample/src/DX12/Sources/SSSR.cpp new file mode 100644 index 0000000..293719a --- /dev/null +++ b/sample/src/DX12/Sources/SSSR.cpp @@ -0,0 +1,1197 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" + +#include "SSSR.h" +#include "Base\ShaderCompilerHelper.h" +#include "Utils.h" + +namespace _1spp +{ +#include "../../../samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp" +} + +/* + The available blue noise sampler with 2spp sampling mode. +*/ +struct +{ + std::int32_t const (&sobol_buffer_)[256 * 256]; + std::int32_t const (&ranking_tile_buffer_)[128 * 128 * 8]; + std::int32_t const (&scrambling_tile_buffer_)[128 * 128 * 8]; +} +const g_blue_noise_sampler_state = { _1spp::sobol_256spp_256d, _1spp::rankingTile, _1spp::scramblingTile }; + +/** + Performs a rounded division. + + \param value The value to be divided. + \param divisor The divisor to be used. + \return The rounded divided value. +*/ +template +static inline TYPE RoundedDivide(TYPE value, TYPE divisor) +{ + return (value + divisor - 1) / divisor; +} + +using namespace CAULDRON_DX12; +namespace SSSR_SAMPLE_DX12 +{ + /** + Initializes a linear sampler for a static sampler description. + + \param shader_register The slot of this sampler. + \return The resulting sampler description. + */ + inline D3D12_STATIC_SAMPLER_DESC InitLinearSampler(uint32_t shader_register) + { + D3D12_STATIC_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplerDesc.MinLOD = 0.0f; + samplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + samplerDesc.MipLODBias = 0; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.ShaderRegister = shader_register; + samplerDesc.RegisterSpace = 0; + samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // Compute + return samplerDesc; + } + + SSSR::SSSR() + { + m_pDevice = nullptr; + m_pConstantBufferRing = nullptr; + m_pCpuVisibleHeap = nullptr; + m_pResourceViewHeaps = nullptr; + m_pUploadHeap = nullptr; + m_pCommandSignature = nullptr; + + m_screenWidth = 0; + m_screenHeight = 0; + + m_bufferIndex = 0; + m_frameCountBeforeReuse = 0; + m_denoisingElapsedGpuTicks = 0; + m_isPerformanceCountersEnabled = false; + m_tileClassificationElapsedGpuTicks = 0; + m_intersectionElapsedGpuTicks = 0; + m_denoisingElapsedGpuTicks = 0; + m_timestampFrameIndex = 0; + + m_pTimestampQueryBuffer = nullptr; + m_pTimestampQueryHeap = nullptr; + + m_environmentMapSamplerDesc = {}; + } + + void SSSR_SAMPLE_DX12::SSSR::OnCreate(Device* pDevice, StaticResourceViewHeap& cpuVisibleHeap, ResourceViewHeaps& resourceHeap, UploadHeap& uploadHeap, DynamicBufferRing& constantBufferRing, uint32_t frameCountBeforeReuse, bool enablePerformanceCounters) + { + m_pDevice = pDevice; + m_pConstantBufferRing = &constantBufferRing; + m_pCpuVisibleHeap = &cpuVisibleHeap; + m_pResourceViewHeaps = &resourceHeap; + m_pUploadHeap = &uploadHeap; + m_frameCountBeforeReuse = frameCountBeforeReuse; + m_isPerformanceCountersEnabled = enablePerformanceCounters; + + m_uploadHeapBuffers.OnCreate(pDevice, 1024 * 1024); + + cpuVisibleHeap.AllocDescriptor(1, &m_environmentMapSRV); + + CreateResources(); + SetupClassifyTilesPass(true); + SetupPrepareIndirectArgsPass(true); + SetupIntersectionPass(true); + SetupResolveSpatialPass(true); + SetupResolveTemporalPass(true); + SetupBlurPass(true); + SetupPerformanceCounters(); + } + + void SSSR::OnCreateWindowSizeDependentResources(const SSSRCreationInfo& input) + { + assert(input.outputWidth > 0); + assert(input.outputHeight > 0); + assert(input.HDR != nullptr); + assert(input.DepthHierarchy != nullptr); + assert(input.MotionVectors != nullptr); + assert(input.NormalBuffer != nullptr); + assert(input.NormalHistoryBuffer != nullptr); + assert(input.SpecularRoughness != nullptr); + assert(input.SkyDome != nullptr); + + m_screenWidth = input.outputWidth; + m_screenHeight = input.outputHeight; + + D3D12_STATIC_SAMPLER_DESC environmentSamplerDesc = {}; + input.SkyDome->SetDescriptorSpec(0, &m_environmentMapSRV, 0, &environmentSamplerDesc); + + m_environmentMapSamplerDesc.AddressU = environmentSamplerDesc.AddressU; + m_environmentMapSamplerDesc.AddressV = environmentSamplerDesc.AddressV; + m_environmentMapSamplerDesc.AddressW = environmentSamplerDesc.AddressW; + m_environmentMapSamplerDesc.ComparisonFunc = environmentSamplerDesc.ComparisonFunc; + m_environmentMapSamplerDesc.Filter = environmentSamplerDesc.Filter; + m_environmentMapSamplerDesc.MaxAnisotropy = environmentSamplerDesc.MaxAnisotropy; + m_environmentMapSamplerDesc.MaxLOD = environmentSamplerDesc.MaxLOD; + m_environmentMapSamplerDesc.MinLOD = environmentSamplerDesc.MinLOD; + m_environmentMapSamplerDesc.MipLODBias = environmentSamplerDesc.MipLODBias; + + CreateWindowSizeDependentResources(); + InitializeDescriptorTableData(input); + } + + void SSSR_SAMPLE_DX12::SSSR::OnDestroy() + { + m_uploadHeapBuffers.OnDestroy(); + + m_ClassifyTilesPass.OnDestroy(); + m_PrepareIndirectArgsPass.OnDestroy(); + m_IntersectPass.OnDestroy(); + m_BlurPass.OnDestroy(); + m_ResolveSpatialPass.OnDestroy(); + m_ResolveTemporalPass.OnDestroy(); + + m_rayCounter.OnDestroy(); + m_intersectionPassIndirectArgs.OnDestroy(); + m_blueNoiseSampler.OnDestroy(); + + if (m_pCommandSignature) + { + m_pCommandSignature->Release(); + } + if (m_pTimestampQueryHeap) + { + m_pTimestampQueryHeap->Release(); + } + if (m_pTimestampQueryBuffer) + { + m_pTimestampQueryBuffer->Release(); + } + } + + void SSSR::OnDestroyWindowSizeDependentResources() + { + m_rayList.OnDestroy(); + m_temporalDenoiserResult[0].OnDestroy(); + m_temporalDenoiserResult[1].OnDestroy(); + m_roughnessTexture[0].OnDestroy(); + m_roughnessTexture[1].OnDestroy(); + m_rayLengths.OnDestroy(); + m_temporalVarianceMask.OnDestroy(); + m_tileMetaDataMask.OnDestroy(); + m_outputBuffer.OnDestroy(); + } + + void SSSR_SAMPLE_DX12::SSSR::Draw(ID3D12GraphicsCommandList* pCommandList, const SSSRConstants& sssrConstants, bool showIntersectResult) + { + UserMarker marker(pCommandList, "FidelityFX SSSR"); + + QueryTimestamps(pCommandList); + + //Set Constantbuffer data + D3D12_GPU_VIRTUAL_ADDRESS constantbufferAddress = m_pConstantBufferRing->AllocConstantBuffer(sizeof(SSSRConstants), (void*)&sssrConstants); + + //Render + ID3D12DescriptorHeap* descriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() }; + pCommandList->SetDescriptorHeaps(_countof(descriptorHeaps), descriptorHeaps); + + // Ensure that the ray list is in UA state + std::vector tile_ray_list_barriers = { + CD3DX12_RESOURCE_BARRIER::Transition(m_rayList.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_tileMetaDataMask.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalDenoiserResult[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_roughnessTexture[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalVarianceMask.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + }; + + pCommandList->ResourceBarrier((UINT)tile_ray_list_barriers.size(), &tile_ray_list_barriers[0]); + + { + UserMarker marker(pCommandList, "Denoiser"); + { + UserMarker marker(pCommandList, "ClassifyTiles"); + + pCommandList->SetComputeRootSignature(m_ClassifyTilesPass.pRootSignature); + pCommandList->SetComputeRootDescriptorTable(0, m_ClassifyTilesPass.descriptorTables_CBV_SRV_UAV[m_bufferIndex].GetGPU()); + pCommandList->SetComputeRootConstantBufferView(1, constantbufferAddress); + pCommandList->SetPipelineState(m_ClassifyTilesPass.pPipeline); + uint32_t dim_x = RoundedDivide(m_screenWidth, 8u); + uint32_t dim_y = RoundedDivide(m_screenHeight, 8u); + pCommandList->Dispatch(dim_x, dim_y, 1); + } + } + + // Ensure that the tile classification pass finished + std::vector classification_results_barriers = { + CD3DX12_RESOURCE_BARRIER::UAV(m_rayCounter.GetResource()), + CD3DX12_RESOURCE_BARRIER::Transition(m_rayList.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_intersectionPassIndirectArgs.GetResource(), D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_tileMetaDataMask.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_roughnessTexture[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + }; + + pCommandList->ResourceBarrier((UINT)classification_results_barriers.size(), &classification_results_barriers[0]); + + { + UserMarker marker(pCommandList, "PrepareIndirectArgs"); + + pCommandList->SetComputeRootSignature(m_PrepareIndirectArgsPass.pRootSignature); + pCommandList->SetComputeRootDescriptorTable(0, m_PrepareIndirectArgsPass.descriptorTables_CBV_SRV_UAV[m_bufferIndex].GetGPU()); + pCommandList->SetPipelineState(m_PrepareIndirectArgsPass.pPipeline); + pCommandList->Dispatch(1, 1, 1); + } + + // Query the amount of time spent in the classifyTIles pass + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + + assert(timestamp_queries.size() == 1ull && timestamp_queries[0] == TIMESTAMP_QUERY_INIT); + + pCommandList->EndQuery(m_pTimestampQueryHeap, + D3D12_QUERY_TYPE_TIMESTAMP, + GetTimestampQueryIndex()); + + timestamp_queries.push_back(TIMESTAMP_QUERY_TILE_CLASSIFICATION); + } + + // Ensure that the arguments are written + std::vector indirect_arguments_barriers = { + CD3DX12_RESOURCE_BARRIER::Transition(m_intersectionPassIndirectArgs.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT), + }; + pCommandList->ResourceBarrier((UINT)indirect_arguments_barriers.size(), &indirect_arguments_barriers[0]); + + { + UserMarker marker(pCommandList, "Intersection pass"); + + pCommandList->SetComputeRootSignature(m_IntersectPass.pRootSignature); + pCommandList->SetComputeRootDescriptorTable(0, m_IntersectPass.descriptorTables_CBV_SRV_UAV[m_bufferIndex].GetGPU()); + pCommandList->SetComputeRootConstantBufferView(1, constantbufferAddress); + pCommandList->SetComputeRootDescriptorTable(2, m_IntersectPass.descriptorTables_Sampler[0].GetGPU()); + pCommandList->SetPipelineState(m_IntersectPass.pPipeline); + pCommandList->ExecuteIndirect(m_pCommandSignature, 1, m_intersectionPassIndirectArgs.GetResource(), 0, nullptr, 0); + } + + // Query the amount of time spent in the intersection pass + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + + assert(timestamp_queries.size() == 2ull && timestamp_queries[1] == TIMESTAMP_QUERY_TILE_CLASSIFICATION); + + pCommandList->EndQuery(m_pTimestampQueryHeap, + D3D12_QUERY_TYPE_TIMESTAMP, + GetTimestampQueryIndex()); + + timestamp_queries.push_back(TIMESTAMP_QUERY_INTERSECTION); + } + + if (showIntersectResult) + { + //Copy the intersection result to the output buffer + std::vector barriers_copy_begin = { + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalDenoiserResult[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_outputBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST), + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalVarianceMask.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + }; + pCommandList->ResourceBarrier((UINT)barriers_copy_begin.size(), &barriers_copy_begin[0]); + + CopyToTexture(pCommandList, m_temporalDenoiserResult[m_bufferIndex].GetResource(), m_outputBuffer.GetResource(), m_screenWidth, m_screenHeight); + + std::vector barriers_copy_end = { + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalDenoiserResult[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_outputBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + }; + + pCommandList->ResourceBarrier((UINT)barriers_copy_end.size(), &barriers_copy_end[0]); + } + else + { + // Ensure that the arguments are written + std::vector intersection_barriers = { + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalDenoiserResult[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + }; + + // Ensure that the intersection pass finished + pCommandList->ResourceBarrier(intersection_barriers.size(), &intersection_barriers[0]); + { + UserMarker marker(pCommandList, "Spatial pass"); + pCommandList->SetComputeRootSignature(m_ResolveSpatialPass.pRootSignature); + pCommandList->SetComputeRootDescriptorTable(0, m_ResolveSpatialPass.descriptorTables_CBV_SRV_UAV[m_bufferIndex].GetGPU()); + pCommandList->SetComputeRootConstantBufferView(1, constantbufferAddress); + pCommandList->SetPipelineState(m_ResolveSpatialPass.pPipeline); + pCommandList->Dispatch(RoundedDivide(m_screenWidth, 8u), RoundedDivide(m_screenHeight, 8u), 1); + // Ensure that the spatial denoising pass finished. We don't have the resource for the final result available, thus we have to wait for any UAV access to finish. + std::vector spatial_barriers = { + CD3DX12_RESOURCE_BARRIER::Transition(m_outputBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalDenoiserResult[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalVarianceMask.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_rayLengths.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + }; + pCommandList->ResourceBarrier((UINT)spatial_barriers.size(), &spatial_barriers[0]); + } + + // Temporal denoiser passes + { + UserMarker marker(pCommandList, "Temporal pass"); + pCommandList->SetComputeRootSignature(m_ResolveTemporalPass.pRootSignature); + pCommandList->SetComputeRootDescriptorTable(0, m_ResolveTemporalPass.descriptorTables_CBV_SRV_UAV[m_bufferIndex].GetGPU()); + pCommandList->SetComputeRootConstantBufferView(1, constantbufferAddress); + pCommandList->SetPipelineState(m_ResolveTemporalPass.pPipeline); + pCommandList->Dispatch(RoundedDivide(m_screenWidth, 8u), RoundedDivide(m_screenHeight, 8u), 1); + // Ensure that the temporal denoising pass finished + std::vector temporal_barriers = { + CD3DX12_RESOURCE_BARRIER::UAV(m_temporalVarianceMask.GetResource()), + CD3DX12_RESOURCE_BARRIER::Transition(m_temporalDenoiserResult[m_bufferIndex].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_rayLengths.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_outputBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + }; + pCommandList->ResourceBarrier((UINT)temporal_barriers.size(), &temporal_barriers[0]); + } + + // Blur pass + { + UserMarker marker(pCommandList, "Blur pass"); + pCommandList->SetComputeRootSignature(m_BlurPass.pRootSignature); + pCommandList->SetComputeRootDescriptorTable(0, m_BlurPass.descriptorTables_CBV_SRV_UAV[m_bufferIndex].GetGPU()); + pCommandList->SetComputeRootConstantBufferView(1, constantbufferAddress); + pCommandList->SetPipelineState(m_BlurPass.pPipeline); + pCommandList->Dispatch(RoundedDivide(m_screenWidth, 8u), RoundedDivide(m_screenHeight, 8u), 1); + } + + // Query the amount of time spent in the denoiser passes + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + + assert(timestamp_queries.size() == 3ull && timestamp_queries[2] == TIMESTAMP_QUERY_INTERSECTION); + + pCommandList->EndQuery(m_pTimestampQueryHeap, + D3D12_QUERY_TYPE_TIMESTAMP, + GetTimestampQueryIndex()); + + timestamp_queries.push_back(TIMESTAMP_QUERY_DENOISING); + } + + } + + // Resolve the timestamp query data + if (m_isPerformanceCountersEnabled) + { + auto const start_index = m_timestampFrameIndex * TIMESTAMP_QUERY_COUNT; + + pCommandList->ResolveQueryData(m_pTimestampQueryHeap, + D3D12_QUERY_TYPE_TIMESTAMP, + start_index, + static_cast(m_timestampQueries[m_timestampFrameIndex].size()), + m_pTimestampQueryBuffer, + start_index * sizeof(std::uint64_t)); + + m_timestampFrameIndex = (m_timestampFrameIndex + 1u) % m_frameCountBeforeReuse; + } + m_bufferIndex = 1 - m_bufferIndex; + } + + Texture* SSSR::GetOutputTexture() + { + return &m_outputBuffer; + } + + std::uint64_t SSSR::GetTileClassificationElapsedGpuTicks() const + { + return m_tileClassificationElapsedGpuTicks; + } + + std::uint64_t SSSR::GetIntersectElapsedGpuTicks() const + { + return m_intersectionElapsedGpuTicks; + } + + std::uint64_t SSSR::GetDenoiserElapsedGpuTicks() const + { + return m_denoisingElapsedGpuTicks; + } + + void SSSR::Recompile() + { + m_pDevice->GPUFlush(); + m_ClassifyTilesPass.DestroyPipeline(); + m_PrepareIndirectArgsPass.DestroyPipeline(); + m_IntersectPass.DestroyPipeline(); + m_BlurPass.DestroyPipeline(); + m_ResolveSpatialPass.DestroyPipeline(); + m_ResolveTemporalPass.DestroyPipeline(); + + SetupClassifyTilesPass(false); + SetupPrepareIndirectArgsPass(false); + SetupIntersectionPass(false); + SetupResolveSpatialPass(false); + SetupResolveTemporalPass(false); + SetupBlurPass(false); + } + + void SSSR::CreateResources() + { + uint32_t elementSize = 4; + //==============================Create Tile Classification-related buffers============================================ + { + m_rayCounter.InitBuffer(m_pDevice, "SSSR - Ray Counter", &CD3DX12_RESOURCE_DESC::Buffer(2ull * elementSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), elementSize, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } + //==============================Create PrepareIndirectArgs-related buffers============================================ + { + m_intersectionPassIndirectArgs.InitBuffer(m_pDevice, "SSSR - Intersect Indirect Args", &CD3DX12_RESOURCE_DESC::Buffer(3ull * elementSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), elementSize, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + } + //==============================Command Signature========================================== + { + D3D12_INDIRECT_ARGUMENT_DESC dispatch = {}; + dispatch.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; + + D3D12_COMMAND_SIGNATURE_DESC desc = {}; + desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); + desc.NodeMask = 0; + desc.NumArgumentDescs = 1; + desc.pArgumentDescs = &dispatch; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(&m_pCommandSignature))); + } + //==============================Blue Noise buffers============================================ + { + auto const& sampler_state = g_blue_noise_sampler_state; + BlueNoiseSamplerD3D12& sampler = m_blueNoiseSampler; + sampler.sobolBuffer.InitFromMem(m_pDevice, "SSSR - Sobol Buffer", &m_uploadHeapBuffers, &sampler_state.sobol_buffer_, _countof(sampler_state.sobol_buffer_), sizeof(std::int32_t)); + sampler.rankingTileBuffer.InitFromMem(m_pDevice, "SSSR - Ranking Tile Buffer", &m_uploadHeapBuffers, &sampler_state.ranking_tile_buffer_, _countof(sampler_state.ranking_tile_buffer_), sizeof(std::int32_t)); + sampler.scramblingTileBuffer.InitFromMem(m_pDevice, "SSSR - Scrambling Tile Buffer", &m_uploadHeapBuffers, &sampler_state.scrambling_tile_buffer_, _countof(sampler_state.scrambling_tile_buffer_), sizeof(std::int32_t)); + m_uploadHeapBuffers.FlushAndFinish(); + } + } + + void SSSR::CreateWindowSizeDependentResources() + { + //===================================Create Output Buffer============================================ + { + CD3DX12_RESOURCE_DESC reflDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R11G11B10_FLOAT, m_screenWidth, m_screenHeight, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + m_outputBuffer.Init(m_pDevice, "Reflection Denoiser - OutputBuffer", &reflDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr); + } + uint32_t elementSize = 4; + //==============================Create Tile Classification-related buffers============================================ + { + UINT64 num_pixels = (UINT64)m_screenWidth * m_screenHeight; + m_rayList.InitBuffer(m_pDevice, "SSSR - Ray List", &CD3DX12_RESOURCE_DESC::Buffer(num_pixels * elementSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), elementSize, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + UINT64 num_tiles = (UINT64)(RoundedDivide(m_screenWidth, 8u) * RoundedDivide(m_screenHeight, 8u)); + // one uint per tile + m_tileMetaDataMask.InitBuffer(m_pDevice, "Reflection Denoiser - Tile Meta Data Mask", &CD3DX12_RESOURCE_DESC::Buffer(num_tiles * elementSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), elementSize, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + num_tiles *= 2; // one bit per pixel + m_temporalVarianceMask.InitBuffer(m_pDevice, "Reflection Denoiser - Temporal Variance Mask", &CD3DX12_RESOURCE_DESC::Buffer(num_tiles * elementSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), elementSize, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } + //==============================Create denoising-related resources============================== + { + CD3DX12_RESOURCE_DESC temporalDenoiserResult_Desc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R11G11B10_FLOAT, m_screenWidth, m_screenHeight, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + CD3DX12_RESOURCE_DESC rayLengths_Desc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16_FLOAT, m_screenWidth, m_screenHeight, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + CD3DX12_RESOURCE_DESC temporalVariance_Desc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, m_screenWidth, m_screenHeight, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + CD3DX12_RESOURCE_DESC roughnessTexture_Desc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, m_screenWidth, m_screenHeight, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + + m_roughnessTexture[0].Init(m_pDevice, "Reflection Denoiser - Extracted Roughness Texture 0", &roughnessTexture_Desc, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr); + m_roughnessTexture[1].Init(m_pDevice, "Reflection Denoiser - Extracted Roughness Texture 1", &roughnessTexture_Desc, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr); + m_temporalDenoiserResult[0].Init(m_pDevice, "Reflection Denoiser - Temporal Denoised Result 0", &temporalDenoiserResult_Desc, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr); + m_temporalDenoiserResult[1].Init(m_pDevice, "Reflection Denoiser - Temporal Denoised Result 1", &temporalDenoiserResult_Desc, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr); + m_rayLengths.Init(m_pDevice, "Reflection Denoiser - Ray Lengths", &rayLengths_Desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr); + } + + m_bufferIndex = 0; + } + + void SSSR::SetupClassifyTilesPass(bool allocateDescriptorTable) + { + ShaderPass& shaderpass = m_ClassifyTilesPass; + D3D12_SHADER_BYTECODE shaderByteCode = {}; + //==============================Compile Shaders============================================ + { + DefineList defines; + CompileShaderFromFile("ClassifyTiles.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); + } + //==============================Allocate Descriptor Table========================================= + if (allocateDescriptorTable) + { + for (size_t i = 0; i < 2; i++) + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(7, &table); + } + } + //==============================RootSignature============================================ + { + CD3DX12_ROOT_PARAMETER RTSlot[2] = {}; + + int parameterCount = 0; + CD3DX12_DESCRIPTOR_RANGE DescRange_1[2] = {}; + { + //Param 0 + int rangeCount = 0; + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 0, 0, 0); + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 5, 0, 0, 2); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_1[0], D3D12_SHADER_VISIBILITY_ALL); + } + //Param 1 + RTSlot[parameterCount++].InitAsConstantBufferView(0); + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = &RTSlot[0]; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = nullptr; + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob = nullptr; + ID3DBlob* pErrorBlob = nullptr; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + m_pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&shaderpass.pRootSignature)) + ); + CAULDRON_DX12::SetName(shaderpass.pRootSignature, "Reflection Denoiser - ClassifyTiles Rootsignature"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + //==============================PipelineStates============================================ + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = shaderpass.pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&shaderpass.pPipeline))); + CAULDRON_DX12::SetName(shaderpass.pPipeline, "Reflection Denoiser - ClassifyTiles Pso"); + } + } + + void SSSR::SetupPrepareIndirectArgsPass(bool allocateDescriptorTable) + { + ShaderPass& shaderpass = m_PrepareIndirectArgsPass; + D3D12_SHADER_BYTECODE shaderByteCode = {}; + + //==============================Compile Shaders============================================ + { + DefineList defines; + CompileShaderFromFile("PrepareIndirectArgs.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); + } + //==============================DescriptorTable========================================== + if (allocateDescriptorTable) + { + + for (size_t i = 0; i < 2; i++) + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(2, &table); + } + } + //==============================RootSignature============================================ + { + CD3DX12_ROOT_PARAMETER RTSlot[1] = {}; + + int parameterCount = 0; + CD3DX12_DESCRIPTOR_RANGE DescRange_1[1] = {}; + { + //Param 0 + int rangeCount = 0; + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 2, 0); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_1[0], D3D12_SHADER_VISIBILITY_ALL); + } + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = &RTSlot[0]; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = nullptr; + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob = nullptr; + ID3DBlob* pErrorBlob = nullptr; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + m_pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&shaderpass.pRootSignature)) + ); + CAULDRON_DX12::SetName(shaderpass.pRootSignature, "PrepareIndirectArgs Rootsignature"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + //==============================PipelineStates============================================ + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = shaderpass.pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&shaderpass.pPipeline))); + CAULDRON_DX12::SetName(shaderpass.pPipeline, "PrepareIndirectArgs Pso"); + } + } + } + + void SSSR::SetupIntersectionPass(bool allocateDescriptorTable) + { + ShaderPass& shaderpass = m_IntersectPass; + D3D12_SHADER_BYTECODE shaderByteCode = {}; + ID3D12Device* device = m_pDevice->GetDevice(); + + //==============================Compile Shaders============================================ + { + DefineList defines; + CompileShaderFromFile("Intersect.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); + } + + //==============================DescriptorTable========================================== + if (allocateDescriptorTable) + { + for (size_t i = 0; i < 2; i++) + { + //Descriptor Table - CBV_SRV_UAV + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(12, &table); + } + //Descriptor Table - Sampler + { + shaderpass.descriptorTables_Sampler.emplace_back(); + auto& table = shaderpass.descriptorTables_Sampler.back(); + m_pResourceViewHeaps->AllocSamplerDescriptor(1, &table); + } + } + } + //==============================RootSignature============================================ + { + D3D12_STATIC_SAMPLER_DESC sampler_descs[] = { InitLinearSampler(0) }; // g_linear_sampler + + CD3DX12_ROOT_PARAMETER RTSlot[3] = {}; + + int parameterCount = 0; + CD3DX12_DESCRIPTOR_RANGE DescRange_1[3] = {}; + CD3DX12_DESCRIPTOR_RANGE DescRange_2[1] = {}; + { + //Param 0 + int rangeCount = 0; + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 9, 0, 0, 0); + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 3, 0, 0, 9); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_1[0], D3D12_SHADER_VISIBILITY_ALL); + } + //Param 1 + RTSlot[parameterCount++].InitAsConstantBufferView(0); + { + //Param 2 + int rangeCount = 0; + DescRange_2[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, 1, 0, 0); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_2[0], D3D12_SHADER_VISIBILITY_ALL); // g_environment_map_sampler + } + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = &RTSlot[0]; + descRootSignature.NumStaticSamplers = _countof(sampler_descs); + descRootSignature.pStaticSamplers = sampler_descs; + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob = nullptr; + ID3DBlob* pErrorBlob = nullptr; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + m_pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&shaderpass.pRootSignature)) + ); + CAULDRON_DX12::SetName(shaderpass.pRootSignature, "SSSR - Intersection Root Signature"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + //==============================PipelineStates============================================ + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = shaderpass.pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&shaderpass.pPipeline))); + CAULDRON_DX12::SetName(shaderpass.pPipeline, "SSSR - Intersection Pso"); + } + } + + void SSSR::SetupResolveSpatialPass(bool allocateDescriptorTable) + { + ShaderPass& shaderpass = m_ResolveSpatialPass; + D3D12_SHADER_BYTECODE shaderByteCode = {}; + + //==============================Compile Shaders============================================ + { + DefineList defines; + CompileShaderFromFile("ResolveSpatial.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); + } + + //==============================DescriptorTable========================================== + if (allocateDescriptorTable) + { + for (size_t i = 0; i < 2; i++) + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(6, &table); + } + } + //==============================RootSignature============================================ + { + CD3DX12_ROOT_PARAMETER RTSlot[3] = {}; + + int parameterCount = 0; + CD3DX12_DESCRIPTOR_RANGE DescRange_1[3] = {}; + CD3DX12_DESCRIPTOR_RANGE DescRange_2[1] = {}; + { + //Param 0 + int rangeCount = 0; + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 5, 0, 0, 0); + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 5); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_1[0], D3D12_SHADER_VISIBILITY_ALL); + } + //Param 1 + RTSlot[parameterCount++].InitAsConstantBufferView(0); + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = &RTSlot[0]; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = nullptr; + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob = nullptr; + ID3DBlob* pErrorBlob = nullptr; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + m_pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&shaderpass.pRootSignature)) + ); + CAULDRON_DX12::SetName(shaderpass.pRootSignature, "Reflection Denoiser - Spatial Resolve Root Signature"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + //==============================PipelineStates============================================ + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = shaderpass.pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&shaderpass.pPipeline))); + CAULDRON_DX12::SetName(shaderpass.pPipeline, "Reflection Denoiser - Spatial Resolve Pso"); + } + } + + void SSSR::SetupResolveTemporalPass(bool allocateDescriptorTable) + { + ShaderPass& shaderpass = m_ResolveTemporalPass; + D3D12_SHADER_BYTECODE shaderByteCode = {}; + + //==============================Compile Shaders============================================ + { + DefineList defines; + CompileShaderFromFile("ResolveTemporal.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); + } + + //==============================DescriptorTable========================================== + + //Descriptor Table - CBV_SRV_UAV + if (allocateDescriptorTable) + { + for (size_t i = 0; i < 2; i++) + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(12, &table); + } + } + + //Descriptor Table - Sampler + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + } + //==============================RootSignature============================================ + { + D3D12_STATIC_SAMPLER_DESC SamplerDesc = {}; + SamplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + SamplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + SamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + SamplerDesc.MinLOD = 0.0f; + SamplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + SamplerDesc.MipLODBias = 0; + SamplerDesc.MaxAnisotropy = 1; + SamplerDesc.ShaderRegister = 0; + SamplerDesc.RegisterSpace = 0; + SamplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + CD3DX12_ROOT_PARAMETER RTSlot[3] = {}; + + int parameterCount = 0; + CD3DX12_DESCRIPTOR_RANGE DescRange_1[3] = {}; + { + //Param 0 + int rangeCount = 0; + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 10, 0, 0, 0); + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 2, 0, 0, 10); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_1[0], D3D12_SHADER_VISIBILITY_ALL); + } + //Param 1 + RTSlot[parameterCount++].InitAsConstantBufferView(0); + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = &RTSlot[0]; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = nullptr; + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob = nullptr; + ID3DBlob* pErrorBlob = nullptr; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + m_pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&shaderpass.pRootSignature)) + ); + CAULDRON_DX12::SetName(shaderpass.pRootSignature, "Reflection Denoiser - Temporal Resolve Root Signature"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + //==============================PipelineStates============================================ + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = shaderpass.pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&shaderpass.pPipeline))); + CAULDRON_DX12::SetName(shaderpass.pPipeline, "Reflection Denoiser - Temporal Resolve Pso"); + } + } + + void SSSR::SetupBlurPass(bool allocateDescriptorTable) + { + ShaderPass& shaderpass = m_BlurPass; + D3D12_SHADER_BYTECODE shaderByteCode = {}; + + //==============================Compile Shaders============================================ + { + DefineList defines; + CompileShaderFromFile("BlurReflections.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); + } + + //==============================DescriptorTable========================================== + if (allocateDescriptorTable) + { + ID3D12Device* device = m_pDevice->GetDevice(); + + //Descriptor Table - CBV_SRV_UAV + for (size_t i = 0; i < 2; i++) + { + shaderpass.descriptorTables_CBV_SRV_UAV.emplace_back(); + auto& table = shaderpass.descriptorTables_CBV_SRV_UAV.back(); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(4, &table); + } + } + //==============================RootSignature============================================ + { + CD3DX12_ROOT_PARAMETER RTSlot[3] = {}; + + int parameterCount = 0; + CD3DX12_DESCRIPTOR_RANGE DescRange_1[3] = {}; + CD3DX12_DESCRIPTOR_RANGE DescRange_2[1] = {}; + { + //Param 0 + int rangeCount = 0; + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, 0, 0, 0); + DescRange_1[rangeCount++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 3); + RTSlot[parameterCount++].InitAsDescriptorTable(rangeCount, &DescRange_1[0], D3D12_SHADER_VISIBILITY_ALL); + } + //Param 1 + RTSlot[parameterCount++].InitAsConstantBufferView(0); + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = &RTSlot[0]; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = nullptr; + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob = nullptr; + ID3DBlob* pErrorBlob = nullptr; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + m_pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&shaderpass.pRootSignature)) + ); + CAULDRON_DX12::SetName(shaderpass.pRootSignature, "Reflection Denoiser - Blur Root Signature"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + //==============================PipelineStates============================================ + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = shaderpass.pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&shaderpass.pPipeline))); + CAULDRON_DX12::SetName(shaderpass.pPipeline, "Reflection Denoiser - Blur Pso"); + } + } + + void SSSR::InitializeDescriptorTableData(const SSSRCreationInfo& input) + { + ID3D12Device* device = m_pDevice->GetDevice(); + Texture* normal_buffers[] = { input.NormalBuffer, input.NormalHistoryBuffer }; + + for (size_t i = 0; i < 2; i++) + { + //==============================ClassifyTilesPass========================================== + { + auto& table = m_ClassifyTilesPass.descriptorTables_CBV_SRV_UAV[i]; + int tableSlot = 0; + + input.SpecularRoughness->CreateSRV(tableSlot++, &table); + m_temporalVarianceMask.CreateSRV(tableSlot++, &table); + m_rayList.CreateBufferUAV(tableSlot++, nullptr, &table); + m_rayCounter.CreateBufferUAV(tableSlot++, nullptr, &table); + m_temporalDenoiserResult[i].CreateUAV(tableSlot++, &table); + m_tileMetaDataMask.CreateBufferUAV(tableSlot++, nullptr, &table); + m_roughnessTexture[i].CreateUAV(tableSlot++, &table); + } + //==============================PrepareIndirectArgsPass========================================== + { + auto& table = m_PrepareIndirectArgsPass.descriptorTables_CBV_SRV_UAV[i]; + int tableSlot = 0; + + m_rayCounter.CreateBufferUAV(tableSlot++, nullptr, &table); + m_intersectionPassIndirectArgs.CreateBufferUAV(tableSlot++, nullptr, &table); + } + //==============================IntersectionPass========================================== + { + auto& table = m_IntersectPass.descriptorTables_CBV_SRV_UAV[i]; + auto& table_sampler = m_IntersectPass.descriptorTables_Sampler[i]; + + BlueNoiseSamplerD3D12& sampler = m_blueNoiseSampler; + + int tableSlot = 0; + + input.HDR->CreateSRV(tableSlot++, &table); + input.DepthHierarchy->CreateSRV(tableSlot++, &table); + normal_buffers[input.pingPongNormal ? i : 0]->CreateSRV(tableSlot++, &table); + m_roughnessTexture[i].CreateSRV(tableSlot++, &table); + device->CopyDescriptorsSimple(1, table.GetCPU(tableSlot++), m_environmentMapSRV.GetCPU(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); // g_lit_scene + sampler.sobolBuffer.CreateSRV(tableSlot++, &table); + sampler.rankingTileBuffer.CreateSRV(tableSlot++, &table); + sampler.scramblingTileBuffer.CreateSRV(tableSlot++, &table); + m_rayList.CreateSRV(tableSlot++, &table); + m_temporalDenoiserResult[i].CreateUAV(tableSlot++, &table); + m_rayLengths.CreateUAV(tableSlot++, &table); + m_rayCounter.CreateBufferUAV(tableSlot++, nullptr, &table); + + m_pDevice->GetDevice()->CreateSampler(&m_environmentMapSamplerDesc, table_sampler.GetCPU(0)); + } + //==============================ResolveSpatial========================================== + { + auto& table = m_ResolveSpatialPass.descriptorTables_CBV_SRV_UAV[i]; + int tableSlot = 0; + + input.DepthHierarchy->CreateSRV(tableSlot++, &table); + normal_buffers[input.pingPongNormal ? i : 0]->CreateSRV(tableSlot++, &table); + m_roughnessTexture[i].CreateSRV(tableSlot++, &table); + m_temporalDenoiserResult[i].CreateSRV(tableSlot++, &table); + m_tileMetaDataMask.CreateSRV(tableSlot++, &table); + m_outputBuffer.CreateUAV(tableSlot++, &table); + } + //==============================ResolveTemporal========================================== + { + auto& table = m_ResolveTemporalPass.descriptorTables_CBV_SRV_UAV[i]; + int tableSlot = 0; + + normal_buffers[input.pingPongNormal ? i : 0]->CreateSRV(tableSlot++, &table); + m_roughnessTexture[i].CreateSRV(tableSlot++, &table); + normal_buffers[input.pingPongNormal ? 1 - i : 1]->CreateSRV(tableSlot++, &table); + m_roughnessTexture[1 - i].CreateSRV(tableSlot++, &table); + input.DepthHierarchy->CreateSRV(tableSlot++, &table); + input.MotionVectors->CreateSRV(tableSlot++, &table); + m_temporalDenoiserResult[1 - i].CreateSRV(tableSlot++, &table); + m_rayLengths.CreateSRV(tableSlot++, &table); + m_outputBuffer.CreateSRV(tableSlot++, &table); + m_tileMetaDataMask.CreateSRV(tableSlot++, &table); + m_temporalDenoiserResult[i].CreateUAV(tableSlot++, &table); + m_temporalVarianceMask.CreateBufferUAV(tableSlot++, nullptr, &table); + } + //==============================BlurPass========================================== + { + auto& table = m_BlurPass.descriptorTables_CBV_SRV_UAV[i]; + int tableSlot = 0; + + m_roughnessTexture[i].CreateSRV(tableSlot++, &table); + m_temporalDenoiserResult[i].CreateSRV(tableSlot++, &table); + m_tileMetaDataMask.CreateSRV(tableSlot++, &table); + m_outputBuffer.CreateUAV(tableSlot++, &table); + } + } + } + + void SSSR::SetupPerformanceCounters() + { + // Create timestamp querying resources if enabled + if (m_isPerformanceCountersEnabled) + { + ID3D12Device* device = m_pDevice->GetDevice(); + + auto const query_heap_size = TIMESTAMP_QUERY_COUNT * m_frameCountBeforeReuse * sizeof(std::uint64_t); + + D3D12_QUERY_HEAP_DESC query_heap_desc = {}; + query_heap_desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + query_heap_desc.Count = static_cast(query_heap_size); + + ThrowIfFailed(device->CreateQueryHeap(&query_heap_desc, IID_PPV_ARGS(&m_pTimestampQueryHeap))); + + D3D12_HEAP_PROPERTIES heap_properties = {}; + heap_properties.Type = D3D12_HEAP_TYPE_READBACK; + heap_properties.CreationNodeMask = 1u; + heap_properties.VisibleNodeMask = 1u; + + D3D12_RESOURCE_DESC resource_desc = {}; + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resource_desc.Width = static_cast(query_heap_size); + resource_desc.Height = 1u; + resource_desc.DepthOrArraySize = 1u; + resource_desc.MipLevels = 1u; + resource_desc.SampleDesc.Count = 1u; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + ThrowIfFailed(device->CreateCommittedResource(&heap_properties, + D3D12_HEAP_FLAG_NONE, + &resource_desc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_pTimestampQueryBuffer))); + + m_pTimestampQueryBuffer->SetName(L"TimestampQueryBuffer"); + m_timestampQueries.resize(m_frameCountBeforeReuse); + for (auto& timestamp_queries : m_timestampQueries) + { + timestamp_queries.reserve(TIMESTAMP_QUERY_COUNT); + } + } + } + + void SSSR::QueryTimestamps(ID3D12GraphicsCommandList* pCommandList) + { + // Query timestamp value prior to resolving the reflection view + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + + if (!timestamp_queries.empty()) + { + std::uint64_t* data; + + // Reset performance counters + m_tileClassificationElapsedGpuTicks = 0ull; + m_denoisingElapsedGpuTicks = 0ull; + m_intersectionElapsedGpuTicks = 0ull; + + auto const start_index = m_timestampFrameIndex * TIMESTAMP_QUERY_COUNT; + + D3D12_RANGE read_range = {}; + read_range.Begin = start_index * sizeof(std::uint64_t); + read_range.End = (start_index + timestamp_queries.size()) * sizeof(std::uint64_t); + + m_pTimestampQueryBuffer->Map(0u, + &read_range, + reinterpret_cast(&data)); + + for (auto i = 0u, j = 1u; j < timestamp_queries.size(); ++i, ++j) + { + auto const elapsed_time = (data[j] - data[i]); + + switch (timestamp_queries[j]) + { + case TIMESTAMP_QUERY_TILE_CLASSIFICATION: + m_tileClassificationElapsedGpuTicks = elapsed_time; + break; + case TIMESTAMP_QUERY_INTERSECTION: + m_intersectionElapsedGpuTicks = elapsed_time; + break; + case TIMESTAMP_QUERY_DENOISING: + m_denoisingElapsedGpuTicks = elapsed_time; + break; + default: + assert(false && "unrecognized timestamp query"); + break; + } + } + + m_pTimestampQueryBuffer->Unmap(0u, nullptr); + } + + timestamp_queries.clear(); + + pCommandList->EndQuery(m_pTimestampQueryHeap, + D3D12_QUERY_TYPE_TIMESTAMP, + GetTimestampQueryIndex()); + + timestamp_queries.push_back(TIMESTAMP_QUERY_INIT); + } + } + + uint32_t SSSR::GetTimestampQueryIndex() const + { + return m_timestampFrameIndex * TIMESTAMP_QUERY_COUNT + static_cast(m_timestampQueries[m_timestampFrameIndex].size()); + } + +} \ No newline at end of file diff --git a/sample/src/DX12/Sources/SSSR.h b/sample/src/DX12/Sources/SSSR.h new file mode 100644 index 0000000..9106605 --- /dev/null +++ b/sample/src/DX12/Sources/SSSR.h @@ -0,0 +1,179 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "Base/DynamicBufferRing.h" +#include "Base/Texture.h" +#include "BufferDX12.h" +#include "ShaderPass.h" +#include "BlueNoiseSampler.h" + +using namespace CAULDRON_DX12; +namespace SSSR_SAMPLE_DX12 +{ + class DescriptorTable : public ResourceView { }; + + struct SSSRCreationInfo { + Texture* HDR; + Texture* DepthHierarchy; + Texture* MotionVectors; + Texture* NormalBuffer; + Texture* NormalHistoryBuffer; + Texture* SpecularRoughness; + SkyDome* SkyDome; + bool pingPongNormal; + bool pingPongRoughness; + uint32_t outputWidth; + uint32_t outputHeight; + }; + + struct SSSRConstants + { + XMFLOAT4X4 invViewProjection; + XMFLOAT4X4 projection; + XMFLOAT4X4 invProjection; + XMFLOAT4X4 view; + XMFLOAT4X4 invView; + XMFLOAT4X4 prevViewProjection; + uint32_t frameIndex; + uint32_t maxTraversalIntersections; + uint32_t minTraversalOccupancy; + uint32_t mostDetailedMip; + float temporalStabilityFactor; + float temporalVarianceThreshold; + float depthBufferThickness; + float roughnessThreshold; + uint32_t samplesPerQuad; + uint32_t temporalVarianceGuidedTracingEnabled; + }; + + class SSSR + { + public: + SSSR(); + void OnCreate(Device* pDevice, StaticResourceViewHeap& cpuVisibleHeap, ResourceViewHeaps& resourceHeap, UploadHeap& uploadHeap, DynamicBufferRing& constantBufferRing, uint32_t frameCountBeforeReuse, bool enablePerformanceCounters); + void OnCreateWindowSizeDependentResources(const SSSRCreationInfo& input); + + void OnDestroy(); + void OnDestroyWindowSizeDependentResources(); + + void Draw(ID3D12GraphicsCommandList* pCommandList, const SSSRConstants& sssrConstants, bool showIntersectResult = false); + Texture* GetOutputTexture(); + + std::uint64_t GetTileClassificationElapsedGpuTicks() const; + std::uint64_t GetIntersectElapsedGpuTicks() const; + std::uint64_t GetDenoiserElapsedGpuTicks() const; + + void Recompile(); + private: + void CreateResources(); + void CreateWindowSizeDependentResources(); + + void SetupClassifyTilesPass(bool allocateDescriptorTable); + void SetupPrepareIndirectArgsPass(bool allocateDescriptorTable); + void SetupIntersectionPass(bool allocateDescriptorTable); + void SetupResolveSpatialPass(bool allocateDescriptorTable); + void SetupResolveTemporalPass(bool allocateDescriptorTable); + void SetupBlurPass(bool allocateDescriptorTable); + void InitializeDescriptorTableData(const SSSRCreationInfo& input); + void SetupPerformanceCounters(); + void QueryTimestamps(ID3D12GraphicsCommandList* pCommandList); + uint32_t GetTimestampQueryIndex() const; + + Device* m_pDevice; + DynamicBufferRing* m_pConstantBufferRing; + StaticResourceViewHeap* m_pCpuVisibleHeap; + ResourceViewHeaps* m_pResourceViewHeaps; + UploadHeap* m_pUploadHeap; + UploadHeapBuffersDX12 m_uploadHeapBuffers; + + uint32_t m_screenWidth; + uint32_t m_screenHeight; + + // Containing all rays that need to be traced. + Texture m_rayList; + // Contains the number of rays that we trace. + Texture m_rayCounter; + // Indirect arguments for intersection pass. + Texture m_intersectionPassIndirectArgs; + // Intermediate result of the temporal denoising pass - double buffered to keep history and aliases the intersection result. + Texture m_temporalDenoiserResult[2]; + // Holds the length of each reflection ray - used for temporal reprojection. + Texture m_rayLengths; + // Holds the temporal variance of the last two frames. + Texture m_temporalVarianceMask; + // Tells us if we have to run the denoiser on a specific tile or if we just have to copy the values + Texture m_tileMetaDataMask; + // Extracted roughness values, also double buffered to keep the history. + Texture m_roughnessTexture[2]; + + // Hold the blue noise buffers. + BlueNoiseSamplerD3D12 m_blueNoiseSampler; + + ShaderPass m_ClassifyTilesPass; + ShaderPass m_PrepareIndirectArgsPass; + ShaderPass m_IntersectPass; + ShaderPass m_ResolveSpatialPass; + ShaderPass m_ResolveTemporalPass; + ShaderPass m_BlurPass; + + D3D12_SAMPLER_DESC m_environmentMapSamplerDesc; + + // The command signature for the indirect dispatches. + ID3D12CommandSignature* m_pCommandSignature; + + CBV_SRV_UAV m_environmentMapSRV; + Texture m_outputBuffer; + + uint32_t m_frameCountBeforeReuse; + uint32_t m_bufferIndex; + + enum TimestampQuery + { + TIMESTAMP_QUERY_INIT, + TIMESTAMP_QUERY_TILE_CLASSIFICATION, + TIMESTAMP_QUERY_INTERSECTION, + TIMESTAMP_QUERY_DENOISING, + + TIMESTAMP_QUERY_COUNT + }; + + //The type definition for an array of timestamp queries. + using TimestampQueries = std::vector; + + // The query heap for the recorded timestamps. + ID3D12QueryHeap* m_pTimestampQueryHeap; + // The buffer for reading the timestamp queries. + ID3D12Resource* m_pTimestampQueryBuffer; + // The number of GPU ticks spent in the tile classification pass. + std::uint64_t m_tileClassificationElapsedGpuTicks; + // The number of GPU ticks spent in depth buffer intersection. + std::uint64_t m_intersectionElapsedGpuTicks; + // The number of GPU ticks spent denoising. + std::uint64_t m_denoisingElapsedGpuTicks; + // The array of timestamp that were queried. + std::vector m_timestampQueries; + // The index of the active set of timestamp queries. + uint32_t m_timestampFrameIndex; + bool m_isPerformanceCountersEnabled; + }; +} \ No newline at end of file diff --git a/sample/src/DX12/Sources/SampleRenderer.cpp b/sample/src/DX12/Sources/SampleRenderer.cpp index 67dd981..6303a9e 100644 --- a/sample/src/DX12/Sources/SampleRenderer.cpp +++ b/sample/src/DX12/Sources/SampleRenderer.cpp @@ -24,176 +24,126 @@ THE SOFTWARE. #include "SampleRenderer.h" #include +#include "Utils.h" #undef max #undef min -void FfxSssrLoggingFunction(const char* pMessage, void* pUserData) -{ - char buffer[4096]; - snprintf(buffer, sizeof(buffer), "%s\n", pMessage); - MessageBox(NULL, buffer, "RtShadows Error", MB_OK | MB_ICONERROR); - exit(-1); -} - //-------------------------------------------------------------------------------------- // // OnCreate // //-------------------------------------------------------------------------------------- -void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) +void SampleRenderer::OnCreate(Device* pDevice, SwapChain* pSwapChain) { - m_pDevice = pDevice; - - // Initialize helpers - - // Create all the heaps for the resources views - const uint32_t cbvDescriptorCount = 2000; - const uint32_t srvDescriptorCount = 2000; - const uint32_t uavDescriptorCount = 10; - const uint32_t dsvDescriptorCount = 10; - const uint32_t rtvDescriptorCount = 60; - const uint32_t samplerDescriptorCount = 20; - m_ResourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, dsvDescriptorCount, rtvDescriptorCount, samplerDescriptorCount); - m_CpuVisibleHeap.OnCreate(m_pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10, true); - - // Create a commandlist ring for the Direct queue - uint32_t commandListsPerBackBuffer = 8; - m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); - - // Create a 'dynamic' constant buffer - const uint32_t constantBuffersMemSize = 200 * 1024 * 1024; - m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, &m_ResourceViewHeaps); - - // Create a 'static' pool for vertices, indices and constant buffers - const uint32_t staticGeometryMemSize = 5 * 128 * 1024 * 1024; - m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); - - // initialize the GPU time stamps module - m_GPUTimer.OnCreate(pDevice, backBufferCount); - - // Quick helper to upload resources, it has it's own commandList and uses suballocation. - // for 4K textures we'll need 100Megs - const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; - m_UploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) - - // Create the depth buffer views - m_ResourceViewHeaps.AllocDSVDescriptor(1, &m_DepthBufferDSV); - - // Create a Shadowmap atlas to hold 4 cascades/spotlights - m_ShadowMap.InitDepthStencil(pDevice, "m_pShadowMap", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, 2 * 1024, 2 * 1024, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); - m_ResourceViewHeaps.AllocDSVDescriptor(1, &m_ShadowMapDSV); - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_ShadowMapSRV); - m_ShadowMap.CreateDSV(0, &m_ShadowMapDSV); - m_ShadowMap.CreateSRV(0, &m_ShadowMapSRV); - - m_AmbientLight.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\white\\diffuse.dds", "..\\media\\envmaps\\white\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 1); - m_SkyDome.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 1); - m_SkyDomeProc.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 1); - m_Wireframe.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 1); - m_WireframeBox.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); - m_DownSample.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); - m_Bloom.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); - - m_BrdfLut.InitFromFile(pDevice, &m_UploadHeap, "BrdfLut.dds", false); // LUT images are stored as linear - - // Create tonemapping pass - m_ToneMapping.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, pSwapChain->GetFormat()); - - // Initialize UI rendering resources - m_ImGUI.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, pSwapChain->GetFormat()); - - m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_HDRRTV); - - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_HDRSRV); - - // motion vectors views - m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_MotionVectorsRTV); - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_MotionVectorsSRV); - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(2, &m_MotionVectorsInputsSRV); - m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_NormalBufferRTV); - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_NormalBufferSRV); - m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_SpecularRoughnessRTV); - - CreateApplyReflectionsPipeline(); - m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_ApplyPipelineRTV); - - CreateDepthDownsamplePipeline(); - m_CpuVisibleHeap.AllocDescriptor(1, &m_AtomicCounterUAV); - - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_DepthBufferDescriptor); - for (int i = 0; i < 13; ++i) - { - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_DepthHierarchyDescriptors[i]); - } - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_AtomicCounterUAVGPU); - - m_DownsampleDescriptorTable = m_DepthBufferDescriptor.GetGPU(); - - // Create a command list for upload - ID3D12CommandAllocator * ca; - ThrowIfFailed(m_pDevice->GetDevice()->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&ca))); - ID3D12GraphicsCommandList * cl; - ThrowIfFailed(m_pDevice->GetDevice()->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, ca, nullptr, IID_PPV_ARGS(&cl))); - - FfxSssrD3D12CreateContextInfo d3d12ContextInfo = {}; - d3d12ContextInfo.pDevice = m_pDevice->GetDevice(); - d3d12ContextInfo.pUploadCommandList = cl; - - FfxSssrLoggingCallbacks loggingCallbacks = {}; - loggingCallbacks.pUserData = this; - loggingCallbacks.pfnLogging = FfxSssrLoggingFunction; - - FfxSssrCreateContextInfo contextInfo = {}; - contextInfo.apiVersion = FFX_SSSR_API_VERSION; - contextInfo.frameCountBeforeMemoryReuse = backBufferCount; - contextInfo.maxReflectionViewCount = 1; - contextInfo.pD3D12CreateContextInfo = &d3d12ContextInfo; - contextInfo.pLoggingCallbacks = &loggingCallbacks; - contextInfo.uploadBufferSize = 8 * 1024 * 1024; - contextInfo.pRoughnessTextureFormat = L"float4"; - contextInfo.pUnpackRoughnessSnippet = L"float FfxSssrUnpackRoughness(FFX_SSSR_ROUGHNESS_TEXTURE_FORMAT packed) { return packed.w; }"; - contextInfo.pNormalsTextureFormat = L"float4"; - contextInfo.pUnpackNormalsSnippet = L"float3 FfxSssrUnpackNormals(FFX_SSSR_NORMALS_TEXTURE_FORMAT packed) { return 2 * packed.xyz - 1; }"; - contextInfo.pSceneTextureFormat = L"float4"; - contextInfo.pUnpackSceneRadianceSnippet = L"float3 FfxSssrUnpackSceneRadiance(FFX_SSSR_SCENE_TEXTURE_FORMAT packed) { return packed.xyz; }"; - contextInfo.pDepthTextureFormat = L"float"; - contextInfo.pUnpackDepthSnippet = L"float FfxSssrUnpackDepth(FFX_SSSR_DEPTH_TEXTURE_FORMAT packed) { return packed.x; }"; - contextInfo.pMotionVectorFormat = L"float2"; - contextInfo.pUnpackMotionVectorsSnippet = L"float2 FfxSssrUnpackMotionVectors(FFX_SSSR_MOTION_VECTOR_TEXTURE_FORMAT packed) { return packed.xy * float2(0.5, -0.5); }"; - - FfxSssrStatus status = ffxSssrCreateContext(&contextInfo, &m_SssrContext); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrCreateContext failed."); - } - - // Wait for the upload to finish; - ThrowIfFailed(cl->Close()); - m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CommandListCast(&cl)); - m_pDevice->GPUFlush(); - cl->Release(); - ca->Release(); - - // Allocate descriptors used as input to SSSR - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrSceneSRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrDepthBufferHierarchySRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrMotionBufferSRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrNormalBufferSRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrNormalHistoryBufferSRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrRoughnessBufferSRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrRoughnessHistoryBufferSRV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrOutputBufferUAV); - m_CpuVisibleHeap.AllocDescriptor(1, &m_SssrEnvironmentMapSRV); - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_SssrOutputBufferUAVGPU); - - // Desctriptor table for apply pass - m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(4, &m_ApplyPassDescriptorTable); - - // Make sure upload heap has finished uploading before continuing + m_pDevice = pDevice; + + // Initialize helpers + + // Create all the heaps for the resources views + const uint32_t cbvDescriptorCount = 2000; + const uint32_t srvDescriptorCount = 2000; + const uint32_t uavDescriptorCount = 10; + const uint32_t dsvDescriptorCount = 10; + const uint32_t rtvDescriptorCount = 60; + const uint32_t samplerDescriptorCount = 20; + m_ResourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, dsvDescriptorCount, rtvDescriptorCount, samplerDescriptorCount); + m_CpuVisibleHeap.OnCreate(m_pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 20, true); + + // Create a commandlist ring for the Direct queue + uint32_t commandListsPerBackBuffer = 8; + m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); + + // Create a 'dynamic' constant buffer + const uint32_t constantBuffersMemSize = 200 * 1024 * 1024; + m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, &m_ResourceViewHeaps); + + // Create a 'static' pool for vertices, indices and constant buffers + const uint32_t staticGeometryMemSize = 5 * 128 * 1024 * 1024; + m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); + + // initialize the GPU time stamps module + m_GPUTimer.OnCreate(pDevice, backBufferCount); + + // Quick helper to upload resources, it has it's own commandList and uses suballocation. + const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; + m_UploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) + + // Create the depth buffer views + m_ResourceViewHeaps.AllocDSVDescriptor(1, &m_DepthBufferDSV); + + // Create a Shadowmap atlas to hold 4 cascades/spotlights + m_ShadowMap.InitDepthStencil(pDevice, "m_pShadowMap", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, 2 * 1024, 2 * 1024, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); + m_ResourceViewHeaps.AllocDSVDescriptor(1, &m_ShadowMapDSV); + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_ShadowMapSRV); + m_ShadowMap.CreateDSV(0, &m_ShadowMapDSV); + m_ShadowMap.CreateSRV(0, &m_ShadowMapSRV); + + m_AmbientLight.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\white\\diffuse.dds", "..\\media\\envmaps\\white\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 1); + m_SkyDome.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 1); + m_SkyDomeProc.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 1); + m_Wireframe.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 1); + m_WireframeBox.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); + m_DownSample.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); + m_Bloom.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); + + m_BrdfLut.InitFromFile(pDevice, &m_UploadHeap, "BrdfLut.dds", false); // LUT images are stored as linear + + // Create tonemapping pass + m_ToneMapping.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, pSwapChain->GetFormat()); + + // Initialize UI rendering resources + m_ImGUI.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, pSwapChain->GetFormat()); + + m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_HDRRTV); + + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_HDRSRV); + + // motion vectors views + m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_MotionVectorsRTV); + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_MotionVectorsSRV); + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(2, &m_MotionVectorsInputsSRV); + m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_NormalBufferRTV); + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_NormalBufferSRV); + m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_SpecularRoughnessRTV); + + CreateApplyReflectionsPipeline(); + m_ResourceViewHeaps.AllocRTVDescriptor(1, &m_ApplyPipelineRTV); + + CreateDepthDownsamplePipeline(); + m_CpuVisibleHeap.AllocDescriptor(1, &m_AtomicCounterUAV); + + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_DepthBufferDescriptor); + for (int i = 0; i < 13; ++i) + { + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_DepthHierarchyDescriptors[i]); + } + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_AtomicCounterUAVGPU); + + m_DownsampleDescriptorTable = m_DepthBufferDescriptor.GetGPU(); + + // Create a command list for upload + ID3D12CommandAllocator* ca; + ThrowIfFailed(m_pDevice->GetDevice()->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&ca))); + ID3D12GraphicsCommandList* cl; + ThrowIfFailed(m_pDevice->GetDevice()->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, ca, nullptr, IID_PPV_ARGS(&cl))); + + m_Sssr.OnCreate(m_pDevice, m_CpuVisibleHeap, m_ResourceViewHeaps, m_UploadHeap, m_ConstantBufferRing, backBufferCount, true); + + // Wait for the upload to finish; + ThrowIfFailed(cl->Close()); + m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CommandListCast(&cl)); + m_pDevice->GPUFlush(); + cl->Release(); + ca->Release(); + + // Desctriptor table for apply pass + m_ResourceViewHeaps.AllocCBV_SRV_UAVDescriptor(4, &m_ApplyPassDescriptorTable); + + // Make sure upload heap has finished uploading before continuing #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + m_UploadHeap.FlushAndFinish(); #endif } @@ -204,36 +154,36 @@ void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) //-------------------------------------------------------------------------------------- void SampleRenderer::OnDestroy() { - m_ImGUI.OnDestroy(); - m_ToneMapping.OnDestroy(); - m_Bloom.OnDestroy(); - m_DownSample.OnDestroy(); - m_WireframeBox.OnDestroy(); - m_Wireframe.OnDestroy(); - m_SkyDomeProc.OnDestroy(); - m_SkyDome.OnDestroy(); - m_AmbientLight.OnDestroy(); - m_ShadowMap.OnDestroy(); - m_BrdfLut.OnDestroy(); - - ffxSssrDestroyContext(m_SssrContext); - - if (m_ApplyPipelineState != nullptr) - m_ApplyPipelineState->Release(); - if (m_ApplyRootSignature != nullptr) - m_ApplyRootSignature->Release(); - if (m_DownsamplePipelineState != nullptr) - m_DownsamplePipelineState->Release(); - if (m_DownsampleRootSignature != nullptr) - m_DownsampleRootSignature->Release(); - - m_UploadHeap.OnDestroy(); - m_GPUTimer.OnDestroy(); - m_VidMemBufferPool.OnDestroy(); - m_ConstantBufferRing.OnDestroy(); - m_CommandListRing.OnDestroy(); - m_CpuVisibleHeap.OnDestroy(); - m_ResourceViewHeaps.OnDestroy(); + + m_ImGUI.OnDestroy(); + m_ToneMapping.OnDestroy(); + m_Bloom.OnDestroy(); + m_DownSample.OnDestroy(); + m_WireframeBox.OnDestroy(); + m_Wireframe.OnDestroy(); + m_SkyDomeProc.OnDestroy(); + m_SkyDome.OnDestroy(); + m_AmbientLight.OnDestroy(); + m_ShadowMap.OnDestroy(); + m_BrdfLut.OnDestroy(); + + if (m_ApplyPipelineState != nullptr) + m_ApplyPipelineState->Release(); + if (m_ApplyRootSignature != nullptr) + m_ApplyRootSignature->Release(); + if (m_DownsamplePipelineState != nullptr) + m_DownsamplePipelineState->Release(); + if (m_DownsampleRootSignature != nullptr) + m_DownsampleRootSignature->Release(); + + m_UploadHeap.OnDestroy(); + m_GPUTimer.OnDestroy(); + m_VidMemBufferPool.OnDestroy(); + m_ConstantBufferRing.OnDestroy(); + m_CommandListRing.OnDestroy(); + m_CpuVisibleHeap.OnDestroy(); + m_ResourceViewHeaps.OnDestroy(); + m_Sssr.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -241,146 +191,97 @@ void SampleRenderer::OnDestroy() // OnCreateWindowSizeDependentResources // //-------------------------------------------------------------------------------------- -void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) +void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height) { - m_Width = Width; - m_Height = Height; - - // Set the viewport - // - m_Viewport = { 0.0f, 0.0f, static_cast(m_Width), static_cast(m_Height), 0.0f, 1.0f }; - - // Create scissor rectangle - // - m_Scissor = { 0, 0, (LONG)m_Width, (LONG)m_Height }; - - // Create depth buffer - // - m_DepthBuffer.InitDepthStencil(m_pDevice, "m_depthBuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); - m_DepthBuffer.CreateSRV(0, &m_DepthBufferDescriptor); - m_DepthBuffer.CreateDSV(0, &m_DepthBufferDSV); - - // Create Texture + RTV - // - CD3DX12_RESOURCE_DESC RDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); - m_HDR.InitRenderTarget(m_pDevice, "m_HDR", &RDesc, D3D12_RESOURCE_STATE_RENDER_TARGET); - m_HDR.CreateSRV(0, &m_HDRSRV); - m_HDR.CreateRTV(0, &m_HDRRTV); - m_HDR.CreateSRV(0, &m_MotionVectorsInputsSRV); - - CD3DX12_RESOURCE_DESC reflDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - m_SssrOutputBuffer.Init(m_pDevice, "m_SssrOutputBuffer", &reflDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr); - - m_NormalBuffer.InitRenderTarget(m_pDevice, "m_NormalBuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R10G10B10A2_UNORM, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); - m_NormalBuffer.CreateRTV(0, &m_NormalBufferRTV); - m_NormalBuffer.CreateSRV(0, &m_NormalBufferSRV); - m_NormalHistoryBuffer.Init(m_pDevice, "m_NormalHistoryBuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R10G10B10A2_UNORM, m_Width, m_Height, 1, 1, 1, 0), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr); - - float clearColorOne[] = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_SpecularRoughnessHistory.InitRenderTarget(m_pDevice, "m_SpecularRoughnessHistory", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, clearColorOne); - m_SpecularRoughness.InitRenderTarget(m_pDevice, "m_SpecularRoughness", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET), D3D12_RESOURCE_STATE_RENDER_TARGET, clearColorOne); - m_SpecularRoughness.CreateRTV(0, &m_SpecularRoughnessRTV); - - m_MotionVectors.InitRenderTarget(m_pDevice, "m_MotionVector", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16_FLOAT, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); - m_MotionVectors.CreateRTV(0, &m_MotionVectorsRTV); - m_MotionVectors.CreateSRV(1, &m_MotionVectorsInputsSRV); - m_MotionVectors.CreateSRV(0, &m_MotionVectorsSRV); - - // update bloom and downscaling effect - // - m_DownSample.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, 5); //downsample the HDR texture 5 times - m_Bloom.OnCreateWindowSizeDependentResources(m_Width / 2, m_Height / 2, m_DownSample.GetTexture(), 5, &m_HDR); - - // update the pipelines if the swapchain render pass has changed (for example when the format of the swapchain changes) - // - m_ToneMapping.UpdatePipelines(pSwapChain->GetFormat()); - m_ImGUI.UpdatePipeline(pSwapChain->GetFormat()); - - // Depth downsampling pass with single CS - { - m_DepthMipLevelCount = static_cast(std::log2(std::max(m_Width, m_Height))) + 1; - - // Downsampled depth buffer - CD3DX12_RESOURCE_DESC dsResDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_Width, m_Height, 1, m_DepthMipLevelCount, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - m_DepthHierarchy.Init(m_pDevice, "m_DepthHierarchy", &dsResDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr); - UINT i = 0; - for (; i < 13u; ++i) - { - m_DepthHierarchy.CreateUAV(0, &m_DepthHierarchyDescriptors[i], std::min(i, m_DepthMipLevelCount - 1)); - } - - // Atomic counter - CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Buffer(1, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - resDesc.Format = DXGI_FORMAT_R32_UINT; - m_AtomicCounter.InitBuffer(m_pDevice, "m_AtomicCounter", &resDesc, 0, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - m_AtomicCounter.CreateBufferUAV(0, NULL, &m_AtomicCounterUAV); - m_AtomicCounter.CreateBufferUAV(0, NULL, &m_AtomicCounterUAVGPU); - } - - // Setup resource views - // - m_HDR.CreateSRV(0, &m_SssrSceneSRV); - m_DepthHierarchy.CreateSRV(0, &m_SssrDepthBufferHierarchySRV); - m_MotionVectors.CreateSRV(0, &m_SssrMotionBufferSRV); - m_NormalBuffer.CreateSRV(0, &m_SssrNormalBufferSRV); - m_NormalHistoryBuffer.CreateSRV(0, &m_SssrNormalHistoryBufferSRV); - m_SpecularRoughness.CreateSRV(0, &m_SssrRoughnessBufferSRV); - m_SpecularRoughnessHistory.CreateSRV(0, &m_SssrRoughnessHistoryBufferSRV); - m_SssrOutputBuffer.CreateUAV(0, &m_SssrOutputBufferUAV); - m_SssrOutputBuffer.CreateUAV(0, &m_SssrOutputBufferUAVGPU); - - D3D12_STATIC_SAMPLER_DESC environmentSamplerDesc = {}; - m_SkyDome.SetDescriptorSpec(0, &m_SssrEnvironmentMapSRV, 0, &environmentSamplerDesc); - m_SssrEnvironmentMapSamplerDesc.AddressU = environmentSamplerDesc.AddressU; - m_SssrEnvironmentMapSamplerDesc.AddressV = environmentSamplerDesc.AddressV; - m_SssrEnvironmentMapSamplerDesc.AddressW = environmentSamplerDesc.AddressW; - m_SssrEnvironmentMapSamplerDesc.BorderColor[0] = 0; - m_SssrEnvironmentMapSamplerDesc.BorderColor[1] = 0; - m_SssrEnvironmentMapSamplerDesc.BorderColor[2] = 0; - m_SssrEnvironmentMapSamplerDesc.BorderColor[3] = 0; - m_SssrEnvironmentMapSamplerDesc.ComparisonFunc = environmentSamplerDesc.ComparisonFunc; - m_SssrEnvironmentMapSamplerDesc.Filter = environmentSamplerDesc.Filter; - m_SssrEnvironmentMapSamplerDesc.MaxAnisotropy = environmentSamplerDesc.MaxAnisotropy; - m_SssrEnvironmentMapSamplerDesc.MaxLOD = environmentSamplerDesc.MaxLOD; - m_SssrEnvironmentMapSamplerDesc.MinLOD = environmentSamplerDesc.MinLOD; - m_SssrEnvironmentMapSamplerDesc.MipLODBias = environmentSamplerDesc.MipLODBias; - - FfxSssrD3D12CreateReflectionViewInfo d3d12ReflectionViewInfo = {}; - d3d12ReflectionViewInfo.depthBufferHierarchySRV = m_SssrDepthBufferHierarchySRV.GetCPU(); - d3d12ReflectionViewInfo.motionBufferSRV = m_SssrMotionBufferSRV.GetCPU(); - d3d12ReflectionViewInfo.normalBufferSRV = m_SssrNormalBufferSRV.GetCPU(); - d3d12ReflectionViewInfo.roughnessBufferSRV = m_SssrRoughnessBufferSRV.GetCPU(); - d3d12ReflectionViewInfo.normalHistoryBufferSRV = m_SssrNormalHistoryBufferSRV.GetCPU(); - d3d12ReflectionViewInfo.roughnessHistoryBufferSRV = m_SssrRoughnessHistoryBufferSRV.GetCPU(); - d3d12ReflectionViewInfo.reflectionViewUAV = m_SssrOutputBufferUAV.GetCPU(); - d3d12ReflectionViewInfo.sceneFormat = m_SssrOutputBuffer.GetFormat(); - d3d12ReflectionViewInfo.sceneSRV = m_SssrSceneSRV.GetCPU(); - d3d12ReflectionViewInfo.environmentMapSRV = m_SssrEnvironmentMapSRV.GetCPU(); - d3d12ReflectionViewInfo.pEnvironmentMapSamplerDesc = &m_SssrEnvironmentMapSamplerDesc; - - bool pingPongNormalBuffers = false; - bool pingPongRoughnessBuffers = false; - - FfxSssrCreateReflectionViewInfo reflectionViewInfo = {}; - reflectionViewInfo.flags = FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS; - reflectionViewInfo.flags |= pingPongNormalBuffers ? FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_NORMAL_BUFFERS : 0; - reflectionViewInfo.flags |= pingPongRoughnessBuffers ? FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_PING_PONG_ROUGHNESS_BUFFERS : 0; - reflectionViewInfo.outputWidth = m_Width; - reflectionViewInfo.outputHeight = m_Height; - reflectionViewInfo.pD3D12CreateReflectionViewInfo = &d3d12ReflectionViewInfo; - - FfxSssrStatus status = ffxSssrCreateReflectionView(m_SssrContext, &reflectionViewInfo, &m_SssrReflectionView); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrCreateReflectionView failed."); - } - m_SssrCreatedReflectionView = true; - - // Fill descriptor table for apply pass - m_SssrOutputBuffer.CreateSRV(0, &m_ApplyPassDescriptorTable); - m_NormalBuffer.CreateSRV(1, &m_ApplyPassDescriptorTable); - m_SpecularRoughness.CreateSRV(2, &m_ApplyPassDescriptorTable); - m_BrdfLut.CreateSRV(3, &m_ApplyPassDescriptorTable); + m_Width = Width; + m_Height = Height; + + // Set the viewport + // + m_Viewport = { 0.0f, 0.0f, static_cast(m_Width), static_cast(m_Height), 0.0f, 1.0f }; + + // Create scissor rectangle + // + m_Scissor = { 0, 0, (LONG)m_Width, (LONG)m_Height }; + + // Create depth buffer + // + m_DepthBuffer.InitDepthStencil(m_pDevice, "m_depthBuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); + m_DepthBuffer.CreateSRV(0, &m_DepthBufferDescriptor); + m_DepthBuffer.CreateDSV(0, &m_DepthBufferDSV); + + // Create Texture + RTV + // + CD3DX12_RESOURCE_DESC RDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + m_HDR.InitRenderTarget(m_pDevice, "m_HDR", &RDesc, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_HDR.CreateSRV(0, &m_HDRSRV); + m_HDR.CreateRTV(0, &m_HDRRTV); + m_HDR.CreateSRV(0, &m_MotionVectorsInputsSRV); + + m_NormalBuffer.InitRenderTarget(m_pDevice, "m_NormalBuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R10G10B10A2_UNORM, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); + m_NormalBuffer.CreateRTV(0, &m_NormalBufferRTV); + m_NormalBuffer.CreateSRV(0, &m_NormalBufferSRV); + m_NormalHistoryBuffer.Init(m_pDevice, "m_NormalHistoryBuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R10G10B10A2_UNORM, m_Width, m_Height, 1, 1, 1, 0), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr); + + float clearColorOne[] = { 1.0f, 1.0f, 1.0f, 1.0f }; + m_SpecularRoughness.InitRenderTarget(m_pDevice, "m_SpecularRoughness", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET), D3D12_RESOURCE_STATE_RENDER_TARGET, clearColorOne); + m_SpecularRoughness.CreateRTV(0, &m_SpecularRoughnessRTV); + + m_MotionVectors.InitRenderTarget(m_pDevice, "m_MotionVector", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16_FLOAT, m_Width, m_Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); + m_MotionVectors.CreateRTV(0, &m_MotionVectorsRTV); + m_MotionVectors.CreateSRV(1, &m_MotionVectorsInputsSRV); + m_MotionVectors.CreateSRV(0, &m_MotionVectorsSRV); + + // update bloom and downscaling effect + // + m_DownSample.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, 5); // downsample the HDR texture 5 times + m_Bloom.OnCreateWindowSizeDependentResources(m_Width / 2, m_Height / 2, m_DownSample.GetTexture(), 5, &m_HDR); + + // update the pipelines if the swapchain render pass has changed (for example when the format of the swapchain changes) + // + m_ToneMapping.UpdatePipelines(pSwapChain->GetFormat()); + m_ImGUI.UpdatePipeline(pSwapChain->GetFormat()); + + // Depth downsampling pass with single CS + { + m_DepthMipLevelCount = static_cast(std::log2(std::max(m_Width, m_Height))) + 1; + + // Downsampled depth buffer + CD3DX12_RESOURCE_DESC dsResDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_Width, m_Height, 1, m_DepthMipLevelCount, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + m_DepthHierarchy.Init(m_pDevice, "m_DepthHierarchy", &dsResDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr); + UINT i = 0; + for (; i < 13u; ++i) + { + m_DepthHierarchy.CreateUAV(0, &m_DepthHierarchyDescriptors[i], std::min(i, m_DepthMipLevelCount - 1)); + } + + // Atomic counter + CD3DX12_RESOURCE_DESC resDesc = CD3DX12_RESOURCE_DESC::Buffer(1, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + resDesc.Format = DXGI_FORMAT_R32_UINT; + m_AtomicCounter.InitBuffer(m_pDevice, "m_AtomicCounter", &resDesc, 0, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + m_AtomicCounter.CreateBufferUAV(0, NULL, &m_AtomicCounterUAV); + m_AtomicCounter.CreateBufferUAV(0, NULL, &m_AtomicCounterUAVGPU); + } + + SSSRCreationInfo sssr_input_textures; + sssr_input_textures.HDR = &m_HDR; + sssr_input_textures.NormalBuffer = &m_NormalBuffer; + sssr_input_textures.MotionVectors = &m_MotionVectors; + sssr_input_textures.DepthHierarchy = &m_DepthHierarchy; + sssr_input_textures.SpecularRoughness = &m_SpecularRoughness; + sssr_input_textures.NormalHistoryBuffer = &m_NormalHistoryBuffer; + sssr_input_textures.SkyDome = &m_SkyDome; + sssr_input_textures.pingPongNormal = false; + sssr_input_textures.pingPongRoughness = false; + sssr_input_textures.outputWidth = Width; + sssr_input_textures.outputHeight = Height; + m_Sssr.OnCreateWindowSizeDependentResources(sssr_input_textures); + + // Fill descriptor table for apply pass + m_Sssr.GetOutputTexture()->CreateSRV(0, &m_ApplyPassDescriptorTable); + m_NormalBuffer.CreateSRV(1, &m_ApplyPassDescriptorTable); + m_SpecularRoughness.CreateSRV(2, &m_ApplyPassDescriptorTable); + m_BrdfLut.CreateSRV(3, &m_ApplyPassDescriptorTable); } //-------------------------------------------------------------------------------------- @@ -390,25 +291,19 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, //-------------------------------------------------------------------------------------- void SampleRenderer::OnDestroyWindowSizeDependentResources() { - m_Bloom.OnDestroyWindowSizeDependentResources(); - m_DownSample.OnDestroyWindowSizeDependentResources(); - - m_MotionVectors.OnDestroy(); - m_SpecularRoughness.OnDestroy(); - m_SpecularRoughnessHistory.OnDestroy(); - m_NormalBuffer.OnDestroy(); - m_NormalHistoryBuffer.OnDestroy(); - m_SssrOutputBuffer.OnDestroy(); - - if (m_SssrCreatedReflectionView) - { - ffxSssrDestroyReflectionView(m_SssrContext, m_SssrReflectionView); - } - - m_HDR.OnDestroy(); - m_DepthBuffer.OnDestroy(); - m_DepthHierarchy.OnDestroy(); - m_AtomicCounter.OnDestroy(); + m_Bloom.OnDestroyWindowSizeDependentResources(); + m_DownSample.OnDestroyWindowSizeDependentResources(); + m_Sssr.OnDestroyWindowSizeDependentResources(); + + m_MotionVectors.OnDestroy(); + m_SpecularRoughness.OnDestroy(); + m_NormalBuffer.OnDestroy(); + m_NormalHistoryBuffer.OnDestroy(); + + m_HDR.OnDestroy(); + m_DepthBuffer.OnDestroy(); + m_DepthHierarchy.OnDestroy(); + m_AtomicCounter.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -416,136 +311,136 @@ void SampleRenderer::OnDestroyWindowSizeDependentResources() // LoadScene // //-------------------------------------------------------------------------------------- -int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) +int SampleRenderer::LoadScene(GLTFCommon* pGLTFCommon, int stage) { - // show loading progress - // - ImGui::OpenPopup("Loading"); - if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) - { - float progress = (float)stage / 13.0f; - ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); - ImGui::EndPopup(); - } - - AsyncPool* pAsyncPool = &m_AsyncPool; - - // Loading stages - // - if (stage == 0) - { - } - else if (stage == 5) - { - Profile p("m_pGltfLoader->Load"); - - m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); - m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); - } - else if (stage == 6) - { - Profile p("LoadTextures"); - - // here we are loading onto the GPU all the textures and the inverse matrices - // this data will be used to create the PBR and Depth passes - m_pGLTFTexturesAndBuffers->LoadTextures(pAsyncPool); - } - else if (stage == 7) - { - { - Profile p("m_gltfDepth->OnCreate"); - - //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass - m_gltfDepth = new GltfDepthPass(); - m_gltfDepth->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - pAsyncPool - ); - } - - { - Profile p("m_gltfMotionVectors->OnCreate"); - - m_gltfMotionVectors = new GltfMotionVectorsPass(); - m_gltfMotionVectors->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - m_MotionVectors.GetFormat(), - m_NormalBuffer.GetFormat(), - pAsyncPool - ); - } - } - else if (stage == 9) - { - Profile p("m_gltfPBR->OnCreate"); - - // same thing as above but for the PBR pass - m_gltfPBR = new GltfPbrPass(); - m_gltfPBR->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_AmbientLight, - false, - false, - DXGI_FORMAT_R16G16B16A16_FLOAT, - m_SpecularRoughness.GetFormat(), - DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, - 1, - pAsyncPool - ); - } - else if (stage == 10) - { - Profile p("m_gltfBBox->OnCreate"); - - // just a bounding box pass that will draw boundingboxes instead of the geometry itself - m_gltfBBox = new GltfBBoxPass(); - m_gltfBBox->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_Wireframe - ); + // show loading progress + // + ImGui::OpenPopup("Loading"); + if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + float progress = (float)stage / 13.0f; + ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); + ImGui::EndPopup(); + } + + AsyncPool* pAsyncPool = &m_AsyncPool; + + // Loading stages + // + if (stage == 0) + { + } + else if (stage == 5) + { + Profile p("m_pGltfLoader->Load"); + + m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); + m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); + } + else if (stage == 6) + { + Profile p("LoadTextures"); + + // here we are loading onto the GPU all the textures and the inverse matrices + // this data will be used to create the PBR and Depth passes + m_pGLTFTexturesAndBuffers->LoadTextures(pAsyncPool); + } + else if (stage == 7) + { + { + Profile p("m_gltfDepth->OnCreate"); + + //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass + m_gltfDepth = new GltfDepthPass(); + m_gltfDepth->OnCreate( + m_pDevice, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + pAsyncPool + ); + } + + { + Profile p("m_gltfMotionVectors->OnCreate"); + + m_gltfMotionVectors = new GltfMotionVectorsPass(); + m_gltfMotionVectors->OnCreate( + m_pDevice, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + m_MotionVectors.GetFormat(), + m_NormalBuffer.GetFormat(), + pAsyncPool + ); + } + } + else if (stage == 9) + { + Profile p("m_gltfPBR->OnCreate"); + + // same thing as above but for the PBR pass + m_gltfPBR = new GltfPbrPass(); + m_gltfPBR->OnCreate( + m_pDevice, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_AmbientLight, + false, + false, + DXGI_FORMAT_R16G16B16A16_FLOAT, + m_SpecularRoughness.GetFormat(), + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + 1, + pAsyncPool + ); + } + else if (stage == 10) + { + Profile p("m_gltfBBox->OnCreate"); + + // just a bounding box pass that will draw boundingboxes instead of the geometry itself + m_gltfBBox = new GltfBBoxPass(); + m_gltfBBox->OnCreate( + m_pDevice, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_Wireframe + ); #if (USE_VID_MEM==true) - // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs + m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); #endif - } - else if (stage == 11) - { - Profile p("Flush"); + } + else if (stage == 11) + { + Profile p("Flush"); - m_UploadHeap.FlushAndFinish(); + m_UploadHeap.FlushAndFinish(); #if (USE_VID_MEM==true) - //once everything is uploaded we dont need he upload heaps anymore - m_VidMemBufferPool.FreeUploadHeap(); + //once everything is uploaded we dont need he upload heaps anymore + m_VidMemBufferPool.FreeUploadHeap(); #endif - // tell caller that we are done loading the map - return 0; - } + // tell caller that we are done loading the map + return 0; + } - stage++; - return stage; + stage++; + return stage; } //-------------------------------------------------------------------------------------- @@ -555,452 +450,411 @@ int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) //-------------------------------------------------------------------------------------- void SampleRenderer::UnloadScene() { - m_pDevice->GPUFlush(); - if (m_gltfPBR) - { - m_gltfPBR->OnDestroy(); - delete m_gltfPBR; - m_gltfPBR = NULL; - } - - if (m_gltfMotionVectors) - { - m_gltfMotionVectors->OnDestroy(); - delete m_gltfMotionVectors; - m_gltfMotionVectors = NULL; - } - - if (m_gltfDepth) - { - m_gltfDepth->OnDestroy(); - delete m_gltfDepth; - m_gltfDepth = NULL; - } - - if (m_gltfBBox) - { - m_gltfBBox->OnDestroy(); - delete m_gltfBBox; - m_gltfBBox = NULL; - } - - if (m_pGLTFTexturesAndBuffers) - { - m_pGLTFTexturesAndBuffers->OnDestroy(); - delete m_pGLTFTexturesAndBuffers; - m_pGLTFTexturesAndBuffers = NULL; - } + m_pDevice->GPUFlush(); + if (m_gltfPBR) + { + m_gltfPBR->OnDestroy(); + delete m_gltfPBR; + m_gltfPBR = NULL; + } + + if (m_gltfMotionVectors) + { + m_gltfMotionVectors->OnDestroy(); + delete m_gltfMotionVectors; + m_gltfMotionVectors = NULL; + } + + if (m_gltfDepth) + { + m_gltfDepth->OnDestroy(); + delete m_gltfDepth; + m_gltfDepth = NULL; + } + + if (m_gltfBBox) + { + m_gltfBBox->OnDestroy(); + delete m_gltfBBox; + m_gltfBBox = NULL; + } + + if (m_pGLTFTexturesAndBuffers) + { + m_pGLTFTexturesAndBuffers->OnDestroy(); + delete m_pGLTFTexturesAndBuffers; + m_pGLTFTexturesAndBuffers = NULL; + } } void SampleRenderer::StallFrame(float targetFrametime) { - // Simulate lower frame rates - static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - std::chrono::duration diff = now - last; - last = now; - float deltaTime = 1000 * static_cast(diff.count()); - if (deltaTime < targetFrametime) - { - int deltaCount = static_cast(targetFrametime - deltaTime); - std::this_thread::sleep_for(std::chrono::milliseconds(deltaCount)); - } + // Simulate lower frame rates + static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::chrono::duration diff = now - last; + last = now; + float deltaTime = 1000 * static_cast(diff.count()); + if (deltaTime < targetFrametime) + { + int deltaCount = static_cast(targetFrametime - deltaTime); + std::this_thread::sleep_for(std::chrono::milliseconds(deltaCount)); + } } void SampleRenderer::BeginFrame() { - FfxSssrStatus status = ffxSssrAdvanceToNextFrame(m_SssrContext); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrAdvanceToNextFrame failed."); - } - - // Timing values - // - m_pDevice->GetGraphicsQueue()->GetTimestampFrequency(&m_GpuTicksPerSecond); - - // Let our resource managers do some house keeping - // - m_ConstantBufferRing.OnBeginFrame(); - m_GPUTimer.OnBeginFrame(m_GpuTicksPerSecond, &m_TimeStamps); - m_CommandListRing.OnBeginFrame(); + // Timing values + // + m_pDevice->GetGraphicsQueue()->GetTimestampFrequency(&m_GpuTicksPerSecond); + + // Let our resource managers do some house keeping + // + m_ConstantBufferRing.OnBeginFrame(); + m_GPUTimer.OnBeginFrame(m_GpuTicksPerSecond, &m_TimeStamps); + m_CommandListRing.OnBeginFrame(); } -per_frame * SampleRenderer::FillFrameConstants(State *pState) +per_frame* SampleRenderer::FillFrameConstants(State* pState) { - // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- - // - per_frame *pPerFrame = NULL; - if (m_pGLTFTexturesAndBuffers) - { - pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); - - //override gltf camera with ours - pPerFrame->mCameraViewProj = pState->camera.GetView() * pState->camera.GetProjection(); - pPerFrame->cameraPos = pState->camera.GetPosition(); - pPerFrame->emmisiveFactor = pState->emmisiveFactor; - pPerFrame->iblFactor = pState->iblFactor; - - //if the gltf doesn't have any lights set a directional light - if (pPerFrame->lightCount == 0) - { - pPerFrame->lightCount = 1; - pPerFrame->lights[0].color[0] = pState->lightColor.x; - pPerFrame->lights[0].color[1] = pState->lightColor.y; - pPerFrame->lights[0].color[2] = pState->lightColor.z; - GetXYZ(pPerFrame->lights[0].position, pState->lightCamera.GetPosition()); - GetXYZ(pPerFrame->lights[0].direction, pState->lightCamera.GetDirection()); - - pPerFrame->lights[0].range = 30.0f; // in meters - pPerFrame->lights[0].type = LightType_Spot; - pPerFrame->lights[0].intensity = pState->lightIntensity; - pPerFrame->lights[0].innerConeCos = cosf(pState->lightCamera.GetFovV() * 0.9f / 2.0f); - pPerFrame->lights[0].outerConeCos = cosf(pState->lightCamera.GetFovV() / 2.0f); - pPerFrame->lights[0].mLightViewProj = pState->lightCamera.GetView() * pState->lightCamera.GetProjection(); - } - - // Up to 4 spotlights can have shadowmaps. Each spot the light has a shadowMap index which is used to find the shadowmap in the atlas - uint32_t shadowMapIndex = 0; - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index so the color pass knows which shadow map to use - pPerFrame->lights[i].depthBias = 20.0f / 100000.0f; - } - else if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Directional)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // same as above - pPerFrame->lights[i].depthBias = 100.0f / 100000.0f; - } - else - { - pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light - } - } - - m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); - - m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); - } - - return pPerFrame; + // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- + // + per_frame* pPerFrame = NULL; + if (m_pGLTFTexturesAndBuffers) + { + pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); + + //override gltf camera with ours + pPerFrame->mCameraViewProj = pState->camera.GetView() * pState->camera.GetProjection(); + pPerFrame->cameraPos = pState->camera.GetPosition(); + pPerFrame->emmisiveFactor = pState->emmisiveFactor; + pPerFrame->iblFactor = pState->iblFactor; + + //if the gltf doesn't have any lights set a directional light + if (pPerFrame->lightCount == 0) + { + pPerFrame->lightCount = 1; + pPerFrame->lights[0].color[0] = pState->lightColor.x; + pPerFrame->lights[0].color[1] = pState->lightColor.y; + pPerFrame->lights[0].color[2] = pState->lightColor.z; + GetXYZ(pPerFrame->lights[0].position, pState->lightCamera.GetPosition()); + GetXYZ(pPerFrame->lights[0].direction, pState->lightCamera.GetDirection()); + + pPerFrame->lights[0].range = 30.0f; // in meters + pPerFrame->lights[0].type = LightType_Spot; + pPerFrame->lights[0].intensity = pState->lightIntensity; + pPerFrame->lights[0].innerConeCos = cosf(pState->lightCamera.GetFovV() * 0.9f / 2.0f); + pPerFrame->lights[0].outerConeCos = cosf(pState->lightCamera.GetFovV() / 2.0f); + pPerFrame->lights[0].mLightViewProj = pState->lightCamera.GetView() * pState->lightCamera.GetProjection(); + } + + // Up to 4 spotlights can have shadowmaps. Each spot the light has a shadowMap index which is used to find the shadowmap in the atlas + uint32_t shadowMapIndex = 0; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index so the color pass knows which shadow map to use + pPerFrame->lights[i].depthBias = 20.0f / 100000.0f; + } + else if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Directional)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // same as above + pPerFrame->lights[i].depthBias = 100.0f / 100000.0f; + } + else + { + pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light + } + } + + m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); + + m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); + } + + return pPerFrame; } -void SampleRenderer::RenderSpotLights(ID3D12GraphicsCommandList* pCmdLst1, per_frame * pPerFrame) +void SampleRenderer::RenderSpotLights(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame) { - UserMarker marker(pCmdLst1, "Shadow Map"); + UserMarker marker(pCmdLst1, "Shadow Map"); - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if (!(pPerFrame->lights[i].type == LightType_Spot || pPerFrame->lights[i].type == LightType_Directional)) - continue; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if (!(pPerFrame->lights[i].type == LightType_Spot || pPerFrame->lights[i].type == LightType_Directional)) + continue; - // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) - uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; - uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; - uint32_t viewportWidth = m_ShadowMap.GetWidth() / 2; - uint32_t viewportHeight = m_ShadowMap.GetHeight() / 2; - SetViewportAndScissor(pCmdLst1, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); - pCmdLst1->OMSetRenderTargets(0, NULL, false, &m_ShadowMapDSV.GetCPU()); + // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) + uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; + uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; + uint32_t viewportWidth = m_ShadowMap.GetWidth() / 2; + uint32_t viewportHeight = m_ShadowMap.GetHeight() / 2; + SetViewportAndScissor(pCmdLst1, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); + pCmdLst1->OMSetRenderTargets(0, NULL, false, &m_ShadowMapDSV.GetCPU()); - GltfDepthPass::per_frame *cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); - cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; + GltfDepthPass::per_frame* cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); + cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; - m_gltfDepth->Draw(pCmdLst1); + m_gltfDepth->Draw(pCmdLst1); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Shadow Map"); - } + m_GPUTimer.GetTimeStamp(pCmdLst1, "Shadow Map"); + } } -void SampleRenderer::RenderMotionVectors(ID3D12GraphicsCommandList* pCmdLst1, per_frame * pPerFrame, State * pState) +void SampleRenderer::RenderMotionVectors(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState) { - UserMarker marker(pCmdLst1, "Motion Vectors"); + UserMarker marker(pCmdLst1, "Motion Vectors"); - // Compute motion vectors - pCmdLst1->RSSetViewports(1, &m_Viewport); - pCmdLst1->RSSetScissorRects(1, &m_Scissor); - D3D12_CPU_DESCRIPTOR_HANDLE rts[] = { m_MotionVectorsRTV.GetCPU(), m_NormalBufferRTV.GetCPU() }; - pCmdLst1->OMSetRenderTargets(2, rts, false, &m_DepthBufferDSV.GetCPU()); + // Compute motion vectors + pCmdLst1->RSSetViewports(1, &m_Viewport); + pCmdLst1->RSSetScissorRects(1, &m_Scissor); + D3D12_CPU_DESCRIPTOR_HANDLE rts[] = { m_MotionVectorsRTV.GetCPU(), m_NormalBufferRTV.GetCPU() }; + pCmdLst1->OMSetRenderTargets(2, rts, false, &m_DepthBufferDSV.GetCPU()); - float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - pCmdLst1->ClearRenderTargetView(m_MotionVectorsRTV.GetCPU(), clearColor, 0, nullptr); - pCmdLst1->ClearRenderTargetView(m_NormalBufferRTV.GetCPU(), clearColor, 0, nullptr); + float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + pCmdLst1->ClearRenderTargetView(m_MotionVectorsRTV.GetCPU(), clearColor, 0, nullptr); + pCmdLst1->ClearRenderTargetView(m_NormalBufferRTV.GetCPU(), clearColor, 0, nullptr); - float clearColorOne[] = { 1.0f, 1.0f, 1.0f, 1.0f }; - pCmdLst1->ClearRenderTargetView(m_SpecularRoughnessRTV.GetCPU(), clearColorOne, 0, nullptr); + float clearColorOne[] = { 1.0f, 1.0f, 1.0f, 1.0f }; + pCmdLst1->ClearRenderTargetView(m_SpecularRoughnessRTV.GetCPU(), clearColorOne, 0, nullptr); - GltfMotionVectorsPass::per_frame *cbDepthPerFrame = m_gltfMotionVectors->SetPerFrameConstants(); - cbDepthPerFrame->mCurrViewProj = pPerFrame->mCameraViewProj; - cbDepthPerFrame->mPrevViewProj = pState->camera.GetPrevView() * pState->camera.GetProjection(); + GltfMotionVectorsPass::per_frame* cbDepthPerFrame = m_gltfMotionVectors->SetPerFrameConstants(); + cbDepthPerFrame->mCurrViewProj = pPerFrame->mCameraViewProj; + cbDepthPerFrame->mPrevViewProj = pState->camera.GetPrevView() * pState->camera.GetProjection(); - m_gltfMotionVectors->Draw(pCmdLst1); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Motion Vectors"); + m_gltfMotionVectors->Draw(pCmdLst1); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Motion Vectors"); } -void SampleRenderer::RenderSkydome(ID3D12GraphicsCommandList* pCmdLst1, per_frame * pPerFrame, State * pState) +void SampleRenderer::RenderSkydome(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState) { - UserMarker marker(pCmdLst1, "Skydome"); - - if (pState->skyDomeType == 1) - { - XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - m_SkyDome.Draw(pCmdLst1, clipToView); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Skydome"); - } - else if (pState->skyDomeType == 0) - { - SkyDomeProc::Constants skyDomeConstants; - skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); - skyDomeConstants.turbidity = 10.0f; - skyDomeConstants.rayleigh = 2.0f; - skyDomeConstants.mieCoefficient = 0.005f; - skyDomeConstants.mieDirectionalG = 0.8f; - skyDomeConstants.luminance = 1.0f; - skyDomeConstants.sun = false; - m_SkyDomeProc.Draw(pCmdLst1, skyDomeConstants); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Skydome proc"); - } + UserMarker marker(pCmdLst1, "Skydome"); + + if (pState->skyDomeType == 1) + { + XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + m_SkyDome.Draw(pCmdLst1, clipToView); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Skydome"); + } + else if (pState->skyDomeType == 0) + { + SkyDomeProc::Constants skyDomeConstants; + skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); + skyDomeConstants.turbidity = 10.0f; + skyDomeConstants.rayleigh = 2.0f; + skyDomeConstants.mieCoefficient = 0.005f; + skyDomeConstants.mieDirectionalG = 0.8f; + skyDomeConstants.luminance = 1.0f; + skyDomeConstants.sun = false; + m_SkyDomeProc.Draw(pCmdLst1, skyDomeConstants); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Skydome proc"); + } } -void SampleRenderer::RenderLightFrustums(ID3D12GraphicsCommandList * pCmdLst1, per_frame * pPerFrame, State * pState) +void SampleRenderer::RenderLightFrustums(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState) { - UserMarker marker(pCmdLst1, "Light frustrums"); - - XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); - XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); - XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); - XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; - m_WireframeBox.Draw(pCmdLst1, &m_Wireframe, worldMatrix, vCenter, vRadius, vColor); - } - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Light frustums"); + UserMarker marker(pCmdLst1, "Light frustrums"); + + XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); + XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); + XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); + XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; + m_WireframeBox.Draw(pCmdLst1, &m_Wireframe, worldMatrix, vCenter, vRadius, vColor); + } + + m_GPUTimer.GetTimeStamp(pCmdLst1, "Light frustums"); } void SampleRenderer::DownsampleDepthBuffer(ID3D12GraphicsCommandList* pCmdLst1) { - UserMarker marker(pCmdLst1, "Downsample Depth"); + UserMarker marker(pCmdLst1, "Downsample Depth"); - ID3D12DescriptorHeap *descriptorHeaps[] = { m_ResourceViewHeaps.GetCBV_SRV_UAVHeap() }; - pCmdLst1->SetDescriptorHeaps(1, descriptorHeaps); - pCmdLst1->SetComputeRootSignature(m_DownsampleRootSignature); - pCmdLst1->SetComputeRootDescriptorTable(0, m_DownsampleDescriptorTable); - pCmdLst1->SetPipelineState(m_DownsamplePipelineState); + ID3D12DescriptorHeap* descriptorHeaps[] = { m_ResourceViewHeaps.GetCBV_SRV_UAVHeap() }; + pCmdLst1->SetDescriptorHeaps(1, descriptorHeaps); + pCmdLst1->SetComputeRootSignature(m_DownsampleRootSignature); + pCmdLst1->SetComputeRootDescriptorTable(0, m_DownsampleDescriptorTable); + pCmdLst1->SetPipelineState(m_DownsamplePipelineState); - // Each threadgroup works on 64x64 texels - uint32_t dimX = (m_Width + 63) / 64; - uint32_t dimY = (m_Height + 63) / 64; - pCmdLst1->Dispatch(dimX, dimY, 1); + // Each threadgroup works on 64x64 texels + uint32_t dimX = (m_Width + 63) / 64; + uint32_t dimY = (m_Height + 63) / 64; + pCmdLst1->Dispatch(dimX, dimY, 1); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Downsample Depth"); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Downsample Depth"); } -void SampleRenderer::RenderScreenSpaceReflections(ID3D12GraphicsCommandList* pCmdLst1, State * pState) +void SampleRenderer::RenderScreenSpaceReflections(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState) { - UserMarker marker(pCmdLst1, "FidelityFX SSSR"); - - const Camera * camera = &pState->camera; - XMMATRIX view = camera->GetView(); - XMMATRIX proj = camera->GetProjection(); - - XMFLOAT4X4 cameraView; - XMStoreFloat4x4(&cameraView, XMMatrixTranspose(view)); - XMFLOAT4X4 cameraProj; - XMStoreFloat4x4(&cameraProj, XMMatrixTranspose(proj)); - - FfxSssrStatus status; - status = ffxSssrReflectionViewSetCameraParameters(m_SssrContext, m_SssrReflectionView, &cameraView.m[0][0], &cameraProj.m[0][0]); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewSetCameraParameters failed."); - } - - FLOAT clearValues[4] = { 0, 0, 0, 0 }; - pCmdLst1->ClearUnorderedAccessViewFloat(m_SssrOutputBufferUAVGPU.GetGPU(), m_SssrOutputBufferUAV.GetCPU(), m_SssrOutputBuffer.GetResource(), clearValues, 0, nullptr); - - FfxSssrD3D12CommandEncodeInfo d3d12EncodeInfo = {}; - d3d12EncodeInfo.pCommandList = pCmdLst1; - - FfxSssrResolveReflectionViewInfo resolveInfo = {}; - resolveInfo.flags = pState->bShowIntersectionResults ? 0 : FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE; - resolveInfo.flags |= pState->bEnableVarianceGuidedTracing ? FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING : 0; - resolveInfo.pD3D12CommandEncodeInfo = &d3d12EncodeInfo; - resolveInfo.temporalStabilityScale = pState->temporalStability; - resolveInfo.maxTraversalIterations = pState->maxTraversalIterations; - resolveInfo.mostDetailedDepthHierarchyMipLevel = pState->mostDetailedDepthHierarchyMipLevel; - resolveInfo.depthBufferThickness = pState->depthBufferThickness; - resolveInfo.minTraversalOccupancy = pState->minTraversalOccupancy; - resolveInfo.samplesPerQuad = pState->samplesPerQuad == 4 ? FFX_SSSR_RAY_SAMPLES_PER_QUAD_4 : (pState->samplesPerQuad == 2 ? FFX_SSSR_RAY_SAMPLES_PER_QUAD_2 : FFX_SSSR_RAY_SAMPLES_PER_QUAD_1); - resolveInfo.roughnessThreshold = pState->roughnessThreshold; - - status = ffxSssrEncodeResolveReflectionView(m_SssrContext, m_SssrReflectionView, &resolveInfo); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrEncodeResolveReflectionView failed."); - } - - // Query timings - uint64_t tileClassificationTime; - status = ffxSssrReflectionViewGetTileClassificationElapsedTime(m_SssrContext, m_SssrReflectionView, &tileClassificationTime); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewGetTileClassificationElapsedTime failed."); - } - - static std::deque tileClassificationTimes(100); - tileClassificationTimes.pop_front(); - tileClassificationTimes.push_back(static_cast(1000 * static_cast(tileClassificationTime) / m_GpuTicksPerSecond)); - pState->tileClassificationTime = 0; - for (auto& time : tileClassificationTimes) - { - pState->tileClassificationTime += time; - } - pState->tileClassificationTime /= tileClassificationTimes.size(); - - uint64_t intersectionTime; - status = ffxSssrReflectionViewGetIntersectionElapsedTime(m_SssrContext, m_SssrReflectionView, &intersectionTime); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewGetIntersectionElapsedTime failed."); - } - - static std::deque intersectionTimes(100); - intersectionTimes.pop_front(); - intersectionTimes.push_back(static_cast(1000 * static_cast(intersectionTime) / m_GpuTicksPerSecond)); - pState->intersectionTime = 0; - for (auto& time : intersectionTimes) - { - pState->intersectionTime += time; - } - pState->intersectionTime /= intersectionTimes.size(); - - uint64_t denoisingTime; - status = ffxSssrReflectionViewGetDenoisingElapsedTime(m_SssrContext, m_SssrReflectionView, &denoisingTime); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewGetDenoisingElapsedTime failed."); - } - - static std::deque denoisingTimes(100); - denoisingTimes.pop_front(); - denoisingTimes.push_back(static_cast(1000 * static_cast(denoisingTime) / m_GpuTicksPerSecond)); - pState->denoisingTime = 0; - for (auto& time : denoisingTimes) - { - pState->denoisingTime += time; - } - pState->denoisingTime /= denoisingTimes.size(); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "FidelityFX SSSR"); + SSSRConstants sssrConstants = {}; + const Camera* camera = &pState->camera; + XMMATRIX view = camera->GetView(); + XMMATRIX proj = camera->GetProjection(); + + XMStoreFloat4x4(&sssrConstants.view, XMMatrixTranspose(view)); + XMStoreFloat4x4(&sssrConstants.projection, XMMatrixTranspose(proj)); + + XMStoreFloat4x4(&sssrConstants.invProjection, XMMatrixTranspose(XMMatrixInverse(nullptr, proj))); + XMStoreFloat4x4(&sssrConstants.invView, XMMatrixTranspose(XMMatrixInverse(nullptr, view))); + XMStoreFloat4x4(&sssrConstants.invViewProjection, XMMatrixTranspose(pPerFrame->mInverseCameraViewProj)); + XMStoreFloat4x4(&sssrConstants.prevViewProjection, XMMatrixTranspose(m_prev_view_projection)); + + sssrConstants.frameIndex = m_frame_index; + sssrConstants.maxTraversalIntersections = pState->maxTraversalIterations; + sssrConstants.minTraversalOccupancy = pState->minTraversalOccupancy; + sssrConstants.mostDetailedMip = pState->mostDetailedDepthHierarchyMipLevel; + sssrConstants.temporalStabilityFactor = pState->temporalStability; + sssrConstants.temporalVarianceThreshold = pState->temporalVarianceThreshold; + sssrConstants.depthBufferThickness = pState->depthBufferThickness; + sssrConstants.samplesPerQuad = pState->samplesPerQuad; + sssrConstants.temporalVarianceGuidedTracingEnabled = pState->bEnableVarianceGuidedTracing ? 1 : 0; + sssrConstants.roughnessThreshold = pState->roughnessThreshold; + + m_Sssr.Draw(pCmdLst1, sssrConstants, pState->bShowIntersectionResults); + + //Extract SSSR Timestamps and calculate averages + uint64_t tileClassificationTime = m_Sssr.GetTileClassificationElapsedGpuTicks(); + static std::deque tileClassificationTimes(100); + tileClassificationTimes.pop_front(); + tileClassificationTimes.push_back(static_cast(1000 * static_cast(tileClassificationTime) / m_GpuTicksPerSecond)); + pState->tileClassificationTime = 0; + for (auto& time : tileClassificationTimes) + { + pState->tileClassificationTime += time; + } + pState->tileClassificationTime /= tileClassificationTimes.size(); + + uint64_t intersectionTime = m_Sssr.GetIntersectElapsedGpuTicks(); + static std::deque intersectionTimes(100); + intersectionTimes.pop_front(); + intersectionTimes.push_back(static_cast(1000 * static_cast(intersectionTime) / m_GpuTicksPerSecond)); + pState->intersectionTime = 0; + for (auto& time : intersectionTimes) + { + pState->intersectionTime += time; + } + pState->intersectionTime /= intersectionTimes.size(); + + uint64_t denoisingTime = m_Sssr.GetDenoiserElapsedGpuTicks(); + static std::deque denoisingTimes(100); + denoisingTimes.pop_front(); + denoisingTimes.push_back(static_cast(1000 * static_cast(denoisingTime) / m_GpuTicksPerSecond)); + pState->denoisingTime = 0; + for (auto& time : denoisingTimes) + { + pState->denoisingTime += time; + } + pState->denoisingTime /= denoisingTimes.size(); + + m_GPUTimer.GetTimeStamp(pCmdLst1, "FidelityFX SSSR"); } void SampleRenderer::CopyHistorySurfaces(ID3D12GraphicsCommandList* pCmdLst1) { - UserMarker marker(pCmdLst1, "Copy History Normals and Roughness"); - // Keep copy of normal roughness buffer for next frame - CopyToTexture(pCmdLst1, m_NormalBuffer.GetResource(), m_NormalHistoryBuffer.GetResource()); - CopyToTexture(pCmdLst1, m_SpecularRoughness.GetResource(), m_SpecularRoughnessHistory.GetResource()); + UserMarker marker(pCmdLst1, "Copy History Normals and Roughness"); + // Keep copy of normal roughness buffer for next frame + CopyToTexture(pCmdLst1, m_NormalBuffer.GetResource(), m_NormalHistoryBuffer.GetResource(), m_Width, m_Height); } -void SampleRenderer::ApplyReflectionTarget(ID3D12GraphicsCommandList* pCmdLst1, State * pState) +void SampleRenderer::ApplyReflectionTarget(ID3D12GraphicsCommandList* pCmdLst1, State* pState) { - UserMarker marker(pCmdLst1, "Apply Reflection View"); - - struct PassConstants - { - XMFLOAT4 viewDir; - UINT showReflectionTarget; - UINT drawReflections; - } constants; - - XMVECTOR view = pState->camera.GetDirection(); - XMStoreFloat4(&constants.viewDir, view); - constants.showReflectionTarget = pState->showReflectionTarget ? 1 : 0; - constants.drawReflections = pState->bDrawScreenSpaceReflections ? 1 : 0; - - D3D12_GPU_VIRTUAL_ADDRESS cb = m_ConstantBufferRing.AllocConstantBuffer(sizeof(PassConstants), &constants); - - ID3D12DescriptorHeap *descriptorHeaps[] = { m_ResourceViewHeaps.GetCBV_SRV_UAVHeap() }; - pCmdLst1->SetDescriptorHeaps(1, descriptorHeaps); - pCmdLst1->SetGraphicsRootSignature(m_ApplyRootSignature); - pCmdLst1->SetGraphicsRootDescriptorTable(0, m_ApplyPassDescriptorTable.GetGPU()); - pCmdLst1->SetGraphicsRootConstantBufferView(1, cb); - pCmdLst1->SetPipelineState(m_ApplyPipelineState); - pCmdLst1->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - pCmdLst1->IASetVertexBuffers(0, 0, nullptr); - pCmdLst1->IASetIndexBuffer(nullptr); - - D3D12_RENDER_TARGET_VIEW_DESC viewDesc = {}; - viewDesc.Format = m_HDR.GetFormat(); - viewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - viewDesc.Texture2D.MipSlice = 0; - viewDesc.Texture2D.PlaneSlice = 0; - m_HDR.CreateRTV(0, &m_ApplyPipelineRTV, &viewDesc); - - SetViewportAndScissor(pCmdLst1, 0, 0, m_Width, m_Height); - - D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = m_ApplyPipelineRTV.GetCPU(); - pCmdLst1->OMSetRenderTargets(1, &rtvHandle, false, nullptr); - pCmdLst1->DrawInstanced(3, 1, 0, 0); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Apply Reflection View"); + UserMarker marker(pCmdLst1, "Apply Reflection View"); + + struct PassConstants + { + XMFLOAT4 viewDir; + UINT showReflectionTarget; + UINT drawReflections; + } constants; + + XMVECTOR view = pState->camera.GetDirection(); + XMStoreFloat4(&constants.viewDir, view); + constants.showReflectionTarget = pState->showReflectionTarget ? 1 : 0; + constants.drawReflections = pState->bDrawScreenSpaceReflections ? 1 : 0; + + D3D12_GPU_VIRTUAL_ADDRESS cb = m_ConstantBufferRing.AllocConstantBuffer(sizeof(PassConstants), &constants); + + ID3D12DescriptorHeap* descriptorHeaps[] = { m_ResourceViewHeaps.GetCBV_SRV_UAVHeap() }; + pCmdLst1->SetDescriptorHeaps(1, descriptorHeaps); + pCmdLst1->SetGraphicsRootSignature(m_ApplyRootSignature); + pCmdLst1->SetGraphicsRootDescriptorTable(0, m_ApplyPassDescriptorTable.GetGPU()); + pCmdLst1->SetGraphicsRootConstantBufferView(1, cb); + pCmdLst1->SetPipelineState(m_ApplyPipelineState); + pCmdLst1->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdLst1->IASetVertexBuffers(0, 0, nullptr); + pCmdLst1->IASetIndexBuffer(nullptr); + + D3D12_RENDER_TARGET_VIEW_DESC viewDesc = {}; + viewDesc.Format = m_HDR.GetFormat(); + viewDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + viewDesc.Texture2D.MipSlice = 0; + viewDesc.Texture2D.PlaneSlice = 0; + m_HDR.CreateRTV(0, &m_ApplyPipelineRTV, &viewDesc); + + SetViewportAndScissor(pCmdLst1, 0, 0, m_Width, m_Height); + + D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = m_ApplyPipelineRTV.GetCPU(); + pCmdLst1->OMSetRenderTargets(1, &rtvHandle, false, nullptr); + pCmdLst1->DrawInstanced(3, 1, 0, 0); + + m_GPUTimer.GetTimeStamp(pCmdLst1, "Apply Reflection View"); } void SampleRenderer::DownsampleScene(ID3D12GraphicsCommandList* pCmdLst1) { - UserMarker marker(pCmdLst1, "Downsample Scene"); + UserMarker marker(pCmdLst1, "Downsample Scene"); - D3D12_CPU_DESCRIPTOR_HANDLE renderTargets[] = { m_HDRRTV.GetCPU() }; - pCmdLst1->OMSetRenderTargets(ARRAYSIZE(renderTargets), renderTargets, false, NULL); + D3D12_CPU_DESCRIPTOR_HANDLE renderTargets[] = { m_HDRRTV.GetCPU() }; + pCmdLst1->OMSetRenderTargets(ARRAYSIZE(renderTargets), renderTargets, false, NULL); - m_DownSample.Draw(pCmdLst1); - //m_downSample.Gui(); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Downsample Scene"); + m_DownSample.Draw(pCmdLst1); + //m_downSample.Gui(); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Downsample Scene"); } void SampleRenderer::RenderBloom(ID3D12GraphicsCommandList* pCmdLst1) { - UserMarker marker(pCmdLst1, "Render Bloom"); + UserMarker marker(pCmdLst1, "Render Bloom"); - D3D12_CPU_DESCRIPTOR_HANDLE renderTargets[] = { m_HDRRTV.GetCPU() }; - pCmdLst1->OMSetRenderTargets(ARRAYSIZE(renderTargets), renderTargets, false, NULL); + D3D12_CPU_DESCRIPTOR_HANDLE renderTargets[] = { m_HDRRTV.GetCPU() }; + pCmdLst1->OMSetRenderTargets(ARRAYSIZE(renderTargets), renderTargets, false, NULL); - m_Bloom.Draw(pCmdLst1, &m_HDR); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Render Bloom"); + m_Bloom.Draw(pCmdLst1, &m_HDR); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Render Bloom"); } -void SampleRenderer::ApplyTonemapping(ID3D12GraphicsCommandList* pCmdLst2, State * pState, SwapChain *pSwapChain) +void SampleRenderer::ApplyTonemapping(ID3D12GraphicsCommandList* pCmdLst2, State* pState, SwapChain* pSwapChain) { - UserMarker marker(pCmdLst2, "Apply Tonemapping"); + UserMarker marker(pCmdLst2, "Apply Tonemapping"); - pCmdLst2->RSSetViewports(1, &m_Viewport); - pCmdLst2->RSSetScissorRects(1, &m_Scissor); - pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), false, NULL); + pCmdLst2->RSSetViewports(1, &m_Viewport); + pCmdLst2->RSSetScissorRects(1, &m_Scissor); + pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), false, NULL); - m_ToneMapping.Draw(pCmdLst2, &m_HDRSRV, pState->exposure, pState->toneMapper); - m_GPUTimer.GetTimeStamp(pCmdLst2, "Apply Tonemapping"); + m_ToneMapping.Draw(pCmdLst2, &m_HDRSRV, pState->exposure, pState->toneMapper); + m_GPUTimer.GetTimeStamp(pCmdLst2, "Apply Tonemapping"); } -void SampleRenderer::RenderHUD(ID3D12GraphicsCommandList* pCmdLst2, SwapChain *pSwapChain) +void SampleRenderer::RenderHUD(ID3D12GraphicsCommandList* pCmdLst2, SwapChain* pSwapChain) { - UserMarker marker(pCmdLst2, "Render HUD"); + UserMarker marker(pCmdLst2, "Render HUD"); - pCmdLst2->RSSetViewports(1, &m_Viewport); - pCmdLst2->RSSetScissorRects(1, &m_Scissor); - pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), false, NULL); + pCmdLst2->RSSetViewports(1, &m_Viewport); + pCmdLst2->RSSetScissorRects(1, &m_Scissor); + pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), false, NULL); - m_ImGUI.Draw(pCmdLst2); + m_ImGUI.Draw(pCmdLst2); - m_GPUTimer.GetTimeStamp(pCmdLst2, "Render HUD"); + m_GPUTimer.GetTimeStamp(pCmdLst2, "Render HUD"); } void Barriers(ID3D12GraphicsCommandList* pCmdLst, const std::vector& barriers) { - pCmdLst->ResourceBarrier(static_cast(barriers.size()), barriers.data()); + pCmdLst->ResourceBarrier(static_cast(barriers.size()), barriers.data()); } //-------------------------------------------------------------------------------------- @@ -1008,407 +862,402 @@ void Barriers(ID3D12GraphicsCommandList* pCmdLst, const std::vectortargetFrametime); - BeginFrame(); - - per_frame *pPerFrame = FillFrameConstants(pState); - - // command buffer calls - // - ID3D12GraphicsCommandList* pCmdLst1 = m_CommandListRing.GetNewCommandList(); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Begin Frame"); - - // Clears ----------------------------------------------------------------------- - // - pCmdLst1->ClearDepthStencilView(m_ShadowMapDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear shadow map"); - - float clearValuesFloat[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - pCmdLst1->ClearRenderTargetView(m_HDRRTV.GetCPU(), clearValuesFloat, 0, nullptr); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear HDR"); - - pCmdLst1->ClearDepthStencilView(m_DepthBufferDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear depth"); - - UINT clearValuesUint[4] = { 0, 0, 0, 0 }; - pCmdLst1->ClearUnorderedAccessViewUint(m_AtomicCounterUAVGPU.GetGPU(), m_AtomicCounterUAV.GetCPU(), m_AtomicCounter.GetResource(), clearValuesUint, 0, nullptr); // Set atomic counter to 0. - - // Render to shadow map atlas for spot lights ------------------------------------------ - // - if (m_gltfDepth && pPerFrame != NULL) - { - RenderSpotLights(pCmdLst1, pPerFrame); - } - - Barriers(pCmdLst1, { - CD3DX12_RESOURCE_BARRIER::Transition(m_ShadowMap.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) - }); - - // Motion vectors --------------------------------------------------------------------------- - // - if (m_gltfMotionVectors != NULL && pPerFrame != NULL) - { - RenderMotionVectors(pCmdLst1, pPerFrame, pState); - } - - // Render Scene to the HDR RT ------------------------------------------------ - // - pCmdLst1->RSSetViewports(1, &m_Viewport); - pCmdLst1->RSSetScissorRects(1, &m_Scissor); - - D3D12_CPU_DESCRIPTOR_HANDLE rts[] = { m_HDRRTV.GetCPU(), m_SpecularRoughnessRTV.GetCPU() }; - pCmdLst1->OMSetRenderTargets(2, rts, false, &m_DepthBufferDSV.GetCPU()); - - if (pPerFrame != NULL) - { - RenderSkydome(pCmdLst1, pPerFrame, pState); - - // Render scene to color buffer - if (m_gltfPBR) - { - //set per frame constant buffer values - m_gltfPBR->Draw(pCmdLst1, &m_ShadowMapSRV); - } - - // Draw object bounding boxes - if (m_gltfBBox) - { - if (pState->bDrawBoundingBoxes) - { - m_gltfBBox->Draw(pCmdLst1, pPerFrame->mCameraViewProj); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Bounding Box"); - } - } - - // Draw light frustum - if (pState->bDrawLightFrustum) - { - RenderLightFrustums(pCmdLst1, pPerFrame, pState); - } - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Rendering scene"); - } - - Barriers(pCmdLst1, { - CD3DX12_RESOURCE_BARRIER::Transition(m_ShadowMap.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE), - CD3DX12_RESOURCE_BARRIER::UAV(m_AtomicCounter.GetResource()), - CD3DX12_RESOURCE_BARRIER::Transition(m_DepthBuffer.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) - }); - - // Downsample depth buffer - if (m_gltfMotionVectors != NULL && pPerFrame != NULL) - { - DownsampleDepthBuffer(pCmdLst1); - } - - Barriers(pCmdLst1, { - CD3DX12_RESOURCE_BARRIER::UAV(m_DepthHierarchy.GetResource()), - CD3DX12_RESOURCE_BARRIER::Transition(m_DepthHierarchy.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_DepthBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE), - CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), - CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), - CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), - CD3DX12_RESOURCE_BARRIER::Transition(m_MotionVectors.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), - }); - - // Stochastic Screen Space Reflections - if (m_gltfPBR && pPerFrame != NULL) // Only draw reflections if we draw objects - { - RenderScreenSpaceReflections(pCmdLst1, pState); - } - - Barriers(pCmdLst1, { - CD3DX12_RESOURCE_BARRIER::Transition(m_SssrOutputBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), // Wait for reflection target to be written - CD3DX12_RESOURCE_BARRIER::Transition(m_DepthHierarchy.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), - CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_NormalHistoryBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST), - CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughnessHistory.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST) - }); - - CopyHistorySurfaces(pCmdLst1); // Keep this frames results for next frame - - Barriers(pCmdLst1, { - CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_NormalHistoryBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughnessHistory.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, 0), - CD3DX12_RESOURCE_BARRIER::Transition(m_MotionVectors.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, 0) - }); - - // Apply the result of SSSR - if (m_gltfPBR && pPerFrame != NULL) // only reflect if we draw objects - { - ApplyReflectionTarget(pCmdLst1, pState); - } - - // Bloom, takes HDR as input and applies bloom to it. - Barriers(pCmdLst1, { - CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_SssrOutputBuffer.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), - CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET), - CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET) - }); - - if (pState->bDrawBloom) - { - DownsampleScene(pCmdLst1); - RenderBloom(pCmdLst1); - } - - // Submit command buffer - ThrowIfFailed(pCmdLst1->Close()); - ID3D12CommandList* CmdListList1[] = { pCmdLst1 }; - m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList1); - - // Wait for swapchain (we are going to render to it) - pSwapChain->WaitForSwapChain(); - ID3D12GraphicsCommandList* pCmdLst2 = m_CommandListRing.GetNewCommandList(); - - Barriers(pCmdLst2, { - CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET), - }); - - // Tonemapping - ApplyTonemapping(pCmdLst2, pState, pSwapChain); - - Barriers(pCmdLst2, { - CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET) - }); - - // Render HUD - RenderHUD(pCmdLst2, pSwapChain); - - if (pState->screenshotName != NULL) - { - m_SaveTexture.CopyRenderTargetIntoStagingTexture(m_pDevice->GetDevice(), pCmdLst2, pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET); - } - - // Transition swapchain into present mode - Barriers(pCmdLst2, { - CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT) - }); - - m_GPUTimer.OnEndFrame(); - - m_GPUTimer.CollectTimings(pCmdLst2); - - // Close & Submit the command list - ThrowIfFailed(pCmdLst2->Close()); - - ID3D12CommandList* CmdListList2[] = { pCmdLst2 }; - m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList2); - - if (pState->screenshotName != NULL) - { - m_SaveTexture.SaveStagingTextureAsJpeg(m_pDevice->GetDevice(), m_pDevice->GetGraphicsQueue(), pState->screenshotName->c_str()); - pState->screenshotName = NULL; - } - - // Update previous camera matrices - pState->camera.UpdatePreviousMatrices(); + StallFrame(pState->targetFrametime); + BeginFrame(); + + per_frame* pPerFrame = FillFrameConstants(pState); + + // command buffer calls + // + ID3D12GraphicsCommandList* pCmdLst1 = m_CommandListRing.GetNewCommandList(); + + m_GPUTimer.GetTimeStamp(pCmdLst1, "Begin Frame"); + + // Clears ----------------------------------------------------------------------- + // + pCmdLst1->ClearDepthStencilView(m_ShadowMapDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear shadow map"); + + float clearValuesFloat[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + pCmdLst1->ClearRenderTargetView(m_HDRRTV.GetCPU(), clearValuesFloat, 0, nullptr); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear HDR"); + + pCmdLst1->ClearDepthStencilView(m_DepthBufferDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear depth"); + + UINT clearValuesUint[4] = { 0, 0, 0, 0 }; + pCmdLst1->ClearUnorderedAccessViewUint(m_AtomicCounterUAVGPU.GetGPU(), m_AtomicCounterUAV.GetCPU(), m_AtomicCounter.GetResource(), clearValuesUint, 0, nullptr); // Set atomic counter to 0. + + // Render to shadow map atlas for spot lights ------------------------------------------ + // + if (m_gltfDepth && pPerFrame != NULL) + { + RenderSpotLights(pCmdLst1, pPerFrame); + } + + Barriers(pCmdLst1, { + CD3DX12_RESOURCE_BARRIER::Transition(m_ShadowMap.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + }); + + // Motion vectors --------------------------------------------------------------------------- + // + if (m_gltfMotionVectors != NULL && pPerFrame != NULL) + { + RenderMotionVectors(pCmdLst1, pPerFrame, pState); + } + + // Render Scene to the HDR RT ------------------------------------------------ + // + pCmdLst1->RSSetViewports(1, &m_Viewport); + pCmdLst1->RSSetScissorRects(1, &m_Scissor); + + D3D12_CPU_DESCRIPTOR_HANDLE rts[] = { m_HDRRTV.GetCPU(), m_SpecularRoughnessRTV.GetCPU() }; + pCmdLst1->OMSetRenderTargets(2, rts, false, &m_DepthBufferDSV.GetCPU()); + + if (pPerFrame != NULL) + { + RenderSkydome(pCmdLst1, pPerFrame, pState); + + // Render scene to color buffer + if (m_gltfPBR) + { + //set per frame constant buffer values + m_gltfPBR->Draw(pCmdLst1, &m_ShadowMapSRV); + } + + // Draw object bounding boxes + if (m_gltfBBox) + { + if (pState->bDrawBoundingBoxes) + { + m_gltfBBox->Draw(pCmdLst1, pPerFrame->mCameraViewProj); + m_GPUTimer.GetTimeStamp(pCmdLst1, "Bounding Box"); + } + } + + // Draw light frustum + if (pState->bDrawLightFrustum) + { + RenderLightFrustums(pCmdLst1, pPerFrame, pState); + } + + m_GPUTimer.GetTimeStamp(pCmdLst1, "Rendering scene"); + } + + Barriers(pCmdLst1, { + CD3DX12_RESOURCE_BARRIER::Transition(m_ShadowMap.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE), + CD3DX12_RESOURCE_BARRIER::UAV(m_AtomicCounter.GetResource()), + CD3DX12_RESOURCE_BARRIER::Transition(m_DepthBuffer.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) + }); + + // Downsample depth buffer + if (m_gltfMotionVectors != NULL && pPerFrame != NULL) + { + DownsampleDepthBuffer(pCmdLst1); + } + + Barriers(pCmdLst1, { + CD3DX12_RESOURCE_BARRIER::UAV(m_DepthHierarchy.GetResource()), + CD3DX12_RESOURCE_BARRIER::Transition(m_DepthHierarchy.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_DepthBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE), + CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), + CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), + CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), + CD3DX12_RESOURCE_BARRIER::Transition(m_MotionVectors.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0), + }); + + // Stochastic Screen Space Reflections + if (m_gltfPBR && pPerFrame != NULL) // Only draw reflections if we draw objects + { + RenderScreenSpaceReflections(pCmdLst1, pPerFrame, pState); + } + + Barriers(pCmdLst1, { + CD3DX12_RESOURCE_BARRIER::Transition(m_Sssr.GetOutputTexture()->GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), // Wait for reflection target to be written + CD3DX12_RESOURCE_BARRIER::Transition(m_DepthHierarchy.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_NormalHistoryBuffer.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST), + }); + + CopyHistorySurfaces(pCmdLst1); // Keep this frames results for next frame + + Barriers(pCmdLst1, { + CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_NormalHistoryBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, 0), + CD3DX12_RESOURCE_BARRIER::Transition(m_MotionVectors.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, 0) + }); + + // Apply the result of SSSR + if (m_gltfPBR && pPerFrame != NULL) // only reflect if we draw objects + { + ApplyReflectionTarget(pCmdLst1, pState); + } + + // Bloom, takes HDR as input and applies bloom to it. + Barriers(pCmdLst1, { + CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_Sssr.GetOutputTexture()->GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + CD3DX12_RESOURCE_BARRIER::Transition(m_SpecularRoughness.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET), + CD3DX12_RESOURCE_BARRIER::Transition(m_NormalBuffer.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET) + }); + + if (pState->bDrawBloom) + { + DownsampleScene(pCmdLst1); + RenderBloom(pCmdLst1); + } + + // Submit command buffer + ThrowIfFailed(pCmdLst1->Close()); + ID3D12CommandList* CmdListList1[] = { pCmdLst1 }; + m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList1); + + // Wait for swapchain (we are going to render to it) + pSwapChain->WaitForSwapChain(); + ID3D12GraphicsCommandList* pCmdLst2 = m_CommandListRing.GetNewCommandList(); + + Barriers(pCmdLst2, { + CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET), + }); + + // Tonemapping + ApplyTonemapping(pCmdLst2, pState, pSwapChain); + + Barriers(pCmdLst2, { + CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET) + }); + + // Render HUD + RenderHUD(pCmdLst2, pSwapChain); + + if (pState->screenshotName != NULL) + { + m_SaveTexture.CopyRenderTargetIntoStagingTexture(m_pDevice->GetDevice(), pCmdLst2, pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET); + } + + // Transition swapchain into present mode + Barriers(pCmdLst2, { + CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT) + }); + + m_GPUTimer.OnEndFrame(); + + m_GPUTimer.CollectTimings(pCmdLst2); + + // Close & Submit the command list + ThrowIfFailed(pCmdLst2->Close()); + + ID3D12CommandList* CmdListList2[] = { pCmdLst2 }; + m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList2); + + if (pState->screenshotName != NULL) + { + m_SaveTexture.SaveStagingTextureAsJpeg(m_pDevice->GetDevice(), m_pDevice->GetGraphicsQueue(), pState->screenshotName->c_str()); + pState->screenshotName = NULL; + } + + // Update previous camera matrices + pState->camera.UpdatePreviousMatrices(); + if (pPerFrame) + { + m_prev_view_projection = pPerFrame->mCameraViewProj; + } + m_frame_index++; } -void SampleRenderer::CreateApplyReflectionsPipeline() +void SampleRenderer::Recompile() { - ID3D12Device * device = m_pDevice->GetDevice(); - - HRESULT hr; - - CD3DX12_ROOT_PARAMETER root[2]; - - CD3DX12_DESCRIPTOR_RANGE descTable[4]; - descTable[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); - descTable[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1); - descTable[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2); - descTable[3].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3); - root[0].InitAsDescriptorTable(ARRAYSIZE(descTable), descTable); - root[1].InitAsConstantBufferView(0); - - D3D12_STATIC_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplerDesc.MinLOD = 0.0f; - samplerDesc.MaxLOD = D3D12_FLOAT32_MAX; - samplerDesc.MipLODBias = 0; - samplerDesc.MaxAnisotropy = 1; - samplerDesc.ShaderRegister = 0; - samplerDesc.RegisterSpace = 0; - samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - - D3D12_ROOT_SIGNATURE_DESC rsDesc = {}; - rsDesc.NumParameters = ARRAYSIZE(root); - rsDesc.pParameters = root; - rsDesc.NumStaticSamplers = 1; - rsDesc.pStaticSamplers = &samplerDesc; - - ID3DBlob* rs, *rsError; - hr = D3D12SerializeRootSignature(&rsDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rs, &rsError); - if (FAILED(hr)) - { - Trace("Failed to serialize root signature for apply pipeline.\n"); - } - - hr = device->CreateRootSignature(0, rs->GetBufferPointer(), rs->GetBufferSize(), IID_PPV_ARGS(&m_ApplyRootSignature)); - if (FAILED(hr)) - { - Trace("Failed to create root signature for apply pipeline.\n"); - } - - hr = m_ApplyRootSignature->SetName(L"Apply Reflections RootSignature"); - if (FAILED(hr)) - { - Trace("Failed to name root signature for apply pipeline.\n"); - } - - D3D12_SHADER_BYTECODE vsShaderByteCode = {}; - D3D12_SHADER_BYTECODE psShaderByteCode = {}; - DefineList defines; - CompileShaderFromFile("ApplyReflections.hlsl", &defines, "vs_main", "-T vs_6_0", &vsShaderByteCode); - CompileShaderFromFile("ApplyReflections.hlsl", &defines, "ps_main", "-T ps_6_0", &psShaderByteCode); - - D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; - desc.VS = vsShaderByteCode; - desc.PS = psShaderByteCode; - desc.pRootSignature = m_ApplyRootSignature; - desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - - desc.DepthStencilState.DepthEnable = false; - desc.DepthStencilState.StencilEnable = false; - desc.BlendState.AlphaToCoverageEnable = false; - desc.BlendState.IndependentBlendEnable = false; - - desc.NumRenderTargets = 1; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.SampleMask = UINT_MAX; - desc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT; - desc.BlendState.RenderTarget[0].BlendEnable = true; - desc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_ONE; - desc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; - desc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_SRC_ALPHA; - desc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; - desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; - desc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ONE; - desc.BlendState.RenderTarget[0].LogicOpEnable = false; - desc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - - desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; - desc.RasterizerState.AntialiasedLineEnable = false; - desc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; - desc.RasterizerState.DepthBias = 0; - desc.RasterizerState.DepthBiasClamp = 0; - desc.RasterizerState.DepthClipEnable = false; - desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; - desc.RasterizerState.ForcedSampleCount = 0; - desc.RasterizerState.FrontCounterClockwise = false; - desc.RasterizerState.MultisampleEnable = false; - desc.RasterizerState.SlopeScaledDepthBias = 0; - - hr = device->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&m_ApplyPipelineState)); - if (FAILED(hr)) - { - Trace("Failed to create apply pipeline.\n"); - } - - hr = m_ApplyPipelineState->SetName(L"Apply Reflections Pipeline"); - if (FAILED(hr)) - { - Trace("Failed to name apply pipeline.\n"); - } - - rs->Release(); + m_Sssr.Recompile(); } -void SampleRenderer::CreateDepthDownsamplePipeline() +void SampleRenderer::CreateApplyReflectionsPipeline() { - HRESULT hr; - - static constexpr uint32_t numRootParameters = 1; - CD3DX12_ROOT_PARAMETER root[numRootParameters]; - - CD3DX12_DESCRIPTOR_RANGE ranges[3] = {}; - ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); - ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 13, 0); - ranges[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 13); - - root[0].InitAsDescriptorTable(3, ranges); - - D3D12_ROOT_SIGNATURE_DESC rsDesc = {}; - rsDesc.NumParameters = numRootParameters; - rsDesc.pParameters = root; - rsDesc.NumStaticSamplers = 0; - rsDesc.pStaticSamplers = nullptr; - - ID3DBlob* rs, *rsError; - hr = D3D12SerializeRootSignature(&rsDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rs, &rsError); - if (FAILED(hr)) - { - Trace("Failed to serialize root signature for downsampling pipeline.\n"); - } - - hr = m_pDevice->GetDevice()->CreateRootSignature(0, rs->GetBufferPointer(), rs->GetBufferSize(), IID_PPV_ARGS(&m_DownsampleRootSignature)); - if (FAILED(hr)) - { - Trace("Failed to create root signature for downsampling pipeline.\n"); - } - - hr = m_DownsampleRootSignature->SetName(L"Depth Downsample RootSignature"); - if (FAILED(hr)) - { - Trace("Failed to name root signature for downsampling pipeline.\n"); - } - - D3D12_SHADER_BYTECODE shaderByteCode = {}; - DefineList defines; - CompileShaderFromFile("DepthDownsample.hlsl", &defines, "main", "-T cs_6_0", &shaderByteCode); - - D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {}; - desc.pRootSignature = m_DownsampleRootSignature; - desc.CS = shaderByteCode; - - hr = m_pDevice->GetDevice()->CreateComputePipelineState(&desc, IID_PPV_ARGS(&m_DownsamplePipelineState)); - if (FAILED(hr)) - { - Trace("Failed to create downsampling pipeline.\n"); - } - - hr = m_DownsamplePipelineState->SetName(L"Depth Downsample Pipeline"); - if (FAILED(hr)) - { - Trace("Failed to name downsampling pipeline.\n"); - } - - rs->Release(); + ID3D12Device* device = m_pDevice->GetDevice(); + + HRESULT hr; + + CD3DX12_ROOT_PARAMETER root[2]; + + CD3DX12_DESCRIPTOR_RANGE descTable[4]; + descTable[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + descTable[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1); + descTable[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2); + descTable[3].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3); + root[0].InitAsDescriptorTable(ARRAYSIZE(descTable), descTable); + root[1].InitAsConstantBufferView(0); + + D3D12_STATIC_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplerDesc.MinLOD = 0.0f; + samplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + samplerDesc.MipLODBias = 0; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.ShaderRegister = 0; + samplerDesc.RegisterSpace = 0; + samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_ROOT_SIGNATURE_DESC rsDesc = {}; + rsDesc.NumParameters = ARRAYSIZE(root); + rsDesc.pParameters = root; + rsDesc.NumStaticSamplers = 1; + rsDesc.pStaticSamplers = &samplerDesc; + + ID3DBlob* rs, * rsError; + hr = D3D12SerializeRootSignature(&rsDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rs, &rsError); + if (FAILED(hr)) + { + Trace("Failed to serialize root signature for apply pipeline.\n"); + ThrowIfFailed(hr); + } + + hr = device->CreateRootSignature(0, rs->GetBufferPointer(), rs->GetBufferSize(), IID_PPV_ARGS(&m_ApplyRootSignature)); + if (FAILED(hr)) + { + Trace("Failed to create root signature for apply pipeline.\n"); + ThrowIfFailed(hr); + } + + hr = m_ApplyRootSignature->SetName(L"Apply Reflections RootSignature"); + if (FAILED(hr)) + { + Trace("Failed to name root signature for apply pipeline.\n"); + ThrowIfFailed(hr); + } + + D3D12_SHADER_BYTECODE vsShaderByteCode = {}; + D3D12_SHADER_BYTECODE psShaderByteCode = {}; + DefineList defines; + CompileShaderFromFile("ApplyReflections.hlsl", &defines, "vs_main", "-T vs_6_0", &vsShaderByteCode); + CompileShaderFromFile("ApplyReflections.hlsl", &defines, "ps_main", "-T ps_6_0", &psShaderByteCode); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; + desc.VS = vsShaderByteCode; + desc.PS = psShaderByteCode; + desc.pRootSignature = m_ApplyRootSignature; + desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + desc.DepthStencilState.DepthEnable = false; + desc.DepthStencilState.StencilEnable = false; + desc.BlendState.AlphaToCoverageEnable = false; + desc.BlendState.IndependentBlendEnable = false; + + desc.NumRenderTargets = 1; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.SampleMask = UINT_MAX; + desc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT; + desc.BlendState.RenderTarget[0].BlendEnable = true; + desc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_ONE; + desc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + desc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_SRC_ALPHA; + desc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; + desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + desc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ONE; + desc.BlendState.RenderTarget[0].LogicOpEnable = false; + desc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + + desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + desc.RasterizerState.AntialiasedLineEnable = false; + desc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + desc.RasterizerState.DepthBias = 0; + desc.RasterizerState.DepthBiasClamp = 0; + desc.RasterizerState.DepthClipEnable = false; + desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + desc.RasterizerState.ForcedSampleCount = 0; + desc.RasterizerState.FrontCounterClockwise = false; + desc.RasterizerState.MultisampleEnable = false; + desc.RasterizerState.SlopeScaledDepthBias = 0; + + hr = device->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&m_ApplyPipelineState)); + if (FAILED(hr)) + { + Trace("Failed to create apply pipeline.\n"); + ThrowIfFailed(hr); + } + + hr = m_ApplyPipelineState->SetName(L"Apply Reflections Pipeline"); + if (FAILED(hr)) + { + Trace("Failed to name apply pipeline.\n"); + ThrowIfFailed(hr); + } + + rs->Release(); } -void SampleRenderer::CopyToTexture(ID3D12GraphicsCommandList * cl, ID3D12Resource * source, ID3D12Resource * target) +void SampleRenderer::CreateDepthDownsamplePipeline() { - D3D12_TEXTURE_COPY_LOCATION src = {}; - src.pResource = source; - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src.SubresourceIndex = 0; - - D3D12_TEXTURE_COPY_LOCATION dst = {}; - dst.pResource = target; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = 0; - - D3D12_BOX srcBox = {}; - srcBox.left = 0; - srcBox.top = 0; - srcBox.front = 0; - srcBox.right = m_Width; - srcBox.bottom = m_Height; - srcBox.back = 1; - - cl->CopyTextureRegion(&dst, 0, 0, 0, &src, &srcBox); + HRESULT hr; + + static constexpr uint32_t numRootParameters = 1; + CD3DX12_ROOT_PARAMETER root[numRootParameters]; + + CD3DX12_DESCRIPTOR_RANGE ranges[3] = {}; + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 13, 0); + ranges[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 13); + + root[0].InitAsDescriptorTable(3, ranges); + + D3D12_ROOT_SIGNATURE_DESC rsDesc = {}; + rsDesc.NumParameters = numRootParameters; + rsDesc.pParameters = root; + rsDesc.NumStaticSamplers = 0; + rsDesc.pStaticSamplers = nullptr; + + ID3DBlob* rs, * rsError; + hr = D3D12SerializeRootSignature(&rsDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rs, &rsError); + if (FAILED(hr)) + { + Trace("Failed to serialize root signature for downsampling pipeline.\n"); + ThrowIfFailed(hr); + } + + hr = m_pDevice->GetDevice()->CreateRootSignature(0, rs->GetBufferPointer(), rs->GetBufferSize(), IID_PPV_ARGS(&m_DownsampleRootSignature)); + if (FAILED(hr)) + { + Trace("Failed to create root signature for downsampling pipeline.\n"); + ThrowIfFailed(hr); + } + + hr = m_DownsampleRootSignature->SetName(L"Depth Downsample RootSignature"); + if (FAILED(hr)) + { + Trace("Failed to name root signature for downsampling pipeline.\n"); + ThrowIfFailed(hr); + } + + D3D12_SHADER_BYTECODE shaderByteCode = {}; + DefineList defines; + CompileShaderFromFile("DepthDownsample.hlsl", &defines, "main", "-T cs_6_0", &shaderByteCode); + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {}; + desc.pRootSignature = m_DownsampleRootSignature; + desc.CS = shaderByteCode; + + hr = m_pDevice->GetDevice()->CreateComputePipelineState(&desc, IID_PPV_ARGS(&m_DownsamplePipelineState)); + if (FAILED(hr)) + { + Trace("Failed to create downsampling pipeline.\n"); + ThrowIfFailed(hr); + } + + hr = m_DownsamplePipelineState->SetName(L"Depth Downsample Pipeline"); + if (FAILED(hr)) + { + Trace("Failed to name downsampling pipeline.\n"); + ThrowIfFailed(hr); + } + + rs->Release(); } \ No newline at end of file diff --git a/sample/src/DX12/Sources/SampleRenderer.h b/sample/src/DX12/Sources/SampleRenderer.h index 44468dd..a1eb0f7 100644 --- a/sample/src/DX12/Sources/SampleRenderer.h +++ b/sample/src/DX12/Sources/SampleRenderer.h @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include "base/SaveTexture.h" +#include "SSSR.h" // We are queuing (backBufferCount + 0.5) frames, so we need to triple buffer the resources that get modified each frame static const int backBufferCount = 3; @@ -32,204 +33,189 @@ static const int backBufferCount = 3; #define USE_VID_MEM true using namespace CAULDRON_DX12; - +using namespace SSSR_SAMPLE_DX12; // // This class deals with the GPU side of the sample. // class SampleRenderer { public: - struct State - { - float time; - Camera camera; - - float exposure; - float emmisiveFactor; - float iblFactor; - float lightIntensity; - XMFLOAT3 lightColor; - Camera lightCamera; - - int toneMapper; - int skyDomeType; - bool bDrawBoundingBoxes; - bool bDrawLightFrustum; - bool bDrawBloom; - bool bDrawScreenSpaceReflections; - - float targetFrametime; - - bool bShowIntersectionResults; - float temporalStability; - int maxTraversalIterations; - int mostDetailedDepthHierarchyMipLevel; - float depthBufferThickness; - int minTraversalOccupancy; - int samplesPerQuad; - bool bEnableVarianceGuidedTracing; - float roughnessThreshold; - - float tileClassificationTime; - float intersectionTime; - float denoisingTime; - - bool showReflectionTarget; - bool isBenchmarking; - const std::string* screenshotName; - }; - - void OnCreate(Device* pDevice, SwapChain *pSwapChain); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); - void OnDestroyWindowSizeDependentResources(); - - int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); - void UnloadScene(); - - const std::vector &GetTimingValues() { return m_TimeStamps; } - - void OnRender(State *pState, SwapChain *pSwapChain); + struct State + { + float time; + Camera camera; + + float exposure; + float emmisiveFactor; + float iblFactor; + float lightIntensity; + XMFLOAT3 lightColor; + Camera lightCamera; + + int toneMapper; + int skyDomeType; + bool bDrawBoundingBoxes; + bool bDrawLightFrustum; + bool bDrawBloom; + bool bDrawScreenSpaceReflections; + + float targetFrametime; + + bool bShowIntersectionResults; + float temporalStability; + float temporalVarianceThreshold; + int maxTraversalIterations; + int mostDetailedDepthHierarchyMipLevel; + float depthBufferThickness; + int minTraversalOccupancy; + int samplesPerQuad; + bool bEnableVarianceGuidedTracing; + float roughnessThreshold; + + float tileClassificationTime; + float intersectionTime; + float denoisingTime; + + bool showReflectionTarget; + bool isBenchmarking; + const std::string* screenshotName; + }; + + void OnCreate(Device* pDevice, SwapChain* pSwapChain); + void OnDestroy(); + + void OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height); + void OnDestroyWindowSizeDependentResources(); + + int LoadScene(GLTFCommon* pGLTFCommon, int stage = 0); + void UnloadScene(); + + const std::vector& GetTimingValues() { return m_TimeStamps; } + + void OnRender(State* pState, SwapChain* pSwapChain); + void Recompile(); private: - void CreateApplyReflectionsPipeline(); - void CreateDepthDownsamplePipeline(); - void StallFrame(float targetFrametime); - void BeginFrame(); - - per_frame * FillFrameConstants(State * pState); - void RenderSpotLights(ID3D12GraphicsCommandList * pCmdLst1, per_frame * pPerFrame); - void RenderMotionVectors(ID3D12GraphicsCommandList * pCmdLst1, per_frame * pPerFrame, State * pState); - void RenderSkydome(ID3D12GraphicsCommandList * pCmdLst1, per_frame * pPerFrame, State * pState); - void RenderLightFrustums(ID3D12GraphicsCommandList * pCmdLst1, per_frame * pPerFrame, State * pState); - void DownsampleDepthBuffer(ID3D12GraphicsCommandList * pCmdLst1); - void RenderScreenSpaceReflections(ID3D12GraphicsCommandList * pCmdLst1, State * pState); - void CopyHistorySurfaces(ID3D12GraphicsCommandList * pCmdLst1); - void ApplyReflectionTarget(ID3D12GraphicsCommandList * pCmdLst1, State * pState); - void DownsampleScene(ID3D12GraphicsCommandList * pCmdLst1); - void RenderBloom(ID3D12GraphicsCommandList * pCmdLst1); - void ApplyTonemapping(ID3D12GraphicsCommandList * pCmdLst2, State * pState, SwapChain * pSwapChain); - void RenderHUD(ID3D12GraphicsCommandList * pCmdLst2, SwapChain * pSwapChain); - void CopyToTexture(ID3D12GraphicsCommandList * cl, ID3D12Resource * source, ID3D12Resource * target); + void CreateApplyReflectionsPipeline(); + void CreateDepthDownsamplePipeline(); + void StallFrame(float targetFrametime); + void BeginFrame(); + + per_frame* FillFrameConstants(State* pState); + void RenderSpotLights(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame); + void RenderMotionVectors(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState); + void RenderSkydome(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState); + void RenderLightFrustums(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState); + void DownsampleDepthBuffer(ID3D12GraphicsCommandList* pCmdLst1); + void RenderScreenSpaceReflections(ID3D12GraphicsCommandList* pCmdLst1, per_frame* pPerFrame, State* pState); + void CopyHistorySurfaces(ID3D12GraphicsCommandList* pCmdLst1); + void ApplyReflectionTarget(ID3D12GraphicsCommandList* pCmdLst1, State* pState); + void DownsampleScene(ID3D12GraphicsCommandList* pCmdLst1); + void RenderBloom(ID3D12GraphicsCommandList* pCmdLst1); + void ApplyTonemapping(ID3D12GraphicsCommandList* pCmdLst2, State* pState, SwapChain* pSwapChain); + void RenderHUD(ID3D12GraphicsCommandList* pCmdLst2, SwapChain* pSwapChain); private: - Device * m_pDevice; - - uint32_t m_Width; - uint32_t m_Height; - - D3D12_VIEWPORT m_Viewport; - D3D12_RECT m_Scissor; - - // Initialize helper classes - ResourceViewHeaps m_ResourceViewHeaps; - StaticResourceViewHeap m_CpuVisibleHeap; - UploadHeap m_UploadHeap; - DynamicBufferRing m_ConstantBufferRing; - StaticBufferPool m_VidMemBufferPool; - CommandListRing m_CommandListRing; - GPUTimestamps m_GPUTimer; - - - //gltf passes - GltfPbrPass * m_gltfPBR; - GltfBBoxPass * m_gltfBBox; - GltfDepthPass * m_gltfDepth; - GltfMotionVectorsPass * m_gltfMotionVectors; - GLTFTexturesAndBuffers * m_pGLTFTexturesAndBuffers; - - // effects - Bloom m_Bloom; - SkyDome m_SkyDome; - SkyDome m_AmbientLight; - DownSamplePS m_DownSample; - SkyDomeProc m_SkyDomeProc; - ToneMapping m_ToneMapping; - - // BRDF LUT - Texture m_BrdfLut; - - // GUI - ImGUI m_ImGUI; - - // Temporary render targets - - // depth buffer - DSV m_DepthBufferDSV; - Texture m_DepthBuffer; - - // Motion Vectors resources - Texture m_MotionVectors; - RTV m_MotionVectorsRTV; - CBV_SRV_UAV m_MotionVectorsSRV; - CBV_SRV_UAV m_MotionVectorsInputsSRV; - - // Normal buffer - Texture m_NormalBuffer; - RTV m_NormalBufferRTV; - CBV_SRV_UAV m_NormalBufferSRV; - Texture m_NormalHistoryBuffer; - - // Specular roughness target - Texture m_SpecularRoughness; - RTV m_SpecularRoughnessRTV; - Texture m_SpecularRoughnessHistory; - - // shadowmaps - Texture m_ShadowMap; - DSV m_ShadowMapDSV; - CBV_SRV_UAV m_ShadowMapSRV; - - // Resolved RT - Texture m_HDR; - CBV_SRV_UAV m_HDRSRV; - RTV m_HDRRTV; - - // widgets - Wireframe m_Wireframe; - WireframeBox m_WireframeBox; - - std::vector m_TimeStamps; - - // FFX SSSR Effect - FfxSssrContext m_SssrContext; - FfxSssrReflectionView m_SssrReflectionView; - bool m_SssrCreatedReflectionView = false; - CBV_SRV_UAV m_SssrSceneSRV; - CBV_SRV_UAV m_SssrDepthBufferHierarchySRV; - CBV_SRV_UAV m_SssrMotionBufferSRV; - CBV_SRV_UAV m_SssrNormalBufferSRV; - CBV_SRV_UAV m_SssrRoughnessBufferSRV; - CBV_SRV_UAV m_SssrNormalHistoryBufferSRV; - CBV_SRV_UAV m_SssrRoughnessHistoryBufferSRV; - CBV_SRV_UAV m_SssrOutputBufferUAV; - CBV_SRV_UAV m_SssrOutputBufferUAVGPU; - CBV_SRV_UAV m_SssrEnvironmentMapSRV; - D3D12_SAMPLER_DESC m_SssrEnvironmentMapSamplerDesc; - Texture m_SssrOutputBuffer; - - RTV m_ApplyPipelineRTV; - ID3D12RootSignature * m_ApplyRootSignature; - ID3D12PipelineState * m_ApplyPipelineState; - CBV_SRV_UAV m_ApplyPassDescriptorTable; - - // Depth downsampling with single CS - ID3D12RootSignature * m_DownsampleRootSignature; - ID3D12PipelineState * m_DownsamplePipelineState; - D3D12_GPU_DESCRIPTOR_HANDLE m_DownsampleDescriptorTable; - CBV_SRV_UAV m_DepthBufferDescriptor; - CBV_SRV_UAV m_DepthHierarchyDescriptors[13]; - CBV_SRV_UAV m_AtomicCounterUAVGPU; - Texture m_DepthHierarchy; - Texture m_AtomicCounter; - CBV_SRV_UAV m_AtomicCounterUAV; - UINT m_DepthMipLevelCount = 0; - - UINT64 m_GpuTicksPerSecond; - - SaveTexture m_SaveTexture; - - // For multithreaded texture loading - AsyncPool m_AsyncPool; + Device* m_pDevice; + + uint32_t m_Width; + uint32_t m_Height; + + D3D12_VIEWPORT m_Viewport; + D3D12_RECT m_Scissor; + + // Initialize helper classes + ResourceViewHeaps m_ResourceViewHeaps; + StaticResourceViewHeap m_CpuVisibleHeap; + UploadHeap m_UploadHeap; + DynamicBufferRing m_ConstantBufferRing; + StaticBufferPool m_VidMemBufferPool; + CommandListRing m_CommandListRing; + GPUTimestamps m_GPUTimer; + + //gltf passes + GltfPbrPass* m_gltfPBR; + GltfBBoxPass* m_gltfBBox; + GltfDepthPass* m_gltfDepth; + GltfMotionVectorsPass* m_gltfMotionVectors; + GLTFTexturesAndBuffers* m_pGLTFTexturesAndBuffers; + + // effects + Bloom m_Bloom; + SkyDome m_SkyDome; + SkyDome m_AmbientLight; + DownSamplePS m_DownSample; + SkyDomeProc m_SkyDomeProc; + ToneMapping m_ToneMapping; + + //SSSR + SSSR m_Sssr; + uint32_t m_frame_index = 0; + XMMATRIX m_prev_view_projection; + + // BRDF LUT + Texture m_BrdfLut; + + // GUI + ImGUI m_ImGUI; + + // depth buffer + DSV m_DepthBufferDSV; + Texture m_DepthBuffer; + + // Motion Vectors resources + Texture m_MotionVectors; + RTV m_MotionVectorsRTV; + CBV_SRV_UAV m_MotionVectorsSRV; + CBV_SRV_UAV m_MotionVectorsInputsSRV; + + // Normal buffer + Texture m_NormalBuffer; + RTV m_NormalBufferRTV; + CBV_SRV_UAV m_NormalBufferSRV; + Texture m_NormalHistoryBuffer; + + // Specular roughness target + Texture m_SpecularRoughness; + RTV m_SpecularRoughnessRTV; + + // shadowmaps + Texture m_ShadowMap; + DSV m_ShadowMapDSV; + CBV_SRV_UAV m_ShadowMapSRV; + + // Resolved RT + Texture m_HDR; + CBV_SRV_UAV m_HDRSRV; + RTV m_HDRRTV; + + // widgets + Wireframe m_Wireframe; + WireframeBox m_WireframeBox; + + std::vector m_TimeStamps; + + RTV m_ApplyPipelineRTV; + ID3D12RootSignature* m_ApplyRootSignature; + ID3D12PipelineState* m_ApplyPipelineState; + CBV_SRV_UAV m_ApplyPassDescriptorTable; + + // Depth downsampling with single CS + ID3D12RootSignature* m_DownsampleRootSignature; + ID3D12PipelineState* m_DownsamplePipelineState; + D3D12_GPU_DESCRIPTOR_HANDLE m_DownsampleDescriptorTable; + CBV_SRV_UAV m_DepthBufferDescriptor; + CBV_SRV_UAV m_DepthHierarchyDescriptors[13]; + CBV_SRV_UAV m_AtomicCounterUAVGPU; + Texture m_DepthHierarchy; + Texture m_AtomicCounter; + CBV_SRV_UAV m_AtomicCounterUAV; + UINT m_DepthMipLevelCount = 0; + + UINT64 m_GpuTicksPerSecond; + + SaveTexture m_SaveTexture; + + // For multithreaded texture loading + AsyncPool m_AsyncPool; }; diff --git a/sample/src/DX12/Sources/ShaderPass.cpp b/sample/src/DX12/Sources/ShaderPass.cpp new file mode 100644 index 0000000..52807f0 --- /dev/null +++ b/sample/src/DX12/Sources/ShaderPass.cpp @@ -0,0 +1,43 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" +#include "ShaderPass.h" +namespace SSSR_SAMPLE_DX12 +{ + void ShaderPass::DestroyPipeline() + { + if (pRootSignature) + { + pRootSignature->Release(); + } + if (pPipeline) + { + pPipeline->Release(); + } + } + void ShaderPass::OnDestroy() + { + DestroyPipeline(); + descriptorTables_CBV_SRV_UAV.clear(); + descriptorTables_Sampler.clear(); + } +} \ No newline at end of file diff --git a/ffx-sssr/src/reflection_view.h b/sample/src/DX12/Sources/ShaderPass.h similarity index 75% rename from ffx-sssr/src/reflection_view.h rename to sample/src/DX12/Sources/ShaderPass.h index 3ed2d19..4de8d3d 100644 --- a/ffx-sssr/src/reflection_view.h +++ b/sample/src/DX12/Sources/ShaderPass.h @@ -20,22 +20,17 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ #pragma once +#include "Base/ResourceViewHeaps.h" -namespace ffx_sssr +namespace SSSR_SAMPLE_DX12 { - /** - The ReflectionView class encapsulates the information for resolving a reflection view. - */ - class ReflectionView - { - public: - inline ReflectionView(); + struct ShaderPass { + ID3D12RootSignature* pRootSignature = nullptr; + ID3D12PipelineState* pPipeline = nullptr; + std::vector descriptorTables_CBV_SRV_UAV; + std::vector descriptorTables_Sampler; - // The view matrix to be used. - matrix4 view_matrix_; - // The projection matrix to be used. - matrix4 projection_matrix_; - }; -} - -#include "reflection_view.inl" + void DestroyPipeline(); + void OnDestroy(); + }; +} \ No newline at end of file diff --git a/sample/src/DX12/Sources/SssrSample.cpp b/sample/src/DX12/Sources/SssrSample.cpp index f9c9c35..8ee4ea5 100644 --- a/sample/src/DX12/Sources/SssrSample.cpp +++ b/sample/src/DX12/Sources/SssrSample.cpp @@ -33,17 +33,22 @@ const bool CPU_BASED_VALIDATION_ENABLED = false; const bool GPU_BASED_VALIDATION_ENABLED = false; #endif // _DEBUG - SssrSample::SssrSample(LPCSTR name) : FrameworkWindows(name) { - m_LastFrameTime = MillisecondsNow(); - m_Time = 0; - m_bPlay = true; - m_bShowUI = true; + m_DeltaTime = 0; + m_Distance = 0; + m_Pitch = 0; + m_Yaw = 0; + m_selectedScene = 0; + + m_LastFrameTime = MillisecondsNow(); + m_Time = 0; + m_bPlay = true; + m_bShowUI = true; - m_CameraControlSelected = 0; // select WASD on start up + m_CameraControlSelected = 0; // select WASD on start up - m_pGltfLoader = NULL; + m_pGltfLoader = NULL; } //-------------------------------------------------------------------------------------- @@ -53,73 +58,76 @@ SssrSample::SssrSample(LPCSTR name) : FrameworkWindows(name) //-------------------------------------------------------------------------------------- void SssrSample::OnCreate(HWND hWnd) { - // get the list of scenes - for (const auto& scene : m_JsonConfigFile["scenes"]) - m_SceneNames.push_back(scene["name"]); - - DWORD dwAttrib = GetFileAttributes("..\\media\\"); - if ((dwAttrib == INVALID_FILE_ATTRIBUTES) || ((dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) == 0) - { - MessageBox(NULL, "Media files not found!\n\nPlease check the readme on how to get the media files.", "Cauldron Panic!", MB_ICONERROR); - exit(0); - } - - // Create Device - // - m_Device.OnCreate("SssrSample", "Cauldron", CPU_BASED_VALIDATION_ENABLED, GPU_BASED_VALIDATION_ENABLED, hWnd); - m_Device.CreatePipelineCache(); - - // Init the shader compiler - InitDirectXCompiler(); - CreateShaderCache(); - - // Create Swapchain - // - uint32_t dwNumberOfBackBuffers = 2; - m_Swapchain.OnCreate(&m_Device, dwNumberOfBackBuffers, hWnd); - - // Create a instance of the renderer and initialize it, we need to do that for each GPU - // - m_Node = new SampleRenderer(); - m_Node->OnCreate(&m_Device, &m_Swapchain); - - // init GUI (non gfx stuff) - // - ImGUI_Init((void *)hWnd); - - // Init Camera, looking at the origin - // - m_Yaw = 0.0f; - m_Pitch = 0.0f; - m_Distance = 3.5f; - - // init GUI state - m_State.toneMapper = 2; - m_State.skyDomeType = 1; - m_State.exposure = 1.0f; - m_State.emmisiveFactor = 1.0f; - m_State.iblFactor = 1.0f; - m_State.bDrawBoundingBoxes = false; - m_State.bDrawLightFrustum = false; - m_State.bDrawBloom = false; - m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, XMVectorSet(0, 0, 0, 0)); - m_State.lightIntensity = 10.f; - m_State.lightCamera.SetFov(XM_PI / 6.0f, 1024, 1024, 0.1f, 20.0f); - m_State.lightCamera.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); - m_State.lightColor = XMFLOAT3(1, 1, 1); - m_State.targetFrametime = 0; - m_State.temporalStability = 0.99f; - m_State.maxTraversalIterations = 128; - m_State.mostDetailedDepthHierarchyMipLevel = 1; - m_State.depthBufferThickness = 0.015f; - m_State.minTraversalOccupancy = 4; - m_State.samplesPerQuad = 1; - m_State.bEnableVarianceGuidedTracing = true; - m_State.bShowIntersectionResults = false; - m_State.roughnessThreshold = 0.2f; - m_State.showReflectionTarget = false; - m_State.bDrawScreenSpaceReflections = true; - m_State.screenshotName = NULL; + // get the list of scenes + for (const auto& scene : m_JsonConfigFile["scenes"]) + m_SceneNames.push_back(scene["name"]); + + DWORD dwAttrib = GetFileAttributes("..\\media\\"); + if ((dwAttrib == INVALID_FILE_ATTRIBUTES) || ((dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) == 0) + { + MessageBox(NULL, "Media files not found!\n\nPlease check the readme on how to get the media files.", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + + // Create Device + // + m_Device.OnCreate("SssrSample", "Cauldron", CPU_BASED_VALIDATION_ENABLED, GPU_BASED_VALIDATION_ENABLED, hWnd); + m_Device.CreatePipelineCache(); + + // Init the shader compiler + InitDirectXCompiler(); + CreateShaderCache(); + + // Create Swapchain + // + uint32_t dwNumberOfBackBuffers = 2; + m_Swapchain.OnCreate(&m_Device, dwNumberOfBackBuffers, hWnd); + + // Create a instance of the renderer and initialize it, we need to do that for each GPU + // + m_Node = new SampleRenderer(); + m_Node->OnCreate(&m_Device, &m_Swapchain); + + // init GUI (non gfx stuff) + // + ImGUI_Init((void*)hWnd); + + // Init Camera, looking at the origin + // + m_Yaw = 0.0f; + m_Pitch = 0.0f; + m_Distance = 3.5f; + + // init GUI state + m_State.toneMapper = 2; + m_State.skyDomeType = 1; + m_State.exposure = 1.0f; + m_State.emmisiveFactor = 1.0f; + m_State.iblFactor = 1.0f; + m_State.bDrawBoundingBoxes = false; + m_State.bDrawLightFrustum = false; + m_State.bDrawBloom = false; + m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, XMVectorSet(0, 0, 0, 0)); + m_State.lightIntensity = 10.f; + m_State.lightCamera.SetFov(XM_PI / 6.0f, 1024, 1024, 0.1f, 20.0f); + m_State.lightCamera.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); + m_State.lightColor = XMFLOAT3(1, 1, 1); + m_State.targetFrametime = 0; + m_State.temporalStability = 0.99f; + m_State.temporalVarianceThreshold = 0.002f; + m_State.maxTraversalIterations = 128; + m_State.mostDetailedDepthHierarchyMipLevel = 1; + m_State.depthBufferThickness = 0.015f; + m_State.minTraversalOccupancy = 4; + m_State.samplesPerQuad = 1; + m_State.bEnableVarianceGuidedTracing = true; + m_State.bShowIntersectionResults = false; + m_State.roughnessThreshold = 0.2f; + m_State.showReflectionTarget = false; + m_State.bDrawScreenSpaceReflections = true; + m_State.screenshotName = NULL; + + LoadScene(m_selectedScene); } //-------------------------------------------------------------------------------------- @@ -129,32 +137,32 @@ void SssrSample::OnCreate(HWND hWnd) //-------------------------------------------------------------------------------------- void SssrSample::OnDestroy() { - ImGUI_Shutdown(); + ImGUI_Shutdown(); - m_Device.GPUFlush(); + m_Device.GPUFlush(); - // Fullscreen state should always be false before exiting the app. - m_Swapchain.SetFullScreen(false); + // Fullscreen state should always be false before exiting the app. + m_Swapchain.SetFullScreen(false); - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); + m_Node->UnloadScene(); + m_Node->OnDestroyWindowSizeDependentResources(); + m_Node->OnDestroy(); - delete m_Node; + delete m_Node; - m_Swapchain.OnDestroyWindowSizeDependentResources(); - m_Swapchain.OnDestroy(); + m_Swapchain.OnDestroyWindowSizeDependentResources(); + m_Swapchain.OnDestroy(); - //shut down the shader compiler - DestroyShaderCache(&m_Device); + //shut down the shader compiler + DestroyShaderCache(&m_Device); - if (m_pGltfLoader) - { - delete m_pGltfLoader; - m_pGltfLoader = NULL; - } + if (m_pGltfLoader) + { + delete m_pGltfLoader; + m_pGltfLoader = NULL; + } - m_Device.OnDestroy(); + m_Device.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -164,10 +172,10 @@ void SssrSample::OnDestroy() //-------------------------------------------------------------------------------------- bool SssrSample::OnEvent(MSG msg) { - if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) - return true; + if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) + return true; - return true; + return true; } //-------------------------------------------------------------------------------------- @@ -177,227 +185,199 @@ bool SssrSample::OnEvent(MSG msg) //-------------------------------------------------------------------------------------- void SssrSample::SetFullScreen(bool fullscreen) { - m_Device.GPUFlush(); + m_Device.GPUFlush(); - m_Swapchain.SetFullScreen(fullscreen); + m_Swapchain.SetFullScreen(fullscreen); } void SssrSample::BuildUI() { - ImGuiStyle& style = ImGui::GetStyle(); - style.FrameBorderSize = 1.0f; - - bool opened = true; - ImGui::Begin("Stats", &opened); - - if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Text("Resolution : %ix%i", m_Width, m_Height); - } - - if (ImGui::CollapsingHeader("Animation")) - { - ImGui::Checkbox("Play", &m_bPlay); - ImGui::SliderFloat("Time", &m_Time, 0, 30); - } - - if (ImGui::CollapsingHeader("Model Selection", ImGuiTreeNodeFlags_DefaultOpen)) - { - static int selectedScene = 0; - auto getterLambda = [](void* data, int idx, const char** out_str)->bool { *out_str = ((std::vector *)data)->at(idx).c_str(); return true; }; - if (ImGui::Combo("model", &selectedScene, getterLambda, &m_SceneNames, (int)m_SceneNames.size()) || (m_pGltfLoader == NULL)) - { - LoadScene(selectedScene); - - // bail out as we need to reload everything - ImGui::End(); - ImGui::EndFrame(); - return; - } - - char *cameraControl[] = { "WASD", "Orbit", "cam #0", "cam #1", "cam #2", "cam #3" , "cam #4", "cam #5" }; - if (m_CameraControlSelected >= m_pGltfLoader->m_cameras.size() + 2) - m_CameraControlSelected = 0; - ImGui::Combo("Camera", &m_CameraControlSelected, cameraControl, (int)(m_pGltfLoader->m_cameras.size() + 2)); - - ImGui::Checkbox("Show Bounding Boxes", &m_State.bDrawBoundingBoxes); - } - - if (ImGui::CollapsingHeader("Lighting")) - { - const char * tonemappers[] = { "Timothy", "DX11DSK", "Reinhard", "Uncharted2Tonemap", "ACES", "No tonemapper" }; - ImGui::Combo("Tonemapper", &m_State.toneMapper, tonemappers, _countof(tonemappers)); - - const char * skyDomeType[] = { "Procedural Sky", "cubemap", "Simple clear" }; - ImGui::Combo("SkyDome", &m_State.skyDomeType, skyDomeType, _countof(skyDomeType)); - - ImGui::SliderFloat("IBL Factor", &m_State.iblFactor, 0.0f, 10.0f, NULL, 1.0f); - ImGui::SliderFloat("Emmisive", &m_State.emmisiveFactor, 1.0f, 1000.0f, NULL, 1.0f); - ImGui::SliderFloat("Exposure", &m_State.exposure, 0.0f, 4.0f); - ImGui::Checkbox("Show Light Frustums", &m_State.bDrawLightFrustum); - ImGui::Checkbox("Draw Bloom", &m_State.bDrawBloom); - } - - if (ImGui::CollapsingHeader("Reflections", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Checkbox("Draw Screen Space Reflections", &m_State.bDrawScreenSpaceReflections); - ImGui::Checkbox("Show Reflection Target", &m_State.showReflectionTarget); - ImGui::Checkbox("Show Intersection Results", &m_State.bShowIntersectionResults); - ImGui::SliderFloat("Target Frametime in ms", &m_State.targetFrametime, 0.0f, 50.0f); - ImGui::SliderInt("Max Traversal Iterations", &m_State.maxTraversalIterations, 0, 256); - ImGui::SliderInt("Min Traversal Occupancy", &m_State.minTraversalOccupancy, 0, 32); - ImGui::SliderInt("Most Detailed Level", &m_State.mostDetailedDepthHierarchyMipLevel, 0, 5); - ImGui::SliderFloat("Depth Buffer Thickness", &m_State.depthBufferThickness, 0.0f, 0.03f); - ImGui::SliderFloat("Roughness Threshold", &m_State.roughnessThreshold, 0.0f, 1.f); - ImGui::SliderFloat("Temporal Stability", &m_State.temporalStability, 0.0f, 1.0f); - ImGui::Checkbox("Enable Variance Guided Tracing", &m_State.bEnableVarianceGuidedTracing); - - ImGui::Text("Samples Per Quad"); ImGui::SameLine(); - ImGui::RadioButton("1", &m_State.samplesPerQuad, 1); ImGui::SameLine(); - ImGui::RadioButton("2", &m_State.samplesPerQuad, 2); ImGui::SameLine(); - ImGui::RadioButton("4", &m_State.samplesPerQuad, 4); - - ImGui::Value("Tile Classification Elapsed Time", 1000 * m_State.tileClassificationTime, "%.1f us"); - ImGui::Value("Intersection Elapsed Time", 1000 * m_State.intersectionTime, "%.1f us"); - ImGui::Value("Denoising Elapsed Time", 1000 * m_State.denoisingTime, "%.1f us"); - } - - if (ImGui::CollapsingHeader("Profiler")) - { - const std::vector& timeStamps = m_Node->GetTimingValues(); - if (timeStamps.size() > 0) - { - for (uint32_t i = 0; i < timeStamps.size(); i++) - { - ImGui::Text("%-22s: %7.1f", timeStamps[i].m_label.c_str(), timeStamps[i].m_microseconds); - } - - //scrolling data and average computing - static float values[128]; - values[127] = timeStamps.back().m_microseconds; - for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; } - ImGui::PlotLines("", values, 128, 0, "GPU frame time (us)", 0.0f, 30000.0f, ImVec2(0, 80)); - } - } - - ImGui::Text("'X' to show/hide GUI"); - ImGui::End(); + ImGuiStyle& style = ImGui::GetStyle(); + style.FrameBorderSize = 1.0f; + + bool opened = true; + ImGui::Begin("FidelityFX SSSR", &opened); + + if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Resolution : %ix%i", m_Width, m_Height); + } + + if (ImGui::CollapsingHeader("Controls", ImGuiTreeNodeFlags_DefaultOpen)) + { + char* cameraControl[] = { "WASD", "Orbit", "cam #0", "cam #1", "cam #2", "cam #3" , "cam #4", "cam #5" }; + if (m_CameraControlSelected >= m_pGltfLoader->m_cameras.size() + 2) + m_CameraControlSelected = 0; + ImGui::Combo("Camera", &m_CameraControlSelected, cameraControl, (int)(m_pGltfLoader->m_cameras.size() + 2)); + } + + if (ImGui::CollapsingHeader("Reflections", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Checkbox("Draw Screen Space Reflections", &m_State.bDrawScreenSpaceReflections); + ImGui::Checkbox("Show Reflection Target", &m_State.showReflectionTarget); + ImGui::Checkbox("Show Intersection Results", &m_State.bShowIntersectionResults); + ImGui::SliderFloat("Target Frametime in ms", &m_State.targetFrametime, 0.0f, 50.0f); + ImGui::SliderInt("Max Traversal Iterations", &m_State.maxTraversalIterations, 0, 256); + ImGui::SliderInt("Min Traversal Occupancy", &m_State.minTraversalOccupancy, 0, 32); + ImGui::SliderInt("Most Detailed Level", &m_State.mostDetailedDepthHierarchyMipLevel, 0, 5); + ImGui::SliderFloat("Depth Buffer Thickness", &m_State.depthBufferThickness, 0.0f, 0.03f); + ImGui::SliderFloat("Roughness Threshold", &m_State.roughnessThreshold, 0.0f, 1.f); + ImGui::SliderFloat("Temporal Stability", &m_State.temporalStability, 0.8f, 0.99f); + ImGui::SliderFloat("Temporal Variance Threshold", &m_State.temporalVarianceThreshold, 0.0f, 0.01f); + ImGui::Checkbox("Enable Variance Guided Tracing", &m_State.bEnableVarianceGuidedTracing); + + ImGui::Text("Samples Per Quad"); ImGui::SameLine(); + ImGui::RadioButton("1", &m_State.samplesPerQuad, 1); ImGui::SameLine(); + ImGui::RadioButton("2", &m_State.samplesPerQuad, 2); ImGui::SameLine(); + ImGui::RadioButton("4", &m_State.samplesPerQuad, 4); + + ImGui::Value("Tile Classification Elapsed Time", 1000 * m_State.tileClassificationTime, "%.1f us"); + ImGui::Value("Intersection Elapsed Time", 1000 * m_State.intersectionTime, "%.1f us"); + ImGui::Value("Denoising Elapsed Time", 1000 * m_State.denoisingTime, "%.1f us"); + } + + if (ImGui::CollapsingHeader("Profiler")) + { + const std::vector& timeStamps = m_Node->GetTimingValues(); + if (timeStamps.size() > 0) + { + for (uint32_t i = 0; i < timeStamps.size(); i++) + { + ImGui::Text("%-22s: %7.1f", timeStamps[i].m_label.c_str(), timeStamps[i].m_microseconds); + } + + //scrolling data and average computing + static float values[128]; + values[127] = timeStamps.back().m_microseconds; + for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; } + ImGui::PlotLines("", values, 128, 0, "GPU frame time (us)", 0.0f, 30000.0f, ImVec2(0, 80)); + } + } + + ImGui::Text("'X' to show/hide GUI"); + ImGui::End(); } void SssrSample::HandleInput() { - // If the mouse was not used by the GUI then it's for the camera - // - ImGuiIO& io = ImGui::GetIO(); - - static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - std::chrono::duration diff = now - last; - last = now; - - io.DeltaTime = static_cast(diff.count()); - - if (ImGui::IsKeyPressed('X')) - { - m_bShowUI = !m_bShowUI; - ShowCursor(m_bShowUI); - } - - if (io.WantCaptureMouse == false || !m_bShowUI) - { - if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) - { - m_Yaw -= io.MouseDelta.x / 100.f; - m_Pitch += io.MouseDelta.y / 100.f; - } - - // Choose camera movement depending on setting - // - if (m_CameraControlSelected == 0) - { - // WASD - // - m_State.camera.UpdateCameraWASD(m_Yaw, m_Pitch, io.KeysDown, io.DeltaTime); - } - else if (m_CameraControlSelected == 1) - { - // Orbiting - // - m_Distance -= (float)io.MouseWheel / 3.0f; - m_Distance = std::max(m_Distance, 0.1f); - - bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); - - m_State.camera.UpdateCameraPolar(m_Yaw, m_Pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_Distance); - } - else - { - // Use a camera from the GLTF - // - m_pGltfLoader->GetCamera(m_CameraControlSelected - 2, &m_State.camera); - m_Yaw = m_State.camera.GetYaw(); - m_Pitch = m_State.camera.GetPitch(); - } - } + // If the mouse was not used by the GUI then it's for the camera + // + ImGuiIO& io = ImGui::GetIO(); + + static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::chrono::duration diff = now - last; + last = now; + + io.DeltaTime = static_cast(diff.count()); + + if (ImGui::IsKeyPressed('X')) + { + m_bShowUI = !m_bShowUI; + ShowCursor(m_bShowUI); + } + + if (ImGui::IsKeyPressed('Y')) + { + m_Node->Recompile(); + } + + + if (io.WantCaptureMouse == false || !m_bShowUI) + { + if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) + { + m_Yaw -= io.MouseDelta.x / 100.f; + m_Pitch += io.MouseDelta.y / 100.f; + } + + // Choose camera movement depending on setting + // + if (m_CameraControlSelected == 0) + { + // WASD + // + m_State.camera.UpdateCameraWASD(m_Yaw, m_Pitch, io.KeysDown, io.DeltaTime); + } + else if (m_CameraControlSelected == 1) + { + // Orbiting + // + m_Distance -= (float)io.MouseWheel / 3.0f; + m_Distance = std::max(m_Distance, 0.1f); + + bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); + + m_State.camera.UpdateCameraPolar(m_Yaw, m_Pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_Distance); + } + else + { + // Use a camera from the GLTF + // + m_pGltfLoader->GetCamera(m_CameraControlSelected - 2, &m_State.camera); + m_Yaw = m_State.camera.GetYaw(); + m_Pitch = m_State.camera.GetPitch(); + } + } } void SssrSample::LoadScene(int sceneIndex) { - json scene = m_JsonConfigFile["scenes"][sceneIndex]; - if (m_pGltfLoader != NULL) - { - //free resources, unload the current scene, and load new scene... - m_Device.GPUFlush(); - - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); - m_pGltfLoader->Unload(); - m_Node->OnCreate(&m_Device, &m_Swapchain); - m_Node->OnCreateWindowSizeDependentResources(&m_Swapchain, m_Width, m_Height); - } - - delete(m_pGltfLoader); - m_pGltfLoader = new GLTFCommon(); - - if (m_pGltfLoader->Load(scene["directory"], scene["filename"]) == false) - { - MessageBox(NULL, "The selected model couldn't be found, please check the documentation", "Cauldron Panic!", MB_ICONERROR); - exit(0); - } - - // Load the UI settings, and also some defaults cameras and lights, in case the GLTF has none - { + json scene = m_JsonConfigFile["scenes"][sceneIndex]; + if (m_pGltfLoader != NULL) + { + //free resources, unload the current scene, and load new scene... + m_Device.GPUFlush(); + + m_Node->UnloadScene(); + m_Node->OnDestroyWindowSizeDependentResources(); + m_Node->OnDestroy(); + m_pGltfLoader->Unload(); + m_Node->OnCreate(&m_Device, &m_Swapchain); + m_Node->OnCreateWindowSizeDependentResources(&m_Swapchain, m_Width, m_Height); + } + + delete(m_pGltfLoader); + m_pGltfLoader = new GLTFCommon(); + + if (m_pGltfLoader->Load(scene["directory"], scene["filename"]) == false) + { + MessageBox(NULL, "The selected model couldn't be found, please check the documentation", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + + // Load the UI settings, and also some defaults cameras and lights, in case the GLTF has none + { #define LOAD(j, key, val) val = j.value(key, val) - // global settings - LOAD(scene, "toneMapper", m_State.toneMapper); - LOAD(scene, "skyDomeType", m_State.skyDomeType); - LOAD(scene, "exposure", m_State.exposure); - LOAD(scene, "iblFactor", m_State.iblFactor); - LOAD(scene, "emmisiveFactor", m_State.emmisiveFactor); - LOAD(scene, "skyDomeType", m_State.skyDomeType); - - // default light - m_State.lightIntensity = scene.value("intensity", 1.0f); - - // default camera (in case the gltf has none) - json camera = scene["camera"]; - LOAD(camera, "yaw", m_Yaw); - LOAD(camera, "pitch", m_Pitch); - LOAD(camera, "distance", m_Distance); - XMVECTOR lookAt = GetVector(GetElementJsonArray(camera, "lookAt", { 0.0, 0.0, 0.0 })); - m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, lookAt); - - // set benchmarking state if enabled - if (m_State.isBenchmarking) - { - BenchmarkConfig(scene["BenchmarkSettings"], -1, m_pGltfLoader); - } - - // indicate the mainloop we started loading a GLTF and it needs to load the rest (textures and geometry) - m_bLoadingScene = true; - } + // global settings + LOAD(scene, "toneMapper", m_State.toneMapper); + LOAD(scene, "skyDomeType", m_State.skyDomeType); + LOAD(scene, "exposure", m_State.exposure); + LOAD(scene, "iblFactor", m_State.iblFactor); + LOAD(scene, "emmisiveFactor", m_State.emmisiveFactor); + LOAD(scene, "skyDomeType", m_State.skyDomeType); + + // default light + m_State.lightIntensity = scene.value("intensity", 1.0f); + + // default camera (in case the gltf has none) + json camera = scene["camera"]; + LOAD(camera, "yaw", m_Yaw); + LOAD(camera, "pitch", m_Pitch); + LOAD(camera, "distance", m_Distance); + XMVECTOR lookAt = GetVector(GetElementJsonArray(camera, "lookAt", { 0.0, 0.0, 0.0 })); + m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, lookAt); + + // set benchmarking state if enabled + if (m_State.isBenchmarking) + { + BenchmarkConfig(scene["BenchmarkSettings"], -1, m_pGltfLoader); + } + + // indicate the mainloop we started loading a GLTF and it needs to load the rest (textures and geometry) + m_bLoadingScene = true; + } } //-------------------------------------------------------------------------------------- @@ -407,71 +387,71 @@ void SssrSample::LoadScene(int sceneIndex) //-------------------------------------------------------------------------------------- void SssrSample::OnResize(uint32_t width, uint32_t height) { - if (m_Width != width || m_Height != height) - { - // Flush GPU - // - m_Device.GPUFlush(); - - // If resizing but no minimizing - // - if (m_Width > 0 && m_Height > 0) - { - if (m_Node!=NULL) - { - m_Node->OnDestroyWindowSizeDependentResources(); - } - m_Swapchain.OnDestroyWindowSizeDependentResources(); - } - - m_Width = width; - m_Height = height; - - // if resizing but not minimizing the recreate it with the new size - // - if (m_Width > 0 && m_Height > 0) - { - m_Swapchain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false, DISPLAYMODE_SDR); - if (m_Node != NULL) - { - m_Node->OnCreateWindowSizeDependentResources(&m_Swapchain, m_Width, m_Height); - } - } - } - m_State.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); + if (m_Width != width || m_Height != height) + { + // Flush GPU + // + m_Device.GPUFlush(); + + // If resizing but no minimizing + // + if (m_Width > 0 && m_Height > 0) + { + if (m_Node != NULL) + { + m_Node->OnDestroyWindowSizeDependentResources(); + } + m_Swapchain.OnDestroyWindowSizeDependentResources(); + } + + m_Width = width; + m_Height = height; + + // if resizing but not minimizing the recreate it with the new size + // + if (m_Width > 0 && m_Height > 0) + { + m_Swapchain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false, DISPLAYMODE_SDR); + if (m_Node != NULL) + { + m_Node->OnCreateWindowSizeDependentResources(&m_Swapchain, m_Width, m_Height); + } + } + } + m_State.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); } void SssrSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) { - // First load configuration - std::ifstream f("config.json"); - if (!f) - { - MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); - exit(-1); - } - f >> m_JsonConfigFile; - - // Parse command line and override the config file - try - { - if (strlen(lpCmdLine) > 0) - { - auto j3 = json::parse(lpCmdLine); - m_JsonConfigFile.merge_patch(j3); - } - } - catch (json::parse_error) - { - Trace("Error parsing commandline\n"); - exit(0); - } - - // Set values - *pWidth = m_JsonConfigFile.value("width", 1920); - *pHeight = m_JsonConfigFile.value("height", 1080); - *pbFullScreen = m_JsonConfigFile.value("fullScreen", false); - m_State.isBenchmarking = m_JsonConfigFile.value("benchmark", false); + // First load configuration + std::ifstream f("config.json"); + if (!f) + { + MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); + exit(-1); + } + f >> m_JsonConfigFile; + + // Parse command line and override the config file + try + { + if (strlen(lpCmdLine) > 0) + { + auto j3 = json::parse(lpCmdLine); + m_JsonConfigFile.merge_patch(j3); + } + } + catch (json::parse_error) + { + Trace("Error parsing commandline\n"); + exit(0); + } + + // Set values + *pWidth = m_JsonConfigFile.value("width", 1920); + *pHeight = m_JsonConfigFile.value("height", 1080); + *pbFullScreen = m_JsonConfigFile.value("fullScreen", false); + m_State.isBenchmarking = m_JsonConfigFile.value("benchmark", false); } //-------------------------------------------------------------------------------------- @@ -481,89 +461,87 @@ void SssrSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* //-------------------------------------------------------------------------------------- void SssrSample::OnRender() { - // Get timings - // - double timeNow = MillisecondsNow(); - m_DeltaTime = timeNow - m_LastFrameTime; - m_LastFrameTime = timeNow; - - // Build UI and set the scene state. Note that the rendering of the UI happens later. - // - ImGUI_UpdateIO(); - ImGui::NewFrame(); - - if (m_bLoadingScene) - { - static int loadingStage = 0; - // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns 0 - // This is done so we can display a progress bar when the scene is loading - loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); - if (loadingStage == 0) - { - m_Time = 0; - m_bLoadingScene = false; - } - } - else if (m_pGltfLoader && m_State.isBenchmarking) - { - const std::vector& timeStamps = m_Node->GetTimingValues(); - m_Time = BenchmarkLoop(timeStamps, &m_State.camera, &m_State.screenshotName); - } - else - { - if (m_bShowUI) - { - BuildUI(); - } - - if (!m_bLoadingScene) - { - HandleInput(); - } - } - - // Set animation time - // - if (m_bPlay) - { - m_Time += (float)m_DeltaTime / 1000.0f; - } - - // Animate and transform the scene - // - if (m_pGltfLoader) - { - m_pGltfLoader->SetAnimationTime(0, m_Time); - m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); - } - - m_State.time = m_Time; - - // Do Render frame using AFR - // - m_Node->OnRender(&m_State, &m_Swapchain); + // Get timings + // + double timeNow = MillisecondsNow(); + m_DeltaTime = timeNow - m_LastFrameTime; + m_LastFrameTime = timeNow; + + // Build UI and set the scene state. Note that the rendering of the UI happens later. + // + ImGUI_UpdateIO(); + ImGui::NewFrame(); + + if (m_bLoadingScene) + { + static int loadingStage = 0; + // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns 0 + // This is done so we can display a progress bar when the scene is loading + loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); + if (loadingStage == 0) + { + m_Time = 0; + m_bLoadingScene = false; + } + } + else if (m_pGltfLoader && m_State.isBenchmarking) + { + const std::vector& timeStamps = m_Node->GetTimingValues(); + m_Time = BenchmarkLoop(timeStamps, &m_State.camera, &m_State.screenshotName); + } + else + { + if (m_bShowUI) + { + BuildUI(); + } + + if (!m_bLoadingScene) + { + HandleInput(); + } + } + + // Set animation time + // + if (m_bPlay) + { + m_Time += (float)m_DeltaTime / 1000.0f; + } + + // Animate and transform the scene + // + if (m_pGltfLoader) + { + m_pGltfLoader->SetAnimationTime(0, m_Time); + m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); + } + + m_State.time = m_Time; + + // Do Render frame using AFR + // + m_Node->OnRender(&m_State, &m_Swapchain); #ifdef _DEBUG - // workaround for hang in device debug layer. - m_Device.GPUFlush(); + // workaround for hang in device debug layer. + m_Device.GPUFlush(); #endif - m_Swapchain.Present(); + m_Swapchain.Present(); } //-------------------------------------------------------------------------------------- - // - // WinMain - // - //-------------------------------------------------------------------------------------- -int WINAPI WinMain(HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpCmdLine, - int nCmdShow) + // + // WinMain + // + //-------------------------------------------------------------------------------------- +int WINAPI WinMain( + _In_ HINSTANCE hInstance, + _In_opt_ HINSTANCE hPrevInstance, + _In_ LPSTR lpCmdLine, + _In_ int nCmdShow) { - LPCSTR Name = "Stochastic Screen Space Reflection Sample DX12 v1.0"; - uint32_t Width = 1920; // 1536; - uint32_t Height = 1080; // 841; - - // create new sample - return RunFramework(hInstance, lpCmdLine, nCmdShow, new SssrSample(Name)); + LPCSTR Name = "FidelityFX Stochastic Screen Space Reflection Sample DX12 v1.2"; + // create new sample + return RunFramework(hInstance, lpCmdLine, nCmdShow, new SssrSample(Name)); } \ No newline at end of file diff --git a/sample/src/DX12/Sources/SssrSample.h b/sample/src/DX12/Sources/SssrSample.h index 80e62ec..8a96660 100644 --- a/sample/src/DX12/Sources/SssrSample.h +++ b/sample/src/DX12/Sources/SssrSample.h @@ -19,9 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ - #pragma once - #include "SampleRenderer.h" // @@ -42,44 +40,45 @@ THE SOFTWARE. class SssrSample : public FrameworkWindows { public: - SssrSample(LPCSTR name); - void OnCreate(HWND hWnd) override; - void OnDestroy() override; - void OnRender() override; - bool OnEvent(MSG msg) override; - void OnResize(uint32_t Width, uint32_t Height) override; - virtual void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) override; + SssrSample(LPCSTR name); + void OnCreate(HWND hWnd) override; + void OnDestroy() override; + void OnRender() override; + bool OnEvent(MSG msg) override; + void OnResize(uint32_t Width, uint32_t Height) override; + virtual void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) override; - void SetFullScreen(bool fullscreen); + void SetFullScreen(bool fullscreen); private: - void BuildUI(); - void HandleInput(); - void LoadScene(int sceneIndex); + void BuildUI(); + void HandleInput(); + void LoadScene(int sceneIndex); - Device m_Device; - SwapChain m_Swapchain; + Device m_Device; + SwapChain m_Swapchain; - GLTFCommon *m_pGltfLoader = NULL; - bool m_bLoadingScene = false; + GLTFCommon* m_pGltfLoader = NULL; + bool m_bLoadingScene = false; - SampleRenderer *m_Node = NULL; - SampleRenderer::State m_State; + SampleRenderer* m_Node = NULL; + SampleRenderer::State m_State; - float m_Distance; - float m_Yaw; - float m_Pitch; + float m_Distance; + float m_Yaw; + float m_Pitch; - float m_Time; // WallClock in seconds. - double m_DeltaTime; // The elapsed time in milliseconds since the previous frame. - double m_LastFrameTime; + float m_Time; // WallClock in seconds. + double m_DeltaTime; // The elapsed time in milliseconds since the previous frame. + double m_LastFrameTime; - // json config file - json m_JsonConfigFile; - std::vector m_SceneNames; + // json config file + json m_JsonConfigFile; + std::vector m_SceneNames; - bool m_bPlay; - bool m_bShowUI; + bool m_bPlay; + bool m_bShowUI; - int m_CameraControlSelected; + int m_CameraControlSelected; + int m_selectedScene; }; diff --git a/sample/src/DX12/Sources/UploadHeapBuffersDX12.cpp b/sample/src/DX12/Sources/UploadHeapBuffersDX12.cpp new file mode 100644 index 0000000..8ce0df5 --- /dev/null +++ b/sample/src/DX12/Sources/UploadHeapBuffersDX12.cpp @@ -0,0 +1,236 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" +#include "UploadHeapBuffersDX12.h" +#include "Misc/Misc.h" +#include "Base/Helper.h" + +using namespace CAULDRON_DX12; +namespace SSSR_SAMPLE_DX12 +{ + //-------------------------------------------------------------------------------------- + // + // OnCreate + // + //-------------------------------------------------------------------------------------- + void UploadHeapBuffersDX12::OnCreate(Device* pDevice, SIZE_T uSize) + { + m_pDevice = pDevice; + m_pCommandQueue = pDevice->GetGraphicsQueue(); + + // Create command list and allocators + + pDevice->GetDevice()->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_pCommandAllocator)); + SetName(m_pCommandAllocator, "UploadHeap::m_pCommandAllocator"); + pDevice->GetDevice()->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_pCommandAllocator, nullptr, IID_PPV_ARGS(&m_pCommandList)); + SetName(m_pCommandList, "UploadHeap::m_pCommandList"); + + // Create buffer to suballocate + + ThrowIfFailed( + pDevice->GetDevice()->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(uSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_pUploadHeap) + ) + ); + + ThrowIfFailed(m_pUploadHeap->Map(0, NULL, (void**)&m_pDataBegin)); + + m_pDataCur = m_pDataBegin; + m_pDataEnd = m_pDataBegin + m_pUploadHeap->GetDesc().Width; + + m_fenceValue = 0; + } + + //-------------------------------------------------------------------------------------- + // + // OnDestroy + // + //-------------------------------------------------------------------------------------- + void UploadHeapBuffersDX12::OnDestroy() + { + m_pUploadHeap->Release(); + + m_pCommandList->Release(); + m_pCommandAllocator->Release(); + } + + //-------------------------------------------------------------------------------------- + // + // SuballocateFromUploadHeap + // + //-------------------------------------------------------------------------------------- + UINT8* UploadHeapBuffersDX12::Suballocate(SIZE_T uSize, UINT64 uAlign) + { + // wait until we are done flusing the heap + flushing.Wait(); + + UINT8* pRet = NULL; + + { + std::unique_lock lock(m_mutex); + + // make sure resource (and its mips) would fit the upload heap, if not please make the upload heap bigger + assert(uSize < (size_t)(m_pDataBegin - m_pDataEnd)); + + m_pDataCur = reinterpret_cast(AlignUp(reinterpret_cast(m_pDataCur), SIZE_T(uAlign))); + + // return NULL if we ran out of space in the heap + if (m_pDataCur >= m_pDataEnd || m_pDataCur + uSize >= m_pDataEnd) + { + return NULL; + } + + pRet = m_pDataCur; + m_pDataCur += uSize; + + //Trace("allocated: %i", m_pDataCur - m_pDataBegin); + } + + return pRet; + } + + UINT8* UploadHeapBuffersDX12::BeginSuballocate(SIZE_T uSize, UINT64 uAlign) + { + UINT8* pRes = NULL; + + for (;;) + { + pRes = Suballocate(uSize, uAlign); + if (pRes != NULL) + { + break; + } + + FlushAndFinish(); + } + + allocating.Inc(); + + return pRes; + } + + void UploadHeapBuffersDX12::EndSuballocate() + { + allocating.Dec(); + } + + void UploadHeapBuffersDX12::AddBufferCopy(const void* pData, int size, ID3D12Resource* pBufferDst) + { + UINT8* pixels = BeginSuballocate(size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + memcpy(pixels, pData, size); + EndSuballocate(); + + { + std::unique_lock lock(m_mutex); + m_bufferCopies.push_back({ pBufferDst, (UINT64)(pixels - BasePtr()), size }); + + D3D12_RESOURCE_BARRIER RBDesc = {}; + RBDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + RBDesc.Transition.pResource = pBufferDst; + RBDesc.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + RBDesc.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + RBDesc.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + m_toBarrierIntoShaderResource.push_back(RBDesc); + } + } + + + void UploadHeapBuffersDX12::AddCopy(CD3DX12_TEXTURE_COPY_LOCATION Src, CD3DX12_TEXTURE_COPY_LOCATION Dst) + { + std::unique_lock lock(m_mutex); + m_textureCopies.push_back({ Src, Dst }); + } + + void UploadHeapBuffersDX12::AddBarrier(ID3D12Resource* pRes) + { + std::unique_lock lock(m_mutex); + + D3D12_RESOURCE_BARRIER RBDesc = {}; + RBDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + RBDesc.Transition.pResource = pRes; + RBDesc.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + RBDesc.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + RBDesc.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + + m_toBarrierIntoShaderResource.push_back(RBDesc); + } + + //-------------------------------------------------------------------------------------- + // + // FlushAndFinish + // + //-------------------------------------------------------------------------------------- + void UploadHeapBuffersDX12::FlushAndFinish() + { + // make sure another thread is not already flushing + flushing.Wait(); + + // begins a critical section, and make sure no allocations happen while a thread is inside it + flushing.Inc(); + + // wait for pending allocations to finish + allocating.Wait(); + + std::unique_lock lock(m_mutex); + Trace("flushing %i, %i", m_textureCopies.size(), m_bufferCopies.size()); + + //issue copies + for (TextureCopy c : m_textureCopies) + { + m_pCommandList->CopyTextureRegion(&c.Dst, 0, 0, 0, &c.Src, NULL); + } + m_textureCopies.clear(); + + for (BufferCopy c : m_bufferCopies) + { + m_pCommandList->CopyBufferRegion(c.pBufferDst, 0, GetResource(), c.offset, c.size); + } + m_bufferCopies.clear(); + + //apply barriers in one go + if (m_toBarrierIntoShaderResource.size() > 0) + { + m_pCommandList->ResourceBarrier((UINT)m_toBarrierIntoShaderResource.size(), m_toBarrierIntoShaderResource.data()); + m_toBarrierIntoShaderResource.clear(); + } + + // Close & submit + ThrowIfFailed(m_pCommandList->Close()); + m_pCommandQueue->ExecuteCommandLists(1, CommandListCast(&m_pCommandList)); + + // Make sure it's been processed by the GPU + m_pDevice->GPUFlush(); + + // Reset so it can be reused + m_pCommandAllocator->Reset(); + m_pCommandList->Reset(m_pCommandAllocator, nullptr); + + m_pDataCur = m_pDataBegin; + + flushing.Dec(); + } +} \ No newline at end of file diff --git a/sample/src/DX12/Sources/UploadHeapBuffersDX12.h b/sample/src/DX12/Sources/UploadHeapBuffersDX12.h new file mode 100644 index 0000000..41e66dd --- /dev/null +++ b/sample/src/DX12/Sources/UploadHeapBuffersDX12.h @@ -0,0 +1,90 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "Base/Device.h" +#include "Misc/Async.h" + +using namespace CAULDRON_DX12; +namespace SSSR_SAMPLE_DX12 +{ + // + // This class shows the most efficient way to upload resources to the GPU memory. + // The idea is to create just one upload heap and suballocate memory from it. + // For convenience this class comes with its own command list & submit (FlushAndFinish) + // + class UploadHeapBuffersDX12 + { + Sync allocating, flushing; + + struct TextureCopy + { + CD3DX12_TEXTURE_COPY_LOCATION Src, Dst; + }; + std::vector m_textureCopies; + + struct BufferCopy + { + ID3D12Resource* pBufferDst; + UINT64 offset; + int size; + }; + std::vector m_bufferCopies; + + std::vector m_toBarrierIntoShaderResource; + + std::mutex m_mutex; + public: + void OnCreate(Device* pDevice, SIZE_T uSize); + void OnDestroy(); + + UINT8* Suballocate(SIZE_T uSize, UINT64 uAlign); + UINT8* BeginSuballocate(SIZE_T uSize, UINT64 uAlign); + void EndSuballocate(); + UINT8* BasePtr() { return m_pDataBegin; } + ID3D12Resource* GetResource() { return m_pUploadHeap; } + ID3D12GraphicsCommandList* GetCommandList() { return m_pCommandList; } + ID3D12CommandQueue* GetCommandQueue() { return m_pCommandQueue; } + void AddBufferCopy(const void* pData, int size, ID3D12Resource* m_pBufferDst); + void AddCopy(CD3DX12_TEXTURE_COPY_LOCATION Src, CD3DX12_TEXTURE_COPY_LOCATION Dst); + + void AddBarrier(ID3D12Resource* pRes); + + void FlushAndFinish(); + + private: + Device* m_pDevice; + ID3D12Resource* m_pUploadHeap = nullptr; + + ID3D12GraphicsCommandList* m_pCommandList = nullptr; + ID3D12CommandQueue* m_pCommandQueue = nullptr; + ID3D12CommandAllocator* m_pCommandAllocator = nullptr; + + UINT8* m_pDataCur = nullptr; // current position of upload heap + UINT8* m_pDataEnd = nullptr; // ending position of upload heap + UINT8* m_pDataBegin = nullptr; // starting position of upload heap + + ID3D12Fence* m_pFence = nullptr; + UINT64 m_fenceValue = 0; + HANDLE m_hEvent; + }; +} diff --git a/sample/src/DX12/Sources/Utils.cpp b/sample/src/DX12/Sources/Utils.cpp new file mode 100644 index 0000000..d86a16e --- /dev/null +++ b/sample/src/DX12/Sources/Utils.cpp @@ -0,0 +1,24 @@ +#include "Utils.h" + +void CopyToTexture(ID3D12GraphicsCommandList* cl, ID3D12Resource* source, ID3D12Resource* target, UINT32 width, UINT32 height) +{ + D3D12_TEXTURE_COPY_LOCATION src = {}; + src.pResource = source; + src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src.SubresourceIndex = 0; + + D3D12_TEXTURE_COPY_LOCATION dst = {}; + dst.pResource = target; + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.SubresourceIndex = 0; + + D3D12_BOX srcBox = {}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.front = 0; + srcBox.right = width; + srcBox.bottom = height; + srcBox.back = 1; + + cl->CopyTextureRegion(&dst, 0, 0, 0, &src, &srcBox); +} diff --git a/sample/src/DX12/Sources/Utils.h b/sample/src/DX12/Sources/Utils.h new file mode 100644 index 0000000..0c49063 --- /dev/null +++ b/sample/src/DX12/Sources/Utils.h @@ -0,0 +1,4 @@ +#pragma once +#include + +void CopyToTexture(ID3D12GraphicsCommandList* cl, ID3D12Resource* source, ID3D12Resource* target, UINT32 width, UINT32 height); \ No newline at end of file diff --git a/sample/src/DX12/Sources/stdafx.h b/sample/src/DX12/Sources/stdafx.h index 46f902d..26ac0c4 100644 --- a/sample/src/DX12/Sources/stdafx.h +++ b/sample/src/DX12/Sources/stdafx.h @@ -62,7 +62,4 @@ using namespace DirectX; #include "PostProc\ShadowResolvePass.h" #include "Widgets\wireframe.h" -using namespace CAULDRON_DX12; - -#include "ffx_sssr.h" -#include "ffx_sssr_d3d12.h" \ No newline at end of file +using namespace CAULDRON_DX12; \ No newline at end of file diff --git a/sample/src/VK/Shaders/ApplyReflections.hlsl b/sample/src/Shaders/ApplyReflections.hlsl similarity index 94% rename from sample/src/VK/Shaders/ApplyReflections.hlsl rename to sample/src/Shaders/ApplyReflections.hlsl index 1cb369d..0ad1d20 100644 --- a/sample/src/VK/Shaders/ApplyReflections.hlsl +++ b/sample/src/Shaders/ApplyReflections.hlsl @@ -20,9 +20,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#ifndef SSR_APPLY -#define SSR_APPLY - [[vk::binding(0)]] Texture2D reflectionTarget : register(t0); [[vk::binding(1)]] Texture2D normalsTexture : register(t1); [[vk::binding(2)]] Texture2D specularRoughnessTexture : register(t2); @@ -30,8 +27,7 @@ THE SOFTWARE. [[vk::binding(4)]] SamplerState linearSampler : register(s0); -[[vk::binding(5)]] cbuffer Constants : register(b0) -{ +[[vk::binding(5)]] cbuffer Constants : register(b0){ float4 viewDirection; uint showReflectionTarget; uint drawReflections; @@ -48,8 +44,7 @@ struct VertexOut float2 texcoord : TEXCOORD0; }; -VertexOut vs_main(VertexInput input) -{ +VertexOut vs_main(VertexInput input){ VertexOut output; output.texcoord = float2((input.vertexId << 1) & 2, input.vertexId & 2); output.position = float4(output.texcoord.xy * 2.0 - 1.0, 0.0, 1.0); @@ -57,8 +52,7 @@ VertexOut vs_main(VertexInput input) } // Important bits from the PBR shader -float3 getIBLContribution(float perceptualRoughness, float3 specularColor, float3 specularLight, float3 n, float3 v) -{ +float3 getIBLContribution(float perceptualRoughness, float3 specularColor, float3 specularLight, float3 n, float3 v){ float NdotV = clamp(dot(n, v), 0.0, 1.0); float2 brdfSamplePoint = clamp(float2(NdotV, perceptualRoughness), float2(0.0, 0.0), float2(1.0, 1.0)); // retrieve a scale and bias to F0. See [1], Figure 3 @@ -93,6 +87,4 @@ float4 ps_main(VertexOut input) : SV_Target0 // Show just the scene return float4(0, 0, 0, 1); } -} - -#endif // SSR_APPLY \ No newline at end of file +} \ No newline at end of file diff --git a/sample/src/Shaders/BlurReflections.hlsl b/sample/src/Shaders/BlurReflections.hlsl new file mode 100644 index 0000000..219a809 --- /dev/null +++ b/sample/src/Shaders/BlurReflections.hlsl @@ -0,0 +1,72 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "Common.hlsl" + +[[vk::binding(0, 1)]] Texture2D g_roughness : register(t0); +[[vk::binding(1, 1)]] Texture2D g_temporally_denoised_reflections : register(t1); +[[vk::binding(2, 1)]] StructuredBuffer g_tile_meta_data_mask : register(t2); + +[[vk::binding(3, 1)]] RWTexture2D g_denoised_reflections : register(u0); + +groupshared uint g_shared_0[12][12]; +groupshared uint g_shared_1[12][12]; + +void FFX_DNSR_Reflections_LoadFromGroupSharedMemory(int2 idx, out min16float3 radiance, out min16float roughness) { + uint2 tmp; + tmp.x = g_shared_0[idx.x][idx.y]; + tmp.y = g_shared_1[idx.x][idx.y]; + + min16float4 min16tmp = min16float4(UnpackFloat16(tmp.x), UnpackFloat16(tmp.y)); + radiance = min16tmp.xyz; + roughness = min16tmp.w; +} + +void FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2 idx, min16float3 radiance, min16float roughness) { + min16float4 tmp = min16float4(radiance, roughness); + g_shared_0[idx.x][idx.y] = PackFloat16(tmp.xy); + g_shared_1[idx.x][idx.y] = PackFloat16(tmp.zw); +} + +min16float3 FFX_DNSR_Reflections_LoadRadianceFP16(int2 pixel_coordinate) { + return g_temporally_denoised_reflections.Load(int3(pixel_coordinate, 0)).xyz; +} + +min16float FFX_DNSR_Reflections_LoadRoughnessFP16(int2 pixel_coordinate) { + return (min16float) g_roughness.Load(int3(pixel_coordinate, 0)); +} + +void FFX_DNSR_Reflections_StoreDenoisedReflectionResult(int2 pixel_coordinate, min16float3 value) { + g_denoised_reflections[pixel_coordinate] = value; +} + +uint FFX_DNSR_Reflections_LoadTileMetaDataMask(uint index) { + return g_tile_meta_data_mask[index]; +} + +#include "ffx_denoiser_reflections_blur.h" + +[numthreads(8, 8, 1)] +void main(int2 dispatch_thread_id : SV_DispatchThreadID, int2 group_thread_id : SV_GroupThreadID) { + uint2 screen_dimensions; + g_temporally_denoised_reflections.GetDimensions(screen_dimensions.x, screen_dimensions.y); + FFX_DNSR_Reflections_Blur(dispatch_thread_id, group_thread_id, screen_dimensions); +} \ No newline at end of file diff --git a/sample/src/Shaders/ClassifyTiles.hlsl b/sample/src/Shaders/ClassifyTiles.hlsl new file mode 100644 index 0000000..4a6901e --- /dev/null +++ b/sample/src/Shaders/ClassifyTiles.hlsl @@ -0,0 +1,69 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "Common.hlsl" + +[[vk::binding(0, 1)]] Texture2D g_roughness : register(t0); +[[vk::binding(1, 1)]] StructuredBuffer g_temporal_variance_mask : register(t1); + +[[vk::binding(2, 1)]] RWBuffer g_ray_list : register(u0); +[[vk::binding(3, 1)]] globallycoherent RWBuffer g_ray_counter : register(u1); +[[vk::binding(4, 1)]] RWTexture2D g_intersection_results : register(u2); +[[vk::binding(5, 1)]] RWStructuredBuffer g_tile_meta_data_mask : register(u3); +[[vk::binding(6, 1)]] RWTexture2D g_extracted_roughness : register(u4); + +uint FFX_DNSR_Reflections_LoadTemporalVarianceMask(uint index) { + return g_temporal_variance_mask[index]; +} + +void FFX_DNSR_Reflections_IncrementRayCounter(uint value, out uint original_value) { + InterlockedAdd(g_ray_counter[0], value, original_value); +} + +void FFX_DNSR_Reflections_StoreRay(int index, uint2 ray_coord, bool copy_horizontal, bool copy_vertical, bool copy_diagonal) { + g_ray_list[index] = PackRayCoords(ray_coord, copy_horizontal, copy_vertical, copy_diagonal); // Store out pixel to trace +} + +void FFX_DNSR_Reflections_StoreTileMetaDataMask(uint index, uint mask) { + g_tile_meta_data_mask[index] = mask; +} + +#include "ffx_denoiser_reflections_classify_tiles.h" + +[numthreads(8, 8, 1)] +void main(uint2 group_id : SV_GroupID, uint group_index : SV_GroupIndex) { + uint2 screen_size; + g_roughness.GetDimensions(screen_size.x, screen_size.y); + + uint2 group_thread_id = FFX_DNSR_Reflections_RemapLane8x8(group_index); // Remap lanes to ensure four neighboring lanes are arranged in a quad pattern + uint2 dispatch_thread_id = group_id * 8 + group_thread_id; + + float roughness = g_roughness.Load(int3(dispatch_thread_id, 0)).w; + + FFX_DNSR_Reflections_ClassifyTiles(dispatch_thread_id, group_thread_id, roughness, screen_size, g_samples_per_quad, g_temporal_variance_guided_tracing_enabled); + + // Clear intersection results as there wont be any ray that overwrites them + g_intersection_results[dispatch_thread_id] = 0; + + // Extract only the channel containing the roughness to avoid loading all 4 channels in the follow up passes. + g_extracted_roughness[dispatch_thread_id] = roughness; +} \ No newline at end of file diff --git a/sample/src/Shaders/Common.hlsl b/sample/src/Shaders/Common.hlsl new file mode 100644 index 0000000..2e0d2bd --- /dev/null +++ b/sample/src/Shaders/Common.hlsl @@ -0,0 +1,124 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +static const float g_roughness_sigma_min = 0.001f; +static const float g_roughness_sigma_max = 0.01f; +static const float g_depth_sigma = 0.02f; + +[[vk::binding(0, 0)]] cbuffer Constants : register(b0) { + float4x4 g_inv_view_proj; + float4x4 g_proj; + float4x4 g_inv_proj; + float4x4 g_view; + float4x4 g_inv_view; + float4x4 g_prev_view_proj; + + uint g_frame_index; + uint g_max_traversal_intersections; + uint g_min_traversal_occupancy; + uint g_most_detailed_mip; + float g_temporal_stability_factor; + float g_temporal_variance_threshold; + float g_depth_buffer_thickness; + float g_roughness_threshold; + uint g_samples_per_quad; + uint g_temporal_variance_guided_tracing_enabled; +}; + +//=== Common functions of the SssrSample === + +uint PackFloat16(min16float2 v) { + uint2 p = f32tof16(float2(v)); + return p.x | (p.y << 16); +} + +min16float2 UnpackFloat16(uint a) { + float2 tmp = f16tof32( + uint2(a & 0xFFFF, a >> 16)); + return min16float2(tmp); +} + +uint PackRayCoords(uint2 ray_coord, bool copy_horizontal, bool copy_vertical, bool copy_diagonal) { + uint ray_x_15bit = ray_coord.x & 0b111111111111111; + uint ray_y_14bit = ray_coord.y & 0b11111111111111; + uint copy_horizontal_1bit = copy_horizontal ? 1 : 0; + uint copy_vertical_1bit = copy_vertical ? 1 : 0; + uint copy_diagonal_1bit = copy_diagonal ? 1 : 0; + + uint packed = (copy_diagonal_1bit << 31) | (copy_vertical_1bit << 30) | (copy_horizontal_1bit << 29) | (ray_y_14bit << 15) | (ray_x_15bit << 0); + return packed; +} + +void UnpackRayCoords(uint packed, out uint2 ray_coord, out bool copy_horizontal, out bool copy_vertical, out bool copy_diagonal) { + ray_coord.x = (packed >> 0) & 0b111111111111111; + ray_coord.y = (packed >> 15) & 0b11111111111111; + copy_horizontal = (packed >> 29) & 0b1; + copy_vertical = (packed >> 30) & 0b1; + copy_diagonal = (packed >> 31) & 0b1; +} + +// Transforms origin to uv space +// Mat must be able to transform origin from its current space into clip space. +float3 ProjectPosition(float3 origin, float4x4 mat) { + float4 projected = mul(float4(origin, 1), mat); + projected.xyz /= projected.w; + projected.xy = 0.5 * projected.xy + 0.5; + projected.y = (1 - projected.y); + return projected.xyz; +} + +// Origin and direction must be in the same space and mat must be able to transform from that space into clip space. +float3 ProjectDirection(float3 origin, float3 direction, float3 screen_space_origin, float4x4 mat) { + float3 offsetted = ProjectPosition(origin + direction, mat); + return offsetted - screen_space_origin; +} + +// Mat must be able to transform origin from texture space to a linear space. +float3 InvProjectPosition(float3 coord, float4x4 mat) { + coord.y = (1 - coord.y); + coord.xy = 2 * coord.xy - 1; + float4 projected = mul(float4(coord, 1), mat); + projected.xyz /= projected.w; + return projected.xyz; +} + +//=== FFX_DNSR_Reflections_ override functions === + +bool FFX_DNSR_Reflections_IsGlossyReflection(float roughness) { + return roughness < g_roughness_threshold; +} + +bool FFX_DNSR_Reflections_IsMirrorReflection(float roughness) { + return roughness < 0.0001; +} + +float3 FFX_DNSR_Reflections_ScreenSpaceToViewSpace(float3 screen_uv_coord) { + return InvProjectPosition(screen_uv_coord, g_inv_proj); +} + +float3 FFX_DNSR_Reflections_ViewSpaceToWorldSpace(float4 view_space_coord) { + return mul(view_space_coord, g_inv_view).xyz; +} + +float3 FFX_DNSR_Reflections_WorldSpaceToScreenSpacePrevious(float3 world_coord) { + return ProjectPosition(world_coord, g_prev_view_proj); +} diff --git a/sample/src/VK/Shaders/DepthDownsample.hlsl b/sample/src/Shaders/DepthDownsample.hlsl similarity index 91% rename from sample/src/VK/Shaders/DepthDownsample.hlsl rename to sample/src/Shaders/DepthDownsample.hlsl index 85da089..da4b12b 100644 --- a/sample/src/VK/Shaders/DepthDownsample.hlsl +++ b/sample/src/Shaders/DepthDownsample.hlsl @@ -20,9 +20,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#ifndef SSR_DEPTH_DOWNSAMPLE -#define SSR_DEPTH_DOWNSAMPLE - [[vk::binding(0)]] Texture2D g_depth_buffer : register(t0); [[vk::binding(1)]] RWTexture2D g_downsampled_depth_buffer[13] : register(u0); // 12 is the maximum amount of supported mips by the downsampling lib (4096x4096). We copy the depth buffer over for simplicity. [[vk::binding(2)]] RWBuffer g_global_atomic : register(u13); // Single atomic counter that stores the number of remaining threadgroups to process. @@ -51,22 +48,19 @@ AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) { return min(min(v0, v1), min(v2, #include "ffx_spd.h" -uint GetThreadgroupCount(uint2 image_size) -{ +uint GetThreadgroupCount(uint2 image_size){ // Each threadgroup works on 64x64 texels return ((image_size.x + 63) / 64) * ((image_size.y + 63) / 64); } // Returns mips count of a texture with specified size -float GetMipsCount(float2 texture_size) -{ +float GetMipsCount(float2 texture_size){ float max_dim = max(texture_size.x, texture_size.y); return 1.0 + floor(log2(max_dim)); } [numthreads(32, 8, 1)] -void main(uint3 did : SV_DispatchThreadID, uint3 gid : SV_GroupID, uint gi : SV_GroupIndex) -{ +void main(uint3 dispatch_thread_id : SV_DispatchThreadID, uint3 group_id : SV_GroupID, uint group_index : SV_GroupIndex){ float2 depth_image_size = 0; g_depth_buffer.GetDimensions(depth_image_size.x, depth_image_size.y); @@ -76,7 +70,7 @@ void main(uint3 did : SV_DispatchThreadID, uint3 gid : SV_GroupID, uint gi : SV_ { for (int j = 0; j < 8; ++j) { - uint2 idx = uint2(2 * did.x + i, 8 * did.y + j); + uint2 idx = uint2(2 * dispatch_thread_id.x + i, 8 * dispatch_thread_id.y + j); if (idx.x < u_depth_image_size.x && idx.y < u_depth_image_size.y) { g_downsampled_depth_buffer[0][idx] = g_depth_buffer[idx]; @@ -90,10 +84,8 @@ void main(uint3 did : SV_DispatchThreadID, uint3 gid : SV_GroupID, uint gi : SV_ uint threadgroup_count = GetThreadgroupCount(image_size); SpdDownsample( - AU2(gid.xy), - AU1(gi), + AU2(group_id.xy), + AU1(group_index), AU1(mips_count), AU1(threadgroup_count)); -} - -#endif // SSR_DEPTH_DOWNSAMPLE \ No newline at end of file +} \ No newline at end of file diff --git a/sample/src/Shaders/Intersect.hlsl b/sample/src/Shaders/Intersect.hlsl new file mode 100644 index 0000000..2b879f0 --- /dev/null +++ b/sample/src/Shaders/Intersect.hlsl @@ -0,0 +1,252 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "Common.hlsl" + +[[vk::binding(0, 1)]] Texture2D g_lit_scene : register(t0); +[[vk::binding(1, 1)]] Texture2D g_depth_buffer_hierarchy : register(t1); +[[vk::binding(2, 1)]] Texture2D g_normal : register(t2); +[[vk::binding(3, 1)]] Texture2D g_roughness : register(t3); +[[vk::binding(4, 1)]] TextureCube g_environment_map : register(t4); +[[vk::binding(5, 1)]] Buffer g_sobol_buffer : register(t5); +[[vk::binding(6, 1)]] Buffer g_ranking_tile_buffer : register(t6); +[[vk::binding(7, 1)]] Buffer g_scrambling_tile_buffer : register(t7); +[[vk::binding(8, 1)]] Buffer g_ray_list : register(t8); + +[[vk::binding(9, 1)]] SamplerState g_linear_sampler : register(s0); +[[vk::binding(10, 1)]] SamplerState g_environment_map_sampler : register(s1); + +[[vk::binding(11, 1)]] RWTexture2D g_intersection_result : register(u0); +[[vk::binding(12, 1)]] RWTexture2D g_ray_lengths : register(u1); +[[vk::binding(13, 1)]] RWBuffer g_ray_counter : register(u2); + +#define GOLDEN_RATIO 1.61803398875f +#define M_PI 3.14159265358979f + +float3 FFX_SSSR_LoadNormal(int2 pixel_coordinate) { + return 2 * g_normal.Load(int3(pixel_coordinate, 0)).xyz - 1; +} + +float FFX_SSSR_LoadDepth(int2 pixel_coordinate, int mip) { + return g_depth_buffer_hierarchy.Load(int3(pixel_coordinate, mip)); +} + +float3 FFX_SSSR_ScreenSpaceToViewSpace(float3 screen_space_position) { + return InvProjectPosition(screen_space_position, g_inv_proj); +} + +float3 ScreenSpaceToWorldSpace(float3 screen_space_position) { + return InvProjectPosition(screen_space_position, g_inv_view_proj); +} + +// http://jcgt.org/published/0007/04/01/paper.pdf by Eric Heitz +// Input Ve: view direction +// Input alpha_x, alpha_y: roughness parameters +// Input U1, U2: uniform random numbers +// Output Ne: normal sampled with PDF D_Ve(Ne) = G1(Ve) * max(0, dot(Ve, Ne)) * D(Ne) / Ve.z +float3 SampleGGXVNDF(float3 Ve, float alpha_x, float alpha_y, float U1, float U2) { + // Section 3.2: transforming the view direction to the hemisphere configuration + float3 Vh = normalize(float3(alpha_x * Ve.x, alpha_y * Ve.y, Ve.z)); + // Section 4.1: orthonormal basis (with special case if cross product is zero) + float lensq = Vh.x * Vh.x + Vh.y * Vh.y; + float3 T1 = lensq > 0 ? float3(-Vh.y, Vh.x, 0) * rsqrt(lensq) : float3(1, 0, 0); + float3 T2 = cross(Vh, T1); + // Section 4.2: parameterization of the projected area + float r = sqrt(U1); + float phi = 2.0 * M_PI * U2; + float t1 = r * cos(phi); + float t2 = r * sin(phi); + float s = 0.5 * (1.0 + Vh.z); + t2 = (1.0 - s) * sqrt(1.0 - t1 * t1) + s * t2; + // Section 4.3: reprojection onto hemisphere + float3 Nh = t1 * T1 + t2 * T2 + sqrt(max(0.0, 1.0 - t1 * t1 - t2 * t2)) * Vh; + // Section 3.4: transforming the normal back to the ellipsoid configuration + float3 Ne = normalize(float3(alpha_x * Nh.x, alpha_y * Nh.y, max(0.0, Nh.z))); + return Ne; +} + +float3 Sample_GGX_VNDF_Ellipsoid(float3 Ve, float alpha_x, float alpha_y, float U1, float U2) { + return SampleGGXVNDF(Ve, alpha_x, alpha_y, U1, U2); +} + +float3 Sample_GGX_VNDF_Hemisphere(float3 Ve, float alpha, float U1, float U2) { + return Sample_GGX_VNDF_Ellipsoid(Ve, alpha, alpha, U1, U2); +} + +float3x3 CreateTBN(float3 N) { + float3 U; + if (abs(N.z) > 0.0) { + float k = sqrt(N.y * N.y + N.z * N.z); + U.x = 0.0; U.y = -N.z / k; U.z = N.y / k; + } + else { + float k = sqrt(N.x * N.x + N.y * N.y); + U.x = N.y / k; U.y = -N.x / k; U.z = 0.0; + } + + float3x3 TBN; + TBN[0] = U; + TBN[1] = cross(N, U); + TBN[2] = N; + return transpose(TBN); +} + +// Blue Noise Sampler by Eric Heitz. Returns a value in the range [0, 1]. +float SampleRandomNumber(uint pixel_i, uint pixel_j, uint sample_index, uint sample_dimension) { + // Wrap arguments + pixel_i = pixel_i & 127u; + pixel_j = pixel_j & 127u; + sample_index = sample_index & 255u; + sample_dimension = sample_dimension & 255u; + +#ifndef SPP +#define SPP 1 +#endif + +#if SPP == 1 + const uint ranked_sample_index = sample_index ^ 0; +#else + // xor index based on optimized ranking + const uint ranked_sample_index = sample_index ^ g_ranking_tile_buffer[sample_dimension + (pixel_i + pixel_j * 128u) * 8u]; +#endif + + // Fetch value in sequence + uint value = g_sobol_buffer[sample_dimension + ranked_sample_index * 256u]; + + // If the dimension is optimized, xor sequence value based on optimized scrambling + value = value ^ g_scrambling_tile_buffer[(sample_dimension % 8u) + (pixel_i + pixel_j * 128u) * 8u]; + + // Convert to float and return + return (value + 0.5f) / 256.0f; +} + +float2 SampleRandomVector2D(uint2 pixel) { + float2 u = float2( + fmod(SampleRandomNumber(pixel.x, pixel.y, 0, 0u) + (g_frame_index & 0xFFu) * GOLDEN_RATIO, 1.0f), + fmod(SampleRandomNumber(pixel.x, pixel.y, 0, 1u) + (g_frame_index & 0xFFu) * GOLDEN_RATIO, 1.0f)); + return u; +} + +float3 SampleReflectionVector(float3 view_direction, float3 normal, float roughness, int2 dispatch_thread_id) { + float3x3 tbn_transform = CreateTBN(normal); + float3 view_direction_tbn = mul(-view_direction, tbn_transform); + + float2 u = SampleRandomVector2D(dispatch_thread_id); + + float3 sampled_normal_tbn = Sample_GGX_VNDF_Hemisphere(view_direction_tbn, roughness, u.x, u.y); + #ifdef PERFECT_REFLECTIONS + sampled_normal_tbn = float3(0, 0, 1); // Overwrite normal sample to produce perfect reflection. + #endif + + float3 reflected_direction_tbn = reflect(-view_direction_tbn, sampled_normal_tbn); + + // Transform reflected_direction back to the initial space. + float3x3 inv_tbn_transform = transpose(tbn_transform); + return mul(reflected_direction_tbn, inv_tbn_transform); +} + +float3 SampleEnvironmentMap(float3 direction) { + return g_environment_map.SampleLevel(g_environment_map_sampler, direction, 0).xyz; +} + +bool IsMirrorReflection(float roughness) { + return roughness < 0.0001; +} + +#include "ffx_sssr.h" + +[numthreads(8, 8, 1)] +void main(uint group_index : SV_GroupIndex, uint group_id : SV_GroupID) { + + uint ray_index = group_id * 64 + group_index; + if (ray_index >= g_ray_counter[1]) return; + uint packed_coords = g_ray_list[ray_index]; + + int2 coords; + bool copy_horizontal; + bool copy_vertical; + bool copy_diagonal; + UnpackRayCoords(packed_coords, coords, copy_horizontal, copy_vertical, copy_diagonal); + + uint2 screen_size; + g_intersection_result.GetDimensions(screen_size.x, screen_size.y); + + float2 uv = (coords + 0.5) / screen_size; + + float3 world_space_normal = FFX_SSSR_LoadNormal(coords); + float roughness = g_roughness.Load(int3(coords, 0)); + bool is_mirror = IsMirrorReflection(roughness); + + int most_detailed_mip = is_mirror ? 0 : g_most_detailed_mip; + float2 mip_resolution = FFX_SSSR_GetMipResolution(screen_size, most_detailed_mip); + float z = FFX_SSSR_LoadDepth(uv * mip_resolution, most_detailed_mip); + + float3 screen_uv_space_ray_origin = float3(uv, z); + float3 view_space_ray = FFX_DNSR_Reflections_ScreenSpaceToViewSpace(screen_uv_space_ray_origin); + float3 view_space_ray_direction = normalize(view_space_ray); + + float3 view_space_surface_normal = mul(float4(normalize(world_space_normal), 0), g_view).xyz; + float3 view_space_reflected_direction = SampleReflectionVector(view_space_ray_direction, view_space_surface_normal, roughness, coords); + float3 screen_space_ray_direction = ProjectDirection(view_space_ray, view_space_reflected_direction, screen_uv_space_ray_origin, g_proj); + + //====SSSR==== + bool valid_hit = false; + float3 hit = FFX_SSSR_HierarchicalRaymarch(screen_uv_space_ray_origin, screen_space_ray_direction, is_mirror, screen_size, most_detailed_mip, g_min_traversal_occupancy, g_max_traversal_intersections, valid_hit); + + float3 world_space_origin = ScreenSpaceToWorldSpace(screen_uv_space_ray_origin); + float3 world_space_hit = ScreenSpaceToWorldSpace(hit); + float3 world_space_ray = world_space_hit - world_space_origin.xyz; + + float confidence = valid_hit ? FFX_SSSR_ValidateHit(hit, uv, world_space_ray, screen_size, g_depth_buffer_thickness) : 0; + float world_ray_length = length(world_space_ray); + + float3 reflection_radiance = 0; + if (confidence > 0) { + // Found an intersection with the depth buffer -> We can lookup the color from lit scene. + reflection_radiance = g_lit_scene.Load(int3(screen_size * hit.xy, 0)).xyz; + } + + // Sample environment map. + float3 world_space_reflected_direction = mul(float4(view_space_reflected_direction, 0), g_inv_view).xyz; + float3 environment_lookup = SampleEnvironmentMap(world_space_reflected_direction); + reflection_radiance = confidence * reflection_radiance + (1 - confidence) * environment_lookup; + + g_intersection_result[coords] = reflection_radiance; + g_ray_lengths[coords] = world_ray_length; + + uint2 copy_target = coords ^ 0b1; // Flip last bit to find the mirrored coords along the x and y axis within a quad. + if (copy_horizontal) { + uint2 copy_coords = uint2(copy_target.x, coords.y); + g_intersection_result[copy_coords] = reflection_radiance; + g_ray_lengths[copy_coords] = world_ray_length; + } + if (copy_vertical) { + uint2 copy_coords = uint2(coords.x, copy_target.y); + g_intersection_result[copy_coords] = reflection_radiance; + g_ray_lengths[copy_coords] = world_ray_length; + } + if (copy_diagonal) { + uint2 copy_coords = copy_target; + g_intersection_result[copy_coords] = reflection_radiance; + g_ray_lengths[copy_coords] = world_ray_length; + } +} \ No newline at end of file diff --git a/sample/src/Shaders/PrepareIndirectArgs.hlsl b/sample/src/Shaders/PrepareIndirectArgs.hlsl new file mode 100644 index 0000000..f131401 --- /dev/null +++ b/sample/src/Shaders/PrepareIndirectArgs.hlsl @@ -0,0 +1,36 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +[[vk::binding(0, 1)]] RWBuffer g_ray_counter : register(u0); +[[vk::binding(1, 1)]] RWBuffer g_intersect_args : register(u1); + +[numthreads(1, 1, 1)] +void main() { + uint ray_count = g_ray_counter[0]; + + g_intersect_args[0] = (ray_count + 63) / 64; + g_intersect_args[1] = 1; + g_intersect_args[2] = 1; + + g_ray_counter[0] = 0; + g_ray_counter[1] = ray_count; +} \ No newline at end of file diff --git a/sample/src/Shaders/ResolveSpatial.hlsl b/sample/src/Shaders/ResolveSpatial.hlsl new file mode 100644 index 0000000..d2958f7 --- /dev/null +++ b/sample/src/Shaders/ResolveSpatial.hlsl @@ -0,0 +1,100 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#include "Common.hlsl" + +[[vk::binding(0, 1)]] Texture2D g_depth_buffer : register(t0); +[[vk::binding(1, 1)]] Texture2D g_normal : register(t1); +[[vk::binding(2, 1)]] Texture2D g_roughness : register(t2); +[[vk::binding(3, 1)]] Texture2D g_intersection_result : register(t3); +[[vk::binding(4, 1)]] StructuredBuffer g_tile_meta_data_mask : register(t4); + +[[vk::binding(5, 1)]] RWTexture2D g_spatially_denoised_reflections : register(u0); + +groupshared uint g_shared_0[16][16]; +groupshared uint g_shared_1[16][16]; +groupshared uint g_shared_2[16][16]; +groupshared uint g_shared_3[16][16]; +groupshared float g_shared_depth[16][16]; + +min16float3 FFX_DNSR_Reflections_LoadRadianceFromGroupSharedMemory(int2 idx) { + uint2 tmp; + tmp.x = g_shared_0[idx.y][idx.x]; + tmp.y = g_shared_1[idx.y][idx.x]; + return min16float4(UnpackFloat16(tmp.x), UnpackFloat16(tmp.y)).xyz; +} + +min16float3 FFX_DNSR_Reflections_LoadNormalFromGroupSharedMemory(int2 idx) { + uint2 tmp; + tmp.x = g_shared_2[idx.y][idx.x]; + tmp.y = g_shared_3[idx.y][idx.x]; + return min16float4(UnpackFloat16(tmp.x), UnpackFloat16(tmp.y)).xyz; +} + +float FFX_DNSR_Reflections_LoadDepthFromGroupSharedMemory(int2 idx) { + return g_shared_depth[idx.y][idx.x]; +} + +void FFX_DNSR_Reflections_StoreInGroupSharedMemory(int2 idx, min16float3 radiance, min16float3 normal, float depth) { + g_shared_0[idx.y][idx.x] = PackFloat16(radiance.xy); + g_shared_1[idx.y][idx.x] = PackFloat16(min16float2(radiance.z, 0)); + g_shared_2[idx.y][idx.x] = PackFloat16(normal.xy); + g_shared_3[idx.y][idx.x] = PackFloat16(min16float2(normal.z, 0)); + g_shared_depth[idx.y][idx.x] = depth; +} + +float FFX_DNSR_Reflections_LoadRoughness(int2 pixel_coordinate) { + return g_roughness.Load(int3(pixel_coordinate, 0)); +} + +min16float3 FFX_DNSR_Reflections_LoadRadianceFP16(int2 pixel_coordinate) { + return g_intersection_result.Load(int3(pixel_coordinate, 0)).xyz; +} + +min16float3 FFX_DNSR_Reflections_LoadNormalFP16(int2 pixel_coordinate) { + return (min16float3) (2 * g_normal.Load(int3(pixel_coordinate, 0)).xyz - 1); +} + +float FFX_DNSR_Reflections_LoadDepth(int2 pixel_coordinate) { + return g_depth_buffer.Load(int3(pixel_coordinate, 0)); +} + +void FFX_DNSR_Reflections_StoreSpatiallyDenoisedReflections(int2 pixel_coordinate, min16float3 value) { + g_spatially_denoised_reflections[pixel_coordinate] = value; +} + +uint FFX_DNSR_Reflections_LoadTileMetaDataMask(uint index) { + return g_tile_meta_data_mask[index]; +} + +#include "ffx_denoiser_reflections_resolve_spatial.h" + +[numthreads(8, 8, 1)] +void main(uint group_index : SV_GroupIndex, uint2 group_id : SV_GroupID) { + + uint2 screen_dimensions; + g_depth_buffer.GetDimensions(screen_dimensions.x, screen_dimensions.y); + + uint2 group_thread_id = FFX_DNSR_Reflections_RemapLane8x8(group_index); + uint2 dispatch_thread_id = group_id * 8 + group_thread_id; + FFX_DNSR_Reflections_ResolveSpatial((int2)dispatch_thread_id, (int2)group_thread_id, g_samples_per_quad, screen_dimensions); +} \ No newline at end of file diff --git a/sample/src/Shaders/ResolveTemporal.hlsl b/sample/src/Shaders/ResolveTemporal.hlsl new file mode 100644 index 0000000..eb61f30 --- /dev/null +++ b/sample/src/Shaders/ResolveTemporal.hlsl @@ -0,0 +1,93 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "Common.hlsl" + +[[vk::binding(0, 1)]] Texture2D g_normal : register(t0); +[[vk::binding(1, 1)]] Texture2D g_roughness : register(t1); +[[vk::binding(2, 1)]] Texture2D g_normal_history : register(t2); +[[vk::binding(3, 1)]] Texture2D g_roughness_history : register(t3); +[[vk::binding(4, 1)]] Texture2D g_depth_buffer : register(t4); +[[vk::binding(5, 1)]] Texture2D g_motion_vectors : register(t5); +[[vk::binding(6, 1)]] Texture2D g_temporally_denoised_reflections_history : register(t6); +[[vk::binding(7, 1)]] Texture2D g_ray_lengths : register(t7); +[[vk::binding(8, 1)]] Texture2D g_spatially_denoised_reflections : register(t8); +[[vk::binding(9, 1)]] StructuredBuffer g_tile_meta_data_mask : register(t9); + +[[vk::binding(10, 1)]] RWTexture2D g_temporally_denoised_reflections : register(u0); +[[vk::binding(11, 1)]] RWStructuredBuffer g_temporal_variance_mask : register(u1); + +float FFX_DNSR_Reflections_LoadRayLength(int2 pixel_coordinate) { + return g_ray_lengths.Load(int3(pixel_coordinate, 0)); +} + +float2 FFX_DNSR_Reflections_LoadMotionVector(int2 pixel_coordinate) { + return g_motion_vectors.Load(int3(pixel_coordinate, 0)).xy * float2(0.5, -0.5); +} + +float3 FFX_DNSR_Reflections_LoadNormal(int2 pixel_coordinate) { + return 2 * g_normal.Load(int3(pixel_coordinate, 0)).xyz - 1; +} + +float3 FFX_DNSR_Reflections_LoadNormalHistory(int2 pixel_coordinate) { + return 2 * g_normal_history.Load(int3(pixel_coordinate, 0)).xyz - 1; +} + +float FFX_DNSR_Reflections_LoadRoughness(int2 pixel_coordinate) { + return g_roughness.Load(int3(pixel_coordinate, 0)); +} + +float FFX_DNSR_Reflections_LoadRoughnessHistory(int2 pixel_coordinate) { + return g_roughness_history.Load(int3(pixel_coordinate, 0)); +} + +float3 FFX_DNSR_Reflections_LoadRadianceHistory(int2 pixel_coordinate) { + return g_temporally_denoised_reflections_history.Load(int3(pixel_coordinate, 0)).xyz; +} + +float FFX_DNSR_Reflections_LoadDepth(int2 pixel_coordinate) { + return g_depth_buffer.Load(int3(pixel_coordinate, 0)); +} + +float3 FFX_DNSR_Reflections_LoadSpatiallyDenoisedReflections(int2 pixel_coordinate) { + return g_spatially_denoised_reflections.Load(int3(pixel_coordinate, 0)).xyz; +} + +uint FFX_DNSR_Reflections_LoadTileMetaDataMask(uint index) { + return g_tile_meta_data_mask[index]; +} + +void FFX_DNSR_Reflections_StoreTemporallyDenoisedReflections(int2 pixel_coordinate, float3 value) { + g_temporally_denoised_reflections[pixel_coordinate] = value; +} + +void FFX_DNSR_Reflections_StoreTemporalVarianceMask(int index, uint mask) { + g_temporal_variance_mask[index] = mask; +} + +#include "ffx_denoiser_reflections_resolve_temporal.h" + +[numthreads(8, 8, 1)] +void main(int2 dispatch_thread_id : SV_DispatchThreadID, int2 group_thread_id : SV_GroupThreadID) { + uint2 image_size; + g_temporally_denoised_reflections.GetDimensions(image_size.x, image_size.y); + FFX_DNSR_Reflections_ResolveTemporal(dispatch_thread_id, group_thread_id, image_size, g_temporal_stability_factor, g_temporal_variance_threshold); +} \ No newline at end of file diff --git a/sample/src/VK/CMakeLists.txt b/sample/src/VK/CMakeLists.txt index 01542d8..9753265 100644 --- a/sample/src/VK/CMakeLists.txt +++ b/sample/src/VK/CMakeLists.txt @@ -2,22 +2,24 @@ project (SssrSample_VK) add_compile_options(/MP) -set(Sources_src - Sources/SssrSample.cpp - Sources/SssrSample.h - Sources/SampleRenderer.cpp - Sources/SampleRenderer.h - Sources/stdafx.cpp - Sources/stdafx.h) +file(GLOB Sources_src + Sources/*.h + Sources/*.cpp + ) -set(Shaders_src - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/ApplyReflections.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/DepthDownsample.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/ffx_a.h - ${CMAKE_CURRENT_SOURCE_DIR}/Shaders/ffx_spd.h) +file(GLOB Shaders_src + ../Shaders/*.hlsl + ../Shaders/*.h + ../../../ffx-dnsr/ffx-reflection-dnsr/*.h + ../../../ffx-dnsr/ffx-reflection-dnsr/*.hlsl + ../../../ffx-sssr/*.h + ../../../ffx-sssr/*.hlsl + ../../../ffx-spd/*.h + ../../../ffx-spd/*.hlsl + ) -set(Common_src - ${CMAKE_CURRENT_SOURCE_DIR}/../Common/config.json +file(GLOB Common_src + ../Common/config.json ) source_group("Sources" FILES ${Sources_src}) @@ -46,7 +48,7 @@ copyCommand("${Shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) copyCommand("${Common_src}" ${CMAKE_HOME_DIRECTORY}/bin) add_executable(${PROJECT_NAME} WIN32 ${Sources_src} ${Shaders_src} ${Common_src}) -target_link_libraries (${PROJECT_NAME} LINK_PUBLIC Cauldron_VK FFX_SSSR ImGUI) +target_link_libraries (${PROJECT_NAME} LINK_PUBLIC Cauldron_VK ImGUI) set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin") diff --git a/sample/src/VK/Shaders/ffx_a.h b/sample/src/VK/Shaders/ffx_a.h deleted file mode 100644 index b92546e..0000000 --- a/sample/src/VK/Shaders/ffx_a.h +++ /dev/null @@ -1,1907 +0,0 @@ -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// [A] SHADER PORTABILITY 1.20190530 -// -//============================================================================================================================== -// LICENSE -// ======= -// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. -// Copyright (c) <2014> -// ------- -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// ------- -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the -// Software. -// ------- -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -// ABOUT -// ===== -// Common central point for high-level shading language and C portability for various shader headers. -//------------------------------------------------------------------------------------------------------------------------------ -// DEFINES -// ======= -// A_CPU ..... Include the CPU related code. -// A_GPU ..... Include the GPU related code. -// A_GLSL .... Using GLSL. -// A_HLSL .... Using HLSL. -// A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default). -// ======= -// A_BYTE .... Support 8-bit integer. -// A_HALF .... Support 16-bit integer and floating point. -// A_LONG .... Support 64-bit integer. -// A_DUBL .... Support 64-bit floating point. -// ======= -// A_WAVE .... Support wave-wide operations. -//------------------------------------------------------------------------------------------------------------------------------ -// To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'. -//------------------------------------------------------------------------------------------------------------------------------ -// SIMPLIFIED TYPE SYSTEM -// ====================== -// - All ints will be unsigned with exception of when signed is required. -// - Type naming simplified and shortened "A<#components>", -// - H = 16-bit float (half) -// - F = 32-bit float (float) -// - D = 64-bit float (double) -// - P = 1-bit integer (predicate, not using bool because 'B' is used for byte) -// - B = 8-bit integer (byte) -// - W = 16-bit integer (word) -// - U = 32-bit integer (unsigned) -// - L = 64-bit integer (long) -// - Using "AS<#components>" for signed when required. -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops). -// - Add subgroup ops. -//------------------------------------------------------------------------------------------------------------------------------ -// CHANGE LOG -// ========== -// 20190531 - Fixed changed to llabs() because long is int on Windows. -// 20190530 - Updated for new CPU/GPU portability. -// 20190528 - Fix AU1_AH2_x() on HLSL (had incorrectly swapped x and y), fixed asuint() cases. -// 20190527 - Added min3/max3 for low precision for HLSL. -// 20190526 - Updated with half approximations, added ARsq*(), and ASat*() for CPU. -// 20190519 - Added more approximations. -// 20190514 - Added long conversions. -// 20190513 - Added the real BFI moved the other one to ABfiM(). -// 20190507 - Added extra remap useful for 2D reductions. -// 20190507 - Started adding wave ops, add parabolic sin/cos. -// 20190505 - Added ASigned*() and friends, setup more auto-typecast, GLSL extensions, etc. -// 20190504 - Added min3/max3 for 32-bit integers. -// 20190503 - Added type reinterpretation for half. -// 20190416 - Added min3/max3 for half. -// 20190405 - Misc bug fixing. -// 20190404 - Cleaned up color conversion code. Switched "splat" to shorter naming "type_". Misc bug fixing. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// COMMON -//============================================================================================================================== -#define A_2PI 6.28318530718 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// CPU -// -// -//============================================================================================================================== -// Requires standard C types: stdint.h -// Requires a collection of standard math intrinsics. -// - Requires VS2013 when not using GCC to get exp2() and log2(). -// - https://blogs.msdn.microsoft.com/vcblog/2013/07/19/c99-library-support-in-visual-studio-2013/ -//------------------------------------------------------------------------------------------------------------------------------ -// This provides a minimum subset of functionality compared to the GPU parts. -//============================================================================================================================== -#ifdef A_CPU - // Supporting user defined overrides. - #ifndef A_RESTRICT - #define A_RESTRICT __restrict - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifndef A_STATIC - #define A_STATIC static - #endif -//------------------------------------------------------------------------------------------------------------------------------ - // Same types across CPU and GPU. - // Predicate uses 32-bit integer (C friendly bool). - typedef uint32_t AP1; - typedef float AF1; - typedef double AD1; - typedef uint8_t AB1; - typedef uint16_t AW1; - typedef uint32_t AU1; - typedef uint64_t AL1; - typedef int8_t ASB1; - typedef int16_t ASW1; - typedef int32_t ASU1; - typedef int64_t ASL1; -//------------------------------------------------------------------------------------------------------------------------------ - #define AD1_(a) ((AD1)(a)) - #define AF1_(a) ((AF1)(a)) - #define AL1_(a) ((AL1)(a)) - #define AU1_(a) ((AU1)(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASL1_(a) ((ASL1)(a)) - #define ASU1_(a) ((ASU1)(a)) -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;} -//------------------------------------------------------------------------------------------------------------------------------ - #define A_TRUE 1 - #define A_FALSE 0 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// CPU/GPU PORTING -// -//------------------------------------------------------------------------------------------------------------------------------ -// Hackary to get CPU and GPU to share all setup code, without duplicate code paths. -// Unfortunately this is the level of "ugly" that is required since the languages are very different. -// This uses a lower-case prefix for special vector constructs. -// - In C restrict pointers are used. -// - In the shading language, in/inout/out arguments are used. -// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]). -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY -//============================================================================================================================== - #define retAD2 AD1 *A_RESTRICT - #define retAD3 AD1 *A_RESTRICT - #define retAD4 AD1 *A_RESTRICT - #define retAF2 AF1 *A_RESTRICT - #define retAF3 AF1 *A_RESTRICT - #define retAF4 AF1 *A_RESTRICT - #define retAL2 AL1 *A_RESTRICT - #define retAL3 AL1 *A_RESTRICT - #define retAL4 AL1 *A_RESTRICT - #define retAU2 AU1 *A_RESTRICT - #define retAU3 AU1 *A_RESTRICT - #define retAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define inAD2 AD1 *A_RESTRICT - #define inAD3 AD1 *A_RESTRICT - #define inAD4 AD1 *A_RESTRICT - #define inAF2 AF1 *A_RESTRICT - #define inAF3 AF1 *A_RESTRICT - #define inAF4 AF1 *A_RESTRICT - #define inAL2 AL1 *A_RESTRICT - #define inAL3 AL1 *A_RESTRICT - #define inAL4 AL1 *A_RESTRICT - #define inAU2 AU1 *A_RESTRICT - #define inAU3 AU1 *A_RESTRICT - #define inAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define inoutAD2 AD1 *A_RESTRICT - #define inoutAD3 AD1 *A_RESTRICT - #define inoutAD4 AD1 *A_RESTRICT - #define inoutAF2 AF1 *A_RESTRICT - #define inoutAF3 AF1 *A_RESTRICT - #define inoutAF4 AF1 *A_RESTRICT - #define inoutAL2 AL1 *A_RESTRICT - #define inoutAL3 AL1 *A_RESTRICT - #define inoutAL4 AL1 *A_RESTRICT - #define inoutAU2 AU1 *A_RESTRICT - #define inoutAU3 AU1 *A_RESTRICT - #define inoutAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define outAD2 AD1 *A_RESTRICT - #define outAD3 AD1 *A_RESTRICT - #define outAD4 AD1 *A_RESTRICT - #define outAF2 AF1 *A_RESTRICT - #define outAF3 AF1 *A_RESTRICT - #define outAF4 AF1 *A_RESTRICT - #define outAL2 AL1 *A_RESTRICT - #define outAL3 AL1 *A_RESTRICT - #define outAL4 AL1 *A_RESTRICT - #define outAU2 AU1 *A_RESTRICT - #define outAU3 AU1 *A_RESTRICT - #define outAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define varAD2(x) AD1 x[2] - #define varAD3(x) AD1 x[3] - #define varAD4(x) AD1 x[4] - #define varAF2(x) AF1 x[2] - #define varAF3(x) AF1 x[3] - #define varAF4(x) AF1 x[4] - #define varAL2(x) AL1 x[2] - #define varAL3(x) AL1 x[3] - #define varAL4(x) AL1 x[4] - #define varAU2(x) AU1 x[2] - #define varAU3(x) AU1 x[3] - #define varAU4(x) AU1 x[4] -//------------------------------------------------------------------------------------------------------------------------------ - #define initAD2(x,y) {x,y} - #define initAD3(x,y,z) {x,y,z} - #define initAD4(x,y,z,w) {x,y,z,w} - #define initAF2(x,y) {x,y} - #define initAF3(x,y,z) {x,y,z} - #define initAF4(x,y,z,w) {x,y,z,w} - #define initAL2(x,y) {x,y} - #define initAL3(x,y,z) {x,y,z} - #define initAL4(x,y,z,w) {x,y,z,w} - #define initAU2(x,y) {x,y} - #define initAU3(x,y,z) {x,y,z} - #define initAU4(x,y,z,w) {x,y,z,w} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Replace transcendentals with manual versions. -//============================================================================================================================== - #ifdef A_GCC - A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);} - A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);} - A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));} - A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_labs(ASL1_(a)));} - #else - A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);} - A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);} - A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));} - A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(llabs(ASL1_(a)));} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);} - A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);} - #else - A_STATIC AD1 ACosD1(AD1 a){return cos(a);} - A_STATIC AF1 ACosF1(AF1 a){return cosf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];} - A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} - A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} - A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];} - A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} - A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);} - A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);} - #else - A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);} - A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);} - A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);} - #else - A_STATIC AD1 AFloorD1(AD1 a){return floor(a);} - A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);} - A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);} - A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);} - #else - A_STATIC AD1 ALog2D1(AD1 a){return log2(a);} - A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;} - A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;} - A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;} - A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;} -//------------------------------------------------------------------------------------------------------------------------------ - // These follow the convention that A integer types don't have signage, until they are operated on. - A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;} - A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a>ASL1_(b));} - A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);} - A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);} - #else - A_STATIC AD1 ASinD1(AD1 a){return sin(a);} - A_STATIC AF1 ASinF1(AF1 a){return sinf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);} - A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);} - #else - A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);} - A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS - DEPENDENT -//============================================================================================================================== - A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);} - A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));} - A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));} - A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));} - A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR OPS -//------------------------------------------------------------------------------------------------------------------------------ -// These are added as needed for production or prototyping, so not necessarily a complete set. -// They follow a convention of taking in a destination and also returning the destination value to increase utility. -//============================================================================================================================== - A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;} - A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;} - A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;} - A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;} - A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} - A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} - A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} - A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} - A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;} - A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} - A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;} - A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} - A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;} - A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;} - A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;} - A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;} - A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;} - A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;} - A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;} - A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;} - A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;} - A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;} - A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;} - A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;} - A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;} - A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;} - A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;} - A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;} - A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} - A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} - A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} - A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} - A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} - A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} - A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} - A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} - A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} -//============================================================================================================================== - A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;} - A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} - A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;} - A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} - A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;} - A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;} - A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;} - A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;} - A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HALF FLOAT PACKING -//============================================================================================================================== - // Convert float to half (in lower 16-bits of output). - // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf - // Supports denormals. - // Conversion rules are to make computations possibly "safer" on the GPU, - // -INF & -NaN -> -65504 - // +INF & +NaN -> +65504 - A_STATIC AU1 AU1_AH1_AF1(AF1 f){ - static AW1 base[512]={ - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100, - 0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00, - 0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100, - 0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00, - 0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff}; - static AB1 shift[512]={ - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, - 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, - 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, - 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, - 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18}; - union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);} -//------------------------------------------------------------------------------------------------------------------------------ - // Used to output packed constant. - A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GLSL -// -// -//============================================================================================================================== -#if defined(A_GLSL) && defined(A_GPU) - #ifndef A_SKIP_EXT - #ifdef A_HALF - #extension GL_EXT_shader_16bit_storage:require - #extension GL_EXT_shader_explicit_arithmetic_types:require - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_LONG - #extension GL_ARB_gpu_shader_int64:require - // TODO: Fixme to more portable extension!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - #extension GL_NV_shader_atomic_int64:require - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_WAVE - #extension GL_KHR_shader_subgroup_arithmetic:require - #extension GL_KHR_shader_subgroup_ballot:require - #extension GL_KHR_shader_subgroup_quad:require - #extension GL_KHR_shader_subgroup_shuffle:require - #endif - #endif -//============================================================================================================================== - #define AP1 bool - #define AP2 bvec2 - #define AP3 bvec3 - #define AP4 bvec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float - #define AF2 vec2 - #define AF3 vec3 - #define AF4 vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint - #define AU2 uvec2 - #define AU3 uvec3 - #define AU4 uvec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int - #define ASU2 ivec2 - #define ASU3 ivec3 - #define ASU4 ivec4 -//============================================================================================================================== - #define AF1_AU1(x) uintBitsToFloat(AU1(x)) - #define AF2_AU2(x) uintBitsToFloat(AU2(x)) - #define AF3_AU3(x) uintBitsToFloat(AU3(x)) - #define AF4_AU4(x) uintBitsToFloat(AU4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AF1(x) floatBitsToUint(AF1(x)) - #define AU2_AF2(x) floatBitsToUint(AF2(x)) - #define AU3_AF3(x) floatBitsToUint(AF3(x)) - #define AU4_AF4(x) floatBitsToUint(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AH2_AF2 packHalf2x16 - #define AU1_AW2Unorm_AF2 packUnorm2x16 - #define AU1_AB4Unorm_AF4 packUnorm4x8 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF2_AH2_AU1 unpackHalf2x16 - #define AF2_AW2Unorm_AU1 unpackUnorm2x16 - #define AF4_AB4Unorm_AU1 unpackUnorm4x8 -//============================================================================================================================== - AF1 AF1_x(AF1 a){return AF1(a);} - AF2 AF2_x(AF1 a){return AF2(a,a);} - AF3 AF3_x(AF1 a){return AF3(a,a,a);} - AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} - #define AF1_(a) AF1_x(AF1(a)) - #define AF2_(a) AF2_x(AF1(a)) - #define AF3_(a) AF3_x(AF1(a)) - #define AF4_(a) AF4_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_x(AU1 a){return AU1(a);} - AU2 AU2_x(AU1 a){return AU2(a,a);} - AU3 AU3_x(AU1 a){return AU3(a,a,a);} - AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} - #define AU1_(a) AU1_x(AU1(a)) - #define AU2_(a) AU2_x(AU1(a)) - #define AU3_(a) AU3_x(AU1(a)) - #define AU4_(a) AU4_x(AU1(a)) -//============================================================================================================================== - AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} - AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} - AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} - AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));} - AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} - // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<>ASU1(b));} - AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} - AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} - AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL BYTE -//============================================================================================================================== - #ifdef A_BYTE - #define AB1 uint8_t - #define AB2 u8vec2 - #define AB3 u8vec3 - #define AB4 u8vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASB1 int8_t - #define ASB2 i8vec2 - #define ASB3 i8vec3 - #define ASB4 i8vec4 -//------------------------------------------------------------------------------------------------------------------------------ - AB1 AB1_x(AB1 a){return AB1(a);} - AB2 AB2_x(AB1 a){return AB2(a,a);} - AB3 AB3_x(AB1 a){return AB3(a,a,a);} - AB4 AB4_x(AB1 a){return AB4(a,a,a,a);} - #define AB1_(a) AB1_x(AB1(a)) - #define AB2_(a) AB2_x(AB1(a)) - #define AB3_(a) AB3_x(AB1(a)) - #define AB4_(a) AB4_x(AB1(a)) - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL HALF -//============================================================================================================================== - #ifdef A_HALF - #define AH1 float16_t - #define AH2 f16vec2 - #define AH3 f16vec3 - #define AH4 f16vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 uint16_t - #define AW2 u16vec2 - #define AW3 u16vec3 - #define AW4 u16vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 int16_t - #define ASW2 i16vec2 - #define ASW3 i16vec3 - #define ASW4 i16vec4 -//============================================================================================================================== - #define AH2_AU1(x) unpackFloat2x16(AU1(x)) - AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));} - #define AH4_AU2(x) AH4_AU2_x(AU2(x)) - #define AW2_AU1(x) unpackUint2x16(AU1(x)) - #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x))) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AH2(x) packFloat2x16(AH2(x)) - AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));} - #define AU2_AH4(x) AU2_AH4_x(AH4(x)) - #define AU1_AW2(x) packUint2x16(AW2(x)) - #define AU2_AW4(x) unpack32(packUint4x16(AW4(x))) -//============================================================================================================================== - #define AW1_AH1(x) halfBitsToUint16(AH1(x)) - #define AW2_AH2(x) halfBitsToUint16(AH2(x)) - #define AW3_AH3(x) halfBitsToUint16(AH3(x)) - #define AW4_AH4(x) halfBitsToUint16(AH4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AH1_AW1(x) uint16BitsToHalf(AW1(x)) - #define AH2_AW2(x) uint16BitsToHalf(AW2(x)) - #define AH3_AW3(x) uint16BitsToHalf(AW3(x)) - #define AH4_AW4(x) uint16BitsToHalf(AW4(x)) -//============================================================================================================================== - AH1 AH1_x(AH1 a){return AH1(a);} - AH2 AH2_x(AH1 a){return AH2(a,a);} - AH3 AH3_x(AH1 a){return AH3(a,a,a);} - AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} - #define AH1_(a) AH1_x(AH1(a)) - #define AH2_(a) AH2_x(AH1(a)) - #define AH3_(a) AH3_x(AH1(a)) - #define AH4_(a) AH4_x(AH1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AW1_x(AW1 a){return AW1(a);} - AW2 AW2_x(AW1 a){return AW2(a,a);} - AW3 AW3_x(AW1 a){return AW3(a,a,a);} - AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} - #define AW1_(a) AW1_x(AW1(a)) - #define AW2_(a) AW2_x(AW1(a)) - #define AW3_(a) AW3_x(AW1(a)) - #define AW4_(a) AW4_x(AW1(a)) -//============================================================================================================================== - AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} - AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} - AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} - AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFractH1(AH1 x){return fract(x);} - AH2 AFractH2(AH2 x){return fract(x);} - AH3 AFractH3(AH3 x){return fract(x);} - AH4 AFractH4(AH4 x){return fract(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);} - AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);} - AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);} - AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - // No packed version of max3. - AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} - AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} - AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} - AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} - AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} - AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} - AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - // No packed version of min3. - AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} - AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} - AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} - AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} - AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} - AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} - AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;} - AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;} - AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;} - AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);} - AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);} - AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);} - AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));} - AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));} - AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));} - AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} - AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} - AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} - AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL DOUBLE -//============================================================================================================================== - #ifdef A_DUBL - #define AD1 double - #define AD2 dvec2 - #define AD3 dvec3 - #define AD4 dvec4 -//------------------------------------------------------------------------------------------------------------------------------ - AD1 AD1_x(AD1 a){return AD1(a);} - AD2 AD2_x(AD1 a){return AD2(a,a);} - AD3 AD3_x(AD1 a){return AD3(a,a,a);} - AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} - #define AD1_(a) AD1_x(AD1(a)) - #define AD2_(a) AD2_x(AD1(a)) - #define AD3_(a) AD3_x(AD1(a)) - #define AD4_(a) AD4_x(AD1(a)) -//============================================================================================================================== - AD1 AFractD1(AD1 x){return fract(x);} - AD2 AFractD2(AD2 x){return fract(x);} - AD3 AFractD3(AD3 x){return fract(x);} - AD4 AFractD4(AD4 x){return fract(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);} - AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);} - AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);} - AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;} - AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;} - AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;} - AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);} - AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);} - AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);} - AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));} - AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));} - AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));} - AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL LONG -//============================================================================================================================== - #ifdef A_LONG - #define AL1 uint64_t - #define AL2 u64vec2 - #define AL3 u64vec3 - #define AL4 u64vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASL1 int64_t - #define ASL2 i64vec2 - #define ASL3 i64vec3 - #define ASL4 i64vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AL1_AU2(x) packUint2x32(AU2(x)) - #define AU2_AL1(x) unpackUint2x32(AL1(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AL1_x(AL1 a){return AL1(a);} - AL2 AL2_x(AL1 a){return AL2(a,a);} - AL3 AL3_x(AL1 a){return AL3(a,a,a);} - AL4 AL4_x(AL1 a){return AL4(a,a,a,a);} - #define AL1_(a) AL1_x(AL1(a)) - #define AL2_(a) AL2_x(AL1(a)) - #define AL3_(a) AL3_x(AL1(a)) - #define AL4_(a) AL4_x(AL1(a)) -//============================================================================================================================== - AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));} - AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));} - AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));} - AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));} - AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));} - AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));} - AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));} - AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));} - AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));} - AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// WAVE OPERATIONS -//============================================================================================================================== - #ifdef A_WAVE - AF1 AWaveAdd(AF1 v){return subgroupAdd(v);} - AF2 AWaveAdd(AF2 v){return subgroupAdd(v);} - AF3 AWaveAdd(AF3 v){return subgroupAdd(v);} - AF4 AWaveAdd(AF4 v){return subgroupAdd(v);} - #endif -//============================================================================================================================== -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// HLSL -// -// -//============================================================================================================================== -#if defined(A_HLSL) && defined(A_GPU) - #define AP1 bool - #define AP2 bool2 - #define AP3 bool3 - #define AP4 bool4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float - #define AF2 float2 - #define AF3 float3 - #define AF4 float4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint - #define AU2 uint2 - #define AU3 uint3 - #define AU4 uint4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int - #define ASU2 int2 - #define ASU3 int3 - #define ASU4 int4 -//============================================================================================================================== - #define AF1_AU1(x) asfloat(AU1(x)) - #define AF2_AU2(x) asfloat(AU2(x)) - #define AF3_AU3(x) asfloat(AU3(x)) - #define AF4_AU4(x) asfloat(AU4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AF1(x) asuint(AF1(x)) - #define AU2_AF2(x) asuint(AF2(x)) - #define AU3_AF3(x) asuint(AF3(x)) - #define AU4_AF4(x) asuint(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);} - #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a)) - #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));} - #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x)) -//============================================================================================================================== - AF1 AF1_x(AF1 a){return AF1(a);} - AF2 AF2_x(AF1 a){return AF2(a,a);} - AF3 AF3_x(AF1 a){return AF3(a,a,a);} - AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} - #define AF1_(a) AF1_x(AF1(a)) - #define AF2_(a) AF2_x(AF1(a)) - #define AF3_(a) AF3_x(AF1(a)) - #define AF4_(a) AF4_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_x(AU1 a){return AU1(a);} - AU2 AU2_x(AU1 a){return AU2(a,a);} - AU3 AU3_x(AU1 a){return AU3(a,a,a);} - AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} - #define AU1_(a) AU1_x(AU1(a)) - #define AU2_(a) AU2_x(AU1(a)) - #define AU3_(a) AU3_x(AU1(a)) - #define AU4_(a) AU4_x(AU1(a)) -//============================================================================================================================== - AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} - AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} - AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} - AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1<>off)&mask;} - AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} - AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1<>ASU1(b));} - AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} - AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} - AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL BYTE -//============================================================================================================================== - #ifdef A_BYTE - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL HALF -//============================================================================================================================== - #ifdef A_HALF - #define AH1 min16float - #define AH2 min16float2 - #define AH3 min16float3 - #define AH4 min16float4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 min16uint - #define AW2 min16uint2 - #define AW3 min16uint3 - #define AW4 min16uint4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 min16int - #define ASW2 min16int2 - #define ASW3 min16int3 - #define ASW4 min16int4 -//============================================================================================================================== - // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). - // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ - AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);} - AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));} - AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);} - AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));} - #define AH2_AU1(x) AH2_AU1_x(AU1(x)) - #define AH4_AU2(x) AH4_AU2_x(AU2(x)) - #define AW2_AU1(x) AW2_AU1_x(AU1(x)) - #define AW4_AU2(x) AW4_AU2_x(AU2(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);} - AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));} - AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);} - AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));} - #define AU1_AH2(x) AU1_AH2_x(AH2(x)) - #define AU2_AH4(x) AU2_AH4_x(AH4(x)) - #define AU1_AW2(x) AU1_AW2_x(AW2(x)) - #define AU2_AW4(x) AU2_AW4_x(AW4(x)) -//============================================================================================================================== - // TODO: These are broken!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - #define AW1_AH1(x) AW1(asuint(AF1(x))) - #define AW2_AH2(x) AW2(asuint(AF2(x))) - #define AW3_AH3(x) AW3(asuint(AF3(x))) - #define AW4_AH4(x) AW4(asuint(AF4(x))) -//------------------------------------------------------------------------------------------------------------------------------ - // TODO: These are broken!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - #define AH1_AW1(x) AH1(asfloat(AU1(x))) - #define AH2_AW2(x) AH2(asfloat(AU2(x))) - #define AH3_AW3(x) AH3(asfloat(AU3(x))) - #define AH4_AW4(x) AH4(asfloat(AU4(x))) -//============================================================================================================================== - AH1 AH1_x(AH1 a){return AH1(a);} - AH2 AH2_x(AH1 a){return AH2(a,a);} - AH3 AH3_x(AH1 a){return AH3(a,a,a);} - AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} - #define AH1_(a) AH1_x(AH1(a)) - #define AH2_(a) AH2_x(AH1(a)) - #define AH3_(a) AH3_x(AH1(a)) - #define AH4_(a) AH4_x(AH1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AW1_x(AW1 a){return AW1(a);} - AW2 AW2_x(AW1 a){return AW2(a,a);} - AW3 AW3_x(AW1 a){return AW3(a,a,a);} - AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} - #define AW1_(a) AW1_x(AW1(a)) - #define AW2_(a) AW2_x(AW1(a)) - #define AW3_(a) AW3_x(AW1(a)) - #define AW4_(a) AW4_x(AW1(a)) -//============================================================================================================================== - AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} - AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} - AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} - AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - // V_FRACT_F16 (note DX frac() is different). - AH1 AFractH1(AH1 x){return x-floor(x);} - AH2 AFractH2(AH2 x){return x-floor(x);} - AH3 AFractH3(AH3 x){return x-floor(x);} - AH4 AFractH4(AH4 x){return x-floor(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);} - AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);} - AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);} - AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} - AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} - AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} - AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} - AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} - AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} - AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} - AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} - AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} - AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} - AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} - AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} - AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARcpH1(AH1 x){return rcp(x);} - AH2 ARcpH2(AH2 x){return rcp(x);} - AH3 ARcpH3(AH3 x){return rcp(x);} - AH4 ARcpH4(AH4 x){return rcp(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARsqH1(AH1 x){return rsqrt(x);} - AH2 ARsqH2(AH2 x){return rsqrt(x);} - AH3 ARsqH3(AH3 x){return rsqrt(x);} - AH4 ARsqH4(AH4 x){return rsqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASatH1(AH1 x){return saturate(x);} - AH2 ASatH2(AH2 x){return saturate(x);} - AH3 ASatH3(AH3 x){return saturate(x);} - AH4 ASatH4(AH4 x){return saturate(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} - AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} - AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} - AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL DOUBLE -//============================================================================================================================== - #ifdef A_DUBL - #define AD1 double - #define AD2 double2 - #define AD3 double3 - #define AD4 double4 -//------------------------------------------------------------------------------------------------------------------------------ - AD1 AD1_x(AD1 a){return AD1(a);} - AD2 AD2_x(AD1 a){return AD2(a,a);} - AD3 AD3_x(AD1 a){return AD3(a,a,a);} - AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} - #define AD1_(a) AD1_x(AD1(a)) - #define AD2_(a) AD2_x(AD1(a)) - #define AD3_(a) AD3_x(AD1(a)) - #define AD4_(a) AD4_x(AD1(a)) -//============================================================================================================================== - AD1 AFractD1(AD1 a){return a-floor(a);} - AD2 AFractD2(AD2 a){return a-floor(a);} - AD3 AFractD3(AD3 a){return a-floor(a);} - AD4 AFractD4(AD4 a){return a-floor(a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);} - AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);} - AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);} - AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARcpD1(AD1 x){return rcp(x);} - AD2 ARcpD2(AD2 x){return rcp(x);} - AD3 ARcpD3(AD3 x){return rcp(x);} - AD4 ARcpD4(AD4 x){return rcp(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARsqD1(AD1 x){return rsqrt(x);} - AD2 ARsqD2(AD2 x){return rsqrt(x);} - AD3 ARsqD3(AD3 x){return rsqrt(x);} - AD4 ARsqD4(AD4 x){return rsqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ASatD1(AD1 x){return saturate(x);} - AD2 ASatD2(AD2 x){return saturate(x);} - AD3 ASatD3(AD3 x){return saturate(x);} - AD4 ASatD4(AD4 x){return saturate(x);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL LONG -//============================================================================================================================== - #ifdef A_LONG - #endif -//============================================================================================================================== -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GPU COMMON -// -// -//============================================================================================================================== -#ifdef A_GPU - // Negative and positive infinity. - #define A_INFN_F AF1_AU1(0x7f800000u) - #define A_INFP_F AF1_AU1(0xff800000u) -//------------------------------------------------------------------------------------------------------------------------------ - // Copy sign from 's' to positive 'd'. - AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));} - AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));} - AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));} - AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));} -//------------------------------------------------------------------------------------------------------------------------------ - // Single operation to return (useful to create a mask to use in lerp for branch free logic), - // m=NaN := 0 - // m>=0 := 0 - // m<0 := 1 - // Uses the following useful floating point logic, - // saturate(+a*(-INF)==-INF) := 0 - // saturate( 0*(-INF)== NaN) := 0 - // saturate(-a*(-INF)==+INF) := 1 - AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));} - AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));} - AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));} - AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));} -//============================================================================================================================== - #ifdef A_HALF - #define A_INFN_H AH1_AW1(0x7c00u) - #define A_INFP_H AH1_AW1(0xfc00u) -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));} - AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));} - AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));} - AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));} - AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));} - AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));} - AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HALF APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// These support only positive inputs. -// Did not see value yet in specialization for range. -// Using quick testing, ended up mostly getting the same "best" approximation for various ranges. -// With hardware that can co-execute transcendentals, the value in approximations could be less than expected. -// However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total. -// And co-execution would require a compiler interleaving a lot of independent work for packed usage. -//------------------------------------------------------------------------------------------------------------------------------ -// The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total). -// Same with sqrt(), as this could be x*rsq() (7 ops). -//------------------------------------------------------------------------------------------------------------------------------ -// IDEAS -// ===== -// - Polaris hardware has 16-bit support, but non-double rate. -// Could be possible still get part double rate for some of this logic, -// by clearing out the lower half's sign when necessary and using 32-bit ops... -//============================================================================================================================== - #ifdef A_HALF - // Minimize squared error across full positive range, 2 ops. - // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output. - AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));} - AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));} -//------------------------------------------------------------------------------------------------------------------------------ - // Lower precision estimation, 1 op. - // Minimize squared error across {smallest normal to 16384.0}. - AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));} - AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));} -//------------------------------------------------------------------------------------------------------------------------------ - // Medium precision estimation, one Newton Raphson iteration, 3 ops. - AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));} - AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));} -//------------------------------------------------------------------------------------------------------------------------------ - // Minimize squared error across {smallest normal to 16384.0}, 2 ops. - AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));} - AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// FLOAT APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN", -// - Idea dates back to SGI, then to Quake 3, etc. -// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -// - sqrt(x)=rsqrt(x)*x -// - rcp(x)=rsqrt(x)*rsqrt(x) for positive x -// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -//------------------------------------------------------------------------------------------------------------------------------ -// These below are from perhaps less complete searching for optimal. -// Used FP16 normal range for testing with +4096 32-bit step size for sampling error. -// So these match up well with the half approximations. -//============================================================================================================================== - AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));} - AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));} - AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));} - AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PARABOLIC SIN & COS -//------------------------------------------------------------------------------------------------------------------------------ -// Approximate answers to transcendental questions. -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Verify packed math ABS is correctly doing an AND. -//============================================================================================================================== - // Valid input range is {-1 to 1} representing {0 to 2 pi}. - // Output range is {-1/4 to -1/4} representing {-1 to 1}. - AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD. - AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - // For a packed {sin,cos} pair, - // - Native takes 16 clocks and 4 issue slots (no packed transcendentals). - // - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed). - AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA - AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// COLOR CONVERSIONS -//------------------------------------------------------------------------------------------------------------------------------ -// These are all linear to/from some other space (where 'linear' has been shortened out of the function name). -// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'. -// These are branch free implementations. -// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion. -//------------------------------------------------------------------------------------------------------------------------------ -// TRANSFER FUNCTIONS -// ================== -// 709 ..... Rec709 used for some HDTVs -// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native -// Pq ...... PQ native for HDR10 -// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type -// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations) -//------------------------------------------------------------------------------------------------------------------------------ -// FOR PQ -// ====== -// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2. -// All constants are only specified to FP32 precision. -// External PQ source reference, -// - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl -//------------------------------------------------------------------------------------------------------------------------------ -// PACKED VERSIONS -// =============== -// These are the A*H2() functions. -// There is no PQ functions as FP16 seemed to not have enough precision for the conversion. -// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors. -// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least). -//------------------------------------------------------------------------------------------------------------------------------ -// NOTES -// ===== -// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case. -//============================================================================================================================== - AF1 ATo709F1(AF1 c){return max(min(c*AF1_(4.5),AF1_(0.018)),AF1_(1.099)*pow(c,AF1_(0.45))-AF1_(0.099));} -//------------------------------------------------------------------------------------------------------------------------------ - // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma(). - AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,rcpX);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302)); - return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToSrgbF1(AF1 c){return max(min(c*AF1_(12.92),AF1_(0.0031308)),AF1_(1.055)*pow(c,AF1_(0.41666))-AF1_(0.055));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToTwoF1(AF1 c){return sqrt(c);} -//============================================================================================================================== - AF1 AFrom709F1(AF1 c){return max(min(c*AF1_(1.0/4.5),AF1_(0.081)), - pow((c+AF1_(0.099))*(AF1_(1.0)/(AF1_(1.099))),AF1_(1.0/0.45)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,x);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833)); - return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromSrgbF1(AF1 c){return max(min(c*AF1_(1.0/12.92),AF1_(0.04045)), - pow((c+AF1_(0.055))*(AF1_(1.0)/AF1_(1.055)),AF1_(2.4)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromTwoF1(AF1 c){return c*c;} -//============================================================================================================================== - #ifdef A_HALF - AH2 ATo709H2(AH2 c){return max(min(c*AH2_(4.5),AH2_(0.018)),AH2_(1.099)*pow(c,AH2_(0.45))-AH2_(0.099));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 AToSrgbH2(AH2 c){return max(min(c*AH2_(12.92),AH2_(0.0031308)),AH2_(1.055)*pow(c,AH2_(0.41666))-AH2_(0.055));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 AToTwoH2(AH2 c){return sqrt(c);} - #endif -//============================================================================================================================== - #ifdef A_HALF - AH2 AFrom709H2(AH2 c){return max(min(c*AH2_(1.0/4.5),AH2_(0.081)), - pow((c+AH2_(0.099))*(AH2_(1.0)/(AH2_(1.099))),AH2_(1.0/0.45)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 AFromSrgbH2(AH2 c){return max(min(c*AH2_(1.0/12.92),AH2_(0.04045)), - pow((c+AH2_(0.055))*(AH2_(1.0)/AH2_(1.055)),AH2_(2.4)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 AFromTwoH2(AH2 c){return c*c;} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CS REMAP -//============================================================================================================================== - // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear. - // 543210 - // ====== - // ..xxx. - // yy...y - AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} -//============================================================================================================================== - // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions. - // 543210 - // ====== - // .xx..x - // y..yy. - // Details, - // LANE TO 8x8 MAPPING - // =================== - // 00 01 08 09 10 11 18 19 - // 02 03 0a 0b 12 13 1a 1b - // 04 05 0c 0d 14 15 1c 1d - // 06 07 0e 0f 16 17 1e 1f - // 20 21 28 29 30 31 38 39 - // 22 23 2a 2b 32 33 3a 3b - // 24 25 2c 2d 34 35 3c 3d - // 26 27 2e 2f 36 37 3e 3f - AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// REFERENCE -// -//------------------------------------------------------------------------------------------------------------------------------ -// IEEE FLOAT RULES -// ================ -// - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1 -// - {+/-}0 * {+/-}INF = NaN -// - -INF + (+INF) = NaN -// - {+/-}0 / {+/-}0 = NaN -// - {+/-}INF / {+/-}INF = NaN -// - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN) -// - 0 == -0 -// - 4/0 = +INF -// - 4/-0 = -INF -// - 4+INF = +INF -// - 4-INF = -INF -// - 4*(+INF) = +INF -// - 4*(-INF) = -INF -// - -4*(+INF) = -INF -// - sqrt(+INF) = +INF -//------------------------------------------------------------------------------------------------------------------------------ -// FP16 ENCODING -// ============= -// fedcba9876543210 -// ---------------- -// ......mmmmmmmmmm 10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals) -// .eeeee.......... 5-bit exponent -// .00000.......... denormals -// .00001.......... -14 exponent -// .11110.......... 15 exponent -// .111110000000000 infinity -// .11111nnnnnnnnnn NaN with n!=0 -// s............... sign -//------------------------------------------------------------------------------------------------------------------------------ -// FP16/INT16 ALIASING DENORMAL -// ============================ -// 11-bit unsigned integers alias with half float denormal/normal values, -// 1 = 2^(-24) = 1/16777216 ....................... first denormal value -// 2 = 2^(-23) -// ... -// 1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value -// 1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers -// 2047 .............................................. last normal value that still maps to integers -// Scaling limits, -// 2^15 = 32768 ...................................... largest power of 2 scaling -// Largest pow2 conversion mapping is at *32768, -// 1 : 2^(-9) = 1/128 -// 1024 : 8 -// 2047 : a little less than 16 -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GPU/CPU PORTABILITY -// -// -//------------------------------------------------------------------------------------------------------------------------------ -// This is the GPU implementation. -// See the CPU implementation for docs. -//============================================================================================================================== -#ifdef A_GPU - #define A_TRUE true - #define A_FALSE false - #define A_STATIC -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY -//============================================================================================================================== - #define retAD2 AD2 - #define retAD3 AD3 - #define retAD4 AD4 - #define retAF2 AF2 - #define retAF3 AF3 - #define retAF4 AF4 - #define retAL2 AL2 - #define retAL3 AL3 - #define retAL4 AL4 - #define retAU2 AU2 - #define retAU3 AU3 - #define retAU4 AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define inAD2 in AD2 - #define inAD3 in AD3 - #define inAD4 in AD4 - #define inAF2 in AF2 - #define inAF3 in AF3 - #define inAF4 in AF4 - #define inAL2 in AL2 - #define inAL3 in AL3 - #define inAL4 in AL4 - #define inAU2 in AU2 - #define inAU3 in AU3 - #define inAU4 in AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define inoutAD2 inout AD2 - #define inoutAD3 inout AD3 - #define inoutAD4 inout AD4 - #define inoutAF2 inout AF2 - #define inoutAF3 inout AF3 - #define inoutAF4 inout AF4 - #define inoutAL2 inout AL2 - #define inoutAL3 inout AL3 - #define inoutAL4 inout AL4 - #define inoutAU2 inout AU2 - #define inoutAU3 inout AU3 - #define inoutAU4 inout AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define outAD2 out AD2 - #define outAD3 out AD3 - #define outAD4 out AD4 - #define outAF2 out AF2 - #define outAF3 out AF3 - #define outAF4 out AF4 - #define outAL2 out AL2 - #define outAL3 out AL3 - #define outAL4 out AL4 - #define outAU2 out AU2 - #define outAU3 out AU3 - #define outAU4 out AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define varAD2(x) AD2 x - #define varAD3(x) AD3 x - #define varAD4(x) AD4 x - #define varAF2(x) AF2 x - #define varAF3(x) AF3 x - #define varAF4(x) AF4 x - #define varAL2(x) AL2 x - #define varAL3(x) AL3 x - #define varAL4(x) AL4 x - #define varAU2(x) AU2 x - #define varAU3(x) AU3 x - #define varAU4(x) AU4 x -//------------------------------------------------------------------------------------------------------------------------------ - #define initAD2(x,y) AD2(x,y) - #define initAD3(x,y,z) AD3(x,y,z) - #define initAD4(x,y,z,w) AD4(x,y,z,w) - #define initAF2(x,y) AF2(x,y) - #define initAF3(x,y,z) AF3(x,y,z) - #define initAF4(x,y,z,w) AF4(x,y,z,w) - #define initAL2(x,y) AL2(x,y) - #define initAL3(x,y,z) AL3(x,y,z) - #define initAL4(x,y,z,w) AL4(x,y,z,w) - #define initAU2(x,y) AU2(x,y) - #define initAU3(x,y,z) AU3(x,y,z) - #define initAU4(x,y,z,w) AU4(x,y,z,w) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS -//============================================================================================================================== - #define AAbsD1(a) abs(AD1(a)) - #define AAbsF1(a) abs(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ACosD1(a) cos(AD1(a)) - #define ACosF1(a) cos(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ADotD2(a,b) dot(AD2(a),AD2(b)) - #define ADotD3(a,b) dot(AD3(a),AD3(b)) - #define ADotD4(a,b) dot(AD4(a),AD4(b)) - #define ADotF2(a,b) dot(AF2(a),AF2(b)) - #define ADotF3(a,b) dot(AF3(a),AF3(b)) - #define ADotF4(a,b) dot(AF4(a),AF4(b)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AExp2D1(a) exp2(AD1(a)) - #define AExp2F1(a) exp2(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AFloorD1(a) floor(AD1(a)) - #define AFloorF1(a) floor(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ALog2D1(a) log2(AD1(a)) - #define ALog2F1(a) log2(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AMaxD1(a,b) min(a,b) - #define AMaxF1(a,b) min(a,b) - #define AMaxL1(a,b) min(a,b) - #define AMaxU1(a,b) min(a,b) -//------------------------------------------------------------------------------------------------------------------------------ - #define AMinD1(a,b) min(a,b) - #define AMinF1(a,b) min(a,b) - #define AMinL1(a,b) min(a,b) - #define AMinU1(a,b) min(a,b) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASinD1(a) sin(AD1(a)) - #define ASinF1(a) sin(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASqrtD1(a) sqrt(AD1(a)) - #define ASqrtF1(a) sqrt(AF1(a)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS - DEPENDENT -//============================================================================================================================== - #define APowD1(a,b) pow(AD1(a),AF1(b)) - #define APowF1(a,b) pow(AF1(a),AF1(b)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR OPS -//------------------------------------------------------------------------------------------------------------------------------ -// These are added as needed for production or prototyping, so not necessarily a complete set. -// They follow a convention of taking in a destination and also returning the destination value to increase utility. -//============================================================================================================================== - #ifdef A_DUBL - AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;} - AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;} - AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;} - AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;} - AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;} - AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;} - AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;} - AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;} - AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;} - AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;} - AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;} - AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;} - AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;} - AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;} - AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;} - AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;} - AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;} - AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;} - AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;} - AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;} - AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;} - AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;} - AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;} - #endif -//============================================================================================================================== - AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;} - AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;} - AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;} - AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;} - AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;} - AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;} - AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;} - AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;} - AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;} - AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;} - AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;} - AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;} - AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;} - AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;} - AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;} - AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;} - AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;} - AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;} - AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;} - AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;} - AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;} - AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;} - AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;} -#endif diff --git a/sample/src/VK/Shaders/ffx_spd.h b/sample/src/VK/Shaders/ffx_spd.h deleted file mode 100644 index 68c9ef4..0000000 --- a/sample/src/VK/Shaders/ffx_spd.h +++ /dev/null @@ -1,1164 +0,0 @@ -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// [FFX SPD] Single Pass Downsampler 1.0 -// -//============================================================================================================================== -// LICENSE -// ======= -// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. -// ------- -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// ------- -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the -// Software. -// ------- -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------------------------------------------------------ -// INTEGRATION SUMMARY FOR CPU -// =========================== -// // you need to provide as constants: -// // number of mip levels to be computed (maximum is 12) -// // number of total thread groups: ((widthInPixels+63)>>6) * ((heightInPixels+63)>>6) -// ... -// // Dispatch the shader such that each thread group works on a 64x64 sub-tile of the source image -// vkCmdDispatch(cmdBuf,(widthInPixels+63)>>6,(heightInPixels+63)>>6,1); - -//------------------------------------------------------------------------------------------------------------------------------ -// INTEGRATION SUMMARY FOR GPU -// =========================== - -// [SAMPLER] - if you want to use a sampler with linear filtering for loading the source image -// follow additionally the instructions marked with [SAMPLER] -// add following define: -// #SPD_LINEAR_SAMPLER -// this is recommended, as using one sample() with linear filter to reduce 2x2 is faster -// than 4x load() plus manual averaging - -// // Setup layout. Example below for VK_FORMAT_R16G16B16A16_SFLOAT. -// // Note: If you use UNORM/SRGB format, you need to convert to linear space -// // when using UAV load() and store() -// // conversion to linear (load function): x*x -// // conversion from linear (store function): sqrt() - -// // source image -// GLSL: layout(set=0,binding=0,rgba16f)uniform image2D imgSrc; -// [SAMPLER]: layout(set=0,binding=0)uniform texture2D imgSrc; -// HLSL: [[vk::binding(0)]] Texture2D imgSrc :register(u0); - -// // destination -> 12 is the maximum number of mips supported by DS -// GLSL: layout(set=0,binding=1,rgba16f) uniform coherent image2D imgDst[12]; -// HLSL: [[vk::binding(1)]] globallycoherent RWTexture2D imgDst[12] :register(u1); - -// // global atomic counter - MUST be initialized to 0 -// // GLSL: -// layout(std430, set=0, binding=2) buffer globalAtomicBuffer -// { -// uint counter; -// } globalAtomic; -// // HLSL: -// struct globalAtomicBuffer -// { -// uint counter; -// }; -// [[vk::binding(2)]] RWStructuredBuffer globalAtomic; - -// // [SAMPLER] add sampler -// GLSL: layout(set=0, binding=3) uniform sampler srcSampler; -// HLSL: [[vk::binding(3)]] SamplerState srcSampler :register(s0); - -// // constants - either push constant or constant buffer -// // or calculate within shader -// // [SAMPLER] when using sampler add inverse source image size -// // GLSL: -// layout(push_constant) uniform pushConstants { -// uint mips; // needed to opt out earlier if mips are < 12 -// uint numWorkGroups; // number of total thread groups, so numWorkGroupsX * numWorkGroupsY * numWorkGroupsZ -// } spdConstants; -// // HLSL: -// [[vk::push_constant]] -// cbuffer spdConstants { -// uint mips; -// uint numWorkGroups; -// }; - -// ... -// // Setup pre-portability-header defines (sets up GLSL/HLSL path, etc) -// #define A_GPU 1 -// #define A_GLSL 1 // or // #define A_HLSL 1 - -// // if you want to use PACKED version -// // recommended if bpc <= 16bit -// #define A_HALF - -// ... -// // Include the portability header (or copy it in without an include). -// #include "ffx_a.h" -// ... - -// // Define LDS variables -// shared AF4 spd_intermediate[16][16]; // HLSL: groupshared -// shared AU1 spd_counter; // HLSL: groupshared -// // PACKED version -// shared AH4 spd_intermediate[16][16]; // HLSL: groupshared -// // Note: You can also use -// shared AF1 spd_intermediateR[16][16]; -// shared AF1 spd_intermediateG[16][16]; -// shared AF1 spd_intermediateB[16][16]; -// shared AF1 spd_intermediateA[16][16]; -// // or for Packed version: -// shared AH2 spd_intermediateRG[16][16]; -// shared AH2 spd_intermediateBA[16][16]; -// // This is potentially faster -// // Adapt your load and store functions accordingly - -// // if subgroup operations are not supported / can't use SM6.0 -// #define SPD_NO_WAVE_OPERATIONS - -// // Define the fetch function(s) and the reduction function -// // if non-power-of-2 textures, add border controls to the load and store functions -// // to make sure the borders of the mip level look as you want it -// // if you don't add border controls you'll read zeros past the border -// // if you load with a sampler, this is obv. handled by your sampler :) -// // this is also the place where you need to do color space transformation if needed -// // E.g. if your texture format is SRGB/UNORM and you use the UAV load and store functions -// // no automatic to/from linear conversions are happening -// // there is to/from linear conversions when using a sampler and render target approach -// // conversion to linear (load function): x*x -// // conversion from linear (store function): sqrt() - -// // Load from source image -// GLSL: AF4 SpdLoadSourceImage(ASU2 p){return imageLoad(imgSrc, p);} -// HLSL: AF4 SpdLoadSourceImage(ASU2 tex){return imgSrc[tex];} -// [SAMPLER] don't forget to add the define #SPD_LINEAR_SAMPLER :) -// GLSL: -// AF4 SpdLoadSourceImage(ASU2 p){ -// AF2 textureCoord = p * invInputSize + invInputSize; -// return texture(sampler2D(imgSrc, srcSampler), textureCoord); -// } -// HLSL: -// AF4 SpdLoadSourceImage(ASU2 p){ -// AF2 textureCoord = p * invInputSize + invInputSize; -// return imgSrc.SampleLevel(srcSampler, textureCoord, 0); -// } - -// // SpdLoad() takes a 32-bit signed integer 2D coordinate and loads color. -// // Loads the 5th mip level, each value is computed by a different thread group -// // last thread group will access all its elements and compute the subsequent mips -// GLSL: AF4 SpdLoad(ASU2 p){return imageLoad(imgDst[5],p);} -// HLSL: AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];} - -// Define the store function -// GLSL: void SpdStore(ASU2 p, AF4 value, AU1 mip){imageStore(imgDst[mip], p, value);} -// HLSL: void SpdStore(ASU2 pix, AF4 value, AU1 index){imgDst[index][pix] = value;} - -// // Define the atomic counter increase function -// // GLSL: -// void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -// AU1 SpdGetAtomicCounter() {return spd_counter;} -// // HLSL: -// void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -// AU1 SpdGetAtomicCounter(){return spd_counter;} - -// // Define the LDS load and store functions -// // GLSL: -// AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spd_intermediate[x][y] = value;} -// // HLSL: -// AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spd_intermediate[x][y] = value;} - -// // Define your reduction function: takes as input the four 2x2 values and returns 1 output value -// Example below: computes the average value -// AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} - -// // PACKED VERSION -// Load from source image -// GLSL: AH4 SpdLoadSourceImageH(ASU2 p){return AH4(imageLoad(imgSrc, p));} -// HLSL: AH4 SpdLoadSourceImageH(ASU2 tex){return AH4(imgSrc[tex]);} -// [SAMPLER] -// GLSL: -// AH4 SpdLoadSourceImageH(ASU2 p){ -// AF2 textureCoord = p * invInputSize + invInputSize; -// return AH4(texture(sampler2D(imgSrc, srcSampler), textureCoord)); -// } -// HLSL: -// AH4 SpdLoadSourceImageH(ASU2 p){ -// AF2 textureCoord = p * invInputSize + invInputSize; -// return AH4(imgSrc.SampleLevel(srcSampler, textureCoord, 0)); -// } - -// // SpdLoadH() takes a 32-bit signed integer 2D coordinate and loads color. -// // Loads the 5th mip level, each value is computed by a different thread group -// // last thread group will access all its elements and compute the subsequent mips -// GLSL: AH4 SpdLoadH(ASU2 p){return AH4(imageLoad(imgDst[5],p));} -// HLSL: AH4 SpdLoadH(ASU2 tex){return AH4(imgDst[5][tex]);} - -// Define the store function -// GLSL: void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imageStore(imgDst[mip], p, AF4(value));} -// HLSL: void SpdStoreH(ASU2 pix, AH4 value, AU1 index){imgDst[index][pix] = AF4(value);} - -// // Define the atomic counter increase function -// // GLSL: -// void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -// AU1 SpdGetAtomicCounter() {return spd_counter;} -// // HLSL: -// void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -// AU1 SpdGetAtomicCounter(){return spd_counter;} - -// // Define the lds load and store functions -// // GLSL: -// AH4 SpdLoadIntermediateH(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){spd_intermediate[x][y] = value;} -// // HLSL: -// AH4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediate(AU1 x, AU1 y, AH4 value){spd_intermediate[x][y] = value;} - -// // Define your reduction function: takes as input the four 2x2 values and returns 1 output value -// Example below: computes the average value -// AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25);} - -// // - -// // If you only use PACKED version -// #define SPD_PACKED_ONLY - -// // Include this SPD (single pass downsampler) header file (or copy it in without an include). -// #include "ffx_spd.h" -// ... - -// // Example in shader integration -// // GLSL: -// layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; -// void main(){ -// // Call the downsampling function -// SpdDownsample(AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), -// AU1(spdConstants.mips), AU1(spdConstants.numWorkGroups)); -// -// // PACKED: -// SpdDownsampleH(AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), -// AU1(spdConstants.mips), AU1(spdConstants.numWorkGroups)); -// ... -// // HLSL: -// [numthreads(256,1,1)] -// void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) { -// SpdDownsample(AU2(WorkGroupId.xy), AU1(LocalThreadIndex), -// AU1(mips), AU1(numWorkGroups)); -// -// // PACKED: -// SpdDownsampleH(AU2(WorkGroupId.xy), AU1(LocalThreadIndex), -// AU1(mips), AU1(numWorkGroups)); -// ... - -// -//------------------------------------------------------------------------------------------------------------------------------ - - - -//============================================================================================================================== -// NON-PACKED VERSION -//============================================================================================================================== - -#ifdef SPD_PACKED_ONLY - // Avoid compiler error -AF4 SpdLoadSourceImage(ASU2 p) { return AF4(0.0, 0.0, 0.0, 0.0); } -AF4 SpdLoad(ASU2 p) { return AF4(0.0, 0.0, 0.0, 0.0); } -void SpdStore(ASU2 p, AF4 value, AU1 mip) {} -AF4 SpdLoadIntermediate(AU1 x, AU1 y) { return AF4(0.0, 0.0, 0.0, 0.0); } -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) {} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) { return AF4(0.0, 0.0, 0.0, 0.0); } -#endif - -//_____________________________________________________________/\_______________________________________________________________ -#if defined(A_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) -#extension GL_KHR_shader_subgroup_quad : require -#endif - -void SpdWorkgroupShuffleBarrier() { -#ifdef A_GLSL - barrier(); -#endif -#ifdef A_HLSL - GroupMemoryBarrierWithGroupSync(); -#endif -} - -// Only last active workgroup should proceed -bool SpdExitWorkgroup(AU1 numWorkGroups, AU1 localInvocationIndex) -{ - // global atomic counter - if (localInvocationIndex == 0) - { - SpdIncreaseAtomicCounter(); - } - SpdWorkgroupShuffleBarrier(); - return (SpdGetAtomicCounter() != (numWorkGroups - 1)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -// User defined: AF4 DSReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3); - -AF4 SpdReduceQuad(AF4 v) -{ -#if defined(A_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) - AF4 v0 = v; - AF4 v1 = subgroupQuadSwapHorizontal(v); - AF4 v2 = subgroupQuadSwapVertical(v); - AF4 v3 = subgroupQuadSwapDiagonal(v); - return SpdReduce4(v0, v1, v2, v3); -#elif defined(A_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) - // requires SM6.0 - AU1 quad = WaveGetLaneIndex() & (~0x3); - AF4 v0 = v; - AF4 v1 = WaveReadLaneAt(v, quad | 1); - AF4 v2 = WaveReadLaneAt(v, quad | 2); - AF4 v3 = WaveReadLaneAt(v, quad | 3); - return SpdReduce4(v0, v1, v2, v3); - /* - // if SM6.0 is not available, you can use the AMD shader intrinsics - // works for DX11 - AF4 v0 = v; - AF4 v1; - v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - AF4 v2; - v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - AF4 v3; - v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - return SpdReduce4(v0, v1, v2, v3); - */ -#endif - return AF4_x(0.0); -} - -AF4 SpdReduceIntermediate(AU2 i0, AU2 i1, AU2 i2, AU2 i3) -{ - AF4 v0 = SpdLoadIntermediate(i0.x, i0.y); - AF4 v1 = SpdLoadIntermediate(i1.x, i1.y); - AF4 v2 = SpdLoadIntermediate(i2.x, i2.y); - AF4 v3 = SpdLoadIntermediate(i3.x, i3.y); - return SpdReduce4(v0, v1, v2, v3); -} - -AF4 SpdReduceLoad4(AU2 i0, AU2 i1, AU2 i2, AU2 i3) -{ - AF4 v0 = SpdLoad(ASU2(i0)); - AF4 v1 = SpdLoad(ASU2(i1)); - AF4 v2 = SpdLoad(ASU2(i2)); - AF4 v3 = SpdLoad(ASU2(i3)); - return SpdReduce4(v0, v1, v2, v3); -} - -AF4 SpdReduceLoad4(AU2 base) -{ - return SpdReduceLoad4( - AU2(base + AU2(0, 0)), - AU2(base + AU2(0, 1)), - AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); -} - -AF4 SpdReduceLoadSourceImage4(AU2 i0, AU2 i1, AU2 i2, AU2 i3) -{ - AF4 v0 = SpdLoadSourceImage(ASU2(i0)); - AF4 v1 = SpdLoadSourceImage(ASU2(i1)); - AF4 v2 = SpdLoadSourceImage(ASU2(i2)); - AF4 v3 = SpdLoadSourceImage(ASU2(i3)); - return SpdReduce4(v0, v1, v2, v3); -} - -AF4 SpdReduceLoadSourceImage4(AU2 base) -{ -#ifdef SPD_LINEAR_SAMPLER - return SpdLoadSourceImage(ASU2(base)); -#else - return SpdReduceLoadSourceImage4( - AU2(base + AU2(0, 0)), - AU2(base + AU2(0, 1)), - AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); -#endif -} - -void SpdDownsampleMips_0_1_Intrinsics(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ - AF4 v[4]; - - ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); - ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[0], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[1], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[2], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[3], 0); - - if (mip <= 1) - return; - - v[0] = SpdReduceQuad(v[0]); - v[1] = SpdReduceQuad(v[1]); - v[2] = SpdReduceQuad(v[2]); - v[3] = SpdReduceQuad(v[3]); - - if ((localInvocationIndex % 4) == 0) - { - SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x / 2, y / 2), v[0], 1); - SpdStoreIntermediate( - x / 2, y / 2, v[0]); - - SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x / 2 + 8, y / 2), v[1], 1); - SpdStoreIntermediate( - x / 2 + 8, y / 2, v[1]); - - SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x / 2, y / 2 + 8), v[2], 1); - SpdStoreIntermediate( - x / 2, y / 2 + 8, v[2]); - - SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x / 2 + 8, y / 2 + 8), v[3], 1); - SpdStoreIntermediate( - x / 2 + 8, y / 2 + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1_LDS(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ - AF4 v[4]; - - ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); - ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[0], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[1], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[2], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[3], 0); - - if (mip <= 1) - return; - - for (int i = 0; i < 4; i++) - { - SpdStoreIntermediate(x, y, v[i]); - SpdWorkgroupShuffleBarrier(); - if (localInvocationIndex < 64) - { - v[i] = SpdReduceIntermediate( - AU2(x * 2 + 0, y * 2 + 0), - AU2(x * 2 + 1, y * 2 + 0), - AU2(x * 2 + 0, y * 2 + 1), - AU2(x * 2 + 1, y * 2 + 1) - ); - SpdStore(ASU2(workGroupID.xy * 16) + ASU2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1); - } - SpdWorkgroupShuffleBarrier(); - } - - if (localInvocationIndex < 64) - { - SpdStoreIntermediate(x + 0, y + 0, v[0]); - SpdStoreIntermediate(x + 8, y + 0, v[1]); - SpdStoreIntermediate(x + 0, y + 8, v[2]); - SpdStoreIntermediate(x + 8, y + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip); -#else - SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip); -#endif -} - - -void SpdDownsampleMip_2(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 64) - { - AF4 v = SpdReduceIntermediate( - AU2(x * 2 + 0 + 0, y * 2 + 0), - AU2(x * 2 + 0 + 1, y * 2 + 0), - AU2(x * 2 + 0 + 0, y * 2 + 1), - AU2(x * 2 + 0 + 1, y * 2 + 1) - ); - SpdStore(ASU2(workGroupID.xy * 8) + ASU2(x, y), v, mip); - // store to LDS, try to reduce bank conflicts - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // ... - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - SpdStoreIntermediate(x * 2 + y % 2, y * 2, v); - } -#else - AF4 v = SpdLoadIntermediate(x, y); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(ASU2(workGroupID.xy * 8) + ASU2(x / 2, y / 2), v, mip); - SpdStoreIntermediate(x + (y / 2) % 2, y, v); - } -#endif -} - -void SpdDownsampleMip_3(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 16) - { - // x 0 x 0 - // 0 0 0 0 - // 0 x 0 x - // 0 0 0 0 - AF4 v = SpdReduceIntermediate( - AU2(x * 4 + 0 + 0, y * 4 + 0), - AU2(x * 4 + 2 + 0, y * 4 + 0), - AU2(x * 4 + 0 + 1, y * 4 + 2), - AU2(x * 4 + 2 + 1, y * 4 + 2) - ); - SpdStore(ASU2(workGroupID.xy * 4) + ASU2(x, y), v, mip); - // store to LDS - // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 - // ... - // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 - // ... - // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x - // ... - SpdStoreIntermediate(x * 4 + y, y * 4, v); - } -#else - if (localInvocationIndex < 64) - { - AF4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(ASU2(workGroupID.xy * 4) + ASU2(x / 2, y / 2), v, mip); - SpdStoreIntermediate(x * 2 + y / 2, y * 2, v); - } - } -#endif -} - -void SpdDownsampleMip_4(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 4) - { - // x 0 0 0 x 0 0 0 - // ... - // 0 x 0 0 0 x 0 0 - AF4 v = SpdReduceIntermediate( - AU2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), - AU2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), - AU2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), - AU2(x * 8 + 4 + 1 + y * 2, y * 8 + 4) - ); - SpdStore(ASU2(workGroupID.xy * 2) + ASU2(x, y), v, mip); - // store to LDS - // x x x x 0 ... - // 0 ... - SpdStoreIntermediate(x + y * 2, 0, v); - } -#else - if (localInvocationIndex < 16) - { - AF4 v = SpdLoadIntermediate(x * 4 + y, y * 4); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(ASU2(workGroupID.xy * 2) + ASU2(x / 2, y / 2), v, mip); - SpdStoreIntermediate(x / 2 + y, 0, v); - } - } -#endif -} - -void SpdDownsampleMip_5(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 1) - { - // x x x x 0 ... - // 0 ... - AF4 v = SpdReduceIntermediate( - AU2(0, 0), - AU2(1, 0), - AU2(2, 0), - AU2(3, 0) - ); - SpdStore(ASU2(workGroupID.xy), v, mip); - } -#else - if (localInvocationIndex < 4) - { - AF4 v = SpdLoadIntermediate(localInvocationIndex, 0); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(ASU2(workGroupID.xy), v, mip); - } - } -#endif -} - -void SpdDownsampleMips_6_7(AU1 x, AU1 y, AU1 mips) -{ - ASU2 tex = ASU2(x * 4 + 0, y * 4 + 0); - ASU2 pix = ASU2(x * 2 + 0, y * 2 + 0); - AF4 v0 = SpdReduceLoad4(tex); - SpdStore(pix, v0, 6); - - tex = ASU2(x * 4 + 2, y * 4 + 0); - pix = ASU2(x * 2 + 1, y * 2 + 0); - AF4 v1 = SpdReduceLoad4(tex); - SpdStore(pix, v1, 6); - - tex = ASU2(x * 4 + 0, y * 4 + 2); - pix = ASU2(x * 2 + 0, y * 2 + 1); - AF4 v2 = SpdReduceLoad4(tex); - SpdStore(pix, v2, 6); - - tex = ASU2(x * 4 + 2, y * 4 + 2); - pix = ASU2(x * 2 + 1, y * 2 + 1); - AF4 v3 = SpdReduceLoad4(tex); - SpdStore(pix, v3, 6); - - if (mips <= 7) return; - // no barrier needed, working on values only from the same thread - - AF4 v = SpdReduce4(v0, v1, v2, v3); - SpdStore(ASU2(x, y), v, 7); - SpdStoreIntermediate(x, y, v); -} - -void SpdDownsampleNextFour(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 baseMip, AU1 mips) -{ - if (mips <= baseMip) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip); - - if (mips <= baseMip + 1) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1); - - if (mips <= baseMip + 2) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2); - - if (mips <= baseMip + 3) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_5(x, y, workGroupID, localInvocationIndex, baseMip + 3); -} - -void SpdDownsample( - AU2 workGroupID, - AU1 localInvocationIndex, - AU1 mips, - AU1 numWorkGroups -) { - AU2 sub_xy = ARmpRed8x8(localInvocationIndex % 64); - AU1 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); - AU1 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); - SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips); - - SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips); - - if (mips <= 6) return; - - if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex)) return; - - // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. - SpdDownsampleMips_6_7(x, y, mips); - - SpdDownsampleNextFour(x, y, AU2(0, 0), localInvocationIndex, 8, mips); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -//============================================================================================================================== -// PACKED VERSION -//============================================================================================================================== - -#ifdef A_HALF // A_HALF - -#ifdef A_GLSL -#extension GL_EXT_shader_subgroup_extended_types_float16 : require -#endif - -AH4 SpdReduceQuadH(AH4 v) -{ -#if defined(A_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) - AH4 v0 = v; - AH4 v1 = subgroupQuadSwapHorizontal(v); - AH4 v2 = subgroupQuadSwapVertical(v); - AH4 v3 = subgroupQuadSwapDiagonal(v); - return SpdReduce4H(v0, v1, v2, v3); -#elif defined(A_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) - // requires SM6.0 - AU1 quad = WaveGetLaneIndex() & (~0x3); - AH4 v0 = v; - AH4 v1 = WaveReadLaneAt(v, quad | 1); - AH4 v2 = WaveReadLaneAt(v, quad | 2); - AH4 v3 = WaveReadLaneAt(v, quad | 3); - return SpdReduce4H(v0, v1, v2, v3); - /* - // if SM6.0 is not available, you can use the AMD shader intrinsics - // works for DX11 - AH4 v0 = v; - AH4 v1; - v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - AH4 v2; - v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - AH4 v3; - v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - return SpdReduce4H(v0, v1, v2, v3); - */ -#endif - return AH4(0.0, 0.0, 0.0, 0.0); - -} - -AH4 SpdReduceIntermediateH(AU2 i0, AU2 i1, AU2 i2, AU2 i3) -{ - AH4 v0 = SpdLoadIntermediateH(i0.x, i0.y); - AH4 v1 = SpdLoadIntermediateH(i1.x, i1.y); - AH4 v2 = SpdLoadIntermediateH(i2.x, i2.y); - AH4 v3 = SpdLoadIntermediateH(i3.x, i3.y); - return SpdReduce4H(v0, v1, v2, v3); -} - -AH4 SpdReduceLoad4H(AU2 i0, AU2 i1, AU2 i2, AU2 i3) -{ - AH4 v0 = SpdLoadH(ASU2(i0)); - AH4 v1 = SpdLoadH(ASU2(i1)); - AH4 v2 = SpdLoadH(ASU2(i2)); - AH4 v3 = SpdLoadH(ASU2(i3)); - return SpdReduce4H(v0, v1, v2, v3); -} - -AH4 SpdReduceLoad4H(AU2 base) -{ - return SpdReduceLoad4H( - AU2(base + AU2(0, 0)), - AU2(base + AU2(0, 1)), - AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); -} - -AH4 SpdReduceLoadSourceImage4H(AU2 i0, AU2 i1, AU2 i2, AU2 i3) -{ - AH4 v0 = SpdLoadSourceImageH(ASU2(i0)); - AH4 v1 = SpdLoadSourceImageH(ASU2(i1)); - AH4 v2 = SpdLoadSourceImageH(ASU2(i2)); - AH4 v3 = SpdLoadSourceImageH(ASU2(i3)); - return SpdReduce4H(v0, v1, v2, v3); -} - -AH4 SpdReduceLoadSourceImage4H(AU2 base) -{ -#ifdef SPD_LINEAR_SAMPLER - return SpdLoadSourceImageH(ASU2(base)); -#else - return SpdReduceLoadSourceImage4H( - AU2(base + AU2(0, 0)), - AU2(base + AU2(0, 1)), - AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); -#endif -} - -void SpdDownsampleMips_0_1_IntrinsicsH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips) -{ - AH4 v[4]; - - ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); - ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[0], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[1], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[2], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[3], 0); - - if (mips <= 1) - return; - - v[0] = SpdReduceQuadH(v[0]); - v[1] = SpdReduceQuadH(v[1]); - v[2] = SpdReduceQuadH(v[2]); - v[3] = SpdReduceQuadH(v[3]); - - if ((localInvocationIndex % 4) == 0) - { - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x / 2, y / 2), v[0], 1); - SpdStoreIntermediateH(x / 2, y / 2, v[0]); - - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x / 2 + 8, y / 2), v[1], 1); - SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]); - - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x / 2, y / 2 + 8), v[2], 1); - SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]); - - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x / 2 + 8, y / 2 + 8), v[3], 1); - SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1_LDSH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips) -{ - AH4 v[4]; - - ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); - ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[0], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[1], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[2], 0); - - tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); - pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[3], 0); - - if (mips <= 1) - return; - - for (int i = 0; i < 4; i++) - { - SpdStoreIntermediateH(x, y, v[i]); - SpdWorkgroupShuffleBarrier(); - if (localInvocationIndex < 64) - { - v[i] = SpdReduceIntermediateH( - AU2(x * 2 + 0, y * 2 + 0), - AU2(x * 2 + 1, y * 2 + 0), - AU2(x * 2 + 0, y * 2 + 1), - AU2(x * 2 + 1, y * 2 + 1) - ); - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1); - } - SpdWorkgroupShuffleBarrier(); - } - - if (localInvocationIndex < 64) - { - SpdStoreIntermediateH(x + 0, y + 0, v[0]); - SpdStoreIntermediateH(x + 8, y + 0, v[1]); - SpdStoreIntermediateH(x + 0, y + 8, v[2]); - SpdStoreIntermediateH(x + 8, y + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips); -#else - SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips); -#endif -} - - -void SpdDownsampleMip_2H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 64) - { - AH4 v = SpdReduceIntermediateH( - AU2(x * 2 + 0 + 0, y * 2 + 0), - AU2(x * 2 + 0 + 1, y * 2 + 0), - AU2(x * 2 + 0 + 0, y * 2 + 1), - AU2(x * 2 + 0 + 1, y * 2 + 1) - ); - SpdStoreH(ASU2(workGroupID.xy * 8) + ASU2(x, y), v, mip); - // store to LDS, try to reduce bank conflicts - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // ... - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v); - } -#else - AH4 v = SpdLoadIntermediateH(x, y); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(ASU2(workGroupID.xy * 8) + ASU2(x / 2, y / 2), v, mip); - SpdStoreIntermediateH(x + (y / 2) % 2, y, v); - } -#endif -} - -void SpdDownsampleMip_3H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 16) - { - // x 0 x 0 - // 0 0 0 0 - // 0 x 0 x - // 0 0 0 0 - AH4 v = SpdReduceIntermediateH( - AU2(x * 4 + 0 + 0, y * 4 + 0), - AU2(x * 4 + 2 + 0, y * 4 + 0), - AU2(x * 4 + 0 + 1, y * 4 + 2), - AU2(x * 4 + 2 + 1, y * 4 + 2) - ); - SpdStoreH(ASU2(workGroupID.xy * 4) + ASU2(x, y), v, mip); - // store to LDS - // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 - // ... - // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 - // ... - // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x - // ... - SpdStoreIntermediateH(x * 4 + y, y * 4, v); - } -#else - if (localInvocationIndex < 64) - { - AH4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(ASU2(workGroupID.xy * 4) + ASU2(x / 2, y / 2), v, mip); - SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v); - } - } -#endif -} - -void SpdDownsampleMip_4H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 4) - { - // x 0 0 0 x 0 0 0 - // ... - // 0 x 0 0 0 x 0 0 - AH4 v = SpdReduceIntermediateH( - AU2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), - AU2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), - AU2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), - AU2(x * 8 + 4 + 1 + y * 2, y * 8 + 4) - ); - SpdStoreH(ASU2(workGroupID.xy * 2) + ASU2(x, y), v, mip); - // store to LDS - // x x x x 0 ... - // 0 ... - SpdStoreIntermediateH(x + y * 2, 0, v); - } -#else - if (localInvocationIndex < 16) - { - AH4 v = SpdLoadIntermediateH(x * 4 + y, y * 4); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(ASU2(workGroupID.xy * 2) + ASU2(x / 2, y / 2), v, mip); - SpdStoreIntermediateH(x / 2 + y, 0, v); - } - } -#endif -} - -void SpdDownsampleMip_5H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) -{ -#ifdef SPD_NO_WAVE_OPERATIONS - if (localInvocationIndex < 1) - { - // x x x x 0 ... - // 0 ... - AH4 v = SpdReduceIntermediateH( - AU2(0, 0), - AU2(1, 0), - AU2(2, 0), - AU2(3, 0) - ); - SpdStoreH(ASU2(workGroupID.xy), v, mip); - } -#else - if (localInvocationIndex < 4) - { - AH4 v = SpdLoadIntermediateH(localInvocationIndex, 0); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(ASU2(workGroupID.xy), v, mip); - } - } -#endif -} - -void SpdDownsampleMips_6_7H(AU1 x, AU1 y, AU1 mips) -{ - ASU2 tex = ASU2(x * 4 + 0, y * 4 + 0); - ASU2 pix = ASU2(x * 2 + 0, y * 2 + 0); - AH4 v0 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v0, 6); - - tex = ASU2(x * 4 + 2, y * 4 + 0); - pix = ASU2(x * 2 + 1, y * 2 + 0); - AH4 v1 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v1, 6); - - tex = ASU2(x * 4 + 0, y * 4 + 2); - pix = ASU2(x * 2 + 0, y * 2 + 1); - AH4 v2 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v2, 6); - - tex = ASU2(x * 4 + 2, y * 4 + 2); - pix = ASU2(x * 2 + 1, y * 2 + 1); - AH4 v3 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v3, 6); - - if (mips < 8) return; - // no barrier needed, working on values only from the same thread - - AH4 v = SpdReduce4H(v0, v1, v2, v3); - SpdStoreH(ASU2(x, y), v, 7); - SpdStoreIntermediateH(x, y, v); -} - -void SpdDownsampleNextFourH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 baseMip, AU1 mips) -{ - if (mips <= baseMip) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip); - - if (mips <= baseMip + 1) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1); - - if (mips <= baseMip + 2) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2); - - if (mips <= baseMip + 3) return; - SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_5H(x, y, workGroupID, localInvocationIndex, baseMip + 3); -} - -void SpdDownsampleH( - AU2 workGroupID, - AU1 localInvocationIndex, - AU1 mips, - AU1 numWorkGroups -) { - AU2 sub_xy = ARmpRed8x8(localInvocationIndex % 64); - AU1 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); - AU1 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); - - SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips); - - SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips); - - if (mips < 7) return; - - if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex)) return; - - // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. - SpdDownsampleMips_6_7H(x, y, mips); - - SpdDownsampleNextFourH(x, y, AU2(0, 0), localInvocationIndex, 8, mips); -} - -#endif \ No newline at end of file diff --git a/ffx-sssr/src/reflection_view.inl b/sample/src/VK/Sources/BlueNoiseSampler.cpp similarity index 85% rename from ffx-sssr/src/reflection_view.inl rename to sample/src/VK/Sources/BlueNoiseSampler.cpp index 8833b69..9d0efea 100644 --- a/ffx-sssr/src/reflection_view.inl +++ b/sample/src/VK/Sources/BlueNoiseSampler.cpp @@ -19,14 +19,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ********************************************************************/ -#pragma once +#include "stdafx.h" +#include "BlueNoiseSampler.h" -namespace ffx_sssr +namespace SSSR_SAMPLE_VK { - /** - The constructor for the ReflectionView class. - */ - ReflectionView::ReflectionView() - { - } + void BlueNoiseSamplerVK::OnDestroy() + { + sobolBuffer.OnDestroy(); + rankingTileBuffer.OnDestroy(); + scramblingTileBuffer.OnDestroy(); + } + } diff --git a/sample/src/VK/Sources/BlueNoiseSampler.h b/sample/src/VK/Sources/BlueNoiseSampler.h new file mode 100644 index 0000000..1d79c8c --- /dev/null +++ b/sample/src/VK/Sources/BlueNoiseSampler.h @@ -0,0 +1,42 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once +#include "BufferVK.h" +namespace SSSR_SAMPLE_VK +{ + /** + The BlueNoiseSamplerVK struct represents a blue-noise sampler to be used for random number generation. + + \note Original implementation can be found here: https://eheitzresearch.wordpress.com/762-2/ + */ + struct BlueNoiseSamplerVK + { + // The Sobol sequence buffer. + BufferVK sobolBuffer; + // The ranking tile buffer for sampling. + BufferVK rankingTileBuffer; + // The scrambling tile buffer for sampling. + BufferVK scramblingTileBuffer; + + void OnDestroy(); + }; +} \ No newline at end of file diff --git a/sample/src/VK/Sources/BufferVK.cpp b/sample/src/VK/Sources/BufferVK.cpp new file mode 100644 index 0000000..2b14cef --- /dev/null +++ b/sample/src/VK/Sources/BufferVK.cpp @@ -0,0 +1,237 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" +#include "BufferVK.h" +#include "memory.h" + +namespace SSSR_SAMPLE_VK +{ + /** + The constructor for the BufferVK class. + */ + BufferVK::BufferVK() + : buffer_(VK_NULL_HANDLE) + , device_(VK_NULL_HANDLE) + , memory_(VK_NULL_HANDLE) + , buffer_view_(VK_NULL_HANDLE) + , mappable_(false) + , mapped_(false) + { + } + + /** + The destructor for the BufferVK class. + */ + BufferVK::~BufferVK() + { + + } + + void BufferVK::OnDestroy() + { + if (mapped_) + { + Unmap(); + } + + if (buffer_) + { + vkDestroyBuffer(device_, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + + if (memory_) + { + vkFreeMemory(device_, memory_, nullptr); + memory_ = VK_NULL_HANDLE; + } + + if (buffer_view_) + { + vkDestroyBufferView(device_, buffer_view_, nullptr); + buffer_view_ = VK_NULL_HANDLE; + } + + device_ = VK_NULL_HANDLE; + } + + /** + The constructor for the BufferVK class. + + \param device The VkDevice that creates the buffer view. + \param physical_device The VkPhysicalDevice to determine the right memory heap. + \param create_info The CreateInfo struct. + */ + BufferVK::BufferVK(VkDevice device, VkPhysicalDevice physical_device, const CreateInfo& create_info) + : device_(device) + , buffer_(VK_NULL_HANDLE) + , memory_(VK_NULL_HANDLE) + , buffer_view_(VK_NULL_HANDLE) + , mappable_(false) + , mapped_(false) + { + VkBufferCreateInfo buffer_create_info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + buffer_create_info.pNext = nullptr; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.size = create_info.size_in_bytes_; + buffer_create_info.usage = create_info.buffer_usage_; + VkResult vkResult = vkCreateBuffer(device_, &buffer_create_info, nullptr, &buffer_); + assert(VK_SUCCESS == vkResult); + + VkMemoryRequirements memory_requirements = {}; + vkGetBufferMemoryRequirements(device_, buffer_, &memory_requirements); + + VkPhysicalDeviceMemoryProperties memory_properties = {}; + vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); + + // find the right memory type for this image + int memory_type_index = -1; + for (uint32_t i = 0; i < memory_properties.memoryTypeCount; ++i) + { + const VkMemoryType& memory_type = memory_properties.memoryTypes[i]; + bool has_required_properties = memory_type.propertyFlags & create_info.memory_property_flags; + bool is_required_memory_type = memory_requirements.memoryTypeBits & (1 << i); + if (has_required_properties && is_required_memory_type) + { + memory_type_index = i; + break; + } + } + + // abort if we couldn't find the right memory type + assert(memory_type_index != -1); + + if (create_info.memory_property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + mappable_ = true; + mapped_ = false; + } + + VkMemoryAllocateInfo memory_allocate_info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + memory_allocate_info.pNext = nullptr; + memory_allocate_info.allocationSize = memory_requirements.size; + memory_allocate_info.memoryTypeIndex = memory_type_index; + + vkResult = vkAllocateMemory(device_, &memory_allocate_info, nullptr, &memory_); + assert(VK_SUCCESS == vkResult); + vkResult = vkBindBufferMemory(device_, buffer_, memory_, 0); + assert(VK_SUCCESS == vkResult); + + + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(device, "vkSetDebugUtilsObjectNameEXT"); + if (vkSetDebugUtilsObjectName) + { + VkDebugUtilsObjectNameInfoEXT object_name_info = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT }; + object_name_info.pNext = nullptr; + object_name_info.objectType = VK_OBJECT_TYPE_BUFFER; + object_name_info.objectHandle = reinterpret_cast(buffer_); + object_name_info.pObjectName = create_info.name_; + + vkResult = vkSetDebugUtilsObjectName(device, &object_name_info); + assert(VK_SUCCESS == vkResult); + } + + if (create_info.format_ == VK_FORMAT_UNDEFINED) + { + return; + } + + VkBufferViewCreateInfo buffer_view_create_info = { VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO }; + buffer_view_create_info.pNext = nullptr; + buffer_view_create_info.flags = 0; + buffer_view_create_info.buffer = buffer_; + buffer_view_create_info.format = create_info.format_; + buffer_view_create_info.offset = 0; + buffer_view_create_info.range = VK_WHOLE_SIZE; + + vkResult = vkCreateBufferView(device_, &buffer_view_create_info, nullptr, &buffer_view_); + assert(VK_SUCCESS == vkResult); + } + + /** + The constructor for the BufferVK class. + + \param other The buffer to be moved. + */ + BufferVK::BufferVK(BufferVK&& other) noexcept + : buffer_(other.buffer_) + , memory_(other.memory_) + , device_(other.device_) + , buffer_view_(other.buffer_view_) + , mappable_(other.mappable_) + , mapped_(other.mapped_) + { + other.buffer_ = VK_NULL_HANDLE; + other.memory_ = VK_NULL_HANDLE; + other.device_ = VK_NULL_HANDLE; + other.buffer_view_ = VK_NULL_HANDLE; + other.mappable_ = false; + other.mapped_ = false; + } + + /** + Assigns the buffer. + + \param other The buffer to be moved. + \return The assigned buffer. + */ + BufferVK& BufferVK::operator=(BufferVK&& other) noexcept + { + if (this != &other) + { + buffer_ = other.buffer_; + memory_ = other.memory_; + device_ = other.device_; + buffer_view_ = other.buffer_view_; + mappable_ = other.mappable_; + mapped_ = other.mapped_; + + other.buffer_ = VK_NULL_HANDLE; + other.memory_ = VK_NULL_HANDLE; + other.device_ = VK_NULL_HANDLE; + other.buffer_view_ = VK_NULL_HANDLE; + other.mappable_ = false; + other.mapped_ = false; + } + + return *this; + } + + void BufferVK::Map(void** data) + { + assert(mappable_ == true); + assert(mapped_ == false); + + VkResult vkResult = vkMapMemory(device_, memory_, 0, VK_WHOLE_SIZE, 0, data); + assert(VK_SUCCESS == vkResult); + + mapped_ = true; + } + + void BufferVK::Unmap() + { + assert(mappable_ == true); + assert(mapped_ == true); + vkUnmapMemory(device_, memory_); + mapped_ = false; + } +} diff --git a/ffx-sssr/src/vk/sampler_vk.h b/sample/src/VK/Sources/BufferVK.h similarity index 57% rename from ffx-sssr/src/vk/sampler_vk.h rename to sample/src/VK/Sources/BufferVK.h index 07d9c11..01e2008 100644 --- a/ffx-sssr/src/vk/sampler_vk.h +++ b/sample/src/VK/Sources/BufferVK.h @@ -23,33 +23,42 @@ THE SOFTWARE. #include -#include "macros.h" -#include "ffx_sssr.h" -#include "buffer_vk.h" - -namespace ffx_sssr +namespace SSSR_SAMPLE_VK { - /** - The BlueNoiseSamplerVK class represents a blue-noise sampler to be used for random number generation. - - \note Original implementation can be found here: https://eheitzresearch.wordpress.com/762-2/ - */ - class BlueNoiseSamplerVK - { - FFX_SSSR_NON_COPYABLE(BlueNoiseSamplerVK); - - public: - BlueNoiseSamplerVK(); - ~BlueNoiseSamplerVK(); - - BlueNoiseSamplerVK(BlueNoiseSamplerVK&& other) noexcept; - BlueNoiseSamplerVK& BlueNoiseSamplerVK::operator =(BlueNoiseSamplerVK&& other) noexcept; - - // The Sobol sequence buffer. - BufferVK sobol_buffer_; - // The ranking tile buffer for sampling. - BufferVK ranking_tile_buffer_; - // The scrambling tile buffer for sampling. - BufferVK scrambling_tile_buffer_; - }; + /** + The BufferVK class is a helper class to create and destroy buffers on Vulkan. + */ + class BufferVK + { + public: + + class CreateInfo + { + public: + VkDeviceSize size_in_bytes_; + VkMemoryPropertyFlags memory_property_flags; + VkBufferUsageFlags buffer_usage_; + VkFormat format_; + const char* name_; + }; + + BufferVK(); + ~BufferVK(); + void OnDestroy(); + + BufferVK(VkDevice device, VkPhysicalDevice physical_device, const CreateInfo& create_info); + + BufferVK(BufferVK&& other) noexcept; + BufferVK& BufferVK::operator =(BufferVK&& other) noexcept; + + void Map(void** data); + void Unmap(); + + VkDevice device_; + VkBuffer buffer_; + VkBufferView buffer_view_; + VkDeviceMemory memory_; // We're creating a low number of allocations for this library, so we just allocate a dedicated memory object per buffer. Normally you'd want to do sub-allocations of a larger allocation. + bool mappable_; + bool mapped_; + }; } diff --git a/sample/src/VK/Sources/SSSR.cpp b/sample/src/VK/Sources/SSSR.cpp new file mode 100644 index 0000000..ee2aa1b --- /dev/null +++ b/sample/src/VK/Sources/SSSR.cpp @@ -0,0 +1,1158 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" + +#include "SSSR.h" +#include + +namespace _1spp +{ +#include "../../../samplerCPP/samplerBlueNoiseErrorDistribution_128x128_OptimizedFor_2d2d2d2d_1spp.cpp" +} + +/** + The available blue noise sampler with 2ssp sampling mode. +*/ +struct +{ + std::int32_t const (&sobol_buffer_)[256 * 256]; + std::int32_t const (&ranking_tile_buffer_)[128 * 128 * 8]; + std::int32_t const (&scrambling_tile_buffer_)[128 * 128 * 8]; +} +const g_blue_noise_sampler_state = { _1spp::sobol_256spp_256d, _1spp::rankingTile, _1spp::scramblingTile }; + +/** + Performs a rounded division. + + \param value The value to be divided. + \param divisor The divisor to be used. + \return The rounded divided value. +*/ +template +static inline TYPE RoundedDivide(TYPE value, TYPE divisor) +{ + return (value + divisor - 1) / divisor; +} + +VkDescriptorSetLayoutBinding Bind(uint32_t binding, VkDescriptorType type) +{ + VkDescriptorSetLayoutBinding layout_binding = {}; + layout_binding.binding = binding; + layout_binding.descriptorType = type; + layout_binding.descriptorCount = 1; + layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layout_binding.pImmutableSamplers = nullptr; + return layout_binding; +}; + +void SetDescriptorSetStructuredBuffer(VkDevice device, uint32_t index, VkBuffer& buffer, VkDescriptorSet descriptorSet, VkDescriptorType type) +{ + VkDescriptorBufferInfo bufferinfo; + bufferinfo.buffer = buffer; + bufferinfo.offset = 0; + bufferinfo.range = VK_WHOLE_SIZE; + + VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; + write_set.pNext = nullptr; + write_set.dstSet = descriptorSet; + write_set.dstBinding = index; + write_set.dstArrayElement = 0; + write_set.descriptorCount = 1; + write_set.descriptorType = type; + write_set.pImageInfo = nullptr; + write_set.pBufferInfo = &bufferinfo; + write_set.pTexelBufferView = nullptr; + + vkUpdateDescriptorSets(device, 1, &write_set, 0, NULL); +}; + +void SetDescriptorSetBuffer(VkDevice device, uint32_t index, VkBufferView bufferView, VkDescriptorSet descriptorSet, VkDescriptorType type) +{ + VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; + write_set.pNext = nullptr; + write_set.dstSet = descriptorSet; + write_set.dstBinding = index; + write_set.dstArrayElement = 0; + write_set.descriptorCount = 1; + write_set.descriptorType = type; + write_set.pImageInfo = nullptr; + write_set.pBufferInfo = nullptr; + write_set.pTexelBufferView = &bufferView; + + vkUpdateDescriptorSets(device, 1, &write_set, 0, NULL); +}; + +void SetDescriptorSet(VkDevice device, uint32_t index, VkImageView imageView, VkDescriptorSet descriptorSet, VkDescriptorType type, VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL) +{ + VkDescriptorImageInfo desc_image; + desc_image.sampler = VK_NULL_HANDLE; + desc_image.imageView = imageView; + desc_image.imageLayout = layout; + + VkWriteDescriptorSet write; + write = {}; + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.pNext = NULL; + write.dstSet = descriptorSet; + write.descriptorCount = 1; + write.descriptorType = type; + write.pImageInfo = &desc_image; + write.dstBinding = index; + write.dstArrayElement = 0; + + vkUpdateDescriptorSets(device, 1, &write, 0, NULL); +} + +void SetDescriptorSetSampler(VkDevice device, uint32_t index, VkSampler sampler, VkDescriptorSet descriptorSet) +{ + VkDescriptorImageInfo image_info = {}; + image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_info.imageView = VK_NULL_HANDLE; + image_info.sampler = sampler; + + VkWriteDescriptorSet write = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; + write.pNext = nullptr; + write.dstSet = descriptorSet; + write.dstBinding = index; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + write.pImageInfo = &image_info; + write.pBufferInfo = nullptr; + write.pTexelBufferView = nullptr; + + vkUpdateDescriptorSets(device, 1, &write, 0, NULL); +} + +void CopyToTexture(VkCommandBuffer cb, CAULDRON_VK::Texture* source, CAULDRON_VK::Texture* target, uint32_t width, uint32_t height) +{ + VkImageCopy region = {}; + region.dstOffset = { 0, 0, 0 }; + region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + region.dstSubresource.mipLevel = 0; + region.extent = { width, height, 1 }; + region.srcOffset = { 0, 0, 0 }; + region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + region.srcSubresource.mipLevel = 0; + vkCmdCopyImage(cb, source->Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, target->Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); +} + +using namespace CAULDRON_VK; +namespace SSSR_SAMPLE_VK +{ + void SSSR::OnCreate(Device* pDevice, VkCommandBuffer command_buffer, ResourceViewHeaps* resourceHeap, DynamicBufferRing* constantBufferRing, uint32_t frameCountBeforeReuse, bool enablePerformanceCounters) + { + m_pDevice = pDevice; + m_pConstantBufferRing = constantBufferRing; + m_pResourceViewHeaps = resourceHeap; + m_frameCountBeforeReuse = frameCountBeforeReuse; + m_isPerformanceCountersEnabled = enablePerformanceCounters; + + VkPhysicalDevice physicalDevice = m_pDevice->GetPhysicalDevice(); + VkDevice device = m_pDevice->GetDevice(); + + // Query if the implementation supports VK_EXT_subgroup_size_control + // This is the case if VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME is present. + // Rely on the application to enable the extension if it's available. + VkResult vkResult; + uint32_t extension_count; + vkResult = vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &extension_count, NULL); + assert(VK_SUCCESS == vkResult); + std::vector device_extension_properties(extension_count); + vkResult = vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &extension_count, device_extension_properties.data()); + assert(VK_SUCCESS == vkResult); + m_isSubgroupSizeControlExtensionAvailable = std::find_if(device_extension_properties.begin(), device_extension_properties.end(), + [](const VkExtensionProperties& extensionProps) -> bool { return strcmp(extensionProps.extensionName, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME) == 0; }) + != device_extension_properties.end(); + + m_uploadHeap.OnCreate(m_pDevice, 1024 * 1024); + + VkDescriptorSetLayoutBinding layout_binding = {}; + layout_binding.binding = 0; + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + layout_binding.descriptorCount = 1; + layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layout_binding.pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = &layout_binding; + + vkResult = vkCreateDescriptorSetLayout(device, &descriptor_set_layout_create_info, nullptr, &m_uniformBufferDescriptorSetLayout); + assert(vkResult == VK_SUCCESS); + + for (uint32_t i = 0; i < frameCountBeforeReuse; ++i) + { + bool bAllocDescriptor = m_pResourceViewHeaps->AllocDescriptor(m_uniformBufferDescriptorSetLayout, &m_uniformBufferDescriptorSet[i]); + assert(bAllocDescriptor == true); + } + + CreateResources(command_buffer); + SetupClassifyTilesPass(); + SetupPrepareIndirectArgsPass(); + SetupIntersectionPass(); + SetupResolveSpatial(); + SetupResolveTemporal(); + SetupBlurPass(); + + SetupPerformanceCounters(); + } + + void SSSR::OnCreateWindowSizeDependentResources(VkCommandBuffer command_buffer, const SSSRCreationInfo& input) + { + m_outputWidth = input.outputWidth; + m_outputHeight = input.outputHeight; + + assert(input.outputWidth != 0); + assert(input.outputHeight != 0); + assert(input.DepthHierarchyView != VK_NULL_HANDLE); + assert(input.EnvironmentMapSampler != VK_NULL_HANDLE); + assert(input.EnvironmentMapView != VK_NULL_HANDLE); + assert(input.HDRView != VK_NULL_HANDLE); + assert(input.MotionVectorsView != VK_NULL_HANDLE); + assert(input.NormalBufferView != VK_NULL_HANDLE); + assert(input.NormalHistoryBufferView != VK_NULL_HANDLE); + assert(input.SpecularRoughnessView != VK_NULL_HANDLE); + + CreateWindowSizeDependentResources(command_buffer); + InitializeResourceDescriptorSets(input); + } + + void SSSR::OnDestroy() + { + VkDevice device = m_pDevice->GetDevice(); + for (size_t i = 0; i < m_frameCountBeforeReuse; i++) + { + m_pResourceViewHeaps->FreeDescriptor(m_uniformBufferDescriptorSet[i]); + } + vkDestroyDescriptorSetLayout(device, m_uniformBufferDescriptorSetLayout, nullptr); + + m_classifyTilesPass.OnDestroy(device, m_pResourceViewHeaps); + m_prepareIndirectArgsPass.OnDestroy(device, m_pResourceViewHeaps); + m_intersectPass.OnDestroy(device, m_pResourceViewHeaps); + m_resolveSpatialPass.OnDestroy(device, m_pResourceViewHeaps); + m_resolveTemporalPass.OnDestroy(device, m_pResourceViewHeaps); + m_blurPass.OnDestroy(device, m_pResourceViewHeaps); + m_uploadHeap.OnDestroy(); + + m_rayCounter.OnDestroy(); + m_intersectionPassIndirectArgs.OnDestroy(); + + vkDestroySampler(device, m_linearSampler, nullptr); + m_blueNoiseSampler.OnDestroy(); + + if (m_timestampQueryPool) + { + vkDestroyQueryPool(m_pDevice->GetDevice(), m_timestampQueryPool, nullptr); + } + + } + void SSSR::OnDestroyWindowSizeDependentResources() + { + VkDevice device = m_pDevice->GetDevice(); + + vkDestroyImageView(device, m_temporalDenoiserResultView[0], nullptr); + vkDestroyImageView(device, m_temporalDenoiserResultView[1], nullptr); + vkDestroyImageView(device, m_rayLengthsView, nullptr); + vkDestroyImageView(device, m_outputBufferView, nullptr); + vkDestroyImageView(device, m_roughnessTextureView[0], nullptr); + vkDestroyImageView(device, m_roughnessTextureView[1], nullptr); + + m_temporalDenoiserResult[0].OnDestroy(); + m_temporalDenoiserResult[1].OnDestroy(); + m_rayLengths.OnDestroy(); + m_outputBuffer.OnDestroy(); + m_roughnessTexture[0].OnDestroy(); + m_roughnessTexture[1].OnDestroy(); + m_temporalVarianceMask.OnDestroy(); + m_tileMetaDataMask.OnDestroy(); + + m_rayList.OnDestroy(); + } + void SSSR::Draw(VkCommandBuffer command_buffer, const SSSRConstants& sssrConstants, bool showIntersectResult) + { + SetPerfMarkerBegin(command_buffer, "FidelityFX SSSR"); + + QueryTimestamps(command_buffer); + + // Ensure the image is cleared + VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + vkCmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 1, &barrier, + 0, nullptr, + 0, nullptr); + + uint32_t uniform_buffer_index = sssrConstants.frameIndex % m_frameCountBeforeReuse; + VkDescriptorSet uniformBufferDescriptorSet = m_uniformBufferDescriptorSet[uniform_buffer_index]; + + // Update descriptor to sliding window in upload buffer that contains the updated pass data + { + VkDescriptorBufferInfo uniformBufferInfo = m_pConstantBufferRing->AllocConstantBuffer(sizeof(SSSRConstants), (void*)&sssrConstants); + + VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; + write_set.pNext = nullptr; + write_set.dstSet = uniformBufferDescriptorSet; + write_set.dstBinding = 0; + write_set.dstArrayElement = 0; + write_set.descriptorCount = 1; + write_set.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_set.pImageInfo = nullptr; + write_set.pBufferInfo = &uniformBufferInfo; + write_set.pTexelBufferView = nullptr; + vkUpdateDescriptorSets(m_pDevice->GetDevice(), 1, &write_set, 0, nullptr); + } + + // ClassifyTiles pass + { + VkDescriptorSet sets[] = { uniformBufferDescriptorSet, m_classifyTilesPass.descriptorSets[m_bufferIndex] }; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_classifyTilesPass.pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_classifyTilesPass.pipelineLayout, 0, _countof(sets), sets, 0, nullptr); + uint32_t dim_x = RoundedDivide(m_outputWidth, 8u); + uint32_t dim_y = RoundedDivide(m_outputHeight, 8u); + vkCmdDispatch(command_buffer, dim_x, dim_y, 1); + } + + ComputeBarrier(command_buffer); + + // PrepareIndirectArgs pass + { + VkDescriptorSet sets[] = { uniformBufferDescriptorSet, m_prepareIndirectArgsPass.descriptorSets[m_bufferIndex] }; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_prepareIndirectArgsPass.pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_prepareIndirectArgsPass.pipelineLayout, 0, _countof(sets), sets, 0, nullptr); + vkCmdDispatch(command_buffer, 1, 1, 1); + } + + // Query the amount of time spent in the tile classification pass + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + assert(timestamp_queries.size() == 1ull && timestamp_queries[0] == TIMESTAMP_QUERY_INIT); + vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_timestampQueryPool, GetTimestampQueryIndex()); + timestamp_queries.push_back(TIMESTAMP_QUERY_TILE_CLASSIFICATION); + } + + // Ensure that the arguments are written + IndirectArgumentsBarrier(command_buffer); + + // Intersection pass + { + VkDescriptorSet sets[] = { uniformBufferDescriptorSet, m_intersectPass.descriptorSets[m_bufferIndex] }; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_intersectPass.pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_intersectPass.pipelineLayout, 0, _countof(sets), sets, 0, nullptr); + vkCmdDispatchIndirect(command_buffer, m_intersectionPassIndirectArgs.buffer_, 0); + } + + // Query the amount of time spent in the intersection pass + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + assert(timestamp_queries.size() == 2ull && timestamp_queries[1] == TIMESTAMP_QUERY_TILE_CLASSIFICATION); + vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_timestampQueryPool, GetTimestampQueryIndex()); + timestamp_queries.push_back(TIMESTAMP_QUERY_INTERSECTION); + } + + if (!showIntersectResult) + { + uint32_t num_tilesX = RoundedDivide(m_outputWidth, 8u); + uint32_t num_tilesY = RoundedDivide(m_outputHeight, 8u); + + // Ensure that the intersection pass finished + VkImageMemoryBarrier intersection_finished_barriers[] = { + Transition(m_temporalDenoiserResult[m_bufferIndex].Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), + }; + TransitionBarriers(command_buffer, intersection_finished_barriers, _countof(intersection_finished_barriers)); + + // Spatial denoiser pass + { + VkDescriptorSet sets[] = { uniformBufferDescriptorSet, m_resolveSpatialPass.descriptorSets[m_bufferIndex] }; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_resolveSpatialPass.pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_resolveSpatialPass.pipelineLayout, 0, _countof(sets), sets, 0, nullptr); + vkCmdDispatch(command_buffer, num_tilesX, num_tilesY, 1); + } + + VkImageMemoryBarrier spatial_denoiser_finished_barriers[] = { + Transition(m_temporalDenoiserResult[m_bufferIndex].Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_temporalDenoiserResult[1 - m_bufferIndex].Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL), + Transition(m_rayLengths.Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + }; + TransitionBarriers(command_buffer, spatial_denoiser_finished_barriers, _countof(spatial_denoiser_finished_barriers)); + + // Temporal denoiser pass + { + VkDescriptorSet sets[] = { uniformBufferDescriptorSet, m_resolveTemporalPass.descriptorSets[m_bufferIndex] }; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_resolveTemporalPass.pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_resolveTemporalPass.pipelineLayout, 0, _countof(sets), sets, 0, nullptr); + vkCmdDispatch(command_buffer, num_tilesX, num_tilesY, 1); + } + + // Ensure that the temporal denoising pass finished + VkImageMemoryBarrier temporal_denoiser_finished_barriers[] = { + Transition(m_rayLengths.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_temporalDenoiserResult[1 - m_bufferIndex].Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), + }; + TransitionBarriers(command_buffer, temporal_denoiser_finished_barriers, _countof(temporal_denoiser_finished_barriers)); + + // Blur pass + { + VkDescriptorSet sets[] = { uniformBufferDescriptorSet, m_blurPass.descriptorSets[m_bufferIndex] }; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_blurPass.pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_blurPass.pipelineLayout, 0, _countof(sets), sets, 0, nullptr); + vkCmdDispatch(command_buffer, num_tilesX, num_tilesY, 1); + } + + // Query the amount of time spent in the denoiser passes + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + + assert(timestamp_queries.size() == 3ull && timestamp_queries[2] == TIMESTAMP_QUERY_INTERSECTION); + + vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_timestampQueryPool, GetTimestampQueryIndex()); + timestamp_queries.push_back(TIMESTAMP_QUERY_DENOISING); + } + } + else + { + VkImageMemoryBarrier copy_barrier_begin[] = { + Transition(m_temporalDenoiserResult[m_bufferIndex].Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL), + Transition(m_outputBuffer.Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL), + }; + TransitionBarriers(command_buffer, copy_barrier_begin, _countof(copy_barrier_begin)); + + CopyToTexture(command_buffer, &m_temporalDenoiserResult[m_bufferIndex], &m_outputBuffer, m_outputWidth, m_outputHeight); + + VkImageMemoryBarrier copy_barrier_end[] = { + Transition(m_temporalDenoiserResult[m_bufferIndex].Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_outputBuffer.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL), + }; + TransitionBarriers(command_buffer, copy_barrier_end, _countof(copy_barrier_end)); + } + + // Move timestamp queries to next frame + if (m_isPerformanceCountersEnabled) + { + m_timestampFrameIndex = (m_timestampFrameIndex + 1u) % m_frameCountBeforeReuse; + } + + m_bufferIndex = 1 - m_bufferIndex; + SetPerfMarkerEnd(command_buffer); + } + + void SSSR::GUI(int* pSlice) + { + + } + + Texture* SSSR::GetOutputTexture() + { + return &m_outputBuffer; + } + + VkImageView SSSR::GetOutputTextureView() const + { + return m_outputBufferView; + } + + std::uint64_t SSSR::GetTileClassificationElapsedGpuTicks() const + { + return m_tileClassificationElapsedGpuTicks; + } + + std::uint64_t SSSR::GetIntersectElapsedGpuTicks() const + { + return m_intersectionElapsedGpuTicks; + } + + std::uint64_t SSSR::GetDenoiserElapsedGpuTicks() const + { + return m_denoisingElapsedGpuTicks; + } + + void SSSR::CreateResources(VkCommandBuffer command_buffer) + { + VkDevice device = m_pDevice->GetDevice(); + VkPhysicalDevice physicalDevice = m_pDevice->GetPhysicalDevice(); + + //==============================Create Tile Classification-related buffers============================================ + { + uint32_t ray_counter_element_count = 2; + + BufferVK::CreateInfo create_info = {}; + create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + create_info.format_ = VK_FORMAT_R32_UINT; + create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + create_info.size_in_bytes_ = ray_counter_element_count * sizeof(uint32_t); + create_info.name_ = "SSSR - Ray Counter"; + m_rayCounter = BufferVK(device, physicalDevice, create_info); + } + + //==============================Create PrepareIndirectArgs-related buffers============================================ + { + uint32_t intersection_pass_indirect_args_element_count = 3; + uint32_t denoiser_pass_indirect_args_element_count = 3; + BufferVK::CreateInfo create_info = {}; + create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + create_info.format_ = VK_FORMAT_R32_UINT; + create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + + create_info.size_in_bytes_ = intersection_pass_indirect_args_element_count * sizeof(uint32_t); + create_info.name_ = "SSSR - Intersect Indirect Args"; + m_intersectionPassIndirectArgs = BufferVK(device, physicalDevice, create_info); + } + + { + VkSamplerCreateInfo sampler_info = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; + sampler_info.pNext = nullptr; + sampler_info.flags = 0; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampler_info.mipLodBias = 0; + sampler_info.anisotropyEnable = false; + sampler_info.maxAnisotropy = 0; + sampler_info.compareEnable = false; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.minLod = 0; + sampler_info.maxLod = 16; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + sampler_info.unnormalizedCoordinates = false; + VkResult res = vkCreateSampler(m_pDevice->GetDevice(), &sampler_info, nullptr, &m_linearSampler); + assert(VK_SUCCESS == res); + } + + //==============================Create Blue Noise buffers============================================ + { + auto const& sampler_state = g_blue_noise_sampler_state; + BlueNoiseSamplerVK& sampler = m_blueNoiseSampler; + + BufferVK::CreateInfo create_info = {}; + create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + create_info.buffer_usage_ = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + create_info.format_ = VK_FORMAT_R32_UINT; + + create_info.size_in_bytes_ = sizeof(sampler_state.sobol_buffer_); + create_info.name_ = "SSSR - Sobol Buffer"; + m_blueNoiseSampler.sobolBuffer = BufferVK(device, physicalDevice, create_info); + + create_info.size_in_bytes_ = sizeof(sampler_state.ranking_tile_buffer_); + create_info.name_ = "SSSR - Ranking Tile Buffer"; + m_blueNoiseSampler.rankingTileBuffer = BufferVK(device, physicalDevice, create_info); + + create_info.size_in_bytes_ = sizeof(sampler_state.scrambling_tile_buffer_); + create_info.name_ = "SSSR - Scrambling Tile Buffer"; + m_blueNoiseSampler.scramblingTileBuffer = BufferVK(device, physicalDevice, create_info); + + VkBufferCopy copyInfo; + copyInfo.dstOffset = 0; + copyInfo.size = sizeof(sampler_state.sobol_buffer_); + copyInfo.srcOffset = 0; + + uint8_t* destAddr; + + destAddr = m_uploadHeap.BeginSuballocate(sizeof(sampler_state.sobol_buffer_), 512); + memcpy(destAddr, &sampler_state.sobol_buffer_, sizeof(sampler_state.sobol_buffer_)); + m_uploadHeap.EndSuballocate(); + m_uploadHeap.AddCopy(m_blueNoiseSampler.sobolBuffer.buffer_, copyInfo); + m_uploadHeap.FlushAndFinish(); + + copyInfo.size = sizeof(sampler_state.ranking_tile_buffer_); + copyInfo.srcOffset = 0; + destAddr = m_uploadHeap.BeginSuballocate(sizeof(sampler_state.ranking_tile_buffer_), 512); + memcpy(destAddr, &sampler_state.ranking_tile_buffer_, sizeof(sampler_state.ranking_tile_buffer_)); + m_uploadHeap.EndSuballocate(); + m_uploadHeap.AddCopy(m_blueNoiseSampler.rankingTileBuffer.buffer_, copyInfo); + m_uploadHeap.FlushAndFinish(); + + copyInfo.size = sizeof(sampler_state.scrambling_tile_buffer_); + destAddr = m_uploadHeap.BeginSuballocate(sizeof(sampler_state.scrambling_tile_buffer_), 512); + memcpy(destAddr, &sampler_state.scrambling_tile_buffer_, sizeof(sampler_state.scrambling_tile_buffer_)); + m_uploadHeap.EndSuballocate(); + m_uploadHeap.AddCopy(m_blueNoiseSampler.scramblingTileBuffer.buffer_, copyInfo); + m_uploadHeap.FlushAndFinish(); + } + } + + void SSSR::CreateWindowSizeDependentResources(VkCommandBuffer command_buffer) + { + VkDevice device = m_pDevice->GetDevice(); + VkPhysicalDevice physicalDevice = m_pDevice->GetPhysicalDevice(); + + //===================================Create Output Buffer============================================ + { + VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + imageCreateInfo.pNext = nullptr; + imageCreateInfo.arrayLayers = 1; + imageCreateInfo.extent = { m_outputWidth, m_outputHeight, 1 }; + imageCreateInfo.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; + imageCreateInfo.imageType = VK_IMAGE_TYPE_2D; + imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageCreateInfo.mipLevels = 1; + imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageCreateInfo.usage = (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + imageCreateInfo.flags = 0; + m_outputBuffer.Init(m_pDevice, &imageCreateInfo, "Reflection Denoiser - OutputBuffer"); + m_outputBuffer.CreateSRV(&m_outputBufferView); + } + + //==============================Create Tile Classification-related buffers============================================ + { + uint32_t num_tiles = RoundedDivide(m_outputWidth, 8u) * RoundedDivide(m_outputHeight, 8u); + uint32_t num_pixels = m_outputWidth * m_outputHeight; + + uint32_t ray_list_element_count = num_pixels; + uint32_t ray_counter_element_count = 1; + + BufferVK::CreateInfo create_info = {}; + create_info.memory_property_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + create_info.format_ = VK_FORMAT_R32_UINT; + create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + + create_info.size_in_bytes_ = sizeof(uint32_t) * ray_list_element_count; + create_info.name_ = "SSSR - Ray List"; + m_rayList = BufferVK(device, physicalDevice, create_info); + + // one uint per tile + create_info.buffer_usage_ = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + create_info.format_ = VK_FORMAT_UNDEFINED; + create_info.size_in_bytes_ = sizeof(uint32_t) * num_tiles; + create_info.name_ = "Reflection Denoiser - Tile Meta Data Mask"; + m_tileMetaDataMask = BufferVK(device, physicalDevice, create_info); + + create_info.size_in_bytes_ = sizeof(uint32_t) * num_tiles * 2u; + create_info.name_ = "Reflection Denoiser - Temporal Variance Mask"; + m_temporalVarianceMask = BufferVK(device, physicalDevice, create_info); + } + + //==============================Create denoising-related resources============================== + { + VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + imgCreateInfo.pNext = nullptr; + imgCreateInfo.flags = 0; + imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; + imgCreateInfo.arrayLayers = 1; + imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imgCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + imgCreateInfo.queueFamilyIndexCount = 0; + imgCreateInfo.pQueueFamilyIndices = nullptr; + imgCreateInfo.mipLevels = 1; + imgCreateInfo.extent = { m_outputWidth, m_outputHeight, 1 }; + imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imgCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + imgCreateInfo.format = VK_FORMAT_R8_UNORM; + m_roughnessTexture[0].Init(m_pDevice, &imgCreateInfo, "Reflection Denoiser - Extracted Roughness Texture 0"); + m_roughnessTexture[0].CreateSRV(&m_roughnessTextureView[0]); + m_roughnessTexture[1].Init(m_pDevice, &imgCreateInfo, "Reflection Denoiser - Extracted Roughness Texture 1"); + m_roughnessTexture[1].CreateSRV(&m_roughnessTextureView[1]); + + imgCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + imgCreateInfo.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; + m_temporalDenoiserResult[0].Init(m_pDevice, &imgCreateInfo, "Reflection Denoiser - Temporal Denoised Result 0"); + m_temporalDenoiserResult[0].CreateSRV(&m_temporalDenoiserResultView[0]); + m_temporalDenoiserResult[1].Init(m_pDevice, &imgCreateInfo, "Reflection Denoiser - Temporal Denoised Result 1"); + m_temporalDenoiserResult[1].CreateSRV(&m_temporalDenoiserResultView[1]); + + imgCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + imgCreateInfo.format = VK_FORMAT_R16_SFLOAT; + m_rayLengths.Init(m_pDevice, &imgCreateInfo, "Reflection Denoiser - Ray Lengths"); + m_rayLengths.CreateSRV(&m_rayLengthsView); + } + + VkImageMemoryBarrier image_barriers[] = { + Transition(m_roughnessTexture[0].Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_roughnessTexture[1].Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_temporalDenoiserResult[0].Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_temporalDenoiserResult[1].Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), + Transition(m_rayLengths.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL), + }; + TransitionBarriers(command_buffer, image_barriers, _countof(image_barriers)); + + // Initial clear of the ray counter. Successive clears are handled by the indirect arguments pass. + vkCmdFillBuffer(command_buffer, m_rayCounter.buffer_, 0, VK_WHOLE_SIZE, 0); + + VkClearColorValue clear_calue = {}; + clear_calue.float32[0] = 0; + clear_calue.float32[1] = 0; + clear_calue.float32[2] = 0; + clear_calue.float32[3] = 0; + + VkImageSubresourceRange subresource_range = {}; + subresource_range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresource_range.baseArrayLayer = 0; + subresource_range.baseMipLevel = 0; + subresource_range.layerCount = 1; + subresource_range.levelCount = 1; + + // Initial resource clears + vkCmdClearColorImage(command_buffer, m_temporalDenoiserResult[0].Resource(), VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); + vkCmdClearColorImage(command_buffer, m_temporalDenoiserResult[1].Resource(), VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); + vkCmdClearColorImage(command_buffer, m_rayLengths.Resource(), VK_IMAGE_LAYOUT_GENERAL, &clear_calue, 1, &subresource_range); + } + + void SSSR::SetupShaderPass(ShaderPass& pass, const char* shader, const VkDescriptorSetLayoutBinding* bindings, uint32_t bindings_count, VkPipelineShaderStageCreateFlags flags) + { + VkPipelineShaderStageCreateInfo stage_create_info = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO }; + pass.bindings_count = bindings_count; + + //==============================Compile Shaders============================================ + { + DefineList defines; + VkResult vkResult = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, shader, "main", "-T cs_6_0", &defines, &stage_create_info); + stage_create_info.flags = flags; + assert(vkResult == VK_SUCCESS); + } + + //==============================DescriptorSetLayout======================================== + { + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = bindings_count; + descriptor_set_layout_create_info.pBindings = bindings; + VkResult vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &pass.descriptorSetLayout); + assert(vkResult == VK_SUCCESS); + + for (int i = 0; i < 2; ++i) + { + pass.descriptorSets.emplace_back(); + bool bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(pass.descriptorSetLayout, &pass.descriptorSets.back()); + assert(bDescriptorAlloc == true); + } + } + + //==============================PipelineLayout======================================== + { + VkDescriptorSetLayout layouts[2]; + layouts[0] = m_uniformBufferDescriptorSetLayout; + layouts[1] = pass.descriptorSetLayout; + + VkPipelineLayoutCreateInfo layout_create_info = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; + layout_create_info.pNext = nullptr; + layout_create_info.flags = 0; + layout_create_info.setLayoutCount = _countof(layouts); + layout_create_info.pSetLayouts = layouts; + layout_create_info.pushConstantRangeCount = 0; + layout_create_info.pPushConstantRanges = nullptr; + VkResult bCreatePipelineLayout = vkCreatePipelineLayout(m_pDevice->GetDevice(), &layout_create_info, nullptr, &pass.pipelineLayout); + assert(bCreatePipelineLayout == VK_SUCCESS); + } + + //==============================Pipeline======================================== + { + VkComputePipelineCreateInfo create_info = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO }; + create_info.pNext = nullptr; + create_info.basePipelineHandle = VK_NULL_HANDLE; + create_info.basePipelineIndex = 0; + create_info.flags = 0; + create_info.layout = pass.pipelineLayout; + create_info.stage = stage_create_info; + VkResult vkResult = vkCreateComputePipelines(m_pDevice->GetDevice(), VK_NULL_HANDLE, 1, &create_info, nullptr, &pass.pipeline); + assert(vkResult == VK_SUCCESS); + } + } + + void SSSR::SetupClassifyTilesPass() + { + uint32_t binding = 0; + VkDescriptorSetLayoutBinding layout_bindings[] = { + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), // g_temporal_variance_mask + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_list + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_counter + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporally_denoised_reflections + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), // g_tile_meta_data_mask + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_extracted_roughness + }; + + SetupShaderPass(m_classifyTilesPass, "ClassifyTiles.hlsl", layout_bindings, _countof(layout_bindings)); + } + + void SSSR::SetupPrepareIndirectArgsPass() + { + uint32_t binding = 0; + VkDescriptorSetLayoutBinding layout_bindings[] = { + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_counter + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_intersect_args + }; + SetupShaderPass(m_prepareIndirectArgsPass, "PrepareIndirectArgs.hlsl", layout_bindings, _countof(layout_bindings)); + } + + void SSSR::SetupIntersectionPass() + { + uint32_t binding = 0; + VkDescriptorSetLayoutBinding layout_bindings[] = { + //Input + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_lit_scene + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer_hierarchy + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_environment_map + Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_sobol_buffer + Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_ranking_tile_buffer + Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_scrambling_tile_buffer + Bind(binding++, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), // g_ray_list + + //Samplers + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLER), // g_linear_sampler + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLER), // g_environment_map_sampler + + //Output + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_intersection_result + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_ray_lengths + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), // g_ray_counter + }; + SetupShaderPass(m_intersectPass, "Intersect.hlsl", layout_bindings, _countof(layout_bindings)); + } + + void SSSR::SetupResolveSpatial() + { + uint32_t binding = 0; + VkDescriptorSetLayoutBinding layout_bindings[] = { + //Input + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_intersection_result + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), // g_tile_meta_data_mask + + //Output + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_spatially_denoised_reflections + }; + + SetupShaderPass(m_resolveSpatialPass, "ResolveSpatial.hlsl", layout_bindings, _countof(layout_bindings), m_isSubgroupSizeControlExtensionAvailable ? VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT : 0); + } + + void SSSR::SetupResolveTemporal() + { + uint32_t binding = 0; + VkDescriptorSetLayoutBinding layout_bindings[] = { + //Input + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_normal_history + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness_history + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_depth_buffer + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_motion_vectors + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_temporally_denoised_reflections_history + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_ray_lengths + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_spatially_denoised_reflections + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), // g_tile_meta_data_mask + + //Output + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_temporally_denoised_reflections + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), // g_temporal_variance_mask + }; + SetupShaderPass(m_resolveTemporalPass, "ResolveTemporal.hlsl", layout_bindings, _countof(layout_bindings)); + } + + void SSSR::SetupBlurPass() + { + uint32_t binding = 0; + VkDescriptorSetLayoutBinding layout_bindings[] = { + //Input + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_roughness + Bind(binding++, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), // g_temporally_denoised_reflections + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), // g_tile_meta_data_mask + + //Output + Bind(binding++, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), // g_denoised_reflections + }; + SetupShaderPass(m_blurPass, "BlurReflections.hlsl", layout_bindings, _countof(layout_bindings)); + } + + void SSSR::SetupPerformanceCounters() + { + //Create TimeStamp Pool + VkQueryPoolCreateInfo query_pool_create_info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; + query_pool_create_info.pNext = nullptr; + query_pool_create_info.flags = 0; + query_pool_create_info.queryType = VK_QUERY_TYPE_TIMESTAMP; + query_pool_create_info.queryCount = TIMESTAMP_QUERY_COUNT * m_frameCountBeforeReuse; + query_pool_create_info.pipelineStatistics = 0; + VkResult vsRes = vkCreateQueryPool(m_pDevice->GetDevice(), &query_pool_create_info, NULL, &m_timestampQueryPool); + assert(VK_SUCCESS == vsRes); + + m_timestampQueries.resize(m_frameCountBeforeReuse); + for (auto& timestamp_queries : m_timestampQueries) + { + timestamp_queries.reserve(TIMESTAMP_QUERY_COUNT); + } + } + + BlueNoiseSamplerVK& SSSR::GetBlueNoiseSampler2SSP() + { + return m_blueNoiseSampler; + } + + void SSSR::ComputeBarrier(VkCommandBuffer command_buffer) const + { + VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 1, &barrier, + 0, nullptr, + 0, nullptr); + } + + void SSSR::IndirectArgumentsBarrier(VkCommandBuffer command_buffer) const + { + VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + vkCmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, + 0, + 1, &barrier, + 0, nullptr, + 0, nullptr); + } + + void SSSR::TransitionBarriers(VkCommandBuffer command_buffer, const VkImageMemoryBarrier* image_barriers, uint32_t image_barriers_count) const + { + vkCmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0, nullptr, + 0, nullptr, + image_barriers_count, image_barriers); + } + + VkImageMemoryBarrier SSSR::Transition(VkImage image, VkImageLayout before, VkImageLayout after) const + { + VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.oldLayout = before; + barrier.newLayout = after; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + + VkImageSubresourceRange subresourceRange = {}; + subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresourceRange.baseArrayLayer = 0; + subresourceRange.layerCount = 1; + subresourceRange.baseMipLevel = 0; + subresourceRange.levelCount = 1; + + barrier.subresourceRange = subresourceRange; + return barrier; + } + + void SSSR::QueryTimestamps(VkCommandBuffer command_buffer) + { + // Query timestamp value prior to resolving the reflection view + if (m_isPerformanceCountersEnabled) + { + auto& timestamp_queries = m_timestampQueries[m_timestampFrameIndex]; + + auto const start_index = m_timestampFrameIndex * TIMESTAMP_QUERY_COUNT; + + if (!timestamp_queries.empty()) + { + // Reset performance counters + m_tileClassificationElapsedGpuTicks = 0ull; + m_denoisingElapsedGpuTicks = 0ull; + m_intersectionElapsedGpuTicks = 0ull; + + uint32_t timestamp_count = static_cast(timestamp_queries.size()); + + uint64_t data[TIMESTAMP_QUERY_COUNT * 8]; // maximum of 8 frames in flight allowed + VkResult result = vkGetQueryPoolResults(m_pDevice->GetDevice(), + m_timestampQueryPool, + start_index, + timestamp_count, + timestamp_count * sizeof(uint64_t), + data, + sizeof(uint64_t), + VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); + + assert(result == VK_SUCCESS); + + for (auto i = 0u, j = 1u; j < timestamp_count; ++i, ++j) + { + auto const elapsed_time = (data[j] - data[i]); + + switch (timestamp_queries[j]) + { + case TIMESTAMP_QUERY_TILE_CLASSIFICATION: + m_tileClassificationElapsedGpuTicks = elapsed_time; + break; + case TIMESTAMP_QUERY_INTERSECTION: + m_intersectionElapsedGpuTicks = elapsed_time; + break; + case TIMESTAMP_QUERY_DENOISING: + m_denoisingElapsedGpuTicks = elapsed_time; + break; + default: + // unrecognized timestamp query + assert(false && "unrecognized timestamp query"); + break; + } + } + + } + + timestamp_queries.clear(); + + vkCmdResetQueryPool(command_buffer, m_timestampQueryPool, start_index, TIMESTAMP_QUERY_COUNT); + + vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_timestampQueryPool, GetTimestampQueryIndex()); + timestamp_queries.push_back(TIMESTAMP_QUERY_INIT); + } + } + + uint32_t SSSR::GetTimestampQueryIndex() const + { + return m_timestampFrameIndex * TIMESTAMP_QUERY_COUNT + static_cast(m_timestampQueries[m_timestampFrameIndex].size()); + } + + void SSSR::InitializeResourceDescriptorSets(const SSSRCreationInfo& input) + { + VkDevice device = m_pDevice->GetDevice(); + VkImageView normal_buffers[] = { input.NormalBufferView, input.NormalHistoryBufferView }; + + uint32_t binding = 0; + VkDescriptorSet target_set = VK_NULL_HANDLE; + + // Place the descriptors + for (int i = 0; i < 2; ++i) + { + // Tile Classifier pass + { + target_set = m_classifyTilesPass.descriptorSets[i]; + binding = 0; + + SetDescriptorSet(device, binding++, input.SpecularRoughnessView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSetStructuredBuffer(device, binding++, m_temporalVarianceMask.buffer_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + SetDescriptorSetBuffer(device, binding++, m_rayList.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + SetDescriptorSetBuffer(device, binding++, m_rayCounter.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + SetDescriptorSet(device, binding++, m_temporalDenoiserResultView[i], target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSetStructuredBuffer(device, binding++, m_tileMetaDataMask.buffer_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + SetDescriptorSet(device, binding++, m_roughnessTextureView[i], target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + } + + // Indirect args pass + { + target_set = m_prepareIndirectArgsPass.descriptorSets[i]; + binding = 0; + + SetDescriptorSetBuffer(device, binding++, m_rayCounter.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + SetDescriptorSetBuffer(device, binding++, m_intersectionPassIndirectArgs.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + } + + // Intersection pass + { + target_set = m_intersectPass.descriptorSets[i]; + binding = 0; + + SetDescriptorSet(device, binding++, input.HDRView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, input.DepthHierarchyView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, input.pingPongNormal ? normal_buffers[i] : input.NormalBufferView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_roughnessTextureView[i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSet(device, binding++, input.EnvironmentMapView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + SetDescriptorSetBuffer(device, binding++, m_blueNoiseSampler.sobolBuffer.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + SetDescriptorSetBuffer(device, binding++, m_blueNoiseSampler.rankingTileBuffer.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + SetDescriptorSetBuffer(device, binding++, m_blueNoiseSampler.scramblingTileBuffer.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + SetDescriptorSetBuffer(device, binding++, m_rayList.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + + SetDescriptorSetSampler(device, binding++, m_linearSampler, target_set); // g_linear_sampler + SetDescriptorSetSampler(device, binding++, input.EnvironmentMapSampler, target_set); // g_environment_map_sampler + + SetDescriptorSet(device, binding++, m_temporalDenoiserResultView[i], target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSet(device, binding++, m_rayLengthsView, target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSetBuffer(device, binding++, m_rayCounter.buffer_view_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + } + + // Spatial denoising pass + { + target_set = m_resolveSpatialPass.descriptorSets[i]; + binding = 0; + + SetDescriptorSet(device, binding++, input.DepthHierarchyView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, input.pingPongNormal ? normal_buffers[i] : input.NormalBufferView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_roughnessTextureView[i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSet(device, binding++, m_temporalDenoiserResultView[i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSetStructuredBuffer(device, binding++, m_tileMetaDataMask.buffer_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + SetDescriptorSet(device, binding++, m_outputBufferView, target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + } + + // Temporal denoising pass + { + target_set = m_resolveTemporalPass.descriptorSets[i]; + binding = 0; + + SetDescriptorSet(device, binding++, input.pingPongNormal ? normal_buffers[i] : input.NormalBufferView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_roughnessTextureView[i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSet(device, binding++, input.pingPongNormal ? normal_buffers[1 - i] : input.NormalHistoryBufferView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_roughnessTextureView[1 - i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSet(device, binding++, input.DepthHierarchyView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, input.MotionVectorsView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_temporalDenoiserResultView[1 - i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_rayLengthsView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + SetDescriptorSet(device, binding++, m_outputBufferView, target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSetStructuredBuffer(device, binding++, m_tileMetaDataMask.buffer_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + SetDescriptorSet(device, binding++, m_temporalDenoiserResultView[i], target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSetStructuredBuffer(device, binding++, m_temporalVarianceMask.buffer_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + } + + // Blur pass + { + target_set = m_blurPass.descriptorSets[i]; + binding = 0; + SetDescriptorSet(device, binding++, m_roughnessTextureView[i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSet(device, binding++, m_temporalDenoiserResultView[i], target_set, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + SetDescriptorSetStructuredBuffer(device, binding++, m_tileMetaDataMask.buffer_, target_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + SetDescriptorSet(device, binding++, m_outputBufferView, target_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_LAYOUT_GENERAL); + } + } + } +} \ No newline at end of file diff --git a/sample/src/VK/Sources/SSSR.h b/sample/src/VK/Sources/SSSR.h new file mode 100644 index 0000000..44e1d78 --- /dev/null +++ b/sample/src/VK/Sources/SSSR.h @@ -0,0 +1,193 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include +#include + +#include "Base/DynamicBufferRing.h" +#include "Base/Texture.h" +#include "BufferVK.h" +#include "UploadHeapVK.h" +#include + +#include "ShaderPass.h" +#include "BlueNoiseSampler.h" + +using namespace CAULDRON_VK; +namespace SSSR_SAMPLE_VK +{ + struct SSSRCreationInfo { + VkImageView HDRView; + VkImageView DepthHierarchyView; + VkImageView MotionVectorsView; + VkImageView NormalBufferView; + VkImageView NormalHistoryBufferView; + VkImageView SpecularRoughnessView; + VkImageView EnvironmentMapView; + VkSampler EnvironmentMapSampler; + bool pingPongNormal; + bool pingPongRoughness; + uint32_t outputWidth; + uint32_t outputHeight; + }; + + struct SSSRConstants + { + XMFLOAT4X4 invViewProjection; + XMFLOAT4X4 projection; + XMFLOAT4X4 invProjection; + XMFLOAT4X4 view; + XMFLOAT4X4 invView; + XMFLOAT4X4 prevViewProjection; + uint32_t frameIndex; + uint32_t maxTraversalIntersections; + uint32_t minTraversalOccupancy; + uint32_t mostDetailedMip; + float temporalStabilityFactor; + float temporalVarianceThreshold; + float depthBufferThickness; + float roughnessThreshold; + uint32_t samplesPerQuad; + uint32_t temporalVarianceGuidedTracingEnabled; + }; + + class SSSR + { + public: + void OnCreate(Device* pDevice, VkCommandBuffer command_buffer, ResourceViewHeaps* resourceHeap, DynamicBufferRing* constantBufferRing, uint32_t frameCountBeforeReuse, bool enablePerformanceCounters); + void OnCreateWindowSizeDependentResources(VkCommandBuffer command_buffer, const SSSRCreationInfo& input); + + void OnDestroy(); + void OnDestroyWindowSizeDependentResources(); + + void Draw(VkCommandBuffer command_buffer, const SSSRConstants& sssrConstants, bool showIntersectResult); + void GUI(int* pSlice); + Texture* GetOutputTexture(); + VkImageView GetOutputTextureView() const; + + std::uint64_t GetTileClassificationElapsedGpuTicks() const; + std::uint64_t GetIntersectElapsedGpuTicks() const; + std::uint64_t GetDenoiserElapsedGpuTicks() const; + + private: + void CreateResources(VkCommandBuffer command_buffer); + void CreateWindowSizeDependentResources(VkCommandBuffer command_buffer); + + void SetupShaderPass(ShaderPass& pass, const char* shader, const VkDescriptorSetLayoutBinding* bindings, uint32_t bindings_count, VkPipelineShaderStageCreateFlags flags = 0); + void SetupClassifyTilesPass(); + void SetupPrepareIndirectArgsPass(); + void SetupIntersectionPass(); + void SetupResolveSpatial(); + void SetupResolveTemporal(); + void SetupBlurPass(); + void SetupPerformanceCounters(); + + void InitializeResourceDescriptorSets(const SSSRCreationInfo& input); + + BlueNoiseSamplerVK& GetBlueNoiseSampler2SSP(); + void ComputeBarrier(VkCommandBuffer command_buffer) const; + void IndirectArgumentsBarrier(VkCommandBuffer command_buffer) const; + void TransitionBarriers(VkCommandBuffer command_buffer, const VkImageMemoryBarrier* image_barriers, uint32_t image_barriers_count) const; + VkImageMemoryBarrier Transition(VkImage image, VkImageLayout before, VkImageLayout after) const; + + void QueryTimestamps(VkCommandBuffer command_buffer); + uint32_t GetTimestampQueryIndex() const; + + Device* m_pDevice = nullptr; + DynamicBufferRing* m_pConstantBufferRing; + ResourceViewHeaps* m_pResourceViewHeaps; + UploadHeapVK m_uploadHeap; + + uint32_t m_outputWidth; + uint32_t m_outputHeight; + + VkDescriptorSetLayout m_uniformBufferDescriptorSetLayout; + VkDescriptorSet m_uniformBufferDescriptorSet[8]; + + // Containing all rays that need to be traced. + BufferVK m_rayList; + BufferVK m_rayCounter; + // Indirect arguments for intersection pass. + BufferVK m_intersectionPassIndirectArgs; + // Holds the temporal variance of the last two frames. + BufferVK m_temporalVarianceMask; + // Tells us if we have to run the denoiser on a specific tile or if we just have to copy the values + BufferVK m_tileMetaDataMask; + // Extracted roughness values + Texture m_roughnessTexture[2]; + VkImageView m_roughnessTextureView[2]; + + // Intermediate result of the temporal denoising pass - double buffered to keep history and aliases the intersection result. + Texture m_temporalDenoiserResult[2]; + VkImageView m_temporalDenoiserResultView[2]; + + // Holds the length of each reflection ray - used for temporal reprojection. + Texture m_rayLengths; + VkImageView m_rayLengthsView; + + BlueNoiseSamplerVK m_blueNoiseSampler; + + ShaderPass m_classifyTilesPass; + ShaderPass m_prepareIndirectArgsPass; + ShaderPass m_intersectPass; + ShaderPass m_resolveSpatialPass; + ShaderPass m_resolveTemporalPass; + ShaderPass m_blurPass; + + VkSampler m_linearSampler; + Texture m_outputBuffer; + VkImageView m_outputBufferView; + + uint32_t m_bufferIndex = 0; + uint32_t m_frameCountBeforeReuse = 0; + bool m_isSubgroupSizeControlExtensionAvailable = false; + + enum TimestampQuery + { + TIMESTAMP_QUERY_INIT, + TIMESTAMP_QUERY_TILE_CLASSIFICATION, + TIMESTAMP_QUERY_INTERSECTION, + TIMESTAMP_QUERY_DENOISING, + + TIMESTAMP_QUERY_COUNT + }; + /** + The type definition for an array of timestamp queries. + */ + using TimestampQueries = std::vector; + + // The query pool containing the recorded timestamps. + VkQueryPool m_timestampQueryPool; + // The number of GPU ticks spent in the tile classification pass. + std::uint64_t m_tileClassificationElapsedGpuTicks; + // The number of GPU ticks spent in depth buffer intersection. + std::uint64_t m_intersectionElapsedGpuTicks; + // The number of GPU ticks spent denoising. + std::uint64_t m_denoisingElapsedGpuTicks; + // The array of timestamp that were queried. + std::vector m_timestampQueries; + // The index of the active set of timestamp queries. + uint32_t m_timestampFrameIndex; + bool m_isPerformanceCountersEnabled; + }; +} \ No newline at end of file diff --git a/sample/src/VK/Sources/SampleRenderer.cpp b/sample/src/VK/Sources/SampleRenderer.cpp index e29908f..cf57848 100644 --- a/sample/src/VK/Sources/SampleRenderer.cpp +++ b/sample/src/VK/Sources/SampleRenderer.cpp @@ -1,21 +1,24 @@ -// AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ #include "stdafx.h" @@ -25,1910 +28,1758 @@ #undef max #undef min -void SSRLoggingFunction(const char* pMessage, void* pUserData) +namespace SSSR_SAMPLE_VK { - char buffer[4096]; - snprintf(buffer, sizeof(buffer), "%s\n", pMessage); - MessageBox(NULL, buffer, "RtShadows Error", MB_OK | MB_ICONERROR); - exit(-1); -} - -//-------------------------------------------------------------------------------------- -// -// OnCreate -// -//-------------------------------------------------------------------------------------- -void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) -{ - m_pDevice = pDevice; - m_CurrentFrame = 0; - - // Initialize helpers - - // Create all the heaps for the resources views - const uint32_t cbvDescriptorCount = 2000; - const uint32_t srvDescriptorCount = 2000; - const uint32_t uavDescriptorCount = 10; - const uint32_t samplerDescriptorCount = 20; - m_ResourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, samplerDescriptorCount); - - // Create a commandlist ring for the Direct queue - uint32_t commandListsPerBackBuffer = 8; - m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer); - - // Create a 'dynamic' constant buffer - const uint32_t constantBuffersMemSize = 200 * 1024 * 1024; - m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, "Uniforms"); - - // Create a 'static' pool for vertices and indices - const uint32_t staticGeometryMemSize = 5 * 128 * 1024 * 1024; - const uint32_t systemGeometryMemSize = 32 * 1024; - m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); - m_SysMemBufferPool.OnCreate(pDevice, systemGeometryMemSize, false, "PostProcGeom"); - - // initialize the GPU time stamps module - m_GPUTimer.OnCreate(pDevice, backBufferCount); - - // Quick helper to upload resources, it has it's own commandList and uses suballocation. - // for 4K textures we'll need 100Megs - const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; - m_UploadHeap.OnCreate(pDevice, staticGeometryMemSize); // initialize an upload heap (uses suballocation for faster results) - - CreateApplyReflectionsPipeline(); - CreateDepthDownsamplePipeline(); - - - // Create a command buffer for upload - m_CommandListRing.OnBeginFrame(); - VkCommandBuffer uploadCommandBuffer = BeginNewCommandBuffer(); - - FfxSssrVkCreateContextInfo vkContextInfo = {}; - vkContextInfo.device = m_pDevice->GetDevice(); - vkContextInfo.physicalDevice = m_pDevice->GetPhysicalDevice(); - vkContextInfo.uploadCommandBuffer = uploadCommandBuffer; - - FfxSssrLoggingCallbacks loggingCallbacks = {}; - loggingCallbacks.pUserData = this; - loggingCallbacks.pfnLogging = SSRLoggingFunction; - - FfxSssrCreateContextInfo contextInfo = {}; - contextInfo.apiVersion = FFX_SSSR_API_VERSION; - contextInfo.frameCountBeforeMemoryReuse = backBufferCount; - contextInfo.maxReflectionViewCount = 1; - contextInfo.pVkCreateContextInfo = &vkContextInfo; - contextInfo.pLoggingCallbacks = &loggingCallbacks; - contextInfo.uploadBufferSize = 8 * 1024 * 1024; - contextInfo.pRoughnessTextureFormat = L"float4"; - contextInfo.pUnpackRoughnessSnippet = L"float FfxSssrUnpackRoughness(FFX_SSSR_ROUGHNESS_TEXTURE_FORMAT packed) { return packed.w; }"; - contextInfo.pNormalsTextureFormat = L"float4"; - contextInfo.pUnpackNormalsSnippet = L"float3 FfxSssrUnpackNormals(FFX_SSSR_NORMALS_TEXTURE_FORMAT packed) { return 2 * packed.xyz - 1; }"; - contextInfo.pSceneTextureFormat = L"float4"; - contextInfo.pUnpackSceneRadianceSnippet = L"float3 FfxSssrUnpackSceneRadiance(FFX_SSSR_SCENE_TEXTURE_FORMAT packed) { return packed.xyz; }"; - contextInfo.pDepthTextureFormat = L"float"; - contextInfo.pUnpackDepthSnippet = L"float FfxSssrUnpackDepth(FFX_SSSR_DEPTH_TEXTURE_FORMAT packed) { return packed.x; }"; - contextInfo.pMotionVectorFormat = L"float2"; - contextInfo.pUnpackMotionVectorsSnippet = L"float2 FfxSssrUnpackMotionVectors(FFX_SSSR_MOTION_VECTOR_TEXTURE_FORMAT packed) { return packed.xy * float2(0.5, -0.5); }"; - - FfxSssrStatus status = ffxSssrCreateContext(&contextInfo, &m_SssrContext); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrCreateContext failed."); - } - - // Wait for the upload to finish; - SubmitCommandBuffer(uploadCommandBuffer); - m_pDevice->GPUFlush(); - - VkSamplerCreateInfo samplerCreateInfo = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; - samplerCreateInfo.pNext = nullptr; - samplerCreateInfo.flags = 0; - samplerCreateInfo.magFilter = VK_FILTER_LINEAR; - samplerCreateInfo.minFilter = VK_FILTER_LINEAR; - samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerCreateInfo.mipLodBias = 0; - samplerCreateInfo.anisotropyEnable = false; - samplerCreateInfo.maxAnisotropy = 0; - samplerCreateInfo.compareEnable = false; - samplerCreateInfo.compareOp = VK_COMPARE_OP_NEVER; - samplerCreateInfo.minLod = 0; - samplerCreateInfo.maxLod = 16; - samplerCreateInfo.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - samplerCreateInfo.unnormalizedCoordinates = false; - if (VK_SUCCESS != vkCreateSampler(m_pDevice->GetDevice(), &samplerCreateInfo, nullptr, &m_LinearSampler)) - { - Trace("Failed to create linear sampler."); - } - - // Create a 2Kx2K Shadowmap atlas to hold 4 cascades/spotlights - m_ShadowMap.InitDepthStencil(m_pDevice, 2 * 1024, 2 * 1024, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "ShadowMap"); - m_ShadowMap.CreateSRV(&m_ShadowMapSRV); - m_ShadowMap.CreateDSV(&m_ShadowMapDSV); - - // Create render pass shadow - // - { - VkAttachmentDescription depthAttachments; - AttachClearBeforeUse(m_ShadowMap.GetFormat(), VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachments); - m_RenderPassShadow = CreateRenderPassOptimal(m_pDevice->GetDevice(), 0, NULL, &depthAttachments); - - // Create frame buffer - // - VkImageView attachmentViews[1] = { m_ShadowMapDSV }; - VkFramebufferCreateInfo framebufferInfo = {}; - framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebufferInfo.pNext = NULL; - framebufferInfo.renderPass = m_RenderPassShadow; - framebufferInfo.attachmentCount = 1; - framebufferInfo.pAttachments = attachmentViews; - framebufferInfo.width = m_ShadowMap.GetWidth(); - framebufferInfo.height = m_ShadowMap.GetHeight(); - framebufferInfo.layers = 1; - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &framebufferInfo, NULL, &m_FramebufferShadows); - assert(res == VK_SUCCESS); - } - - // Create motion vector render pass - // - { - VkAttachmentDescription colorAttachments[2], depthAttachment; - // motion vector RT - AttachClearBeforeUse(VK_FORMAT_R16G16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[0]); - // normals RT - AttachClearBeforeUse(VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[1]); - // depth RT - AttachClearBeforeUse(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); - m_RenderPassMV = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); - } - - // Create HDR render pass color with color clear - // - { - VkAttachmentDescription colorAttachments[1], depthAttachment; - // color RT - AttachClearBeforeUse(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachments[0]); - // depth RT - AttachBlending(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); - m_RenderPassClearHDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); - } - - // Create PBR render pass - // - { - VkAttachmentDescription colorAttachments[2], depthAttachment; - // color RT - AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachments[0]); - // specular roughness RT - AttachClearBeforeUse(VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[1]); - // depth RT - AttachBlending(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); - m_RenderPassPBR = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); - } - - // Create HDR render pass color without clear - // - { - VkAttachmentDescription colorAttachments[1], depthAttachment; - // color RT - AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[0]); - // depth RT - AttachBlending(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachment); - m_RenderPassHDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); - } - - m_SkyDome.OnCreate(pDevice, m_RenderPassHDR, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", VK_SAMPLE_COUNT_1_BIT); - m_AmbientLight.OnCreate(pDevice, m_RenderPassHDR, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\white\\diffuse.dds", "..\\media\\envmaps\\white\\specular.dds", VK_SAMPLE_COUNT_1_BIT); - m_SkyDomeProc.OnCreate(pDevice, m_RenderPassHDR, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_1_BIT); - m_Wireframe.OnCreate(pDevice, m_RenderPassHDR, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_1_BIT); - m_WireframeBox.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); - m_DownSample.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); - m_Bloom.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); - - // Create tonemapping pass - m_ToneMapping.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_ResourceViewHeaps, &m_SysMemBufferPool, &m_ConstantBufferRing); - - // Initialize UI rendering resources - m_ImGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_UploadHeap, &m_ConstantBufferRing); - - m_BrdfLut.InitFromFile(pDevice, &m_UploadHeap, "BrdfLut.dds", false); // LUT images are stored as linear - m_BrdfLut.CreateSRV(&m_BrdfLutSRV); - - // Make sure upload heap has finished uploading before continuing + //-------------------------------------------------------------------------------------- + // + // OnCreate + // + //-------------------------------------------------------------------------------------- + void SampleRenderer::OnCreate(Device* pDevice, SwapChain* pSwapChain) + { + m_pDevice = pDevice; + m_CurrentBackbufferIndex = 0; + + // Initialize helpers + + // Create all the heaps for the resources views + const uint32_t cbvDescriptorCount = 2000; + const uint32_t srvDescriptorCount = 2000; + const uint32_t uavDescriptorCount = 10; + const uint32_t samplerDescriptorCount = 20; + m_ResourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, samplerDescriptorCount); + + // Create a commandlist ring for the Direct queue + uint32_t commandListsPerBackBuffer = 8; + m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer); + + // Create a 'dynamic' constant buffer + const uint32_t constantBuffersMemSize = 200 * 1024 * 1024; + m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, "Uniforms"); + + // Create a 'static' pool for vertices and indices + const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; + const uint32_t systemGeometryMemSize = 32 * 1024; + m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); + m_SysMemBufferPool.OnCreate(pDevice, systemGeometryMemSize, false, "PostProcGeom"); + + // initialize the GPU time stamps module + m_GPUTimer.OnCreate(pDevice, backBufferCount); + + // Quick helper to upload resources, it has it's own commandList and uses suballocation. + const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; + m_UploadHeap.OnCreate(pDevice, staticGeometryMemSize); // initialize an upload heap (uses suballocation for faster results) + + CreateApplyReflectionsPipeline(); + CreateDepthDownsamplePipeline(); + + // Create a command buffer for upload + m_CommandListRing.OnBeginFrame(); + + VkSamplerCreateInfo samplerCreateInfo = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; + samplerCreateInfo.pNext = nullptr; + samplerCreateInfo.flags = 0; + samplerCreateInfo.magFilter = VK_FILTER_LINEAR; + samplerCreateInfo.minFilter = VK_FILTER_LINEAR; + samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + samplerCreateInfo.mipLodBias = 0; + samplerCreateInfo.anisotropyEnable = false; + samplerCreateInfo.maxAnisotropy = 0; + samplerCreateInfo.compareEnable = false; + samplerCreateInfo.compareOp = VK_COMPARE_OP_NEVER; + samplerCreateInfo.minLod = 0; + samplerCreateInfo.maxLod = 16; + samplerCreateInfo.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + samplerCreateInfo.unnormalizedCoordinates = false; + if (VK_SUCCESS != vkCreateSampler(m_pDevice->GetDevice(), &samplerCreateInfo, nullptr, &m_LinearSampler)) + { + Trace("Failed to create linear sampler."); + } + + // Create a 2Kx2K Shadowmap atlas to hold 4 cascades/spotlights + m_ShadowMap.InitDepthStencil(m_pDevice, 2 * 1024, 2 * 1024, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "ShadowMap"); + m_ShadowMap.CreateSRV(&m_ShadowMapSRV); + m_ShadowMap.CreateDSV(&m_ShadowMapDSV); + + // Create render pass shadow + // + { + VkAttachmentDescription depthAttachments; + AttachClearBeforeUse(m_ShadowMap.GetFormat(), VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachments); + m_RenderPassShadow = CreateRenderPassOptimal(m_pDevice->GetDevice(), 0, NULL, &depthAttachments); + + // Create frame buffer + // + VkImageView attachmentViews[1] = { m_ShadowMapDSV }; + VkFramebufferCreateInfo framebufferInfo = {}; + framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebufferInfo.pNext = NULL; + framebufferInfo.renderPass = m_RenderPassShadow; + framebufferInfo.attachmentCount = 1; + framebufferInfo.pAttachments = attachmentViews; + framebufferInfo.width = m_ShadowMap.GetWidth(); + framebufferInfo.height = m_ShadowMap.GetHeight(); + framebufferInfo.layers = 1; + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &framebufferInfo, NULL, &m_FramebufferShadows); + assert(res == VK_SUCCESS); + } + + // Create motion vector render pass + // + { + VkAttachmentDescription colorAttachments[2], depthAttachment; + // motion vector RT + AttachClearBeforeUse(VK_FORMAT_R16G16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[0]); + // normals RT + AttachClearBeforeUse(VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[1]); + // depth RT + AttachClearBeforeUse(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); + m_RenderPassMV = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); + } + + // Create HDR render pass color with color clear + // + { + VkAttachmentDescription colorAttachments[1], depthAttachment; + // color RT + AttachClearBeforeUse(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachments[0]); + // depth RT + AttachBlending(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); + m_RenderPassClearHDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); + } + + // Create PBR render pass + // + { + VkAttachmentDescription colorAttachments[2], depthAttachment; + // color RT + AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachments[0]); + // specular roughness RT + AttachClearBeforeUse(VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[1]); + // depth RT + AttachBlending(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); + m_RenderPassPBR = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); + } + + // Create HDR render pass color without clear + // + { + VkAttachmentDescription colorAttachments[1], depthAttachment; + // color RT + AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[0]); + // depth RT + AttachBlending(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachment); + m_RenderPassHDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, &depthAttachment); + } + + m_SkyDome.OnCreate(pDevice, m_RenderPassHDR, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", VK_SAMPLE_COUNT_1_BIT); + m_AmbientLight.OnCreate(pDevice, m_RenderPassHDR, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\white\\diffuse.dds", "..\\media\\envmaps\\white\\specular.dds", VK_SAMPLE_COUNT_1_BIT); + m_SkyDomeProc.OnCreate(pDevice, m_RenderPassHDR, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_1_BIT); + m_Wireframe.OnCreate(pDevice, m_RenderPassHDR, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_1_BIT); + m_WireframeBox.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); + m_DownSample.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); + m_Bloom.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); + + VkCommandBuffer cb1 = BeginNewCommandBuffer(); + m_Sssr.OnCreate(pDevice, cb1, &m_ResourceViewHeaps, &m_ConstantBufferRing, backBufferCount, true); + // Wait for the upload to finish; + SubmitCommandBuffer(cb1); + m_pDevice->GPUFlush(); + + // Create tonemapping pass + m_ToneMapping.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_ResourceViewHeaps, &m_SysMemBufferPool, &m_ConstantBufferRing); + + // Initialize UI rendering resources + m_ImGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_UploadHeap, &m_ConstantBufferRing); + + m_BrdfLut.InitFromFile(pDevice, &m_UploadHeap, "BrdfLut.dds", false); // LUT images are stored as linear + m_BrdfLut.CreateSRV(&m_BrdfLutSRV); + + // Make sure upload heap has finished uploading before continuing #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + m_UploadHeap.FlushAndFinish(); #endif -} - -//-------------------------------------------------------------------------------------- -// -// OnDestroy -// -//-------------------------------------------------------------------------------------- -void SampleRenderer::OnDestroy() -{ - m_ImGUI.OnDestroy(); - m_ToneMapping.OnDestroy(); - m_Bloom.OnDestroy(); - m_DownSample.OnDestroy(); - m_WireframeBox.OnDestroy(); - m_Wireframe.OnDestroy(); - m_SkyDomeProc.OnDestroy(); - m_SkyDome.OnDestroy(); - m_AmbientLight.OnDestroy(); - m_ShadowMap.OnDestroy(); - m_BrdfLut.OnDestroy(); - - ffxSssrDestroyContext(m_SssrContext); - - VkDevice device = m_pDevice->GetDevice(); - - vkDestroySampler(device, m_LinearSampler, nullptr); - vkDestroyImageView(device, m_BrdfLutSRV, nullptr); - vkDestroyImageView(device, m_ShadowMapDSV, nullptr); - vkDestroyImageView(device, m_ShadowMapSRV, nullptr); - - vkDestroyPipeline(device, m_DepthDownsamplePipeline, nullptr); - vkDestroyPipelineLayout(device, m_DepthDownsamplePipelineLayout, nullptr); - vkDestroyDescriptorSetLayout(device, m_DepthDownsampleDescriptorSetLayout, nullptr); - m_ResourceViewHeaps.FreeDescriptor(m_DepthDownsampleDescriptorSet); - - vkDestroyPipeline(device, m_ApplyPipeline, nullptr); - vkDestroyPipelineLayout(device, m_ApplyPipelineLayout, nullptr); - vkDestroyDescriptorSetLayout(device, m_ApplyPipelineDescriptorSetLayout, nullptr); - - for (int i = 0; i < backBufferCount; ++i) - { - m_ResourceViewHeaps.FreeDescriptor(m_ApplyPipelineDescriptorSet[i]); - } - - vkDestroyRenderPass(device, m_RenderPassShadow, nullptr); - vkDestroyRenderPass(device, m_RenderPassClearHDR, nullptr); - vkDestroyRenderPass(device, m_RenderPassHDR, nullptr); - vkDestroyRenderPass(device, m_RenderPassPBR, nullptr); - vkDestroyRenderPass(device, m_RenderPassMV, nullptr); - vkDestroyRenderPass(device, m_RenderPassApply, nullptr); - - vkDestroyFramebuffer(device, m_FramebufferShadows, nullptr); - - m_UploadHeap.OnDestroy(); - m_GPUTimer.OnDestroy(); - m_VidMemBufferPool.OnDestroy(); - m_SysMemBufferPool.OnDestroy(); - m_ConstantBufferRing.OnDestroy(); - m_ResourceViewHeaps.OnDestroy(); - m_CommandListRing.OnDestroy(); -} - -//-------------------------------------------------------------------------------------- -// -// OnCreateWindowSizeDependentResources -// -//-------------------------------------------------------------------------------------- -void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) -{ - m_Width = Width; - m_Height = Height; - - // Set the viewport - // - m_Viewport.x = 0; - m_Viewport.y = (float)m_Height; - m_Viewport.width = (float)m_Width; - m_Viewport.height = -(float)(m_Height); - m_Viewport.minDepth = (float)0.0f; - m_Viewport.maxDepth = (float)1.0f; - - // Create scissor rectangle - // - m_Scissor.extent.width = m_Width; - m_Scissor.extent.height = m_Height; - m_Scissor.offset.x = 0; - m_Scissor.offset.y = 0; - - // Create depth buffer - // - m_DepthBuffer.InitDepthStencil(m_pDevice, m_Width, m_Height, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "DepthBuffer"); - m_DepthBuffer.CreateSRV(&m_DepthBufferSRV); - m_DepthBuffer.CreateDSV(&m_DepthBufferDSV); - - // Create Texture + RTV - // - m_HDR.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "HDR"); - m_HDR.CreateSRV(&m_HDRSRV); - - VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; - imageCreateInfo.pNext = nullptr; - imageCreateInfo.arrayLayers = 1; - imageCreateInfo.extent = { m_Width, m_Height, 1 }; - imageCreateInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT; - imageCreateInfo.imageType = VK_IMAGE_TYPE_2D; - imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imageCreateInfo.mipLevels = 1; - imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; - imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; - imageCreateInfo.usage = (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); - imageCreateInfo.flags = 0; - m_SssrOutputBuffer.Init(m_pDevice, &imageCreateInfo, "m_SssrOutputBuffer"); - - m_NormalBuffer.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "m_NormalBuffer"); - m_NormalBuffer.CreateSRV(&m_NormalBufferSRV); - - imageCreateInfo.format = VK_FORMAT_A2B10G10R10_UNORM_PACK32; - m_NormalHistoryBuffer.Init(m_pDevice, &imageCreateInfo, "m_NormalHistoryBuffer"); - - m_SpecularRoughnessHistory.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT), false, "m_SpecularRoughnessHistory"); - m_SpecularRoughness.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "m_SpecularRoughness"); - m_SpecularRoughness.CreateSRV(&m_SpecularRoughnessSRV); - - m_MotionVectors.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "m_MotionVector"); - m_MotionVectors.CreateSRV(&m_MotionVectorsSRV); - - // Create framebuffer for the RT - // - { - VkImageView hdrAttachments[2] = { m_HDRSRV, m_DepthBufferDSV }; - - VkFramebufferCreateInfo hdrFramebufferInfo = {}; - hdrFramebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - hdrFramebufferInfo.pNext = NULL; - hdrFramebufferInfo.renderPass = m_RenderPassHDR; - hdrFramebufferInfo.attachmentCount = _countof(hdrAttachments); - hdrFramebufferInfo.pAttachments = hdrAttachments; - hdrFramebufferInfo.width = m_Width; - hdrFramebufferInfo.height = m_Height; - hdrFramebufferInfo.layers = 1; - - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &hdrFramebufferInfo, NULL, &m_FramebufferHDR); - assert(res == VK_SUCCESS); - } - - { - VkImageView pbrAttachments[3] = { m_HDRSRV, m_SpecularRoughnessSRV, m_DepthBufferDSV }; - - VkFramebufferCreateInfo pbrFramebufferInfo = {}; - pbrFramebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - pbrFramebufferInfo.pNext = NULL; - pbrFramebufferInfo.renderPass = m_RenderPassPBR; - pbrFramebufferInfo.attachmentCount = _countof(pbrAttachments); - pbrFramebufferInfo.pAttachments = pbrAttachments; - pbrFramebufferInfo.width = m_Width; - pbrFramebufferInfo.height = m_Height; - pbrFramebufferInfo.layers = 1; - - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &pbrFramebufferInfo, NULL, &m_FramebufferPBR); - assert(res == VK_SUCCESS); - } - - { - VkImageView mvAttachments[3] = { m_MotionVectorsSRV, m_NormalBufferSRV, m_DepthBufferDSV }; - - VkFramebufferCreateInfo mvFramebufferInfo = {}; - mvFramebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - mvFramebufferInfo.pNext = NULL; - mvFramebufferInfo.renderPass = m_RenderPassMV; - mvFramebufferInfo.attachmentCount = _countof(mvAttachments); - mvFramebufferInfo.pAttachments = mvAttachments; - mvFramebufferInfo.width = m_Width; - mvFramebufferInfo.height = m_Height; - mvFramebufferInfo.layers = 1; - - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &mvFramebufferInfo, NULL, &m_FramebufferMV); - assert(res == VK_SUCCESS); - } - - { - m_HDR.CreateRTV(&m_ApplyPipelineRTV); - VkImageView attachmentViews[1] = { m_ApplyPipelineRTV }; - - VkFramebufferCreateInfo framebufferInfo = {}; - framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebufferInfo.pNext = NULL; - framebufferInfo.renderPass = m_RenderPassApply; - framebufferInfo.attachmentCount = 1; - framebufferInfo.pAttachments = attachmentViews; - framebufferInfo.width = m_Width; - framebufferInfo.height = m_Height; - framebufferInfo.layers = 1; - - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &framebufferInfo, NULL, &m_FramebufferApply); - assert(res == VK_SUCCESS); - } - - // update bloom and downscaling effect - // - m_DownSample.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, 6); - m_Bloom.OnCreateWindowSizeDependentResources(m_Width / 2, m_Height / 2, m_DownSample.GetTexture(), 6, &m_HDR); - - // update the pipelines if the swapchain render pass has changed (for example when the format of the swapchain changes) - // - m_ToneMapping.UpdatePipelines(pSwapChain->GetRenderPass()); - m_ImGUI.UpdatePipeline(pSwapChain->GetRenderPass()); - - // Depth downsampling pass with single CS - { - m_DepthMipLevelCount = static_cast(std::log2(std::max(m_Width, m_Height))) + 1; - - // Downsampled depth buffer - imageCreateInfo.format = VK_FORMAT_R32_SFLOAT; - imageCreateInfo.mipLevels = m_DepthMipLevelCount; - m_DepthHierarchy.Init(m_pDevice, &imageCreateInfo, "m_DepthHierarchy"); - for (UINT i = 0; i < std::min(13u, m_DepthMipLevelCount); ++i) - { - m_DepthHierarchy.CreateSRV(&m_DepthHierarchyDescriptors[i], i); - } - - // Atomic counter - - VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - bufferCreateInfo.pNext = nullptr; - bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - bufferCreateInfo.size = 4; - bufferCreateInfo.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.memoryTypeBits = 0; - allocCreateInfo.pool = VK_NULL_HANDLE; - allocCreateInfo.preferredFlags = 0; - allocCreateInfo.pUserData = "m_AtomicCounter"; - allocCreateInfo.requiredFlags = 0; - allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN; - if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_AtomicCounter, &m_AtomicCounterAllocation, nullptr)) - { - Trace("Failed to create buffer for atomic counter"); - } - - VkBufferViewCreateInfo bufferViewCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO }; - bufferViewCreateInfo.buffer = m_AtomicCounter; - bufferViewCreateInfo.format = VK_FORMAT_R32_UINT; - bufferViewCreateInfo.range = VK_WHOLE_SIZE; - bufferViewCreateInfo.flags = 0; - if (VK_SUCCESS != vkCreateBufferView(m_pDevice->GetDevice(), &bufferViewCreateInfo, nullptr, &m_AtomicCounterUAV)) - { - Trace("Failed to create buffer view for atomic counter"); - } - } - - // Setup SSR - // - m_HDR.CreateSRV(&m_SssrSceneSRV); - m_DepthHierarchy.CreateSRV(&m_SssrDepthBufferHierarchySRV); - m_MotionVectors.CreateSRV(&m_SssrMotionBufferSRV); - m_NormalBuffer.CreateSRV(&m_SssrNormalBufferSRV); - m_NormalHistoryBuffer.CreateSRV(&m_SssrNormalHistoryBufferSRV); - m_SpecularRoughness.CreateSRV(&m_SssrRoughnessBufferSRV); - m_SpecularRoughnessHistory.CreateSRV(&m_SssrRoughnessHistoryBufferSRV); - m_SssrOutputBuffer.CreateSRV(&m_SssrOutputBufferUAV); - m_SssrEnvironmentMapSRV = m_SkyDome.GetCubeSpecularTextureView(); - m_SssrEnvironmentMapSampler = m_SkyDome.GetCubeSpecularTextureSampler(); - - m_CommandListRing.OnBeginFrame(); - VkCommandBuffer cb = BeginNewCommandBuffer(); - - FfxSssrVkCreateReflectionViewInfo vkReflectionViewInfo = {}; - vkReflectionViewInfo.depthBufferHierarchySRV = m_SssrDepthBufferHierarchySRV; - vkReflectionViewInfo.motionBufferSRV = m_SssrMotionBufferSRV; - vkReflectionViewInfo.normalBufferSRV = m_SssrNormalBufferSRV; - vkReflectionViewInfo.roughnessBufferSRV = m_SssrRoughnessBufferSRV; - vkReflectionViewInfo.normalHistoryBufferSRV = m_SssrNormalHistoryBufferSRV; - vkReflectionViewInfo.roughnessHistoryBufferSRV = m_SssrRoughnessHistoryBufferSRV; - vkReflectionViewInfo.reflectionViewUAV = m_SssrOutputBufferUAV; - vkReflectionViewInfo.sceneFormat = m_SssrOutputBuffer.GetFormat(); - vkReflectionViewInfo.sceneSRV = m_SssrSceneSRV; - vkReflectionViewInfo.environmentMapSRV = m_SssrEnvironmentMapSRV; - vkReflectionViewInfo.environmentMapSampler = m_SssrEnvironmentMapSampler; - vkReflectionViewInfo.uploadCommandBuffer = cb; - - FfxSssrCreateReflectionViewInfo reflectionViewInfo = {}; - reflectionViewInfo.flags = FFX_SSSR_CREATE_REFLECTION_VIEW_FLAG_ENABLE_PERFORMANCE_COUNTERS; - reflectionViewInfo.outputWidth = m_Width; - reflectionViewInfo.outputHeight = m_Height; - reflectionViewInfo.pVkCreateReflectionViewInfo = &vkReflectionViewInfo; - - FfxSssrStatus status = ffxSssrCreateReflectionView(m_SssrContext, &reflectionViewInfo, &m_SssrReflectionView); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrCreateReflectionView failed."); - } - m_SssrCreatedReflectionView = true; - - // Fill apply reflections descriptor set - VkDescriptorImageInfo applyReflectionsImageInfos[5]; - applyReflectionsImageInfos[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - applyReflectionsImageInfos[0].imageView = m_SssrOutputBufferUAV; - applyReflectionsImageInfos[0].sampler = VK_NULL_HANDLE; - applyReflectionsImageInfos[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - applyReflectionsImageInfos[1].imageView = m_NormalBufferSRV; - applyReflectionsImageInfos[1].sampler = VK_NULL_HANDLE; - applyReflectionsImageInfos[2].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - applyReflectionsImageInfos[2].imageView = m_SpecularRoughnessSRV; - applyReflectionsImageInfos[2].sampler = VK_NULL_HANDLE; - applyReflectionsImageInfos[3].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - applyReflectionsImageInfos[3].imageView = m_BrdfLutSRV; - applyReflectionsImageInfos[3].sampler = VK_NULL_HANDLE; - applyReflectionsImageInfos[4].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - applyReflectionsImageInfos[4].imageView = VK_NULL_HANDLE; - applyReflectionsImageInfos[4].sampler = m_LinearSampler; - - for (int i = 0; i < backBufferCount; ++i) - { - VkWriteDescriptorSet applyReflectionsWriteDescSets[5]; - applyReflectionsWriteDescSets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - applyReflectionsWriteDescSets[0].pNext = nullptr; - applyReflectionsWriteDescSets[0].descriptorCount = 1; - applyReflectionsWriteDescSets[0].dstArrayElement = 0; - applyReflectionsWriteDescSets[0].dstSet = m_ApplyPipelineDescriptorSet[i]; - applyReflectionsWriteDescSets[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - applyReflectionsWriteDescSets[0].dstBinding = 0; - applyReflectionsWriteDescSets[0].pImageInfo = &applyReflectionsImageInfos[0]; - - applyReflectionsWriteDescSets[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - applyReflectionsWriteDescSets[1].pNext = nullptr; - applyReflectionsWriteDescSets[1].descriptorCount = 1; - applyReflectionsWriteDescSets[1].dstArrayElement = 0; - applyReflectionsWriteDescSets[1].dstSet = m_ApplyPipelineDescriptorSet[i]; - applyReflectionsWriteDescSets[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - applyReflectionsWriteDescSets[1].dstBinding = 1; - applyReflectionsWriteDescSets[1].pImageInfo = &applyReflectionsImageInfos[1]; - - applyReflectionsWriteDescSets[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - applyReflectionsWriteDescSets[2].pNext = nullptr; - applyReflectionsWriteDescSets[2].descriptorCount = 1; - applyReflectionsWriteDescSets[2].dstArrayElement = 0; - applyReflectionsWriteDescSets[2].dstSet = m_ApplyPipelineDescriptorSet[i]; - applyReflectionsWriteDescSets[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - applyReflectionsWriteDescSets[2].dstBinding = 2; - applyReflectionsWriteDescSets[2].pImageInfo = &applyReflectionsImageInfos[2]; - - applyReflectionsWriteDescSets[3].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - applyReflectionsWriteDescSets[3].pNext = nullptr; - applyReflectionsWriteDescSets[3].descriptorCount = 1; - applyReflectionsWriteDescSets[3].dstArrayElement = 0; - applyReflectionsWriteDescSets[3].dstSet = m_ApplyPipelineDescriptorSet[i]; - applyReflectionsWriteDescSets[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - applyReflectionsWriteDescSets[3].dstBinding = 3; - applyReflectionsWriteDescSets[3].pImageInfo = &applyReflectionsImageInfos[3]; - - applyReflectionsWriteDescSets[4].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - applyReflectionsWriteDescSets[4].pNext = nullptr; - applyReflectionsWriteDescSets[4].descriptorCount = 1; - applyReflectionsWriteDescSets[4].dstArrayElement = 0; - applyReflectionsWriteDescSets[4].dstSet = m_ApplyPipelineDescriptorSet[i]; - applyReflectionsWriteDescSets[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - applyReflectionsWriteDescSets[4].dstBinding = 4; - applyReflectionsWriteDescSets[4].pImageInfo = &applyReflectionsImageInfos[4]; - - vkUpdateDescriptorSets(m_pDevice->GetDevice(), _countof(applyReflectionsWriteDescSets), applyReflectionsWriteDescSets, 0, nullptr); - } - - // Fill depth downsample descriptor set - VkDescriptorImageInfo downsampleImageInfos[15]; - downsampleImageInfos[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - downsampleImageInfos[0].imageView = m_DepthBufferDSV; - downsampleImageInfos[0].sampler = VK_NULL_HANDLE; - - uint32_t i = 0; - for (; i < m_DepthMipLevelCount; ++i) - { - uint32_t idx = i + 1; - downsampleImageInfos[idx].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - downsampleImageInfos[idx].imageView = m_DepthHierarchyDescriptors[i]; - downsampleImageInfos[idx].sampler = VK_NULL_HANDLE; - } - - VkWriteDescriptorSet depthDownsampleWriteDescSets[15]; - depthDownsampleWriteDescSets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - depthDownsampleWriteDescSets[0].pNext = nullptr; - depthDownsampleWriteDescSets[0].descriptorCount = 1; - depthDownsampleWriteDescSets[0].dstArrayElement = 0; - depthDownsampleWriteDescSets[0].dstSet = m_DepthDownsampleDescriptorSet; - depthDownsampleWriteDescSets[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - depthDownsampleWriteDescSets[0].dstBinding = 0; - depthDownsampleWriteDescSets[0].pImageInfo = &downsampleImageInfos[0]; - - i = 0; - for (; i < m_DepthMipLevelCount; ++i) - { - uint32_t idx = i + 1; - depthDownsampleWriteDescSets[idx].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - depthDownsampleWriteDescSets[idx].pNext = nullptr; - depthDownsampleWriteDescSets[idx].descriptorCount = 1; - depthDownsampleWriteDescSets[idx].dstArrayElement = i; - depthDownsampleWriteDescSets[idx].dstSet = m_DepthDownsampleDescriptorSet; - depthDownsampleWriteDescSets[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - depthDownsampleWriteDescSets[idx].dstBinding = 1; - depthDownsampleWriteDescSets[idx].pImageInfo = &downsampleImageInfos[idx]; - } - - // Map the remaining mip levels to the lowest mip - for (; i < 13; ++i) - { - uint32_t idx = i + 1; - depthDownsampleWriteDescSets[idx].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - depthDownsampleWriteDescSets[idx].pNext = nullptr; - depthDownsampleWriteDescSets[idx].descriptorCount = 1; - depthDownsampleWriteDescSets[idx].dstArrayElement = i; - depthDownsampleWriteDescSets[idx].dstSet = m_DepthDownsampleDescriptorSet; - depthDownsampleWriteDescSets[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - depthDownsampleWriteDescSets[idx].dstBinding = 1; - depthDownsampleWriteDescSets[idx].pImageInfo = &downsampleImageInfos[m_DepthMipLevelCount]; - } - - depthDownsampleWriteDescSets[14].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - depthDownsampleWriteDescSets[14].pNext = nullptr; - depthDownsampleWriteDescSets[14].descriptorCount = 1; - depthDownsampleWriteDescSets[14].dstArrayElement = 0; - depthDownsampleWriteDescSets[14].dstSet = m_DepthDownsampleDescriptorSet; - depthDownsampleWriteDescSets[14].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - depthDownsampleWriteDescSets[14].dstBinding = 2; - depthDownsampleWriteDescSets[14].pTexelBufferView = &m_AtomicCounterUAV; - - vkUpdateDescriptorSets(m_pDevice->GetDevice(), _countof(depthDownsampleWriteDescSets), depthDownsampleWriteDescSets, 0, nullptr); - - // Initial layout transitions - Barriers(cb, { - Transition(m_NormalHistoryBuffer.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_SpecularRoughnessHistory.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_DepthHierarchy.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_ASPECT_COLOR_BIT, m_DepthMipLevelCount), - Transition(m_DownSample.GetTexture()->Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT, 6), - Transition(m_SssrOutputBuffer.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - }); - - SubmitCommandBuffer(cb); -} - -//-------------------------------------------------------------------------------------- -// -// OnDestroyWindowSizeDependentResources -// -//-------------------------------------------------------------------------------------- -void SampleRenderer::OnDestroyWindowSizeDependentResources() -{ - m_Bloom.OnDestroyWindowSizeDependentResources(); - m_DownSample.OnDestroyWindowSizeDependentResources(); - - m_MotionVectors.OnDestroy(); - m_SpecularRoughness.OnDestroy(); - m_SpecularRoughnessHistory.OnDestroy(); - m_NormalBuffer.OnDestroy(); - m_NormalHistoryBuffer.OnDestroy(); - m_SssrOutputBuffer.OnDestroy(); - - VkDevice device = m_pDevice->GetDevice(); - - vkDestroyImageView(device, m_SssrSceneSRV, nullptr); - vkDestroyImageView(device, m_SssrDepthBufferHierarchySRV, nullptr); - vkDestroyImageView(device, m_SssrMotionBufferSRV, nullptr); - vkDestroyImageView(device, m_SssrNormalBufferSRV, nullptr); - vkDestroyImageView(device, m_SssrRoughnessBufferSRV, nullptr); - vkDestroyImageView(device, m_SssrNormalHistoryBufferSRV, nullptr); - vkDestroyImageView(device, m_SssrRoughnessHistoryBufferSRV, nullptr); - vkDestroyImageView(device, m_SssrOutputBufferUAV, nullptr); - vkDestroyImageView(device, m_ApplyPipelineRTV, nullptr); - vkDestroyImageView(device, m_DepthBufferSRV, nullptr); - for (int i = 0; i < 13; ++i) - { - if (m_DepthHierarchyDescriptors[i] != VK_NULL_HANDLE) - { - vkDestroyImageView(device, m_DepthHierarchyDescriptors[i], nullptr); - } - m_DepthHierarchyDescriptors[i] = VK_NULL_HANDLE; - } - vkDestroyImageView(device, m_HDRSRV, nullptr); - vkDestroyImageView(device, m_SpecularRoughnessSRV, nullptr); - vkDestroyImageView(device, m_NormalBufferSRV, nullptr); - vkDestroyImageView(device, m_MotionVectorsSRV, nullptr); - vkDestroyImageView(device, m_DepthBufferDSV, nullptr); - vkDestroyBufferView(device, m_AtomicCounterUAV, nullptr); - - if (m_SssrCreatedReflectionView) - { - ffxSssrDestroyReflectionView(m_SssrContext, m_SssrReflectionView); - } - - m_HDR.OnDestroy(); - m_DepthBuffer.OnDestroy(); - m_DepthHierarchy.OnDestroy(); - - vkDestroyFramebuffer(device, m_FramebufferHDR, nullptr); - vkDestroyFramebuffer(device, m_FramebufferPBR, nullptr); - vkDestroyFramebuffer(device, m_FramebufferMV, nullptr); - vkDestroyFramebuffer(device, m_FramebufferApply, nullptr); - - vmaDestroyBuffer(m_pDevice->GetAllocator(), m_AtomicCounter, m_AtomicCounterAllocation); -} - -//-------------------------------------------------------------------------------------- -// -// LoadScene -// -//-------------------------------------------------------------------------------------- -int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) -{ - // show loading progress - // - ImGui::OpenPopup("Loading"); - if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) - { - float progress = (float)stage / 13.0f; - ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); - ImGui::EndPopup(); - } - - AsyncPool* pAsyncPool = &m_AsyncPool; - - // Loading stages - // - if (stage == 0) - { - } - else if (stage == 5) - { - Profile p("m_pGltfLoader->Load"); - - m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); - m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); - } - else if (stage == 6) - { - Profile p("LoadTextures"); - - // here we are loading onto the GPU all the textures and the inverse matrices - // this data will be used to create the PBR and Depth passes - m_pGLTFTexturesAndBuffers->LoadTextures(pAsyncPool); - } - else if (stage == 7) - { - Profile p("m_gltfDepth->OnCreate"); - - //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass - m_gltfDepth = new GltfDepthPass(); - m_gltfDepth->OnCreate( - m_pDevice, - m_RenderPassShadow, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - pAsyncPool - ); - } - else if (stage == 8) - { - Profile p("m_gltfMotionVectors->OnCreate"); - - m_gltfMotionVectors = new GltfMotionVectorsPass(); - m_gltfMotionVectors->OnCreate( - m_pDevice, - m_RenderPassMV, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - m_MotionVectors.GetFormat(), - m_NormalBuffer.GetFormat(), - pAsyncPool - ); - } - else if (stage == 9) - { - Profile p("m_gltfPBR->OnCreate"); - - // same thing as above but for the PBR pass - m_gltfPBR = new GltfPbrPass(); - m_gltfPBR->OnCreate( - m_pDevice, - m_RenderPassPBR, - &m_UploadHeap, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_AmbientLight, - false, - m_ShadowMapSRV, - true, true, true, false, - VK_SAMPLE_COUNT_1_BIT, - pAsyncPool - ); + } + + //-------------------------------------------------------------------------------------- + // + // OnDestroy + // + //-------------------------------------------------------------------------------------- + void SampleRenderer::OnDestroy() + { + m_ImGUI.OnDestroy(); + m_ToneMapping.OnDestroy(); + m_Bloom.OnDestroy(); + m_DownSample.OnDestroy(); + m_WireframeBox.OnDestroy(); + m_Wireframe.OnDestroy(); + m_SkyDomeProc.OnDestroy(); + m_SkyDome.OnDestroy(); + m_AmbientLight.OnDestroy(); + m_ShadowMap.OnDestroy(); + m_BrdfLut.OnDestroy(); + m_Sssr.OnDestroy(); + + VkDevice device = m_pDevice->GetDevice(); + + vkDestroySampler(device, m_LinearSampler, nullptr); + vkDestroyImageView(device, m_BrdfLutSRV, nullptr); + vkDestroyImageView(device, m_ShadowMapDSV, nullptr); + vkDestroyImageView(device, m_ShadowMapSRV, nullptr); + + vkDestroyPipeline(device, m_DepthDownsamplePipeline, nullptr); + vkDestroyPipelineLayout(device, m_DepthDownsamplePipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(device, m_DepthDownsampleDescriptorSetLayout, nullptr); + m_ResourceViewHeaps.FreeDescriptor(m_DepthDownsampleDescriptorSet); + + vkDestroyPipeline(device, m_ApplyPipeline, nullptr); + vkDestroyPipelineLayout(device, m_ApplyPipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(device, m_ApplyPipelineDescriptorSetLayout, nullptr); + + for (int i = 0; i < backBufferCount; ++i) + { + m_ResourceViewHeaps.FreeDescriptor(m_ApplyPipelineDescriptorSet[i]); + } + + vkDestroyRenderPass(device, m_RenderPassShadow, nullptr); + vkDestroyRenderPass(device, m_RenderPassClearHDR, nullptr); + vkDestroyRenderPass(device, m_RenderPassHDR, nullptr); + vkDestroyRenderPass(device, m_RenderPassPBR, nullptr); + vkDestroyRenderPass(device, m_RenderPassMV, nullptr); + vkDestroyRenderPass(device, m_RenderPassApply, nullptr); + + vkDestroyFramebuffer(device, m_FramebufferShadows, nullptr); + + m_UploadHeap.OnDestroy(); + m_GPUTimer.OnDestroy(); + m_VidMemBufferPool.OnDestroy(); + m_SysMemBufferPool.OnDestroy(); + m_ConstantBufferRing.OnDestroy(); + m_ResourceViewHeaps.OnDestroy(); + m_CommandListRing.OnDestroy(); + } + + //-------------------------------------------------------------------------------------- + // + // OnCreateWindowSizeDependentResources + // + //-------------------------------------------------------------------------------------- + void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height) + { + m_Width = Width; + m_Height = Height; + + // Set the viewport + m_Viewport.x = 0; + m_Viewport.y = (float)m_Height; + m_Viewport.width = (float)m_Width; + m_Viewport.height = -(float)(m_Height); + m_Viewport.minDepth = (float)0.0f; + m_Viewport.maxDepth = (float)1.0f; + + // Create scissor rectangle + m_Scissor.extent.width = m_Width; + m_Scissor.extent.height = m_Height; + m_Scissor.offset.x = 0; + m_Scissor.offset.y = 0; + + // Create depth buffer + m_DepthBuffer.InitDepthStencil(m_pDevice, m_Width, m_Height, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "DepthBuffer"); + m_DepthBuffer.CreateSRV(&m_DepthBufferSRV); + m_DepthBuffer.CreateDSV(&m_DepthBufferDSV); + + // Create Texture + RTV + m_HDR.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "HDR"); + m_HDR.CreateSRV(&m_HDRSRV); + + VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + imageCreateInfo.pNext = nullptr; + imageCreateInfo.arrayLayers = 1; + imageCreateInfo.extent = { m_Width, m_Height, 1 }; + imageCreateInfo.imageType = VK_IMAGE_TYPE_2D; + imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageCreateInfo.mipLevels = 1; + imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageCreateInfo.usage = (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + imageCreateInfo.flags = 0; + + m_NormalBuffer.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "m_NormalBuffer"); + m_NormalBuffer.CreateSRV(&m_NormalBufferSRV); + + imageCreateInfo.format = VK_FORMAT_A2B10G10R10_UNORM_PACK32; + m_NormalHistoryBuffer.Init(m_pDevice, &imageCreateInfo, "m_NormalHistoryBuffer"); + m_NormalHistoryBuffer.CreateSRV(&m_NormalHistoryBufferSRV); + + m_SpecularRoughness.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "m_SpecularRoughness"); + m_SpecularRoughness.CreateSRV(&m_SpecularRoughnessSRV); + + m_MotionVectors.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "m_MotionVector"); + m_MotionVectors.CreateSRV(&m_MotionVectorsSRV); + + // Create framebuffer for the RT + { + VkImageView hdrAttachments[2] = { m_HDRSRV, m_DepthBufferDSV }; + + VkFramebufferCreateInfo hdrFramebufferInfo = {}; + hdrFramebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + hdrFramebufferInfo.pNext = NULL; + hdrFramebufferInfo.renderPass = m_RenderPassHDR; + hdrFramebufferInfo.attachmentCount = _countof(hdrAttachments); + hdrFramebufferInfo.pAttachments = hdrAttachments; + hdrFramebufferInfo.width = m_Width; + hdrFramebufferInfo.height = m_Height; + hdrFramebufferInfo.layers = 1; + + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &hdrFramebufferInfo, NULL, &m_FramebufferHDR); + assert(res == VK_SUCCESS); + } + + { + VkImageView pbrAttachments[3] = { m_HDRSRV, m_SpecularRoughnessSRV, m_DepthBufferDSV }; + + VkFramebufferCreateInfo pbrFramebufferInfo = {}; + pbrFramebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + pbrFramebufferInfo.pNext = NULL; + pbrFramebufferInfo.renderPass = m_RenderPassPBR; + pbrFramebufferInfo.attachmentCount = _countof(pbrAttachments); + pbrFramebufferInfo.pAttachments = pbrAttachments; + pbrFramebufferInfo.width = m_Width; + pbrFramebufferInfo.height = m_Height; + pbrFramebufferInfo.layers = 1; + + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &pbrFramebufferInfo, NULL, &m_FramebufferPBR); + assert(res == VK_SUCCESS); + } + + { + VkImageView mvAttachments[3] = { m_MotionVectorsSRV, m_NormalBufferSRV, m_DepthBufferDSV }; + + VkFramebufferCreateInfo mvFramebufferInfo = {}; + mvFramebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + mvFramebufferInfo.pNext = NULL; + mvFramebufferInfo.renderPass = m_RenderPassMV; + mvFramebufferInfo.attachmentCount = _countof(mvAttachments); + mvFramebufferInfo.pAttachments = mvAttachments; + mvFramebufferInfo.width = m_Width; + mvFramebufferInfo.height = m_Height; + mvFramebufferInfo.layers = 1; + + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &mvFramebufferInfo, NULL, &m_FramebufferMV); + assert(res == VK_SUCCESS); + } + + { + m_HDR.CreateRTV(&m_ApplyPipelineRTV); + VkImageView attachmentViews[1] = { m_ApplyPipelineRTV }; + + VkFramebufferCreateInfo framebufferInfo = {}; + framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebufferInfo.pNext = NULL; + framebufferInfo.renderPass = m_RenderPassApply; + framebufferInfo.attachmentCount = 1; + framebufferInfo.pAttachments = attachmentViews; + framebufferInfo.width = m_Width; + framebufferInfo.height = m_Height; + framebufferInfo.layers = 1; + + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &framebufferInfo, NULL, &m_FramebufferApply); + assert(res == VK_SUCCESS); + } + + // update bloom and downscaling effect + m_DownSample.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, 6); + m_Bloom.OnCreateWindowSizeDependentResources(m_Width / 2, m_Height / 2, m_DownSample.GetTexture(), 6, &m_HDR); + + // update the pipelines if the swapchain render pass has changed (for example when the format of the swapchain changes) + m_ToneMapping.UpdatePipelines(pSwapChain->GetRenderPass()); + m_ImGUI.UpdatePipeline(pSwapChain->GetRenderPass()); + + // Depth downsampling pass with single CS + { + m_DepthMipLevelCount = static_cast(std::log2(std::max(m_Width, m_Height))) + 1; + + // Downsampled depth buffer + imageCreateInfo.format = VK_FORMAT_R32_SFLOAT; + imageCreateInfo.mipLevels = m_DepthMipLevelCount; + m_DepthHierarchy.Init(m_pDevice, &imageCreateInfo, "m_DepthHierarchy"); + for (UINT i = 0; i < std::min(13u, m_DepthMipLevelCount); ++i) + { + m_DepthHierarchy.CreateSRV(&m_DepthHierarchyDescriptors[i], i); + } + m_DepthHierarchy.CreateSRV(&m_DepthHierarchySRV); + + // Atomic counter + + VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + bufferCreateInfo.pNext = nullptr; + bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + bufferCreateInfo.size = 4; + bufferCreateInfo.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.memoryTypeBits = 0; + allocCreateInfo.pool = VK_NULL_HANDLE; + allocCreateInfo.preferredFlags = 0; + allocCreateInfo.pUserData = "m_AtomicCounter"; + allocCreateInfo.requiredFlags = 0; + allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN; + if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_AtomicCounter, &m_AtomicCounterAllocation, nullptr)) + { + Trace("Failed to create buffer for atomic counter"); + } + + VkBufferViewCreateInfo bufferViewCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO }; + bufferViewCreateInfo.buffer = m_AtomicCounter; + bufferViewCreateInfo.format = VK_FORMAT_R32_UINT; + bufferViewCreateInfo.range = VK_WHOLE_SIZE; + bufferViewCreateInfo.flags = 0; + if (VK_SUCCESS != vkCreateBufferView(m_pDevice->GetDevice(), &bufferViewCreateInfo, nullptr, &m_AtomicCounterUAV)) + { + Trace("Failed to create buffer view for atomic counter"); + } + } + + + m_CommandListRing.OnBeginFrame(); + VkCommandBuffer cb = BeginNewCommandBuffer(); + + //==============Setup SSSR============== + SSSRCreationInfo sssrInput; + sssrInput.HDRView = m_HDRSRV; + sssrInput.DepthHierarchyView = m_DepthHierarchySRV; + sssrInput.MotionVectorsView = m_MotionVectorsSRV; + sssrInput.NormalBufferView = m_NormalBufferSRV; + sssrInput.NormalHistoryBufferView = m_NormalHistoryBufferSRV; + sssrInput.SpecularRoughnessView = m_SpecularRoughnessSRV; + sssrInput.EnvironmentMapView = m_SkyDome.GetCubeSpecularTextureView(); + sssrInput.EnvironmentMapSampler = m_SkyDome.GetCubeSpecularTextureSampler(); + sssrInput.pingPongNormal = false; + sssrInput.pingPongRoughness = false; + sssrInput.outputWidth = m_Width; + sssrInput.outputHeight = m_Height; + m_Sssr.OnCreateWindowSizeDependentResources(cb, sssrInput); + + // Fill apply reflections descriptor set + VkDescriptorImageInfo applyReflectionsImageInfos[5]; + applyReflectionsImageInfos[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + applyReflectionsImageInfos[0].imageView = m_Sssr.GetOutputTextureView(); + applyReflectionsImageInfos[0].sampler = VK_NULL_HANDLE; + applyReflectionsImageInfos[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + applyReflectionsImageInfos[1].imageView = m_NormalBufferSRV; + applyReflectionsImageInfos[1].sampler = VK_NULL_HANDLE; + applyReflectionsImageInfos[2].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + applyReflectionsImageInfos[2].imageView = m_SpecularRoughnessSRV; + applyReflectionsImageInfos[2].sampler = VK_NULL_HANDLE; + applyReflectionsImageInfos[3].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + applyReflectionsImageInfos[3].imageView = m_BrdfLutSRV; + applyReflectionsImageInfos[3].sampler = VK_NULL_HANDLE; + applyReflectionsImageInfos[4].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + applyReflectionsImageInfos[4].imageView = VK_NULL_HANDLE; + applyReflectionsImageInfos[4].sampler = m_LinearSampler; + + for (int i = 0; i < backBufferCount; ++i) + { + VkWriteDescriptorSet applyReflectionsWriteDescSets[5]; + applyReflectionsWriteDescSets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + applyReflectionsWriteDescSets[0].pNext = nullptr; + applyReflectionsWriteDescSets[0].descriptorCount = 1; + applyReflectionsWriteDescSets[0].dstArrayElement = 0; + applyReflectionsWriteDescSets[0].dstSet = m_ApplyPipelineDescriptorSet[i]; + applyReflectionsWriteDescSets[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + applyReflectionsWriteDescSets[0].dstBinding = 0; + applyReflectionsWriteDescSets[0].pImageInfo = &applyReflectionsImageInfos[0]; + + applyReflectionsWriteDescSets[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + applyReflectionsWriteDescSets[1].pNext = nullptr; + applyReflectionsWriteDescSets[1].descriptorCount = 1; + applyReflectionsWriteDescSets[1].dstArrayElement = 0; + applyReflectionsWriteDescSets[1].dstSet = m_ApplyPipelineDescriptorSet[i]; + applyReflectionsWriteDescSets[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + applyReflectionsWriteDescSets[1].dstBinding = 1; + applyReflectionsWriteDescSets[1].pImageInfo = &applyReflectionsImageInfos[1]; + + applyReflectionsWriteDescSets[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + applyReflectionsWriteDescSets[2].pNext = nullptr; + applyReflectionsWriteDescSets[2].descriptorCount = 1; + applyReflectionsWriteDescSets[2].dstArrayElement = 0; + applyReflectionsWriteDescSets[2].dstSet = m_ApplyPipelineDescriptorSet[i]; + applyReflectionsWriteDescSets[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + applyReflectionsWriteDescSets[2].dstBinding = 2; + applyReflectionsWriteDescSets[2].pImageInfo = &applyReflectionsImageInfos[2]; + + applyReflectionsWriteDescSets[3].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + applyReflectionsWriteDescSets[3].pNext = nullptr; + applyReflectionsWriteDescSets[3].descriptorCount = 1; + applyReflectionsWriteDescSets[3].dstArrayElement = 0; + applyReflectionsWriteDescSets[3].dstSet = m_ApplyPipelineDescriptorSet[i]; + applyReflectionsWriteDescSets[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + applyReflectionsWriteDescSets[3].dstBinding = 3; + applyReflectionsWriteDescSets[3].pImageInfo = &applyReflectionsImageInfos[3]; + + applyReflectionsWriteDescSets[4].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + applyReflectionsWriteDescSets[4].pNext = nullptr; + applyReflectionsWriteDescSets[4].descriptorCount = 1; + applyReflectionsWriteDescSets[4].dstArrayElement = 0; + applyReflectionsWriteDescSets[4].dstSet = m_ApplyPipelineDescriptorSet[i]; + applyReflectionsWriteDescSets[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + applyReflectionsWriteDescSets[4].dstBinding = 4; + applyReflectionsWriteDescSets[4].pImageInfo = &applyReflectionsImageInfos[4]; + + vkUpdateDescriptorSets(m_pDevice->GetDevice(), _countof(applyReflectionsWriteDescSets), applyReflectionsWriteDescSets, 0, nullptr); + } + + // Fill depth downsample descriptor set + VkDescriptorImageInfo downsampleImageInfos[15]; + downsampleImageInfos[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + downsampleImageInfos[0].imageView = m_DepthBufferDSV; + downsampleImageInfos[0].sampler = VK_NULL_HANDLE; + + uint32_t i = 0; + for (; i < m_DepthMipLevelCount; ++i) + { + uint32_t idx = i + 1; + downsampleImageInfos[idx].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + downsampleImageInfos[idx].imageView = m_DepthHierarchyDescriptors[i]; + downsampleImageInfos[idx].sampler = VK_NULL_HANDLE; + } + + VkWriteDescriptorSet depthDownsampleWriteDescSets[15]; + depthDownsampleWriteDescSets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + depthDownsampleWriteDescSets[0].pNext = nullptr; + depthDownsampleWriteDescSets[0].descriptorCount = 1; + depthDownsampleWriteDescSets[0].dstArrayElement = 0; + depthDownsampleWriteDescSets[0].dstSet = m_DepthDownsampleDescriptorSet; + depthDownsampleWriteDescSets[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + depthDownsampleWriteDescSets[0].dstBinding = 0; + depthDownsampleWriteDescSets[0].pImageInfo = &downsampleImageInfos[0]; + + i = 0; + for (; i < m_DepthMipLevelCount; ++i) + { + uint32_t idx = i + 1; + depthDownsampleWriteDescSets[idx].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + depthDownsampleWriteDescSets[idx].pNext = nullptr; + depthDownsampleWriteDescSets[idx].descriptorCount = 1; + depthDownsampleWriteDescSets[idx].dstArrayElement = i; + depthDownsampleWriteDescSets[idx].dstSet = m_DepthDownsampleDescriptorSet; + depthDownsampleWriteDescSets[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + depthDownsampleWriteDescSets[idx].dstBinding = 1; + depthDownsampleWriteDescSets[idx].pImageInfo = &downsampleImageInfos[idx]; + } + + // Map the remaining mip levels to the lowest mip + for (; i < 13; ++i) + { + uint32_t idx = i + 1; + depthDownsampleWriteDescSets[idx].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + depthDownsampleWriteDescSets[idx].pNext = nullptr; + depthDownsampleWriteDescSets[idx].descriptorCount = 1; + depthDownsampleWriteDescSets[idx].dstArrayElement = i; + depthDownsampleWriteDescSets[idx].dstSet = m_DepthDownsampleDescriptorSet; + depthDownsampleWriteDescSets[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + depthDownsampleWriteDescSets[idx].dstBinding = 1; + depthDownsampleWriteDescSets[idx].pImageInfo = &downsampleImageInfos[m_DepthMipLevelCount]; + } + + depthDownsampleWriteDescSets[14].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + depthDownsampleWriteDescSets[14].pNext = nullptr; + depthDownsampleWriteDescSets[14].descriptorCount = 1; + depthDownsampleWriteDescSets[14].dstArrayElement = 0; + depthDownsampleWriteDescSets[14].dstSet = m_DepthDownsampleDescriptorSet; + depthDownsampleWriteDescSets[14].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + depthDownsampleWriteDescSets[14].dstBinding = 2; + depthDownsampleWriteDescSets[14].pTexelBufferView = &m_AtomicCounterUAV; + + vkUpdateDescriptorSets(m_pDevice->GetDevice(), _countof(depthDownsampleWriteDescSets), depthDownsampleWriteDescSets, 0, nullptr); + + // Initial layout transitions + Barriers(cb, { + Transition(m_NormalHistoryBuffer.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + Transition(m_DepthHierarchy.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_ASPECT_COLOR_BIT, m_DepthMipLevelCount), + Transition(m_DownSample.GetTexture()->Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT, 6), + Transition(m_Sssr.GetOutputTexture()->Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + }); + + SubmitCommandBuffer(cb); + } + + //-------------------------------------------------------------------------------------- + // + // OnDestroyWindowSizeDependentResources + // + //-------------------------------------------------------------------------------------- + void SampleRenderer::OnDestroyWindowSizeDependentResources() + { + m_Bloom.OnDestroyWindowSizeDependentResources(); + m_DownSample.OnDestroyWindowSizeDependentResources(); + m_Sssr.OnDestroyWindowSizeDependentResources(); + + m_MotionVectors.OnDestroy(); + m_SpecularRoughness.OnDestroy(); + m_NormalBuffer.OnDestroy(); + m_NormalHistoryBuffer.OnDestroy(); + + VkDevice device = m_pDevice->GetDevice(); + + vkDestroyImageView(device, m_ApplyPipelineRTV, nullptr); + vkDestroyImageView(device, m_DepthBufferSRV, nullptr); + + for (int i = 0; i < 13; ++i) + { + if (m_DepthHierarchyDescriptors[i] != VK_NULL_HANDLE) + { + vkDestroyImageView(device, m_DepthHierarchyDescriptors[i], nullptr); + } + m_DepthHierarchyDescriptors[i] = VK_NULL_HANDLE; + } + vkDestroyImageView(device, m_HDRSRV, nullptr); + vkDestroyImageView(device, m_DepthHierarchySRV, nullptr); + vkDestroyImageView(device, m_SpecularRoughnessSRV, nullptr); + vkDestroyImageView(device, m_NormalBufferSRV, nullptr); + vkDestroyImageView(device, m_NormalHistoryBufferSRV, nullptr); + vkDestroyImageView(device, m_MotionVectorsSRV, nullptr); + vkDestroyImageView(device, m_DepthBufferDSV, nullptr); + vkDestroyBufferView(device, m_AtomicCounterUAV, nullptr); + + m_HDR.OnDestroy(); + m_DepthBuffer.OnDestroy(); + m_DepthHierarchy.OnDestroy(); + + vkDestroyFramebuffer(device, m_FramebufferHDR, nullptr); + vkDestroyFramebuffer(device, m_FramebufferPBR, nullptr); + vkDestroyFramebuffer(device, m_FramebufferMV, nullptr); + vkDestroyFramebuffer(device, m_FramebufferApply, nullptr); + + vmaDestroyBuffer(m_pDevice->GetAllocator(), m_AtomicCounter, m_AtomicCounterAllocation); + } + + //-------------------------------------------------------------------------------------- + // + // LoadScene + // + //-------------------------------------------------------------------------------------- + int SampleRenderer::LoadScene(GLTFCommon* pGLTFCommon, int stage) + { + // show loading progress + // + ImGui::OpenPopup("Loading"); + if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + float progress = (float)stage / 13.0f; + ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); + ImGui::EndPopup(); + } + + AsyncPool* pAsyncPool = &m_AsyncPool; + + // Loading stages + // + if (stage == 0) + { + } + else if (stage == 5) + { + Profile p("m_pGltfLoader->Load"); + + m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); + m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); + } + else if (stage == 6) + { + Profile p("LoadTextures"); + + // here we are loading onto the GPU all the textures and the inverse matrices + // this data will be used to create the PBR and Depth passes + m_pGLTFTexturesAndBuffers->LoadTextures(pAsyncPool); + } + else if (stage == 7) + { + Profile p("m_gltfDepth->OnCreate"); + + //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass + m_gltfDepth = new GltfDepthPass(); + m_gltfDepth->OnCreate( + m_pDevice, + m_RenderPassShadow, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + pAsyncPool + ); + } + else if (stage == 8) + { + Profile p("m_gltfMotionVectors->OnCreate"); + + m_gltfMotionVectors = new GltfMotionVectorsPass(); + m_gltfMotionVectors->OnCreate( + m_pDevice, + m_RenderPassMV, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + m_MotionVectors.GetFormat(), + m_NormalBuffer.GetFormat(), + pAsyncPool + ); + } + else if (stage == 9) + { + Profile p("m_gltfPBR->OnCreate"); + + // same thing as above but for the PBR pass + m_gltfPBR = new GltfPbrPass(); + m_gltfPBR->OnCreate( + m_pDevice, + m_RenderPassPBR, + &m_UploadHeap, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_AmbientLight, + false, + m_ShadowMapSRV, + true, true, false, false, + VK_SAMPLE_COUNT_1_BIT, + pAsyncPool + ); #if (USE_VID_MEM==true) - // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs + m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + m_UploadHeap.FlushAndFinish(); #endif - } - else if (stage == 10) - { - Profile p("m_gltfBBox->OnCreate"); - - // just a bounding box pass that will draw boundingboxes instead of the geometry itself - m_gltfBBox = new GltfBBoxPass(); - m_gltfBBox->OnCreate( - m_pDevice, - m_RenderPassHDR, - &m_ResourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_Wireframe - ); + } + else if (stage == 10) + { + Profile p("m_gltfBBox->OnCreate"); + + // just a bounding box pass that will draw boundingboxes instead of the geometry itself + m_gltfBBox = new GltfBBoxPass(); + m_gltfBBox->OnCreate( + m_pDevice, + m_RenderPassHDR, + &m_ResourceViewHeaps, + &m_ConstantBufferRing, + &m_VidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_Wireframe + ); #if (USE_VID_MEM==true) - // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs + m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + m_UploadHeap.FlushAndFinish(); #endif - } - else if (stage == 11) - { - Profile p("Flush"); + } + else if (stage == 11) + { + Profile p("Flush"); - m_UploadHeap.FlushAndFinish(); + m_UploadHeap.FlushAndFinish(); #if (USE_VID_MEM==true) - //once everything is uploaded we dont need he upload heaps anymore - m_VidMemBufferPool.FreeUploadHeap(); + //once everything is uploaded we dont need he upload heaps anymore + m_VidMemBufferPool.FreeUploadHeap(); #endif - // tell caller that we are done loading the map - return 0; - } - - stage++; - return stage; -} - -//-------------------------------------------------------------------------------------- -// -// UnloadScene -// -//-------------------------------------------------------------------------------------- -void SampleRenderer::UnloadScene() -{ - if (m_gltfPBR) - { - m_gltfPBR->OnDestroy(); - delete m_gltfPBR; - m_gltfPBR = NULL; - } - - if (m_gltfMotionVectors) - { - m_gltfMotionVectors->OnDestroy(); - delete m_gltfMotionVectors; - m_gltfMotionVectors = NULL; - } - - if (m_gltfDepth) - { - m_gltfDepth->OnDestroy(); - delete m_gltfDepth; - m_gltfDepth = NULL; - } - - if (m_gltfBBox) - { - m_gltfBBox->OnDestroy(); - delete m_gltfBBox; - m_gltfBBox = NULL; - } - - if (m_pGLTFTexturesAndBuffers) - { - m_pGLTFTexturesAndBuffers->OnDestroy(); - delete m_pGLTFTexturesAndBuffers; - m_pGLTFTexturesAndBuffers = NULL; - } -} - -void SampleRenderer::CreateApplyReflectionsPipeline() -{ - VkDevice device = m_pDevice->GetDevice(); - - VkDescriptorSetLayoutBinding bindings[6]; - bindings[0].binding = 0; - bindings[0].descriptorCount = 1; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; - bindings[0].pImmutableSamplers = nullptr; - - bindings[1].binding = 1; - bindings[1].descriptorCount = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; - bindings[1].pImmutableSamplers = nullptr; - - bindings[2].binding = 2; - bindings[2].descriptorCount = 1; - bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; - bindings[2].pImmutableSamplers = nullptr; - - bindings[3].binding = 3; - bindings[3].descriptorCount = 1; - bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - bindings[3].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; - bindings[3].pImmutableSamplers = nullptr; - - bindings[4].binding = 4; - bindings[4].descriptorCount = 1; - bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - bindings[4].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; - bindings[4].pImmutableSamplers = nullptr; - - bindings[5].binding = 5; - bindings[5].descriptorCount = 1; - bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[5].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; - bindings[5].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; - descSetLayoutCreateInfo.pNext = nullptr; - descSetLayoutCreateInfo.bindingCount = _countof(bindings); - descSetLayoutCreateInfo.pBindings = bindings; - descSetLayoutCreateInfo.flags = 0; - - if (VK_SUCCESS != vkCreateDescriptorSetLayout(device, &descSetLayoutCreateInfo, nullptr, &m_ApplyPipelineDescriptorSetLayout)) - { - Trace("Failed to create set layout for apply reflections pipeline."); - } - - VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; - pipelineLayoutCreateInfo.flags = 0; - pipelineLayoutCreateInfo.pNext = nullptr; - pipelineLayoutCreateInfo.setLayoutCount = 1; - pipelineLayoutCreateInfo.pSetLayouts = &m_ApplyPipelineDescriptorSetLayout; - pipelineLayoutCreateInfo.pushConstantRangeCount = 0; - pipelineLayoutCreateInfo.pPushConstantRanges = nullptr; - - if (VK_SUCCESS != vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &m_ApplyPipelineLayout)) - { - Trace("Failed to create pipeline layout for apply reflections pipeline."); - } - - DefineList defines; - VkPipelineShaderStageCreateInfo vs, fs; - VKCompileFromFile(device, VK_SHADER_STAGE_VERTEX_BIT, "ApplyReflections.hlsl", "vs_main", "-T vs_6_0", &defines, &vs); - VKCompileFromFile(device, VK_SHADER_STAGE_FRAGMENT_BIT, "ApplyReflections.hlsl", "ps_main", "-T ps_6_0", &defines, &fs); - - VkPipelineVertexInputStateCreateInfo vertexInputStateInfo = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; - vertexInputStateInfo.pNext = nullptr; - vertexInputStateInfo.flags = 0; - vertexInputStateInfo.vertexBindingDescriptionCount = 0; - vertexInputStateInfo.pVertexBindingDescriptions = nullptr; - vertexInputStateInfo.vertexAttributeDescriptionCount = 0; - vertexInputStateInfo.pVertexAttributeDescriptions = nullptr; - - VkPipelineColorBlendAttachmentState pipelineColorBlendAttachmentState = {}; - pipelineColorBlendAttachmentState.blendEnable = VK_TRUE; - pipelineColorBlendAttachmentState.srcColorBlendFactor = VK_BLEND_FACTOR_ONE; - pipelineColorBlendAttachmentState.dstColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - pipelineColorBlendAttachmentState.colorBlendOp = VK_BLEND_OP_ADD; - pipelineColorBlendAttachmentState.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; - pipelineColorBlendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE; - pipelineColorBlendAttachmentState.alphaBlendOp = VK_BLEND_OP_ADD; - pipelineColorBlendAttachmentState.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - - VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO }; - colorBlendStateCreateInfo.pNext = nullptr; - colorBlendStateCreateInfo.flags = 0; - colorBlendStateCreateInfo.logicOpEnable = false; - colorBlendStateCreateInfo.attachmentCount = 1; - colorBlendStateCreateInfo.pAttachments = &pipelineColorBlendAttachmentState; - - VkDynamicState dynamicStates[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; - VkPipelineDynamicStateCreateInfo pipelineDynamicStateInfo = { VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO }; - pipelineDynamicStateInfo.pNext = nullptr; - pipelineDynamicStateInfo.flags = 0; - pipelineDynamicStateInfo.dynamicStateCount = _countof(dynamicStates); - pipelineDynamicStateInfo.pDynamicStates = dynamicStates; - - VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO }; - pipelineRasterizationStateCreateInfo.pNext = nullptr; - pipelineRasterizationStateCreateInfo.flags = 0; - pipelineRasterizationStateCreateInfo.depthClampEnable = VK_FALSE; - pipelineRasterizationStateCreateInfo.rasterizerDiscardEnable = VK_FALSE; - pipelineRasterizationStateCreateInfo.polygonMode = VK_POLYGON_MODE_FILL; - pipelineRasterizationStateCreateInfo.cullMode = VK_CULL_MODE_NONE; - pipelineRasterizationStateCreateInfo.frontFace = VK_FRONT_FACE_CLOCKWISE; - pipelineRasterizationStateCreateInfo.depthBiasEnable = VK_FALSE; - pipelineRasterizationStateCreateInfo.depthBiasConstantFactor = 0; - pipelineRasterizationStateCreateInfo.depthBiasClamp = 0; - pipelineRasterizationStateCreateInfo.depthBiasSlopeFactor = 0; - pipelineRasterizationStateCreateInfo.lineWidth = 0; - - VkPipelineMultisampleStateCreateInfo multisampleStateInfo = { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO }; - multisampleStateInfo.pNext = nullptr; - multisampleStateInfo.flags = 0; - multisampleStateInfo.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisampleStateInfo.sampleShadingEnable = VK_FALSE; - multisampleStateInfo.minSampleShading = 0; - multisampleStateInfo.pSampleMask = nullptr; - multisampleStateInfo.alphaToCoverageEnable = VK_FALSE; - multisampleStateInfo.alphaToOneEnable = VK_FALSE; - - VkPipelineViewportStateCreateInfo viewportStateInfo = {}; - viewportStateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewportStateInfo.pNext = nullptr; - viewportStateInfo.flags = 0; - viewportStateInfo.viewportCount = 1; - viewportStateInfo.scissorCount = 1; - viewportStateInfo.pScissors = nullptr; - viewportStateInfo.pViewports = nullptr; - - VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO }; - inputAssemblyState.pNext = nullptr; - inputAssemblyState.flags = 0; - inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - inputAssemblyState.primitiveRestartEnable = VK_FALSE; - - VkAttachmentDescription colorAttachments[1]; - // m_HDR - AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[0]); - m_RenderPassApply = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, nullptr); - - VkPipelineShaderStageCreateInfo stages[] = { vs, fs }; - - VkGraphicsPipelineCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO }; - pipelineCreateInfo.pNext = nullptr; - pipelineCreateInfo.flags = 0; - pipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE; - pipelineCreateInfo.basePipelineIndex = 0; - pipelineCreateInfo.layout = m_ApplyPipelineLayout; - pipelineCreateInfo.pColorBlendState = &colorBlendStateCreateInfo; - pipelineCreateInfo.pDepthStencilState = nullptr; - pipelineCreateInfo.pDynamicState = &pipelineDynamicStateInfo; - pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; - pipelineCreateInfo.pMultisampleState = &multisampleStateInfo; - pipelineCreateInfo.pRasterizationState = &pipelineRasterizationStateCreateInfo; - pipelineCreateInfo.stageCount = _countof(stages); - pipelineCreateInfo.pStages = stages; - pipelineCreateInfo.pTessellationState = nullptr; - pipelineCreateInfo.pVertexInputState = &vertexInputStateInfo; - pipelineCreateInfo.pViewportState = &viewportStateInfo; - pipelineCreateInfo.renderPass = m_RenderPassApply; - pipelineCreateInfo.subpass = 0; - - if (VK_SUCCESS != vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, nullptr, &m_ApplyPipeline)) - { - Trace("Failed to create pipeline for the apply reflection target pass."); - } - - for (int i = 0; i < backBufferCount; ++i) - { - m_ResourceViewHeaps.AllocDescriptor(m_ApplyPipelineDescriptorSetLayout, &m_ApplyPipelineDescriptorSet[i]); - } -} - -void SampleRenderer::CreateDepthDownsamplePipeline() -{ - VkDevice device = m_pDevice->GetDevice(); - - VkDescriptorSetLayoutBinding bindings[3]; - bindings[0].binding = 0; - bindings[0].descriptorCount = 1; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].pImmutableSamplers = nullptr; - - bindings[1].binding = 1; - bindings[1].descriptorCount = 13; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].pImmutableSamplers = nullptr; - - bindings[2].binding = 2; - bindings[2].descriptorCount = 1; - bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[2].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; - descSetLayoutCreateInfo.pNext = nullptr; - descSetLayoutCreateInfo.bindingCount = _countof(bindings); - descSetLayoutCreateInfo.pBindings = bindings; - descSetLayoutCreateInfo.flags = 0; - - if (VK_SUCCESS != vkCreateDescriptorSetLayout(device, &descSetLayoutCreateInfo, nullptr, &m_DepthDownsampleDescriptorSetLayout)) - { - Trace("Failed to create descriptor set layout for depth downsampling pipeline."); - } - - VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; - pipelineLayoutCreateInfo.flags = 0; - pipelineLayoutCreateInfo.pNext = nullptr; - pipelineLayoutCreateInfo.setLayoutCount = 1; - pipelineLayoutCreateInfo.pSetLayouts = &m_DepthDownsampleDescriptorSetLayout; - pipelineLayoutCreateInfo.pushConstantRangeCount = 0; - pipelineLayoutCreateInfo.pPushConstantRanges = nullptr; - - if (VK_SUCCESS != vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &m_DepthDownsamplePipelineLayout)) - { - Trace("Failed to create pipeline layout for depth downsampling pipeline."); - } - - DefineList defines; - VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo; - VKCompileFromFile(device, VK_SHADER_STAGE_COMPUTE_BIT, "DepthDownsample.hlsl", "main", "-T cs_6_0", &defines, &pipelineShaderStageCreateInfo); - - VkComputePipelineCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO }; - pipelineCreateInfo.pNext = nullptr; - pipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE; - pipelineCreateInfo.basePipelineIndex = 0; - pipelineCreateInfo.flags = 0; - pipelineCreateInfo.layout = m_DepthDownsamplePipelineLayout; - pipelineCreateInfo.stage = pipelineShaderStageCreateInfo; - - if (VK_SUCCESS != vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, nullptr, &m_DepthDownsamplePipeline)) - { - Trace("Failed to create pipeline for depth downsampling pipeline."); - } - - m_ResourceViewHeaps.AllocDescriptor(m_DepthDownsampleDescriptorSetLayout, &m_DepthDownsampleDescriptorSet); -} - -void SampleRenderer::StallFrame(float targetFrametime) -{ - // Simulate lower frame rates - static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - std::chrono::duration diff = now - last; - last = now; - float deltaTime = 1000 * static_cast(diff.count()); - if (deltaTime < targetFrametime) - { - int deltaCount = static_cast(targetFrametime - deltaTime); - std::this_thread::sleep_for(std::chrono::milliseconds(deltaCount)); - } -} - -void SampleRenderer::BeginFrame(VkCommandBuffer cb) -{ - m_CurrentFrame = (m_CurrentFrame + 1) % backBufferCount; - FfxSssrStatus status = ffxSssrAdvanceToNextFrame(m_SssrContext); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrAdvanceToNextFrame failed."); - } - - // Timing values - // - double nanosecondsBetweenGPUTicks = m_pDevice->GetPhysicalDeviceProperries().limits.timestampPeriod; - m_MillisecondsBetweenGpuTicks = 1e-6 * nanosecondsBetweenGPUTicks; - - // Let our resource managers do some house keeping - // - m_ConstantBufferRing.OnBeginFrame(); - m_GPUTimer.OnBeginFrame(cb, &m_TimeStamps); -} - -VkBufferMemoryBarrier SampleRenderer::BufferBarrier(VkBuffer buffer) -{ - VkBufferMemoryBarrier barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = VK_WHOLE_SIZE; - return barrier; -} - -VkImageMemoryBarrier SampleRenderer::Transition(VkImage image, VkImageLayout before, VkImageLayout after, VkImageAspectFlags aspectMask, int mipCount) -{ - VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_MEMORY_READ_BIT; - barrier.oldLayout = before; - barrier.newLayout = after; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = image; - - VkImageSubresourceRange subresourceRange = {}; - subresourceRange.aspectMask = aspectMask; // VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT; - subresourceRange.baseArrayLayer = 0; - subresourceRange.layerCount = 1; - subresourceRange.baseMipLevel = 0; - subresourceRange.levelCount = mipCount; - - barrier.subresourceRange = subresourceRange; - return barrier; -} - -void SampleRenderer::Barriers(VkCommandBuffer cb, const std::vector& imageBarriers) -{ - vkCmdPipelineBarrier(cb, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, - 0, nullptr, - 0, nullptr, - static_cast(imageBarriers.size()), imageBarriers.data()); -} - -VkCommandBuffer SampleRenderer::BeginNewCommandBuffer() -{ - VkCommandBuffer cb = m_CommandListRing.GetNewCommandList(); - VkCommandBufferBeginInfo commandBufferBeginInfo = {}; - commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - commandBufferBeginInfo.pNext = NULL; - commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - commandBufferBeginInfo.pInheritanceInfo = NULL; - VkResult res = vkBeginCommandBuffer(cb, &commandBufferBeginInfo); - assert(res == VK_SUCCESS); - return cb; -} - -void SampleRenderer::SubmitCommandBuffer(VkCommandBuffer cb, VkSemaphore* waitSemaphore, VkSemaphore* signalSemaphores, VkFence fence) -{ - VkResult res = vkEndCommandBuffer(cb); - assert(res == VK_SUCCESS); - - VkPipelineStageFlags submitWaitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - - VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; - submitInfo.pNext = NULL; - submitInfo.waitSemaphoreCount = waitSemaphore ? 1 : 0; - submitInfo.pWaitSemaphores = waitSemaphore; - submitInfo.pWaitDstStageMask = waitSemaphore ? &submitWaitStage : NULL; - submitInfo.commandBufferCount = 1; - submitInfo.pCommandBuffers = &cb; - submitInfo.signalSemaphoreCount = signalSemaphores ? 1 : 0; - submitInfo.pSignalSemaphores = signalSemaphores; - res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submitInfo, fence); - assert(res == VK_SUCCESS); -} - -per_frame * SampleRenderer::FillFrameConstants(State *pState) -{ - // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- - // - per_frame *pPerFrame = NULL; - if (m_pGLTFTexturesAndBuffers) - { - pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); - - //override gltf camera with ours - pPerFrame->mCameraViewProj = pState->camera.GetView() * pState->camera.GetProjection(); - pPerFrame->cameraPos = pState->camera.GetPosition(); - pPerFrame->emmisiveFactor = pState->emmisiveFactor; - pPerFrame->iblFactor = pState->iblFactor; - - //if the gltf doesn't have any lights set a directional light - if (pPerFrame->lightCount == 0) - { - pPerFrame->lightCount = 1; - pPerFrame->lights[0].color[0] = pState->lightColor.x; - pPerFrame->lights[0].color[1] = pState->lightColor.y; - pPerFrame->lights[0].color[2] = pState->lightColor.z; - GetXYZ(pPerFrame->lights[0].position, pState->lightCamera.GetPosition()); - GetXYZ(pPerFrame->lights[0].direction, pState->lightCamera.GetDirection()); - - pPerFrame->lights[0].range = 30.0f; // in meters - pPerFrame->lights[0].type = LightType_Spot; - pPerFrame->lights[0].intensity = pState->lightIntensity; - pPerFrame->lights[0].innerConeCos = cosf(pState->lightCamera.GetFovV() * 0.9f / 2.0f); - pPerFrame->lights[0].outerConeCos = cosf(pState->lightCamera.GetFovV() / 2.0f); - pPerFrame->lights[0].mLightViewProj = pState->lightCamera.GetView() * pState->lightCamera.GetProjection(); - } - - // Up to 4 spotlights can have shadowmaps. Each spot the light has a shadowMap index which is used to find the shadowmap in the atlas - uint32_t shadowMapIndex = 0; - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index so the color pass knows which shadow map to use - pPerFrame->lights[i].depthBias = 20.0f / 100000.0f; - } - else if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Directional)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // same as above - pPerFrame->lights[i].depthBias = 100.0f / 100000.0f; - } - else - { - pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light - } - } - - m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); - - m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); - } - - return pPerFrame; -} - -void SampleRenderer::RenderSpotLights(VkCommandBuffer cb, per_frame * pPerFrame) -{ - VkClearValue clearValue = {}; - clearValue.depthStencil.depth = 1; - clearValue.depthStencil.stencil = 0; - - VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = 1; - beginInfo.pClearValues = &clearValue; - beginInfo.renderArea = { 0, 0, m_ShadowMap.GetWidth(), m_ShadowMap.GetHeight() }; - beginInfo.renderPass = m_RenderPassShadow; - beginInfo.framebuffer = m_FramebufferShadows; - vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if (!(pPerFrame->lights[i].type == LightType_Spot || pPerFrame->lights[i].type == LightType_Directional)) - continue; - - // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) - uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; - uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; - uint32_t viewportWidth = m_ShadowMap.GetWidth() / 2; - uint32_t viewportHeight = m_ShadowMap.GetHeight() / 2; - SetViewportAndScissor(cb, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); - - GltfDepthPass::per_frame *cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); - cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; - - m_gltfDepth->Draw(cb); - - m_GPUTimer.GetTimeStamp(cb, "Shadow map"); - } - - vkCmdEndRenderPass(cb); -} - -void SampleRenderer::RenderMotionVectors(VkCommandBuffer cb, per_frame * pPerFrame, State * pState) -{ - vkCmdSetViewport(cb, 0, 1, &m_Viewport); - vkCmdSetScissor(cb, 0, 1, &m_Scissor); - - GltfMotionVectorsPass::per_frame *cbDepthPerFrame = m_gltfMotionVectors->SetPerFrameConstants(); - cbDepthPerFrame->mCurrViewProj = pPerFrame->mCameraViewProj; - cbDepthPerFrame->mPrevViewProj = pState->camera.GetPrevView() * pState->camera.GetProjection(); - - m_gltfMotionVectors->Draw(cb); - m_GPUTimer.GetTimeStamp(cb, "Motion vectors"); -} - - -void SampleRenderer::RenderSkydome(VkCommandBuffer cb, per_frame * pPerFrame, State * pState) -{ - VkClearValue clearValues[1]; - clearValues[0].color.float32[0] = 0; - clearValues[0].color.float32[1] = 0; - clearValues[0].color.float32[2] = 0; - clearValues[0].color.float32[3] = 0; - - VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = _countof(clearValues); - beginInfo.pClearValues = clearValues; - beginInfo.renderArea = { 0, 0, m_Width, m_Height }; - beginInfo.renderPass = m_RenderPassClearHDR; - beginInfo.framebuffer = m_FramebufferHDR; - vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - vkCmdSetViewport(cb, 0, 1, &m_Viewport); - vkCmdSetScissor(cb, 0, 1, &m_Scissor); - - if (pState->skyDomeType == 1) - { - XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - m_SkyDome.Draw(cb, clipToView); - m_GPUTimer.GetTimeStamp(cb, "Skydome"); - } - else if (pState->skyDomeType == 0) - { - SkyDomeProc::Constants skyDomeConstants; - skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); - skyDomeConstants.turbidity = 10.0f; - skyDomeConstants.rayleigh = 2.0f; - skyDomeConstants.mieCoefficient = 0.005f; - skyDomeConstants.mieDirectionalG = 0.8f; - skyDomeConstants.luminance = 1.0f; - skyDomeConstants.sun = false; - m_SkyDomeProc.Draw(cb, skyDomeConstants); - m_GPUTimer.GetTimeStamp(cb, "Skydome proc"); - } - - vkCmdEndRenderPass(cb); -} - -void SampleRenderer::RenderScene(VkCommandBuffer cb) -{ - VkClearValue clearValues[2]; - clearValues[0].color.float32[0] = 0; - clearValues[0].color.float32[1] = 0; - clearValues[0].color.float32[2] = 0; - clearValues[0].color.float32[3] = 0; - clearValues[1].color.float32[0] = 1; - clearValues[1].color.float32[1] = 1; - clearValues[1].color.float32[2] = 1; - clearValues[1].color.float32[3] = 1; - - VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = _countof(clearValues); - beginInfo.pClearValues = clearValues; - beginInfo.renderArea = { 0, 0, m_Width, m_Height }; - beginInfo.renderPass = m_RenderPassPBR; - beginInfo.framebuffer = m_FramebufferPBR; - vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - //set per frame constant buffer values - m_gltfPBR->Draw(cb); - - vkCmdEndRenderPass(cb); -} - -void SampleRenderer::RenderBoundingBoxes(VkCommandBuffer cb, per_frame * pPerFrame) -{ - m_gltfBBox->Draw(cb, pPerFrame->mCameraViewProj); - m_GPUTimer.GetTimeStamp(cb, "Bounding Box"); -} - - -void SampleRenderer::RenderLightFrustums(VkCommandBuffer cb, per_frame * pPerFrame, State * pState) -{ - SetPerfMarkerBegin(cb, "Light frustrums"); - - XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); - XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); - XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); - XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; - m_WireframeBox.Draw(cb, &m_Wireframe, worldMatrix, vCenter, vRadius, vColor); - } - - m_GPUTimer.GetTimeStamp(cb, "Light frustums"); - SetPerfMarkerEnd(cb); -} - - -void SampleRenderer::DownsampleDepthBuffer(VkCommandBuffer cb) -{ - // Clear m_AtomicCounter to 0 - vkCmdFillBuffer(cb, m_AtomicCounter, 0, VK_WHOLE_SIZE, 0); - - SetPerfMarkerBegin(cb, "Downsample Depth"); - - vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, m_DepthDownsamplePipeline); - vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_COMPUTE, m_DepthDownsamplePipelineLayout, 0, 1, &m_DepthDownsampleDescriptorSet, 0, nullptr); - - // Each threadgroup works on 64x64 texels - uint32_t dimX = (m_Width + 63) / 64; - uint32_t dimY = (m_Height + 63) / 64; - vkCmdDispatch(cb, dimX, dimY, 1); - - m_GPUTimer.GetTimeStamp(cb, "Downsample Depth"); - SetPerfMarkerEnd(cb); -} - - -void SampleRenderer::RenderScreenSpaceReflections(VkCommandBuffer cb, State * pState) -{ - Barriers(cb, { - Transition(m_SssrOutputBuffer.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_DepthHierarchy.Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT, m_DepthMipLevelCount), - }); - - SetPerfMarkerBegin(cb, "FidelityFX SSSR"); - - const Camera * camera = &pState->camera; - XMMATRIX view = camera->GetView(); - XMMATRIX proj = camera->GetProjection(); - - XMFLOAT4X4 cameraView; - XMStoreFloat4x4(&cameraView, XMMatrixTranspose(view)); - XMFLOAT4X4 cameraProj; - XMStoreFloat4x4(&cameraProj, XMMatrixTranspose(proj)); - - FfxSssrStatus status; - status = ffxSssrReflectionViewSetCameraParameters(m_SssrContext, m_SssrReflectionView, &cameraView.m[0][0], &cameraProj.m[0][0]); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewSetCameraParameters failed."); - } - - VkClearColorValue clearValue = {}; - clearValue.float32[0] = 0; - clearValue.float32[1] = 0; - clearValue.float32[2] = 0; - clearValue.float32[3] = 0; - - VkImageSubresourceRange subresourceRange = {}; - subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subresourceRange.baseArrayLayer = 0; - subresourceRange.baseMipLevel = 0; - subresourceRange.layerCount = 1; - subresourceRange.levelCount = 1; - vkCmdClearColorImage(cb, m_SssrOutputBuffer.Resource(), VK_IMAGE_LAYOUT_GENERAL, &clearValue, 1, &subresourceRange); - - // Ensure the image is cleared - VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - vkCmdPipelineBarrier(cb, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, - 1, &barrier, - 0, nullptr, - 0, nullptr); - - - FfxSssrVkCommandEncodeInfo vkEncodeInfo = {}; - vkEncodeInfo.commandBuffer = cb; - - FfxSssrResolveReflectionViewInfo resolveInfo = {}; - resolveInfo.flags = pState->bShowIntersectionResults ? 0 : FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_DENOISE; - resolveInfo.flags |= pState->bEnableVarianceGuidedTracing ? FFX_SSSR_RESOLVE_REFLECTION_VIEW_FLAG_ENABLE_VARIANCE_GUIDED_TRACING : 0; - resolveInfo.pVkCommandEncodeInfo = &vkEncodeInfo; - resolveInfo.temporalStabilityScale = pState->temporalStability; - resolveInfo.maxTraversalIterations = pState->maxTraversalIterations; - resolveInfo.mostDetailedDepthHierarchyMipLevel = pState->mostDetailedDepthHierarchyMipLevel; - resolveInfo.depthBufferThickness = pState->depthBufferThickness; - resolveInfo.minTraversalOccupancy = pState->minTraversalOccupancy; - resolveInfo.samplesPerQuad = pState->samplesPerQuad == 4 ? FFX_SSSR_RAY_SAMPLES_PER_QUAD_4 : (pState->samplesPerQuad == 2 ? FFX_SSSR_RAY_SAMPLES_PER_QUAD_2 : FFX_SSSR_RAY_SAMPLES_PER_QUAD_1); - resolveInfo.roughnessThreshold = pState->roughnessThreshold; - - status = ffxSssrEncodeResolveReflectionView(m_SssrContext, m_SssrReflectionView, &resolveInfo); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrEncodeResolveReflectionView failed."); - } - - // Query timings - uint64_t tileClassificationTime; - status = ffxSssrReflectionViewGetTileClassificationElapsedTime(m_SssrContext, m_SssrReflectionView, &tileClassificationTime); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewGetTileClassificationElapsedTime failed."); - } - - static std::deque tileClassificationTimes(100); - tileClassificationTimes.pop_front(); - tileClassificationTimes.push_back(static_cast(tileClassificationTime * m_MillisecondsBetweenGpuTicks)); - pState->tileClassificationTime = 0; - for (auto& time : tileClassificationTimes) - { - pState->tileClassificationTime += time; - } - pState->tileClassificationTime /= tileClassificationTimes.size(); - - uint64_t intersectionTime; - status = ffxSssrReflectionViewGetIntersectionElapsedTime(m_SssrContext, m_SssrReflectionView, &intersectionTime); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewGetIntersectionElapsedTime failed."); - } - - static std::deque intersectionTimes(100); - intersectionTimes.pop_front(); - intersectionTimes.push_back(static_cast(intersectionTime * m_MillisecondsBetweenGpuTicks)); - pState->intersectionTime = 0; - for (auto& time : intersectionTimes) - { - pState->intersectionTime += time; - } - pState->intersectionTime /= intersectionTimes.size(); - - uint64_t denoisingTime; - status = ffxSssrReflectionViewGetDenoisingElapsedTime(m_SssrContext, m_SssrReflectionView, &denoisingTime); - if (status != FFX_SSSR_STATUS_OK) - { - Trace("ffxSssrReflectionViewGetDenoisingElapsedTime failed."); - } - - static std::deque denoisingTimes(100); - denoisingTimes.pop_front(); - denoisingTimes.push_back(static_cast(denoisingTime * m_MillisecondsBetweenGpuTicks)); - pState->denoisingTime = 0; - for (auto& time : denoisingTimes) - { - pState->denoisingTime += time; - } - pState->denoisingTime /= denoisingTimes.size(); - - m_GPUTimer.GetTimeStamp(cb, "FidelityFX SSSR"); - SetPerfMarkerEnd(cb); - - Barriers(cb, { - Transition(m_SssrOutputBuffer.Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_DepthHierarchy.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_ASPECT_COLOR_BIT, m_DepthMipLevelCount), - }); -} - -void SampleRenderer::CopyHistorySurfaces(VkCommandBuffer cb) -{ - Barriers(cb, { - Transition(m_NormalBuffer.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_SpecularRoughness.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_NormalHistoryBuffer.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_SpecularRoughnessHistory.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - }); - - SetPerfMarkerBegin(cb, "Copy History Normals and Roughness"); - // Keep copy of normal roughness buffer for next frame - CopyToTexture(cb, &m_NormalBuffer, &m_NormalHistoryBuffer); - CopyToTexture(cb, &m_SpecularRoughness, &m_SpecularRoughnessHistory); - SetPerfMarkerEnd(cb); - - Barriers(cb, { - Transition(m_NormalBuffer.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_SpecularRoughness.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_NormalHistoryBuffer.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - Transition(m_SpecularRoughnessHistory.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), - }); -} - -void SampleRenderer::ApplyReflectionTarget(VkCommandBuffer cb, State * pState) -{ - VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = 0; - beginInfo.pClearValues = nullptr; - beginInfo.renderArea = { 0, 0, m_Width, m_Height }; - beginInfo.renderPass = m_RenderPassApply; - beginInfo.framebuffer = m_FramebufferApply; - vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - SetPerfMarkerBegin(cb, "Apply Reflection View"); - - struct PassConstants - { - XMFLOAT4 viewDir; - UINT showReflectionTarget; - UINT drawReflections; - } constants; - - XMVECTOR view = pState->camera.GetDirection(); - XMStoreFloat4(&constants.viewDir, view); - constants.showReflectionTarget = pState->showReflectionTarget ? 1 : 0; - constants.drawReflections = pState->bDrawScreenSpaceReflections ? 1 : 0; - - VkDescriptorBufferInfo uniformBufferInfo = m_ConstantBufferRing.AllocConstantBuffer(sizeof(PassConstants), &constants); - VkWriteDescriptorSet uniformBufferWriteDescSet = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; - uniformBufferWriteDescSet.pNext = nullptr; - uniformBufferWriteDescSet.descriptorCount = 1; - uniformBufferWriteDescSet.dstArrayElement = 0; - uniformBufferWriteDescSet.dstSet = m_ApplyPipelineDescriptorSet[m_CurrentFrame]; - uniformBufferWriteDescSet.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - uniformBufferWriteDescSet.dstBinding = 5; - uniformBufferWriteDescSet.pBufferInfo = &uniformBufferInfo; - - vkUpdateDescriptorSets(m_pDevice->GetDevice(), 1, &uniformBufferWriteDescSet, 0, nullptr); - - vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, m_ApplyPipeline); - vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, m_ApplyPipelineLayout, 0, 1, &m_ApplyPipelineDescriptorSet[m_CurrentFrame], 0, nullptr); - vkCmdSetViewport(cb, 0, 1, &m_Viewport); - vkCmdSetScissor(cb, 0, 1, &m_Scissor); - - vkCmdDraw(cb, 3, 1, 0, 0); - - m_GPUTimer.GetTimeStamp(cb, "Apply Reflection View"); - SetPerfMarkerEnd(cb); - - vkCmdEndRenderPass(cb); -} - -void SampleRenderer::DownsampleScene(VkCommandBuffer cb) -{ - m_DownSample.Draw(cb); - m_GPUTimer.GetTimeStamp(cb, "Downsample"); -} - -void SampleRenderer::RenderBloom(VkCommandBuffer cb) -{ - m_Bloom.Draw(cb); - m_GPUTimer.GetTimeStamp(cb, "Bloom"); -} - -void SampleRenderer::ApplyTonemapping(VkCommandBuffer cb, State * pState, SwapChain *pSwapChain) -{ - vkCmdSetViewport(cb, 0, 1, &m_Viewport); - vkCmdSetScissor(cb, 0, 1, &m_Scissor); - - m_ToneMapping.Draw(cb, m_HDRSRV, pState->exposure, pState->toneMapper); - m_GPUTimer.GetTimeStamp(cb, "Tone mapping"); -} - -void SampleRenderer::RenderHUD(VkCommandBuffer cb, SwapChain *pSwapChain) -{ - vkCmdSetViewport(cb, 0, 1, &m_Viewport); - vkCmdSetScissor(cb, 0, 1, &m_Scissor); - - m_ImGUI.Draw(cb); - - m_GPUTimer.GetTimeStamp(cb, "ImGUI rendering"); -} - -void SampleRenderer::CopyToTexture(VkCommandBuffer cb, Texture * source, Texture * target) -{ - VkImageCopy region = {}; - region.dstOffset = { 0, 0, 0 }; - region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.dstSubresource.baseArrayLayer = 0; - region.dstSubresource.layerCount = 1; - region.dstSubresource.mipLevel = 0; - region.extent = {m_Width, m_Height, 1}; - region.srcOffset = {0, 0, 0}; - region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.srcSubresource.baseArrayLayer = 0; - region.srcSubresource.layerCount = 1; - region.srcSubresource.mipLevel = 0; - vkCmdCopyImage(cb, source->Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, target->Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); -} - -//-------------------------------------------------------------------------------------- -// -// OnRender -// -//-------------------------------------------------------------------------------------- -void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) -{ - StallFrame(pState->targetFrametime); - - VkCommandBuffer cb1 = BeginNewCommandBuffer(); - BeginFrame(cb1); - - per_frame *pPerFrame = FillFrameConstants(pState); - - // Clears happen in the render passes ----------------------------------------------------------------------- - - // Render to shadow map atlas for spot lights ------------------------------------------ - // - if (m_gltfDepth && pPerFrame) - { - RenderSpotLights(cb1, pPerFrame); - } - - VkClearValue clearValues[3]; - clearValues[0].color.float32[0] = 0; - clearValues[0].color.float32[1] = 0; - clearValues[0].color.float32[2] = 0; - clearValues[0].color.float32[3] = 0; - clearValues[1].color.float32[0] = 0; - clearValues[1].color.float32[1] = 0; - clearValues[1].color.float32[2] = 0; - clearValues[1].color.float32[3] = 0; - clearValues[2].depthStencil.depth = 1; - clearValues[2].depthStencil.stencil = 0; - - VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = _countof(clearValues); - beginInfo.pClearValues = clearValues; - beginInfo.renderArea = { 0, 0, m_Width, m_Height }; - beginInfo.renderPass = m_RenderPassMV; - beginInfo.framebuffer = m_FramebufferMV; - vkCmdBeginRenderPass(cb1, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - // Motion vectors --------------------------------------------------------------------------- - // - if (m_gltfMotionVectors && pPerFrame) - { - RenderMotionVectors(cb1, pPerFrame, pState); - } - - vkCmdEndRenderPass(cb1); - - // Render Scene to the HDR RT ------------------------------------------------ - // - - if (pPerFrame) - { - RenderSkydome(cb1, pPerFrame, pState); - - // Render scene to color buffer - if (m_gltfPBR) - { - RenderScene(cb1); - } - - beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = 0; - beginInfo.pClearValues = nullptr; - beginInfo.renderArea = { 0, 0, m_Width, m_Height }; - beginInfo.renderPass = m_RenderPassHDR; - beginInfo.framebuffer = m_FramebufferHDR; - vkCmdBeginRenderPass(cb1, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - // Draw object bounding boxes - if (m_gltfBBox && pState->bDrawBoundingBoxes) - { - RenderBoundingBoxes(cb1, pPerFrame); - } - - // Draw light frustum - if (pState->bDrawLightFrustum) - { - RenderLightFrustums(cb1, pPerFrame, pState); - } - - vkCmdEndRenderPass(cb1); - - m_GPUTimer.GetTimeStamp(cb1, "Rendering scene"); - } - - // Downsample depth buffer - if (m_gltfMotionVectors && pPerFrame) - { - DownsampleDepthBuffer(cb1); - } - - if (m_gltfPBR && pPerFrame) - { - // Stochastic SSR - RenderScreenSpaceReflections(cb1, pState); - - // Keep this frames results for next frame - CopyHistorySurfaces(cb1); - - // Apply the result of SSR - ApplyReflectionTarget(cb1, pState); - } - - if (pPerFrame && pState->bDrawBloom) - { - DownsampleScene(cb1); - RenderBloom(cb1); - } - - SubmitCommandBuffer(cb1); - - // Wait for swapchain (we are going to render to it) ----------------------------------- - // - int imageIndex = pSwapChain->WaitForSwapChain(); - m_CommandListRing.OnBeginFrame(); - - VkCommandBuffer cb2 = BeginNewCommandBuffer(); - - beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - beginInfo.pNext = nullptr; - beginInfo.clearValueCount = 0; - beginInfo.pClearValues = nullptr; - beginInfo.renderArea = { 0, 0, m_Width, m_Height }; - beginInfo.renderPass = pSwapChain->GetRenderPass(); - beginInfo.framebuffer = pSwapChain->GetFramebuffer(imageIndex); - vkCmdBeginRenderPass(cb2, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); - - if (pPerFrame) - { - // Tonemapping - ApplyTonemapping(cb2, pState, pSwapChain); - } - - // Render HUD - RenderHUD(cb2, pSwapChain); - - m_GPUTimer.OnEndFrame(); - - vkCmdEndRenderPass(cb2); - - VkSemaphore imageAvailableSemaphore = VK_NULL_HANDLE; - VkSemaphore renderFinishedSemaphores = VK_NULL_HANDLE; - VkFence cmdBufExecutedFences = VK_NULL_HANDLE; - pSwapChain->GetSemaphores(&imageAvailableSemaphore, &renderFinishedSemaphores, &cmdBufExecutedFences); - - SubmitCommandBuffer(cb2, &imageAvailableSemaphore, &renderFinishedSemaphores, cmdBufExecutedFences); - - // Update previous camera matrices - pState->camera.UpdatePreviousMatrices(); -} + // tell caller that we are done loading the map + return 0; + } + + stage++; + return stage; + } + + //-------------------------------------------------------------------------------------- + // + // UnloadScene + // + //-------------------------------------------------------------------------------------- + void SampleRenderer::UnloadScene() + { + if (m_gltfPBR) + { + m_gltfPBR->OnDestroy(); + delete m_gltfPBR; + m_gltfPBR = NULL; + } + + if (m_gltfMotionVectors) + { + m_gltfMotionVectors->OnDestroy(); + delete m_gltfMotionVectors; + m_gltfMotionVectors = NULL; + } + + if (m_gltfDepth) + { + m_gltfDepth->OnDestroy(); + delete m_gltfDepth; + m_gltfDepth = NULL; + } + + if (m_gltfBBox) + { + m_gltfBBox->OnDestroy(); + delete m_gltfBBox; + m_gltfBBox = NULL; + } + + if (m_pGLTFTexturesAndBuffers) + { + m_pGLTFTexturesAndBuffers->OnDestroy(); + delete m_pGLTFTexturesAndBuffers; + m_pGLTFTexturesAndBuffers = NULL; + } + } + + void SampleRenderer::CreateApplyReflectionsPipeline() + { + VkDevice device = m_pDevice->GetDevice(); + + VkDescriptorSetLayoutBinding bindings[6]; + bindings[0].binding = 0; + bindings[0].descriptorCount = 1; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + bindings[0].pImmutableSamplers = nullptr; + + bindings[1].binding = 1; + bindings[1].descriptorCount = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + bindings[1].pImmutableSamplers = nullptr; + + bindings[2].binding = 2; + bindings[2].descriptorCount = 1; + bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + bindings[2].pImmutableSamplers = nullptr; + + bindings[3].binding = 3; + bindings[3].descriptorCount = 1; + bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + bindings[3].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + bindings[3].pImmutableSamplers = nullptr; + + bindings[4].binding = 4; + bindings[4].descriptorCount = 1; + bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + bindings[4].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + bindings[4].pImmutableSamplers = nullptr; + + bindings[5].binding = 5; + bindings[5].descriptorCount = 1; + bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[5].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; + bindings[5].pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; + descSetLayoutCreateInfo.pNext = nullptr; + descSetLayoutCreateInfo.bindingCount = _countof(bindings); + descSetLayoutCreateInfo.pBindings = bindings; + descSetLayoutCreateInfo.flags = 0; + + if (VK_SUCCESS != vkCreateDescriptorSetLayout(device, &descSetLayoutCreateInfo, nullptr, &m_ApplyPipelineDescriptorSetLayout)) + { + Trace("Failed to create set layout for apply reflections pipeline."); + } + + VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; + pipelineLayoutCreateInfo.flags = 0; + pipelineLayoutCreateInfo.pNext = nullptr; + pipelineLayoutCreateInfo.setLayoutCount = 1; + pipelineLayoutCreateInfo.pSetLayouts = &m_ApplyPipelineDescriptorSetLayout; + pipelineLayoutCreateInfo.pushConstantRangeCount = 0; + pipelineLayoutCreateInfo.pPushConstantRanges = nullptr; + + if (VK_SUCCESS != vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &m_ApplyPipelineLayout)) + { + Trace("Failed to create pipeline layout for apply reflections pipeline."); + } + + DefineList defines; + VkPipelineShaderStageCreateInfo vs, fs; + VKCompileFromFile(device, VK_SHADER_STAGE_VERTEX_BIT, "ApplyReflections.hlsl", "vs_main", "-T vs_6_0", &defines, &vs); + VKCompileFromFile(device, VK_SHADER_STAGE_FRAGMENT_BIT, "ApplyReflections.hlsl", "ps_main", "-T ps_6_0", &defines, &fs); + + VkPipelineVertexInputStateCreateInfo vertexInputStateInfo = { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO }; + vertexInputStateInfo.pNext = nullptr; + vertexInputStateInfo.flags = 0; + vertexInputStateInfo.vertexBindingDescriptionCount = 0; + vertexInputStateInfo.pVertexBindingDescriptions = nullptr; + vertexInputStateInfo.vertexAttributeDescriptionCount = 0; + vertexInputStateInfo.pVertexAttributeDescriptions = nullptr; + + VkPipelineColorBlendAttachmentState pipelineColorBlendAttachmentState = {}; + pipelineColorBlendAttachmentState.blendEnable = VK_TRUE; + pipelineColorBlendAttachmentState.srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + pipelineColorBlendAttachmentState.dstColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + pipelineColorBlendAttachmentState.colorBlendOp = VK_BLEND_OP_ADD; + pipelineColorBlendAttachmentState.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + pipelineColorBlendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + pipelineColorBlendAttachmentState.alphaBlendOp = VK_BLEND_OP_ADD; + pipelineColorBlendAttachmentState.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO }; + colorBlendStateCreateInfo.pNext = nullptr; + colorBlendStateCreateInfo.flags = 0; + colorBlendStateCreateInfo.logicOpEnable = false; + colorBlendStateCreateInfo.attachmentCount = 1; + colorBlendStateCreateInfo.pAttachments = &pipelineColorBlendAttachmentState; + + VkDynamicState dynamicStates[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; + VkPipelineDynamicStateCreateInfo pipelineDynamicStateInfo = { VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO }; + pipelineDynamicStateInfo.pNext = nullptr; + pipelineDynamicStateInfo.flags = 0; + pipelineDynamicStateInfo.dynamicStateCount = _countof(dynamicStates); + pipelineDynamicStateInfo.pDynamicStates = dynamicStates; + + VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO }; + pipelineRasterizationStateCreateInfo.pNext = nullptr; + pipelineRasterizationStateCreateInfo.flags = 0; + pipelineRasterizationStateCreateInfo.depthClampEnable = VK_FALSE; + pipelineRasterizationStateCreateInfo.rasterizerDiscardEnable = VK_FALSE; + pipelineRasterizationStateCreateInfo.polygonMode = VK_POLYGON_MODE_FILL; + pipelineRasterizationStateCreateInfo.cullMode = VK_CULL_MODE_NONE; + pipelineRasterizationStateCreateInfo.frontFace = VK_FRONT_FACE_CLOCKWISE; + pipelineRasterizationStateCreateInfo.depthBiasEnable = VK_FALSE; + pipelineRasterizationStateCreateInfo.depthBiasConstantFactor = 0; + pipelineRasterizationStateCreateInfo.depthBiasClamp = 0; + pipelineRasterizationStateCreateInfo.depthBiasSlopeFactor = 0; + pipelineRasterizationStateCreateInfo.lineWidth = 0; + + VkPipelineMultisampleStateCreateInfo multisampleStateInfo = { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO }; + multisampleStateInfo.pNext = nullptr; + multisampleStateInfo.flags = 0; + multisampleStateInfo.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisampleStateInfo.sampleShadingEnable = VK_FALSE; + multisampleStateInfo.minSampleShading = 0; + multisampleStateInfo.pSampleMask = nullptr; + multisampleStateInfo.alphaToCoverageEnable = VK_FALSE; + multisampleStateInfo.alphaToOneEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewportStateInfo = {}; + viewportStateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewportStateInfo.pNext = nullptr; + viewportStateInfo.flags = 0; + viewportStateInfo.viewportCount = 1; + viewportStateInfo.scissorCount = 1; + viewportStateInfo.pScissors = nullptr; + viewportStateInfo.pViewports = nullptr; + + VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO }; + inputAssemblyState.pNext = nullptr; + inputAssemblyState.flags = 0; + inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + inputAssemblyState.primitiveRestartEnable = VK_FALSE; + + VkAttachmentDescription colorAttachments[1]; + AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachments[0]); + m_RenderPassApply = CreateRenderPassOptimal(m_pDevice->GetDevice(), _countof(colorAttachments), colorAttachments, nullptr); + + VkPipelineShaderStageCreateInfo stages[] = { vs, fs }; + + VkGraphicsPipelineCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO }; + pipelineCreateInfo.pNext = nullptr; + pipelineCreateInfo.flags = 0; + pipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE; + pipelineCreateInfo.basePipelineIndex = 0; + pipelineCreateInfo.layout = m_ApplyPipelineLayout; + pipelineCreateInfo.pColorBlendState = &colorBlendStateCreateInfo; + pipelineCreateInfo.pDepthStencilState = nullptr; + pipelineCreateInfo.pDynamicState = &pipelineDynamicStateInfo; + pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; + pipelineCreateInfo.pMultisampleState = &multisampleStateInfo; + pipelineCreateInfo.pRasterizationState = &pipelineRasterizationStateCreateInfo; + pipelineCreateInfo.stageCount = _countof(stages); + pipelineCreateInfo.pStages = stages; + pipelineCreateInfo.pTessellationState = nullptr; + pipelineCreateInfo.pVertexInputState = &vertexInputStateInfo; + pipelineCreateInfo.pViewportState = &viewportStateInfo; + pipelineCreateInfo.renderPass = m_RenderPassApply; + pipelineCreateInfo.subpass = 0; + + if (VK_SUCCESS != vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, nullptr, &m_ApplyPipeline)) + { + Trace("Failed to create pipeline for the apply reflection target pass."); + } + + for (int i = 0; i < backBufferCount; ++i) + { + m_ResourceViewHeaps.AllocDescriptor(m_ApplyPipelineDescriptorSetLayout, &m_ApplyPipelineDescriptorSet[i]); + } + } + + void SampleRenderer::CreateDepthDownsamplePipeline() + { + VkDevice device = m_pDevice->GetDevice(); + + VkDescriptorSetLayoutBinding bindings[3]; + bindings[0].binding = 0; + bindings[0].descriptorCount = 1; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[0].pImmutableSamplers = nullptr; + + bindings[1].binding = 1; + bindings[1].descriptorCount = 13; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].pImmutableSamplers = nullptr; + + bindings[2].binding = 2; + bindings[2].descriptorCount = 1; + bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[2].pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; + descSetLayoutCreateInfo.pNext = nullptr; + descSetLayoutCreateInfo.bindingCount = _countof(bindings); + descSetLayoutCreateInfo.pBindings = bindings; + descSetLayoutCreateInfo.flags = 0; + + if (VK_SUCCESS != vkCreateDescriptorSetLayout(device, &descSetLayoutCreateInfo, nullptr, &m_DepthDownsampleDescriptorSetLayout)) + { + Trace("Failed to create descriptor set layout for depth downsampling pipeline."); + } + + VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO }; + pipelineLayoutCreateInfo.flags = 0; + pipelineLayoutCreateInfo.pNext = nullptr; + pipelineLayoutCreateInfo.setLayoutCount = 1; + pipelineLayoutCreateInfo.pSetLayouts = &m_DepthDownsampleDescriptorSetLayout; + pipelineLayoutCreateInfo.pushConstantRangeCount = 0; + pipelineLayoutCreateInfo.pPushConstantRanges = nullptr; + + if (VK_SUCCESS != vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &m_DepthDownsamplePipelineLayout)) + { + Trace("Failed to create pipeline layout for depth downsampling pipeline."); + } + + DefineList defines; + VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo; + VKCompileFromFile(device, VK_SHADER_STAGE_COMPUTE_BIT, "DepthDownsample.hlsl", "main", "-T cs_6_0", &defines, &pipelineShaderStageCreateInfo); + + VkComputePipelineCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO }; + pipelineCreateInfo.pNext = nullptr; + pipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE; + pipelineCreateInfo.basePipelineIndex = 0; + pipelineCreateInfo.flags = 0; + pipelineCreateInfo.layout = m_DepthDownsamplePipelineLayout; + pipelineCreateInfo.stage = pipelineShaderStageCreateInfo; + + if (VK_SUCCESS != vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, nullptr, &m_DepthDownsamplePipeline)) + { + Trace("Failed to create pipeline for depth downsampling pipeline."); + } + + m_ResourceViewHeaps.AllocDescriptor(m_DepthDownsampleDescriptorSetLayout, &m_DepthDownsampleDescriptorSet); + } + + void SampleRenderer::StallFrame(float targetFrametime) + { + // Simulate lower frame rates + static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::chrono::duration diff = now - last; + last = now; + float deltaTime = 1000 * static_cast(diff.count()); + if (deltaTime < targetFrametime) + { + int deltaCount = static_cast(targetFrametime - deltaTime); + std::this_thread::sleep_for(std::chrono::milliseconds(deltaCount)); + } + } + + void SampleRenderer::BeginFrame(VkCommandBuffer cb) + { + m_CurrentBackbufferIndex = (m_CurrentBackbufferIndex + 1) % backBufferCount; + + // Timing values + double nanosecondsBetweenGPUTicks = m_pDevice->GetPhysicalDeviceProperries().limits.timestampPeriod; + m_MillisecondsBetweenGpuTicks = 1e-6 * nanosecondsBetweenGPUTicks; + + // Let our resource managers do some house keeping + m_ConstantBufferRing.OnBeginFrame(); + m_GPUTimer.OnBeginFrame(cb, &m_TimeStamps); + } + + VkBufferMemoryBarrier SampleRenderer::BufferBarrier(VkBuffer buffer) + { + VkBufferMemoryBarrier barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = VK_WHOLE_SIZE; + return barrier; + } + + VkImageMemoryBarrier SampleRenderer::Transition(VkImage image, VkImageLayout before, VkImageLayout after, VkImageAspectFlags aspectMask, int mipCount) + { + VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_MEMORY_READ_BIT; + barrier.oldLayout = before; + barrier.newLayout = after; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + + VkImageSubresourceRange subresourceRange = {}; + subresourceRange.aspectMask = aspectMask; // VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT; + subresourceRange.baseArrayLayer = 0; + subresourceRange.layerCount = 1; + subresourceRange.baseMipLevel = 0; + subresourceRange.levelCount = mipCount; + + barrier.subresourceRange = subresourceRange; + return barrier; + } + + void SampleRenderer::Barriers(VkCommandBuffer cb, const std::vector& imageBarriers) + { + vkCmdPipelineBarrier(cb, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 0, nullptr, + 0, nullptr, + static_cast(imageBarriers.size()), imageBarriers.data()); + } + + VkCommandBuffer SampleRenderer::BeginNewCommandBuffer() + { + VkCommandBuffer cb = m_CommandListRing.GetNewCommandList(); + VkCommandBufferBeginInfo commandBufferBeginInfo = {}; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = NULL; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + commandBufferBeginInfo.pInheritanceInfo = NULL; + VkResult res = vkBeginCommandBuffer(cb, &commandBufferBeginInfo); + assert(res == VK_SUCCESS); + return cb; + } + + void SampleRenderer::SubmitCommandBuffer(VkCommandBuffer cb, VkSemaphore* waitSemaphore, VkSemaphore* signalSemaphores, VkFence fence) + { + VkResult res = vkEndCommandBuffer(cb); + assert(res == VK_SUCCESS); + + VkPipelineStageFlags submitWaitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; + submitInfo.pNext = NULL; + submitInfo.waitSemaphoreCount = waitSemaphore ? 1 : 0; + submitInfo.pWaitSemaphores = waitSemaphore; + submitInfo.pWaitDstStageMask = waitSemaphore ? &submitWaitStage : NULL; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cb; + submitInfo.signalSemaphoreCount = signalSemaphores ? 1 : 0; + submitInfo.pSignalSemaphores = signalSemaphores; + res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submitInfo, fence); + assert(res == VK_SUCCESS); + } + + per_frame* SampleRenderer::FillFrameConstants(State* pState) + { + // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- + // + per_frame* pPerFrame = NULL; + if (m_pGLTFTexturesAndBuffers) + { + pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); + + //override gltf camera with ours + pPerFrame->mCameraViewProj = pState->camera.GetView() * pState->camera.GetProjection(); + pPerFrame->cameraPos = pState->camera.GetPosition(); + pPerFrame->emmisiveFactor = pState->emmisiveFactor; + pPerFrame->iblFactor = pState->iblFactor; + + //if the gltf doesn't have any lights set a directional light + if (pPerFrame->lightCount == 0) + { + pPerFrame->lightCount = 1; + pPerFrame->lights[0].color[0] = pState->lightColor.x; + pPerFrame->lights[0].color[1] = pState->lightColor.y; + pPerFrame->lights[0].color[2] = pState->lightColor.z; + GetXYZ(pPerFrame->lights[0].position, pState->lightCamera.GetPosition()); + GetXYZ(pPerFrame->lights[0].direction, pState->lightCamera.GetDirection()); + + pPerFrame->lights[0].range = 30.0f; // in meters + pPerFrame->lights[0].type = LightType_Spot; + pPerFrame->lights[0].intensity = pState->lightIntensity; + pPerFrame->lights[0].innerConeCos = cosf(pState->lightCamera.GetFovV() * 0.9f / 2.0f); + pPerFrame->lights[0].outerConeCos = cosf(pState->lightCamera.GetFovV() / 2.0f); + pPerFrame->lights[0].mLightViewProj = pState->lightCamera.GetView() * pState->lightCamera.GetProjection(); + } + + // Up to 4 spotlights can have shadowmaps. Each spot the light has a shadowMap index which is used to find the shadowmap in the atlas + uint32_t shadowMapIndex = 0; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index so the color pass knows which shadow map to use + pPerFrame->lights[i].depthBias = 20.0f / 100000.0f; + } + else if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Directional)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // same as above + pPerFrame->lights[i].depthBias = 100.0f / 100000.0f; + } + else + { + pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light + } + } + + m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); + + m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); + } + + return pPerFrame; + } + + void SampleRenderer::RenderSpotLights(VkCommandBuffer cb, per_frame* pPerFrame) + { + VkClearValue clearValue = {}; + clearValue.depthStencil.depth = 1; + clearValue.depthStencil.stencil = 0; + + VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = 1; + beginInfo.pClearValues = &clearValue; + beginInfo.renderArea = { 0, 0, m_ShadowMap.GetWidth(), m_ShadowMap.GetHeight() }; + beginInfo.renderPass = m_RenderPassShadow; + beginInfo.framebuffer = m_FramebufferShadows; + vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if (!(pPerFrame->lights[i].type == LightType_Spot || pPerFrame->lights[i].type == LightType_Directional)) + continue; + + // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) + uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; + uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; + uint32_t viewportWidth = m_ShadowMap.GetWidth() / 2; + uint32_t viewportHeight = m_ShadowMap.GetHeight() / 2; + SetViewportAndScissor(cb, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); + + GltfDepthPass::per_frame* cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); + cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; + + m_gltfDepth->Draw(cb); + + m_GPUTimer.GetTimeStamp(cb, "Shadow map"); + } + + vkCmdEndRenderPass(cb); + } + + void SampleRenderer::RenderMotionVectors(VkCommandBuffer cb, per_frame* pPerFrame, State* pState) + { + vkCmdSetViewport(cb, 0, 1, &m_Viewport); + vkCmdSetScissor(cb, 0, 1, &m_Scissor); + + GltfMotionVectorsPass::per_frame* cbDepthPerFrame = m_gltfMotionVectors->SetPerFrameConstants(); + cbDepthPerFrame->mCurrViewProj = pPerFrame->mCameraViewProj; + cbDepthPerFrame->mPrevViewProj = pState->camera.GetPrevView() * pState->camera.GetProjection(); + + m_gltfMotionVectors->Draw(cb); + m_GPUTimer.GetTimeStamp(cb, "Motion vectors"); + } + + + void SampleRenderer::RenderSkydome(VkCommandBuffer cb, per_frame* pPerFrame, State* pState) + { + VkClearValue clearValues[1]; + clearValues[0].color.float32[0] = 0; + clearValues[0].color.float32[1] = 0; + clearValues[0].color.float32[2] = 0; + clearValues[0].color.float32[3] = 0; + + VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = _countof(clearValues); + beginInfo.pClearValues = clearValues; + beginInfo.renderArea = { 0, 0, m_Width, m_Height }; + beginInfo.renderPass = m_RenderPassClearHDR; + beginInfo.framebuffer = m_FramebufferHDR; + vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + vkCmdSetViewport(cb, 0, 1, &m_Viewport); + vkCmdSetScissor(cb, 0, 1, &m_Scissor); + + if (pState->skyDomeType == 1) + { + XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + m_SkyDome.Draw(cb, clipToView); + m_GPUTimer.GetTimeStamp(cb, "Skydome"); + } + else if (pState->skyDomeType == 0) + { + SkyDomeProc::Constants skyDomeConstants; + skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); + skyDomeConstants.turbidity = 10.0f; + skyDomeConstants.rayleigh = 2.0f; + skyDomeConstants.mieCoefficient = 0.005f; + skyDomeConstants.mieDirectionalG = 0.8f; + skyDomeConstants.luminance = 1.0f; + skyDomeConstants.sun = false; + m_SkyDomeProc.Draw(cb, skyDomeConstants); + m_GPUTimer.GetTimeStamp(cb, "Skydome proc"); + } + + vkCmdEndRenderPass(cb); + } + + void SampleRenderer::RenderScene(VkCommandBuffer cb) + { + VkClearValue clearValues[2]; + clearValues[0].color.float32[0] = 0; + clearValues[0].color.float32[1] = 0; + clearValues[0].color.float32[2] = 0; + clearValues[0].color.float32[3] = 0; + clearValues[1].color.float32[0] = 1; + clearValues[1].color.float32[1] = 1; + clearValues[1].color.float32[2] = 1; + clearValues[1].color.float32[3] = 1; + + VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = _countof(clearValues); + beginInfo.pClearValues = clearValues; + beginInfo.renderArea = { 0, 0, m_Width, m_Height }; + beginInfo.renderPass = m_RenderPassPBR; + beginInfo.framebuffer = m_FramebufferPBR; + vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + //set per frame constant buffer values + m_gltfPBR->Draw(cb); + + vkCmdEndRenderPass(cb); + } + + void SampleRenderer::RenderBoundingBoxes(VkCommandBuffer cb, per_frame* pPerFrame) + { + m_gltfBBox->Draw(cb, pPerFrame->mCameraViewProj); + m_GPUTimer.GetTimeStamp(cb, "Bounding Box"); + } + + + void SampleRenderer::RenderLightFrustums(VkCommandBuffer cb, per_frame* pPerFrame, State* pState) + { + SetPerfMarkerBegin(cb, "Light frustrums"); + + XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); + XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); + XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); + XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; + m_WireframeBox.Draw(cb, &m_Wireframe, worldMatrix, vCenter, vRadius, vColor); + } + + m_GPUTimer.GetTimeStamp(cb, "Light frustums"); + SetPerfMarkerEnd(cb); + } + + + void SampleRenderer::DownsampleDepthBuffer(VkCommandBuffer cb) + { + // Clear m_AtomicCounter to 0 + vkCmdFillBuffer(cb, m_AtomicCounter, 0, VK_WHOLE_SIZE, 0); + + SetPerfMarkerBegin(cb, "Downsample Depth"); + + vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, m_DepthDownsamplePipeline); + vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_COMPUTE, m_DepthDownsamplePipelineLayout, 0, 1, &m_DepthDownsampleDescriptorSet, 0, nullptr); + + // Each threadgroup works on 64x64 texels + uint32_t dimX = (m_Width + 63) / 64; + uint32_t dimY = (m_Height + 63) / 64; + vkCmdDispatch(cb, dimX, dimY, 1); + + m_GPUTimer.GetTimeStamp(cb, "Downsample Depth"); + SetPerfMarkerEnd(cb); + } + + + void SampleRenderer::RenderScreenSpaceReflections(VkCommandBuffer cb, per_frame* pPerFrame, State* pState) + { + Barriers(cb, { + Transition(m_Sssr.GetOutputTexture()->Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_ASPECT_COLOR_BIT), + Transition(m_DepthHierarchy.Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT, m_DepthMipLevelCount), + }); + + SSSRConstants sssrConstants = {}; + const Camera* camera = &pState->camera; + XMMATRIX view = camera->GetView(); + XMMATRIX proj = camera->GetProjection(); + + XMStoreFloat4x4(&sssrConstants.view, XMMatrixTranspose(view)); + XMStoreFloat4x4(&sssrConstants.projection, XMMatrixTranspose(proj)); + XMStoreFloat4x4(&sssrConstants.invProjection, XMMatrixTranspose(XMMatrixInverse(nullptr, proj))); + XMStoreFloat4x4(&sssrConstants.invView, XMMatrixTranspose(XMMatrixInverse(nullptr, view))); + XMStoreFloat4x4(&sssrConstants.invViewProjection, XMMatrixTranspose(pPerFrame->mInverseCameraViewProj)); + XMStoreFloat4x4(&sssrConstants.prevViewProjection, XMMatrixTranspose(m_prev_view_projection)); + + sssrConstants.frameIndex = m_CurrentFrameIndex; + sssrConstants.maxTraversalIntersections = pState->maxTraversalIterations; + sssrConstants.minTraversalOccupancy = pState->minTraversalOccupancy; + sssrConstants.mostDetailedMip = pState->mostDetailedDepthHierarchyMipLevel; + sssrConstants.temporalStabilityFactor = pState->temporalStability; + sssrConstants.temporalVarianceThreshold = pState->temporalVarianceThreshold; + sssrConstants.depthBufferThickness = pState->depthBufferThickness; + sssrConstants.samplesPerQuad = pState->samplesPerQuad; + sssrConstants.temporalVarianceGuidedTracingEnabled = pState->bEnableVarianceGuidedTracing ? 1 : 0; + sssrConstants.roughnessThreshold = pState->roughnessThreshold; + + m_Sssr.Draw(cb, sssrConstants, pState->bShowIntersectionResults); + m_GPUTimer.GetTimeStamp(cb, "FidelityFX SSSR"); + + Barriers(cb, { + Transition(m_Sssr.GetOutputTexture()->Resource(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + Transition(m_DepthHierarchy.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_ASPECT_COLOR_BIT, m_DepthMipLevelCount), + }); + + + //Extract SSSR Timestamps and calculate averages + uint64_t tileClassificationTime = m_Sssr.GetTileClassificationElapsedGpuTicks(); + static std::deque tileClassificationTimes(100); + tileClassificationTimes.pop_front(); + tileClassificationTimes.push_back(static_cast(tileClassificationTime * m_MillisecondsBetweenGpuTicks)); + pState->tileClassificationTime = 0; + for (auto& time : tileClassificationTimes) + { + pState->tileClassificationTime += time; + } + pState->tileClassificationTime /= tileClassificationTimes.size(); + + uint64_t intersectionTime = m_Sssr.GetIntersectElapsedGpuTicks(); + static std::deque intersectionTimes(100); + intersectionTimes.pop_front(); + intersectionTimes.push_back(static_cast(intersectionTime * m_MillisecondsBetweenGpuTicks)); + pState->intersectionTime = 0; + for (auto& time : intersectionTimes) + { + pState->intersectionTime += time; + } + pState->intersectionTime /= intersectionTimes.size(); + + uint64_t denoisingTime = m_Sssr.GetDenoiserElapsedGpuTicks(); + static std::deque denoisingTimes(100); + denoisingTimes.pop_front(); + denoisingTimes.push_back(static_cast(denoisingTime * m_MillisecondsBetweenGpuTicks)); + pState->denoisingTime = 0; + for (auto& time : denoisingTimes) + { + pState->denoisingTime += time; + } + pState->denoisingTime /= denoisingTimes.size(); + } + + void SampleRenderer::CopyHistorySurfaces(VkCommandBuffer cb) + { + Barriers(cb, { + Transition(m_NormalBuffer.Resource(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + Transition(m_NormalHistoryBuffer.Resource(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + }); + + SetPerfMarkerBegin(cb, "Copy History Normals and Roughness"); + // Keep copy of normal roughness buffer for next frame + CopyToTexture(cb, &m_NormalBuffer, &m_NormalHistoryBuffer); + SetPerfMarkerEnd(cb); + + Barriers(cb, { + Transition(m_NormalBuffer.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + Transition(m_NormalHistoryBuffer.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_ASPECT_COLOR_BIT), + }); + } + + void SampleRenderer::ApplyReflectionTarget(VkCommandBuffer cb, State* pState) + { + VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = 0; + beginInfo.pClearValues = nullptr; + beginInfo.renderArea = { 0, 0, m_Width, m_Height }; + beginInfo.renderPass = m_RenderPassApply; + beginInfo.framebuffer = m_FramebufferApply; + vkCmdBeginRenderPass(cb, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + SetPerfMarkerBegin(cb, "Apply Reflection View"); + + struct PassConstants + { + XMFLOAT4 viewDir; + UINT showReflectionTarget; + UINT drawReflections; + } constants; + + XMVECTOR view = pState->camera.GetDirection(); + XMStoreFloat4(&constants.viewDir, view); + constants.showReflectionTarget = pState->showReflectionTarget ? 1 : 0; + constants.drawReflections = pState->bDrawScreenSpaceReflections ? 1 : 0; + + VkDescriptorBufferInfo uniformBufferInfo = m_ConstantBufferRing.AllocConstantBuffer(sizeof(PassConstants), &constants); + VkWriteDescriptorSet uniformBufferWriteDescSet = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET }; + uniformBufferWriteDescSet.pNext = nullptr; + uniformBufferWriteDescSet.descriptorCount = 1; + uniformBufferWriteDescSet.dstArrayElement = 0; + uniformBufferWriteDescSet.dstSet = m_ApplyPipelineDescriptorSet[m_CurrentBackbufferIndex]; + uniformBufferWriteDescSet.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniformBufferWriteDescSet.dstBinding = 5; + uniformBufferWriteDescSet.pBufferInfo = &uniformBufferInfo; + + vkUpdateDescriptorSets(m_pDevice->GetDevice(), 1, &uniformBufferWriteDescSet, 0, nullptr); + + vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, m_ApplyPipeline); + vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, m_ApplyPipelineLayout, 0, 1, &m_ApplyPipelineDescriptorSet[m_CurrentBackbufferIndex], 0, nullptr); + vkCmdSetViewport(cb, 0, 1, &m_Viewport); + vkCmdSetScissor(cb, 0, 1, &m_Scissor); + + vkCmdDraw(cb, 3, 1, 0, 0); + + m_GPUTimer.GetTimeStamp(cb, "Apply Reflection View"); + SetPerfMarkerEnd(cb); + + vkCmdEndRenderPass(cb); + } + + void SampleRenderer::DownsampleScene(VkCommandBuffer cb) + { + m_DownSample.Draw(cb); + m_GPUTimer.GetTimeStamp(cb, "Downsample"); + } + + void SampleRenderer::RenderBloom(VkCommandBuffer cb) + { + m_Bloom.Draw(cb); + m_GPUTimer.GetTimeStamp(cb, "Bloom"); + } + + void SampleRenderer::ApplyTonemapping(VkCommandBuffer cb, State* pState, SwapChain* pSwapChain) + { + vkCmdSetViewport(cb, 0, 1, &m_Viewport); + vkCmdSetScissor(cb, 0, 1, &m_Scissor); + + m_ToneMapping.Draw(cb, m_HDRSRV, pState->exposure, pState->toneMapper); + m_GPUTimer.GetTimeStamp(cb, "Tone mapping"); + } + + void SampleRenderer::RenderHUD(VkCommandBuffer cb, SwapChain* pSwapChain) + { + vkCmdSetViewport(cb, 0, 1, &m_Viewport); + vkCmdSetScissor(cb, 0, 1, &m_Scissor); + + m_ImGUI.Draw(cb); + + m_GPUTimer.GetTimeStamp(cb, "ImGUI rendering"); + } + + void SampleRenderer::CopyToTexture(VkCommandBuffer cb, Texture* source, Texture* target) + { + VkImageCopy region = {}; + region.dstOffset = { 0, 0, 0 }; + region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + region.dstSubresource.mipLevel = 0; + region.extent = { m_Width, m_Height, 1 }; + region.srcOffset = { 0, 0, 0 }; + region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + region.srcSubresource.mipLevel = 0; + vkCmdCopyImage(cb, source->Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, target->Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + } + + //-------------------------------------------------------------------------------------- + // + // OnRender + // + //-------------------------------------------------------------------------------------- + void SampleRenderer::OnRender(State* pState, SwapChain* pSwapChain) + { + StallFrame(pState->targetFrametime); + + VkCommandBuffer cb1 = BeginNewCommandBuffer(); + BeginFrame(cb1); + + per_frame* pPerFrame = FillFrameConstants(pState); + + // Clears happen in the render passes ----------------------------------------------------------------------- + + // Render to shadow map atlas for spot lights ------------------------------------------ + // + if (m_gltfDepth && pPerFrame) + { + RenderSpotLights(cb1, pPerFrame); + } + + VkClearValue clearValues[3]; + clearValues[0].color.float32[0] = 0; + clearValues[0].color.float32[1] = 0; + clearValues[0].color.float32[2] = 0; + clearValues[0].color.float32[3] = 0; + clearValues[1].color.float32[0] = 0; + clearValues[1].color.float32[1] = 0; + clearValues[1].color.float32[2] = 0; + clearValues[1].color.float32[3] = 0; + clearValues[2].depthStencil.depth = 1; + clearValues[2].depthStencil.stencil = 0; + + VkRenderPassBeginInfo beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = _countof(clearValues); + beginInfo.pClearValues = clearValues; + beginInfo.renderArea = { 0, 0, m_Width, m_Height }; + beginInfo.renderPass = m_RenderPassMV; + beginInfo.framebuffer = m_FramebufferMV; + vkCmdBeginRenderPass(cb1, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + // Motion vectors --------------------------------------------------------------------------- + // + if (m_gltfMotionVectors && pPerFrame) + { + RenderMotionVectors(cb1, pPerFrame, pState); + } + + vkCmdEndRenderPass(cb1); + + // Render Scene to the HDR RT ------------------------------------------------ + // + + if (pPerFrame) + { + RenderSkydome(cb1, pPerFrame, pState); + + // Render scene to color buffer + if (m_gltfPBR) + { + RenderScene(cb1); + } + + beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = 0; + beginInfo.pClearValues = nullptr; + beginInfo.renderArea = { 0, 0, m_Width, m_Height }; + beginInfo.renderPass = m_RenderPassHDR; + beginInfo.framebuffer = m_FramebufferHDR; + vkCmdBeginRenderPass(cb1, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + // Draw object bounding boxes + if (m_gltfBBox && pState->bDrawBoundingBoxes) + { + RenderBoundingBoxes(cb1, pPerFrame); + } + + // Draw light frustum + if (pState->bDrawLightFrustum) + { + RenderLightFrustums(cb1, pPerFrame, pState); + } + + vkCmdEndRenderPass(cb1); + + m_GPUTimer.GetTimeStamp(cb1, "Rendering scene"); + } + + // Downsample depth buffer + if (m_gltfMotionVectors && pPerFrame) + { + DownsampleDepthBuffer(cb1); + } + + if (m_gltfPBR && pPerFrame) + { + // Stochastic SSR + RenderScreenSpaceReflections(cb1, pPerFrame, pState); + + // Keep this frames results for next frame + CopyHistorySurfaces(cb1); + + // Apply the result of SSR + ApplyReflectionTarget(cb1, pState); + } + + if (pPerFrame && pState->bDrawBloom) + { + DownsampleScene(cb1); + RenderBloom(cb1); + } + + SubmitCommandBuffer(cb1); + + // Wait for swapchain (we are going to render to it) ----------------------------------- + // + int imageIndex = pSwapChain->WaitForSwapChain(); + m_CommandListRing.OnBeginFrame(); + + VkCommandBuffer cb2 = BeginNewCommandBuffer(); + + beginInfo = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; + beginInfo.pNext = nullptr; + beginInfo.clearValueCount = 0; + beginInfo.pClearValues = nullptr; + beginInfo.renderArea = { 0, 0, m_Width, m_Height }; + beginInfo.renderPass = pSwapChain->GetRenderPass(); + beginInfo.framebuffer = pSwapChain->GetFramebuffer(imageIndex); + vkCmdBeginRenderPass(cb2, &beginInfo, VK_SUBPASS_CONTENTS_INLINE); + + if (pPerFrame) + { + // Tonemapping + ApplyTonemapping(cb2, pState, pSwapChain); + } + + // Render HUD + RenderHUD(cb2, pSwapChain); + + m_GPUTimer.OnEndFrame(); + + vkCmdEndRenderPass(cb2); + + VkSemaphore imageAvailableSemaphore = VK_NULL_HANDLE; + VkSemaphore renderFinishedSemaphores = VK_NULL_HANDLE; + VkFence cmdBufExecutedFences = VK_NULL_HANDLE; + pSwapChain->GetSemaphores(&imageAvailableSemaphore, &renderFinishedSemaphores, &cmdBufExecutedFences); + + SubmitCommandBuffer(cb2, &imageAvailableSemaphore, &renderFinishedSemaphores, cmdBufExecutedFences); + + // Update previous camera matrices + if (pPerFrame) + { + m_prev_view_projection = pPerFrame->mCameraViewProj; + } + pState->camera.UpdatePreviousMatrices(); + m_CurrentFrameIndex++; + } +} \ No newline at end of file diff --git a/sample/src/VK/Sources/SampleRenderer.h b/sample/src/VK/Sources/SampleRenderer.h index 47c0b21..c13caa9 100644 --- a/sample/src/VK/Sources/SampleRenderer.h +++ b/sample/src/VK/Sources/SampleRenderer.h @@ -1,24 +1,28 @@ -// AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ #pragma once #include +#include "SSSR.h" // We are queuing (backBufferCount + 0.5) frames, so we need to triple buffer the resources that get modified each frame static const int backBufferCount = 3; @@ -30,227 +34,220 @@ using namespace CAULDRON_VK; // // This class deals with the GPU side of the sample. // - -class SampleRenderer +namespace SSSR_SAMPLE_VK { -public: - struct State - { - float time; - Camera camera; - - float exposure; - float emmisiveFactor; - float iblFactor; - float lightIntensity; - XMFLOAT3 lightColor; - Camera lightCamera; - - int toneMapper; - int skyDomeType; - bool bDrawBoundingBoxes; - bool bDrawLightFrustum; - bool bDrawBloom; - bool bDrawScreenSpaceReflections; - - float targetFrametime; - - bool bShowIntersectionResults; - float temporalStability; - int maxTraversalIterations; - int mostDetailedDepthHierarchyMipLevel; - float depthBufferThickness; - int minTraversalOccupancy; - int samplesPerQuad; - bool bEnableVarianceGuidedTracing; - float roughnessThreshold; - - float tileClassificationTime; - float intersectionTime; - float denoisingTime; - - bool showReflectionTarget; - bool isBenchmarking; - }; - - void OnCreate(Device *pDevice, SwapChain *pSwapChain); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); - void OnDestroyWindowSizeDependentResources(); - - int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); - void UnloadScene(); - - const std::vector &GetTimingValues() { return m_TimeStamps; } - - void OnRender(State *pState, SwapChain *pSwapChain); - -private: - void CreateApplyReflectionsPipeline(); - void CreateDepthDownsamplePipeline(); - void StallFrame(float targetFrametime); - void BeginFrame(VkCommandBuffer cb); - VkBufferMemoryBarrier BufferBarrier(VkBuffer buffer); - VkImageMemoryBarrier Transition(VkImage image, VkImageLayout before, VkImageLayout after, VkImageAspectFlags aspectMask, int mipCount = 1); - void Barriers(VkCommandBuffer cb, const std::vector& imageBarriers); - - VkCommandBuffer BeginNewCommandBuffer(); - void SubmitCommandBuffer(VkCommandBuffer cb, VkSemaphore* waitSemaphore = NULL, VkSemaphore* signalSemaphores = NULL, VkFence fence = VK_NULL_HANDLE); - - per_frame * FillFrameConstants(State * pState); - void RenderSpotLights(VkCommandBuffer cb, per_frame * pPerFrame); - void RenderMotionVectors(VkCommandBuffer cb, per_frame * pPerFrame, State * pState); - void RenderSkydome(VkCommandBuffer cb, per_frame * pPerFrame, State * pState); - void RenderScene(VkCommandBuffer cb); - void RenderBoundingBoxes(VkCommandBuffer cb, per_frame * pPerFrame); - void RenderLightFrustums(VkCommandBuffer cb, per_frame * pPerFrame, State * pState); - void DownsampleDepthBuffer(VkCommandBuffer cb); - void RenderScreenSpaceReflections(VkCommandBuffer cb, State * pState); - void CopyHistorySurfaces(VkCommandBuffer cb); - void ApplyReflectionTarget(VkCommandBuffer cb, State * pState); - void DownsampleScene(VkCommandBuffer cb); - void RenderBloom(VkCommandBuffer cb); - void ApplyTonemapping(VkCommandBuffer cb, State * pState, SwapChain * pSwapChain); - void RenderHUD(VkCommandBuffer cb, SwapChain * pSwapChain); - void CopyToTexture(VkCommandBuffer cb, Texture * source, Texture * target); - -private: - Device * m_pDevice; - - uint32_t m_Width; - uint32_t m_Height; - - uint32_t m_CurrentFrame; - - VkViewport m_Viewport; - VkRect2D m_Scissor; - - // Initialize helper classes - ResourceViewHeaps m_ResourceViewHeaps; - UploadHeap m_UploadHeap; - DynamicBufferRing m_ConstantBufferRing; - StaticBufferPool m_VidMemBufferPool; - StaticBufferPool m_SysMemBufferPool; - CommandListRing m_CommandListRing; - GPUTimestamps m_GPUTimer; - - //gltf passes - GltfPbrPass * m_gltfPBR; - GltfBBoxPass * m_gltfBBox; - GltfDepthPass * m_gltfDepth; - GltfMotionVectorsPass * m_gltfMotionVectors; - GLTFTexturesAndBuffers * m_pGLTFTexturesAndBuffers; - - // effects - Bloom m_Bloom; - SkyDome m_SkyDome; - SkyDome m_AmbientLight; - DownSamplePS m_DownSample; - SkyDomeProc m_SkyDomeProc; - ToneMapping m_ToneMapping; - - // Samplers - VkSampler m_LinearSampler; - - // BRDF LUT - Texture m_BrdfLut; - VkImageView m_BrdfLutSRV; - - // GUI - ImGUI m_ImGUI; - - // Temporary render targets - - // depth buffer - Texture m_DepthBuffer; - VkImageView m_DepthBufferDSV; - - // Motion Vectors resources - Texture m_MotionVectors; - VkImageView m_MotionVectorsSRV; - - // Normal buffer - Texture m_NormalBuffer; - VkImageView m_NormalBufferSRV; - Texture m_NormalHistoryBuffer; - - // Specular roughness target - Texture m_SpecularRoughness; - VkImageView m_SpecularRoughnessSRV; - Texture m_SpecularRoughnessHistory; - - // shadowmaps - Texture m_ShadowMap; - VkImageView m_ShadowMapDSV; - VkImageView m_ShadowMapSRV; - - // Resolved RT - Texture m_HDR; - VkImageView m_HDRSRV; - - // widgets - Wireframe m_Wireframe; - WireframeBox m_WireframeBox; - - std::vector m_TimeStamps; - - // SSR Effect - FfxSssrContext m_SssrContext; - FfxSssrReflectionView m_SssrReflectionView; - bool m_SssrCreatedReflectionView = false; - VkImageView m_SssrSceneSRV; - VkImageView m_SssrDepthBufferHierarchySRV; - VkImageView m_SssrMotionBufferSRV; - VkImageView m_SssrNormalBufferSRV; - VkImageView m_SssrRoughnessBufferSRV; - VkImageView m_SssrNormalHistoryBufferSRV; - VkImageView m_SssrRoughnessHistoryBufferSRV; - VkImageView m_SssrOutputBufferUAV; - VkImageView m_SssrEnvironmentMapSRV; - VkSampler m_SssrEnvironmentMapSampler; - Texture m_SssrOutputBuffer; - - // Pass to apply reflection target - VkPipeline m_ApplyPipeline; - VkPipelineLayout m_ApplyPipelineLayout; - VkDescriptorSetLayout m_ApplyPipelineDescriptorSetLayout; - VkDescriptorSet m_ApplyPipelineDescriptorSet[backBufferCount]; - - VkImageView m_ApplyPipelineRTV; - - // Depth downsampling with single CS - VkPipeline m_DepthDownsamplePipeline; - VkPipelineLayout m_DepthDownsamplePipelineLayout; - VkDescriptorSetLayout m_DepthDownsampleDescriptorSetLayout; - VkDescriptorSet m_DepthDownsampleDescriptorSet; - - VkImageView m_DepthBufferSRV; - VkImageView m_DepthHierarchyDescriptors[13]; - Texture m_DepthHierarchy; - VkBuffer m_AtomicCounter; - VmaAllocation m_AtomicCounterAllocation; - VkBufferView m_AtomicCounterUAV; - UINT m_DepthMipLevelCount = 0; - - double m_MillisecondsBetweenGpuTicks; - - // Renderpasses - VkRenderPass m_RenderPassShadow; - VkRenderPass m_RenderPassClearHDR; - VkRenderPass m_RenderPassHDR; - VkRenderPass m_RenderPassMV; - VkRenderPass m_RenderPassPBR; - VkRenderPass m_RenderPassApply; - - // Framebuffers - VkFramebuffer m_FramebufferShadows; - VkFramebuffer m_FramebufferHDR; - VkFramebuffer m_FramebufferMV; - VkFramebuffer m_FramebufferPBR; - VkFramebuffer m_FramebufferApply; - - // For multithreaded texture loading - AsyncPool m_AsyncPool; -}; - + class SampleRenderer + { + public: + struct State + { + float time; + Camera camera; + + float exposure; + float emmisiveFactor; + float iblFactor; + float lightIntensity; + XMFLOAT3 lightColor; + Camera lightCamera; + + int toneMapper; + int skyDomeType; + bool bDrawBoundingBoxes; + bool bDrawLightFrustum; + bool bDrawBloom; + bool bDrawScreenSpaceReflections; + + float targetFrametime; + + bool bShowIntersectionResults; + float temporalStability; + float temporalVarianceThreshold; + int maxTraversalIterations; + int mostDetailedDepthHierarchyMipLevel; + float depthBufferThickness; + int minTraversalOccupancy; + int samplesPerQuad; + bool bEnableVarianceGuidedTracing; + float roughnessThreshold; + + float tileClassificationTime; + float intersectionTime; + float denoisingTime; + + bool showReflectionTarget; + bool isBenchmarking; + }; + + void OnCreate(Device* pDevice, SwapChain* pSwapChain); + void OnDestroy(); + + void OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height); + void OnDestroyWindowSizeDependentResources(); + + int LoadScene(GLTFCommon* pGLTFCommon, int stage = 0); + void UnloadScene(); + + const std::vector& GetTimingValues() { return m_TimeStamps; } + + void OnRender(State* pState, SwapChain* pSwapChain); + + private: + void CreateApplyReflectionsPipeline(); + void CreateDepthDownsamplePipeline(); + void StallFrame(float targetFrametime); + void BeginFrame(VkCommandBuffer cb); + VkBufferMemoryBarrier BufferBarrier(VkBuffer buffer); + VkImageMemoryBarrier Transition(VkImage image, VkImageLayout before, VkImageLayout after, VkImageAspectFlags aspectMask, int mipCount = 1); + void Barriers(VkCommandBuffer cb, const std::vector& imageBarriers); + + VkCommandBuffer BeginNewCommandBuffer(); + void SubmitCommandBuffer(VkCommandBuffer cb, VkSemaphore* waitSemaphore = NULL, VkSemaphore* signalSemaphores = NULL, VkFence fence = VK_NULL_HANDLE); + + per_frame* FillFrameConstants(State* pState); + void RenderSpotLights(VkCommandBuffer cb, per_frame* pPerFrame); + void RenderMotionVectors(VkCommandBuffer cb, per_frame* pPerFrame, State* pState); + void RenderSkydome(VkCommandBuffer cb, per_frame* pPerFrame, State* pState); + void RenderScene(VkCommandBuffer cb); + void RenderBoundingBoxes(VkCommandBuffer cb, per_frame* pPerFrame); + void RenderLightFrustums(VkCommandBuffer cb, per_frame* pPerFrame, State* pState); + void DownsampleDepthBuffer(VkCommandBuffer cb); + void RenderScreenSpaceReflections(VkCommandBuffer cb, per_frame* pPerFrame, State* pState); + void CopyHistorySurfaces(VkCommandBuffer cb); + void ApplyReflectionTarget(VkCommandBuffer cb, State* pState); + void DownsampleScene(VkCommandBuffer cb); + void RenderBloom(VkCommandBuffer cb); + void ApplyTonemapping(VkCommandBuffer cb, State* pState, SwapChain* pSwapChain); + void RenderHUD(VkCommandBuffer cb, SwapChain* pSwapChain); + void CopyToTexture(VkCommandBuffer cb, Texture* source, Texture* target); + + private: + Device* m_pDevice; + + uint32_t m_Width; + uint32_t m_Height; + + uint32_t m_CurrentBackbufferIndex; + uint32_t m_CurrentFrameIndex; + + VkViewport m_Viewport; + VkRect2D m_Scissor; + + // Initialize helper classes + ResourceViewHeaps m_ResourceViewHeaps; + UploadHeap m_UploadHeap; + DynamicBufferRing m_ConstantBufferRing; + StaticBufferPool m_VidMemBufferPool; + StaticBufferPool m_SysMemBufferPool; + CommandListRing m_CommandListRing; + GPUTimestamps m_GPUTimer; + + //gltf passes + GltfPbrPass* m_gltfPBR; + GltfBBoxPass* m_gltfBBox; + GltfDepthPass* m_gltfDepth; + GltfMotionVectorsPass* m_gltfMotionVectors; + GLTFTexturesAndBuffers* m_pGLTFTexturesAndBuffers; + + // effects + Bloom m_Bloom; + SkyDome m_SkyDome; + SkyDome m_AmbientLight; + DownSamplePS m_DownSample; + SkyDomeProc m_SkyDomeProc; + ToneMapping m_ToneMapping; + + // Samplers + VkSampler m_LinearSampler; + + // BRDF LUT + Texture m_BrdfLut; + VkImageView m_BrdfLutSRV; + + // GUI + ImGUI m_ImGUI; + + // Temporary render targets + + // depth buffer + Texture m_DepthBuffer; + VkImageView m_DepthBufferDSV; + + // Motion Vectors resources + Texture m_MotionVectors; + VkImageView m_MotionVectorsSRV; + + // Normal buffer + Texture m_NormalBuffer; + VkImageView m_NormalBufferSRV; + Texture m_NormalHistoryBuffer; + VkImageView m_NormalHistoryBufferSRV; + + // Specular roughness target + Texture m_SpecularRoughness; + VkImageView m_SpecularRoughnessSRV; + + // shadowmaps + Texture m_ShadowMap; + VkImageView m_ShadowMapDSV; + VkImageView m_ShadowMapSRV; + + // Resolved RT + Texture m_HDR; + VkImageView m_HDRSRV; + + // widgets + Wireframe m_Wireframe; + WireframeBox m_WireframeBox; + + std::vector m_TimeStamps; + + // SSSR Effect + SSSR m_Sssr; + XMMATRIX m_prev_view_projection; + + // Pass to apply reflection target + VkPipeline m_ApplyPipeline; + VkPipelineLayout m_ApplyPipelineLayout; + VkDescriptorSetLayout m_ApplyPipelineDescriptorSetLayout; + VkDescriptorSet m_ApplyPipelineDescriptorSet[backBufferCount]; + + VkImageView m_ApplyPipelineRTV; + + // Depth downsampling with single CS + VkPipeline m_DepthDownsamplePipeline; + VkPipelineLayout m_DepthDownsamplePipelineLayout; + VkDescriptorSetLayout m_DepthDownsampleDescriptorSetLayout; + VkDescriptorSet m_DepthDownsampleDescriptorSet; + + VkImageView m_DepthBufferSRV; + VkImageView m_DepthHierarchyDescriptors[13]; + Texture m_DepthHierarchy; + VkImageView m_DepthHierarchySRV; + + VkBuffer m_AtomicCounter; + VmaAllocation m_AtomicCounterAllocation; + VkBufferView m_AtomicCounterUAV; + UINT m_DepthMipLevelCount = 0; + + double m_MillisecondsBetweenGpuTicks; + + // Renderpasses + VkRenderPass m_RenderPassShadow; + VkRenderPass m_RenderPassClearHDR; + VkRenderPass m_RenderPassHDR; + VkRenderPass m_RenderPassMV; + VkRenderPass m_RenderPassPBR; + VkRenderPass m_RenderPassApply; + + // Framebuffers + VkFramebuffer m_FramebufferShadows; + VkFramebuffer m_FramebufferHDR; + VkFramebuffer m_FramebufferMV; + VkFramebuffer m_FramebufferPBR; + VkFramebuffer m_FramebufferApply; + + // For multithreaded texture loading + AsyncPool m_AsyncPool; + }; +} \ No newline at end of file diff --git a/sample/src/VK/Sources/ShaderPass.cpp b/sample/src/VK/Sources/ShaderPass.cpp new file mode 100644 index 0000000..a528773 --- /dev/null +++ b/sample/src/VK/Sources/ShaderPass.cpp @@ -0,0 +1,39 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" +#include "ShaderPass.h" +namespace SSSR_SAMPLE_VK +{ + void ShaderPass::OnDestroy(VkDevice device, ResourceViewHeaps* resourceHeap) + { + vkDestroyPipelineLayout(device, pipelineLayout, nullptr); + vkDestroyPipeline(device, pipeline, nullptr); + vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); + + for (auto e : descriptorSets) + { + resourceHeap->FreeDescriptor(e); + } + descriptorSets.clear(); + } + +} \ No newline at end of file diff --git a/sample/src/VK/Sources/ShaderPass.h b/sample/src/VK/Sources/ShaderPass.h new file mode 100644 index 0000000..aab252d --- /dev/null +++ b/sample/src/VK/Sources/ShaderPass.h @@ -0,0 +1,35 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once +#include "Base/ResourceViewHeaps.h" + +namespace SSSR_SAMPLE_VK +{ + struct ShaderPass { + VkPipeline pipeline; + VkPipelineLayout pipelineLayout; + uint32_t bindings_count; + VkDescriptorSetLayout descriptorSetLayout; + std::vector descriptorSets; + void OnDestroy(VkDevice device, ResourceViewHeaps* resourceHeap); + }; +} \ No newline at end of file diff --git a/sample/src/VK/Sources/SssrSample.cpp b/sample/src/VK/Sources/SssrSample.cpp index 9277d64..4ac445f 100644 --- a/sample/src/VK/Sources/SssrSample.cpp +++ b/sample/src/VK/Sources/SssrSample.cpp @@ -1,21 +1,24 @@ -// AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ #include "stdafx.h" @@ -23,569 +26,546 @@ #include "base/ShaderCompilerCache.h" #include "base/Instance.h" -SssrSample::SssrSample(LPCSTR name) : FrameworkWindows(name) +namespace SSSR_SAMPLE_VK { - m_LastFrameTime = MillisecondsNow(); - m_Time = 0; - m_bPlay = true; - m_bShowUI = true; - - m_CameraControlSelected = 0; // select WASD on start up - - m_pGltfLoader = NULL; -} - -//-------------------------------------------------------------------------------------- -// -// OnCreate -// -//-------------------------------------------------------------------------------------- -void SssrSample::OnCreate(HWND hWnd) -{ - // get the list of scenes - for (const auto& scene : m_JsonConfigFile["scenes"]) - m_SceneNames.push_back(scene["name"]); - - DWORD dwAttrib = GetFileAttributes("..\\media\\"); - if ((dwAttrib == INVALID_FILE_ATTRIBUTES) || ((dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) == 0) - { - MessageBox(NULL, "Media files not found!\n\nPlease check the readme on how to get the media files.", "Cauldron Panic!", MB_ICONERROR); - exit(0); - } - - // Create Device - // + SssrSample::SssrSample(LPCSTR name) : FrameworkWindows(name) + { + m_DeltaTime = 0; + m_Distance = 0; + m_Pitch = 0; + m_Yaw = 0; + m_selectedScene = 0; + m_LastFrameTime = MillisecondsNow(); + m_Time = 0; + m_bPlay = true; + m_bShowUI = true; + + m_CameraControlSelected = 0; // select WASD on start up + + m_pGltfLoader = NULL; + } + + //-------------------------------------------------------------------------------------- + // + // OnCreate + // + //-------------------------------------------------------------------------------------- + void SssrSample::OnCreate(HWND hWnd) + { + // get the list of scenes + for (const auto& scene : m_JsonConfigFile["scenes"]) + m_SceneNames.push_back(scene["name"]); + + DWORD dwAttrib = GetFileAttributes("..\\media\\"); + if ((dwAttrib == INVALID_FILE_ATTRIBUTES) || ((dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) == 0) + { + MessageBox(NULL, "Media files not found!\n\nPlease check the readme on how to get the media files.", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + + // Create Device + // #ifdef _DEBUG - bool cpuValidationLayerEnabled = true; - bool gpuValidationLayerEnabled = false; + bool cpuValidationLayerEnabled = true; + bool gpuValidationLayerEnabled = false; #else - bool cpuValidationLayerEnabled = false; - bool gpuValidationLayerEnabled = false; + bool cpuValidationLayerEnabled = false; + bool gpuValidationLayerEnabled = false; #endif - - // Create the device - InstanceProperties ip; - ip.Init(); - m_Device.SetEssentialInstanceExtensions(cpuValidationLayerEnabled, gpuValidationLayerEnabled, &ip); - - // Create instance - VkInstance vulkanInstance; - VkPhysicalDevice physicalDevice; - CreateInstance("SssrSample", "Cauldron", &vulkanInstance, &physicalDevice, &ip); - - DeviceProperties dp; - dp.Init(physicalDevice); - m_Device.SetEssentialDeviceExtensions(&dp); - dp.AddDeviceExtensionName(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); - - bool addedSubgroupSizeControl = dp.AddDeviceExtensionName(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - - VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures = {}; - subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; - subgroupSizeControlFeatures.pNext = nullptr; - subgroupSizeControlFeatures.subgroupSizeControl = true; - subgroupSizeControlFeatures.computeFullSubgroups = false; - if (addedSubgroupSizeControl) - { - dp.SetNewNext(&subgroupSizeControlFeatures); - } - - // Create device - m_Device.OnCreateEx(vulkanInstance, physicalDevice, hWnd, &dp); - - m_Device.CreatePipelineCache(); - - // Init the shader compiler - InitDirectXCompiler(); - CreateShaderCache(); - - // Create Swapchain - // - uint32_t dwNumberOfBackBuffers = 2; - m_SwapChain.OnCreate(&m_Device, dwNumberOfBackBuffers, hWnd); - - // Create a instance of the renderer and initialize it, we need to do that for each GPU - // - m_Node = new SampleRenderer(); - m_Node->OnCreate(&m_Device, &m_SwapChain); - - // init GUI (non gfx stuff) - // - ImGUI_Init((void *)hWnd); - - // Init Camera, looking at the origin - // - m_Yaw = 0.0f; - m_Pitch = 0.0f; - m_Distance = 3.5f; - - // init GUI state - m_State.toneMapper = 2; - m_State.skyDomeType = 1; - m_State.exposure = 1.0f; - m_State.emmisiveFactor = 1.0f; - m_State.iblFactor = 1.0f; - m_State.bDrawBoundingBoxes = false; - m_State.bDrawLightFrustum = false; - m_State.bDrawBloom = false; - m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, XMVectorSet(0, 0, 0, 0)); - m_State.lightIntensity = 10.f; - m_State.lightCamera.SetFov(XM_PI / 6.0f, 1024, 1024, 0.1f, 20.0f); - m_State.lightCamera.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); - m_State.lightColor = XMFLOAT3(1, 1, 1); - m_State.targetFrametime = 0; - m_State.temporalStability = 0.99f; - m_State.maxTraversalIterations = 128; - m_State.mostDetailedDepthHierarchyMipLevel = 1; - m_State.depthBufferThickness = 0.015f; - m_State.minTraversalOccupancy = 4; - m_State.samplesPerQuad = 1; - m_State.bEnableVarianceGuidedTracing = true; - m_State.bShowIntersectionResults = false; - m_State.roughnessThreshold = 0.2f; - m_State.showReflectionTarget = false; - m_State.bDrawScreenSpaceReflections = true; -} - -//-------------------------------------------------------------------------------------- -// -// OnDestroy -// -//-------------------------------------------------------------------------------------- -void SssrSample::OnDestroy() -{ - ImGUI_Shutdown(); - - m_Device.GPUFlush(); - - // Fullscreen state should always be false before exiting the app. - m_SwapChain.SetFullScreen(false); - - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); - - delete m_Node; - - m_SwapChain.OnDestroyWindowSizeDependentResources(); - m_SwapChain.OnDestroy(); - - //shut down the shader compiler - DestroyShaderCache(&m_Device); - - m_Device.DestroyPipelineCache(); - - if (m_pGltfLoader) - { - delete m_pGltfLoader; - m_pGltfLoader = NULL; - } - - m_Device.OnDestroy(); -} - -//-------------------------------------------------------------------------------------- -// -// OnEvent, forward Win32 events to ImGUI -// -//-------------------------------------------------------------------------------------- -bool SssrSample::OnEvent(MSG msg) -{ - if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) - return true; - - return true; -} - -//-------------------------------------------------------------------------------------- -// -// SetFullScreen -// -//-------------------------------------------------------------------------------------- -void SssrSample::SetFullScreen(bool fullscreen) -{ - m_Device.GPUFlush(); - - m_SwapChain.SetFullScreen(fullscreen); -} -void SssrSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) -{ - // First load configuration - std::ifstream f("config.json"); - if (!f) - { - MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); - exit(-1); - } - f >> m_JsonConfigFile; - - // Parse command line and override the config file - try - { - if (strlen(lpCmdLine) > 0) - { - auto j3 = json::parse(lpCmdLine); - m_JsonConfigFile.merge_patch(j3); - } - } - catch (json::parse_error) - { - Trace("Error parsing commandline\n"); - exit(0); - } - - // Set values - *pWidth = m_JsonConfigFile.value("width", 1920); - *pHeight = m_JsonConfigFile.value("height", 1080); - *pbFullScreen = m_JsonConfigFile.value("fullScreen", false); - m_State.isBenchmarking = m_JsonConfigFile.value("benchmark", false); -} - -void SssrSample::BuildUI() -{ - ImGuiStyle& style = ImGui::GetStyle(); - style.FrameBorderSize = 1.0f; - - bool opened = true; - ImGui::Begin("Stats", &opened); - - if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Text("Resolution : %ix%i", m_Width, m_Height); - } - - if (ImGui::CollapsingHeader("Animation")) - { - ImGui::Checkbox("Play", &m_bPlay); - ImGui::SliderFloat("Time", &m_Time, 0, 30); - } - - if (ImGui::CollapsingHeader("Model Selection", ImGuiTreeNodeFlags_DefaultOpen)) - { - static int selectedScene = 0; - auto getterLambda = [](void* data, int idx, const char** out_str)->bool { *out_str = ((std::vector *)data)->at(idx).c_str(); return true; }; - if (ImGui::Combo("model", &selectedScene, getterLambda, &m_SceneNames, (int)m_SceneNames.size()) || (m_pGltfLoader == NULL)) - { - LoadScene(selectedScene); - - // bail out as we need to reload everything - ImGui::End(); - ImGui::EndFrame(); - return; - } - - char *cameraControl[] = { "WASD", "Orbit", "cam #0", "cam #1", "cam #2", "cam #3" , "cam #4", "cam #5" }; - if (m_CameraControlSelected >= m_pGltfLoader->m_cameras.size() + 2) - m_CameraControlSelected = 0; - ImGui::Combo("Camera", &m_CameraControlSelected, cameraControl, (int)(m_pGltfLoader->m_cameras.size() + 2)); - - ImGui::Checkbox("Show Bounding Boxes", &m_State.bDrawBoundingBoxes); - } - - if (ImGui::CollapsingHeader("Lighting")) - { - const char * tonemappers[] = { "Timothy", "DX11DSK", "Reinhard", "Uncharted2Tonemap", "ACES", "No tonemapper" }; - ImGui::Combo("Tonemapper", &m_State.toneMapper, tonemappers, _countof(tonemappers)); - - const char * skyDomeType[] = { "Procedural Sky", "cubemap", "Simple clear" }; - ImGui::Combo("SkyDome", &m_State.skyDomeType, skyDomeType, _countof(skyDomeType)); - - ImGui::SliderFloat("IBL Factor", &m_State.iblFactor, 0.0f, 10.0f, NULL, 1.0f); - ImGui::SliderFloat("Emmisive", &m_State.emmisiveFactor, 1.0f, 1000.0f, NULL, 1.0f); - ImGui::SliderFloat("Exposure", &m_State.exposure, 0.0f, 4.0f); - ImGui::Checkbox("Show Light Frustums", &m_State.bDrawLightFrustum); - ImGui::Checkbox("Draw Bloom", &m_State.bDrawBloom); - } - - if (ImGui::CollapsingHeader("Reflections", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Checkbox("Draw Screen Space Reflections", &m_State.bDrawScreenSpaceReflections); - ImGui::Checkbox("Show Reflection Target", &m_State.showReflectionTarget); - ImGui::Checkbox("Show Intersection Results", &m_State.bShowIntersectionResults); - ImGui::SliderFloat("Target Frametime in ms", &m_State.targetFrametime, 0.0f, 50.0f); - ImGui::SliderInt("Max Traversal Iterations", &m_State.maxTraversalIterations, 0, 256); - ImGui::SliderInt("Min Traversal Occupancy", &m_State.minTraversalOccupancy, 0, 32); - ImGui::SliderInt("Most Detailed Level", &m_State.mostDetailedDepthHierarchyMipLevel, 0, 5); - ImGui::SliderFloat("Depth Buffer Thickness", &m_State.depthBufferThickness, 0.0f, 0.03f); - ImGui::SliderFloat("Roughness Threshold", &m_State.roughnessThreshold, 0.0f, 1.f); - ImGui::SliderFloat("Temporal Stability", &m_State.temporalStability, 0.0f, 1.0f); - ImGui::Checkbox("Enable Variance Guided Tracing", &m_State.bEnableVarianceGuidedTracing); - - ImGui::Text("Samples Per Quad"); ImGui::SameLine(); - ImGui::RadioButton("1", &m_State.samplesPerQuad, 1); ImGui::SameLine(); - ImGui::RadioButton("2", &m_State.samplesPerQuad, 2); ImGui::SameLine(); - ImGui::RadioButton("4", &m_State.samplesPerQuad, 4); - - ImGui::Value("Tile Classification Elapsed Time", 1000 * m_State.tileClassificationTime, "%.1f us"); - ImGui::Value("Intersection Elapsed Time", 1000 * m_State.intersectionTime, "%.1f us"); - ImGui::Value("Denoising Elapsed Time", 1000 * m_State.denoisingTime, "%.1f us"); - } - - if (ImGui::CollapsingHeader("Profiler")) - { - const std::vector& timeStamps = m_Node->GetTimingValues(); - if (timeStamps.size() > 0) - { - for (uint32_t i = 0; i < timeStamps.size(); i++) - { - ImGui::Text("%-22s: %7.1f", timeStamps[i].m_label.c_str(), timeStamps[i].m_microseconds); - } - - //scrolling data and average computing - static float values[128]; - values[127] = timeStamps.back().m_microseconds; - for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; } - ImGui::PlotLines("", values, 128, 0, "GPU frame time (us)", 0.0f, 30000.0f, ImVec2(0, 80)); - } - } - - ImGui::Text("'X' to show/hide GUI"); - ImGui::End(); -} - -void SssrSample::HandleInput() -{ - // If the mouse was not used by the GUI then it's for the camera - // - ImGuiIO& io = ImGui::GetIO(); - - static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - std::chrono::duration diff = now - last; - last = now; - - io.DeltaTime = static_cast(diff.count()); - - if (ImGui::IsKeyPressed('X')) - { - m_bShowUI = !m_bShowUI; - ShowCursor(m_bShowUI); - } - - if (io.WantCaptureMouse == false || !m_bShowUI) - { - if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) - { - m_Yaw -= io.MouseDelta.x / 100.f; - m_Pitch += io.MouseDelta.y / 100.f; - } - - // Choose camera movement depending on setting - // - if (m_CameraControlSelected == 0) - { - // WASD - // - m_State.camera.UpdateCameraWASD(m_Yaw, m_Pitch, io.KeysDown, io.DeltaTime); - } - else if (m_CameraControlSelected == 1) - { - // Orbiting - // - m_Distance -= (float)io.MouseWheel / 3.0f; - m_Distance = std::max(m_Distance, 0.1f); - - bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); - - m_State.camera.UpdateCameraPolar(m_Yaw, m_Pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_Distance); - } - else - { - // Use a camera from the GLTF - // - m_pGltfLoader->GetCamera(m_CameraControlSelected - 2, &m_State.camera); - m_Yaw = m_State.camera.GetYaw(); - m_Pitch = m_State.camera.GetPitch(); - } - } -} - -void SssrSample::LoadScene(int sceneIndex) -{ - json scene = m_JsonConfigFile["scenes"][sceneIndex]; - if (m_pGltfLoader != NULL) - { - //free resources, unload the current scene, and load new scene... - m_Device.GPUFlush(); - - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); - m_pGltfLoader->Unload(); - m_Node->OnCreate(&m_Device, &m_SwapChain); - m_Node->OnCreateWindowSizeDependentResources(&m_SwapChain, m_Width, m_Height); - } - - delete(m_pGltfLoader); - m_pGltfLoader = new GLTFCommon(); - - if (m_pGltfLoader->Load(scene["directory"], scene["filename"]) == false) - { - MessageBox(NULL, "The selected model couldn't be found, please check the documentation", "Cauldron Panic!", MB_ICONERROR); - exit(0); - } - - // Load the UI settings, and also some defaults cameras and lights, in case the GLTF has none - { + // Create the device + InstanceProperties ip; + ip.Init(); + m_Device.SetEssentialInstanceExtensions(cpuValidationLayerEnabled, gpuValidationLayerEnabled, &ip); + + // Create instance + VkInstance vulkanInstance; + VkPhysicalDevice physicalDevice; + CreateInstance("SssrSample", "Cauldron", &vulkanInstance, &physicalDevice, &ip); + + DeviceProperties dp; + dp.Init(physicalDevice); + m_Device.SetEssentialDeviceExtensions(&dp); + dp.AddDeviceExtensionName(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); + + bool addedSubgroupSizeControl = dp.AddDeviceExtensionName(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + + VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures = {}; + subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; + subgroupSizeControlFeatures.pNext = nullptr; + subgroupSizeControlFeatures.subgroupSizeControl = true; + subgroupSizeControlFeatures.computeFullSubgroups = false; + if (addedSubgroupSizeControl) + { + dp.SetNewNext(&subgroupSizeControlFeatures); + } + + // Create device + m_Device.OnCreateEx(vulkanInstance, physicalDevice, hWnd, &dp); + + m_Device.CreatePipelineCache(); + + // Init the shader compiler + InitDirectXCompiler(); + CreateShaderCache(); + + // Create Swapchain + // + uint32_t dwNumberOfBackBuffers = 2; + m_SwapChain.OnCreate(&m_Device, dwNumberOfBackBuffers, hWnd); + + // Create a instance of the renderer and initialize it, we need to do that for each GPU + // + m_Node = new SampleRenderer(); + m_Node->OnCreate(&m_Device, &m_SwapChain); + + // init GUI (non gfx stuff) + // + ImGUI_Init((void*)hWnd); + + // Init Camera, looking at the origin + // + m_Yaw = 0.0f; + m_Pitch = 0.0f; + m_Distance = 3.5f; + + // init GUI state + m_State.toneMapper = 2; + m_State.skyDomeType = 1; + m_State.exposure = 1.0f; + m_State.emmisiveFactor = 1.0f; + m_State.iblFactor = 1.0f; + m_State.bDrawBoundingBoxes = false; + m_State.bDrawLightFrustum = false; + m_State.bDrawBloom = false; + m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, XMVectorSet(0, 0, 0, 0)); + m_State.lightIntensity = 10.f; + m_State.lightCamera.SetFov(XM_PI / 6.0f, 1024, 1024, 0.1f, 20.0f); + m_State.lightCamera.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); + m_State.lightColor = XMFLOAT3(1, 1, 1); + m_State.targetFrametime = 0; + m_State.temporalStability = 0.99f; + m_State.temporalVarianceThreshold = 0.002f; + m_State.maxTraversalIterations = 128; + m_State.mostDetailedDepthHierarchyMipLevel = 1; + m_State.depthBufferThickness = 0.015f; + m_State.minTraversalOccupancy = 4; + m_State.samplesPerQuad = 1; + m_State.bEnableVarianceGuidedTracing = true; + m_State.bShowIntersectionResults = false; + m_State.roughnessThreshold = 0.2f; + m_State.showReflectionTarget = false; + m_State.bDrawScreenSpaceReflections = true; + + LoadScene(m_selectedScene); + } + + //-------------------------------------------------------------------------------------- + // + // OnDestroy + // + //-------------------------------------------------------------------------------------- + void SssrSample::OnDestroy() + { + ImGUI_Shutdown(); + + m_Device.GPUFlush(); + + // Fullscreen state should always be false before exiting the app. + m_SwapChain.SetFullScreen(false); + + m_Node->UnloadScene(); + m_Node->OnDestroyWindowSizeDependentResources(); + m_Node->OnDestroy(); + + delete m_Node; + + m_SwapChain.OnDestroyWindowSizeDependentResources(); + m_SwapChain.OnDestroy(); + + //shut down the shader compiler + DestroyShaderCache(&m_Device); + + m_Device.DestroyPipelineCache(); + + if (m_pGltfLoader) + { + delete m_pGltfLoader; + m_pGltfLoader = NULL; + } + + m_Device.OnDestroy(); + } + + //-------------------------------------------------------------------------------------- + // + // OnEvent, forward Win32 events to ImGUI + // + //-------------------------------------------------------------------------------------- + bool SssrSample::OnEvent(MSG msg) + { + if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) + return true; + + return true; + } + + //-------------------------------------------------------------------------------------- + // + // SetFullScreen + // + //-------------------------------------------------------------------------------------- + void SssrSample::SetFullScreen(bool fullscreen) + { + m_Device.GPUFlush(); + + m_SwapChain.SetFullScreen(fullscreen); + } + + void SssrSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) + { + // First load configuration + std::ifstream f("config.json"); + if (!f) + { + MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); + exit(-1); + } + f >> m_JsonConfigFile; + + // Parse command line and override the config file + try + { + if (strlen(lpCmdLine) > 0) + { + auto j3 = json::parse(lpCmdLine); + m_JsonConfigFile.merge_patch(j3); + } + } + catch (json::parse_error) + { + Trace("Error parsing commandline\n"); + exit(0); + } + + // Set values + *pWidth = m_JsonConfigFile.value("width", 1920); + *pHeight = m_JsonConfigFile.value("height", 1080); + *pbFullScreen = m_JsonConfigFile.value("fullScreen", false); + m_State.isBenchmarking = m_JsonConfigFile.value("benchmark", false); + } + + void SssrSample::BuildUI() + { + ImGuiStyle& style = ImGui::GetStyle(); + style.FrameBorderSize = 1.0f; + + bool opened = true; + ImGui::Begin("FidelityFX SSSR", &opened); + + if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Resolution : %ix%i", m_Width, m_Height); + } + + if (ImGui::CollapsingHeader("Controls", ImGuiTreeNodeFlags_DefaultOpen)) + { + char* cameraControl[] = { "WASD", "Orbit", "cam #0", "cam #1", "cam #2", "cam #3" , "cam #4", "cam #5" }; + if (m_CameraControlSelected >= m_pGltfLoader->m_cameras.size() + 2) + m_CameraControlSelected = 0; + ImGui::Combo("Camera", &m_CameraControlSelected, cameraControl, (int)(m_pGltfLoader->m_cameras.size() + 2)); + } + + if (ImGui::CollapsingHeader("Reflections", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Checkbox("Draw Screen Space Reflections", &m_State.bDrawScreenSpaceReflections); + ImGui::Checkbox("Show Reflection Target", &m_State.showReflectionTarget); + ImGui::Checkbox("Show Intersection Results", &m_State.bShowIntersectionResults); + ImGui::SliderFloat("Target Frametime in ms", &m_State.targetFrametime, 0.0f, 50.0f); + ImGui::SliderInt("Max Traversal Iterations", &m_State.maxTraversalIterations, 0, 256); + ImGui::SliderInt("Min Traversal Occupancy", &m_State.minTraversalOccupancy, 0, 32); + ImGui::SliderInt("Most Detailed Level", &m_State.mostDetailedDepthHierarchyMipLevel, 0, 5); + ImGui::SliderFloat("Depth Buffer Thickness", &m_State.depthBufferThickness, 0.0f, 0.03f); + ImGui::SliderFloat("Roughness Threshold", &m_State.roughnessThreshold, 0.0f, 1.f); + ImGui::SliderFloat("Temporal Stability", &m_State.temporalStability, 0.0f, 1.0f); + ImGui::SliderFloat("Temporal Variance Threshold", &m_State.temporalVarianceThreshold, 0.0f, 0.01f); + ImGui::Checkbox("Enable Variance Guided Tracing", &m_State.bEnableVarianceGuidedTracing); + + ImGui::Text("Samples Per Quad"); ImGui::SameLine(); + ImGui::RadioButton("1", &m_State.samplesPerQuad, 1); ImGui::SameLine(); + ImGui::RadioButton("2", &m_State.samplesPerQuad, 2); ImGui::SameLine(); + ImGui::RadioButton("4", &m_State.samplesPerQuad, 4); + + ImGui::Value("Tile Classification Elapsed Time", 1000 * m_State.tileClassificationTime, "%.1f us"); + ImGui::Value("Intersection Elapsed Time", 1000 * m_State.intersectionTime, "%.1f us"); + ImGui::Value("Denoising Elapsed Time", 1000 * m_State.denoisingTime, "%.1f us"); + } + + if (ImGui::CollapsingHeader("Profiler")) + { + const std::vector& timeStamps = m_Node->GetTimingValues(); + if (timeStamps.size() > 0) + { + for (uint32_t i = 0; i < timeStamps.size(); i++) + { + ImGui::Text("%-22s: %7.1f", timeStamps[i].m_label.c_str(), timeStamps[i].m_microseconds); + } + + //scrolling data and average computing + static float values[128]; + values[127] = timeStamps.back().m_microseconds; + for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; } + ImGui::PlotLines("", values, 128, 0, "GPU frame time (us)", 0.0f, 30000.0f, ImVec2(0, 80)); + } + } + + ImGui::Text("'X' to show/hide GUI"); + ImGui::End(); + } + + void SssrSample::HandleInput() + { + // If the mouse was not used by the GUI then it's for the camera + // + ImGuiIO& io = ImGui::GetIO(); + + static std::chrono::system_clock::time_point last = std::chrono::system_clock::now(); + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::chrono::duration diff = now - last; + last = now; + + io.DeltaTime = static_cast(diff.count()); + + if (ImGui::IsKeyPressed('X')) + { + m_bShowUI = !m_bShowUI; + ShowCursor(m_bShowUI); + } + + if (io.WantCaptureMouse == false || !m_bShowUI) + { + if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) + { + m_Yaw -= io.MouseDelta.x / 100.f; + m_Pitch += io.MouseDelta.y / 100.f; + } + + // Choose camera movement depending on setting + // + if (m_CameraControlSelected == 0) + { + // WASD + // + m_State.camera.UpdateCameraWASD(m_Yaw, m_Pitch, io.KeysDown, io.DeltaTime); + } + else if (m_CameraControlSelected == 1) + { + // Orbiting + // + m_Distance -= (float)io.MouseWheel / 3.0f; + m_Distance = std::max(m_Distance, 0.1f); + + bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); + + m_State.camera.UpdateCameraPolar(m_Yaw, m_Pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_Distance); + } + else + { + // Use a camera from the GLTF + // + m_pGltfLoader->GetCamera(m_CameraControlSelected - 2, &m_State.camera); + m_Yaw = m_State.camera.GetYaw(); + m_Pitch = m_State.camera.GetPitch(); + } + } + } + + void SssrSample::LoadScene(int sceneIndex) + { + json scene = m_JsonConfigFile["scenes"][sceneIndex]; + if (m_pGltfLoader != NULL) + { + //free resources, unload the current scene, and load new scene... + m_Device.GPUFlush(); + + m_Node->UnloadScene(); + m_Node->OnDestroyWindowSizeDependentResources(); + m_Node->OnDestroy(); + m_pGltfLoader->Unload(); + m_Node->OnCreate(&m_Device, &m_SwapChain); + m_Node->OnCreateWindowSizeDependentResources(&m_SwapChain, m_Width, m_Height); + } + + delete(m_pGltfLoader); + m_pGltfLoader = new GLTFCommon(); + + if (m_pGltfLoader->Load(scene["directory"], scene["filename"]) == false) + { + MessageBox(NULL, "The selected model couldn't be found, please check the documentation", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + + // Load the UI settings, and also some defaults cameras and lights, in case the GLTF has none + { #define LOAD(j, key, val) val = j.value(key, val) - // global settings - LOAD(scene, "toneMapper", m_State.toneMapper); - LOAD(scene, "skyDomeType", m_State.skyDomeType); - LOAD(scene, "exposure", m_State.exposure); - LOAD(scene, "iblFactor", m_State.iblFactor); - LOAD(scene, "emmisiveFactor", m_State.emmisiveFactor); - LOAD(scene, "skyDomeType", m_State.skyDomeType); - - // default light - m_State.lightIntensity = scene.value("intensity", 1.0f); - - // default camera (in case the gltf has none) - json camera = scene["camera"]; - LOAD(camera, "yaw", m_Yaw); - LOAD(camera, "pitch", m_Pitch); - LOAD(camera, "distance", m_Distance); - XMVECTOR lookAt = GetVector(GetElementJsonArray(camera, "lookAt", { 0.0, 0.0, 0.0 })); - m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, lookAt); - - // set benchmarking state if enabled - if (m_State.isBenchmarking) - { - BenchmarkConfig(scene["BenchmarkSettings"], -1, m_pGltfLoader); - } - - // indicate the mainloop we started loading a GLTF and it needs to load the rest (textures and geometry) - m_bLoadingScene = true; - } -} + // global settings + LOAD(scene, "toneMapper", m_State.toneMapper); + LOAD(scene, "skyDomeType", m_State.skyDomeType); + LOAD(scene, "exposure", m_State.exposure); + LOAD(scene, "iblFactor", m_State.iblFactor); + LOAD(scene, "emmisiveFactor", m_State.emmisiveFactor); + LOAD(scene, "skyDomeType", m_State.skyDomeType); + + // default light + m_State.lightIntensity = scene.value("intensity", 1.0f); + + // default camera (in case the gltf has none) + json camera = scene["camera"]; + LOAD(camera, "yaw", m_Yaw); + LOAD(camera, "pitch", m_Pitch); + LOAD(camera, "distance", m_Distance); + XMVECTOR lookAt = GetVector(GetElementJsonArray(camera, "lookAt", { 0.0, 0.0, 0.0 })); + m_State.camera.LookAt(m_Yaw, m_Pitch, m_Distance, lookAt); + + // set benchmarking state if enabled + if (m_State.isBenchmarking) + { + BenchmarkConfig(scene["BenchmarkSettings"], -1, m_pGltfLoader); + } + + // indicate the mainloop we started loading a GLTF and it needs to load the rest (textures and geometry) + m_bLoadingScene = true; + } + } + + //-------------------------------------------------------------------------------------- + // + // OnResize + // + //-------------------------------------------------------------------------------------- + void SssrSample::OnResize(uint32_t width, uint32_t height) + { + if (m_Width != width || m_Height != height) + { + // Flush GPU + // + m_Device.GPUFlush(); + + // If resizing but no minimizing + // + if (m_Width > 0 && m_Height > 0) + { + if (m_Node != NULL) + { + m_Node->OnDestroyWindowSizeDependentResources(); + } + m_SwapChain.OnDestroyWindowSizeDependentResources(); + } + + m_Width = width; + m_Height = height; + + // if resizing but not minimizing the recreate it with the new size + // + if (m_Width > 0 && m_Height > 0) + { + m_SwapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false, DISPLAYMODE_SDR); + if (m_Node != NULL) + { + m_Node->OnCreateWindowSizeDependentResources(&m_SwapChain, m_Width, m_Height); + } + } + } + m_State.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); + } + + //-------------------------------------------------------------------------------------- + // + // OnRender, updates the state from the UI, animates, transforms and renders the scene + // + //-------------------------------------------------------------------------------------- + void SssrSample::OnRender() + { + // Get timings + // + double timeNow = MillisecondsNow(); + m_DeltaTime = timeNow - m_LastFrameTime; + m_LastFrameTime = timeNow; + + // Build UI and set the scene state. Note that the rendering of the UI happens later. + // + ImGUI_UpdateIO(); + ImGui::NewFrame(); + + if (m_bLoadingScene) + { + static int loadingStage = 0; + // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns 0 + // This is done so we can display a progress bar when the scene is loading + loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); + if (loadingStage == 0) + { + m_Time = 0; + m_bLoadingScene = false; + } + } + else if (m_pGltfLoader && m_State.isBenchmarking) + { + const std::vector& timeStamps = m_Node->GetTimingValues(); + const std::string* screenshotName; + m_Time = BenchmarkLoop(timeStamps, &m_State.camera, &screenshotName); + } + else + { + if (m_bShowUI) + { + BuildUI(); + } + + if (!m_bLoadingScene) + { + HandleInput(); + } + } + + // Set animation time + // + if (m_bPlay) + { + m_Time += (float)m_DeltaTime / 1000.0f; + } + + // Animate and transform the scene + // + if (m_pGltfLoader) + { + m_pGltfLoader->SetAnimationTime(0, m_Time); + m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); + } + + m_State.time = m_Time; + + // Do Render frame using AFR + // + m_Node->OnRender(&m_State, &m_SwapChain); + + m_SwapChain.Present(); + } -//-------------------------------------------------------------------------------------- -// -// OnResize -// -//-------------------------------------------------------------------------------------- -void SssrSample::OnResize(uint32_t width, uint32_t height) -{ - if (m_Width != width || m_Height != height) - { - // Flush GPU - // - m_Device.GPUFlush(); - - // If resizing but no minimizing - // - if (m_Width > 0 && m_Height > 0) - { - if (m_Node != NULL) - { - m_Node->OnDestroyWindowSizeDependentResources(); - } - m_SwapChain.OnDestroyWindowSizeDependentResources(); - } - - m_Width = width; - m_Height = height; - - // if resizing but not minimizing the recreate it with the new size - // - if (m_Width > 0 && m_Height > 0) - { - m_SwapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false, DISPLAYMODE_SDR); - if (m_Node != NULL) - { - m_Node->OnCreateWindowSizeDependentResources(&m_SwapChain, m_Width, m_Height); - } - } - } - m_State.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); } - -//-------------------------------------------------------------------------------------- -// -// OnRender, updates the state from the UI, animates, transforms and renders the scene -// -//-------------------------------------------------------------------------------------- -void SssrSample::OnRender() -{ - // Get timings - // - double timeNow = MillisecondsNow(); - m_DeltaTime = timeNow - m_LastFrameTime; - m_LastFrameTime = timeNow; - - // Build UI and set the scene state. Note that the rendering of the UI happens later. - // - ImGUI_UpdateIO(); - ImGui::NewFrame(); - - if (m_bLoadingScene) - { - static int loadingStage = 0; - // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns 0 - // This is done so we can display a progress bar when the scene is loading - loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); - if (loadingStage == 0) - { - m_Time = 0; - m_bLoadingScene = false; - } - } - else if (m_pGltfLoader && m_State.isBenchmarking) - { - const std::vector& timeStamps = m_Node->GetTimingValues(); - const std::string * screenshotName; - m_Time = BenchmarkLoop(timeStamps, &m_State.camera, &screenshotName); - } - else - { - if (m_bShowUI) - { - BuildUI(); - } - - if (!m_bLoadingScene) - { - HandleInput(); - } - } - - // Set animation time - // - if (m_bPlay) - { - m_Time += (float)m_DeltaTime / 1000.0f; - } - - // Animate and transform the scene - // - if (m_pGltfLoader) - { - m_pGltfLoader->SetAnimationTime(0, m_Time); - m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); - } - - m_State.time = m_Time; - - // Do Render frame using AFR - // - m_Node->OnRender(&m_State, &m_SwapChain); - - m_SwapChain.Present(); -} - //-------------------------------------------------------------------------------------- - // - // WinMain - // - //-------------------------------------------------------------------------------------- + // + // WinMain + // + //-------------------------------------------------------------------------------------- int WINAPI WinMain(HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpCmdLine, - int nCmdShow) + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) { - LPCSTR Name = "Stochastic Screen Space Reflection Sample VK v1.0"; + LPCSTR Name = "FidelityFX Stochastic Screen Space Reflection Sample VK v1.2"; - // create new sample - return RunFramework(hInstance, lpCmdLine, nCmdShow, new SssrSample(Name)); + // create new sample + return RunFramework(hInstance, lpCmdLine, nCmdShow, new SSSR_SAMPLE_VK::SssrSample(Name)); } \ No newline at end of file diff --git a/sample/src/VK/Sources/SssrSample.h b/sample/src/VK/Sources/SssrSample.h index 913964c..ec9f855 100644 --- a/sample/src/VK/Sources/SssrSample.h +++ b/sample/src/VK/Sources/SssrSample.h @@ -1,21 +1,24 @@ -// AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ #pragma once #include "SampleRenderer.h" @@ -35,47 +38,51 @@ // - uses the SampleRenderer to update all the state to the GPU and do the rendering // -class SssrSample : public FrameworkWindows +namespace SSSR_SAMPLE_VK { -public: - SssrSample(LPCSTR name); - void OnCreate(HWND hWnd) override; - void OnDestroy() override; - void OnRender() override; - bool OnEvent(MSG msg) override; - void OnResize(uint32_t Width, uint32_t Height) override; - void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) override; + class SssrSample : public FrameworkWindows + { + public: + SssrSample(LPCSTR name); + void OnCreate(HWND hWnd) override; + void OnDestroy() override; + void OnRender() override; + bool OnEvent(MSG msg) override; + void OnResize(uint32_t Width, uint32_t Height) override; + void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) override; - void SetFullScreen(bool fullscreen); + void SetFullScreen(bool fullscreen); -private: - void BuildUI(); - void HandleInput(); - void LoadScene(int sceneIndex); + private: + void BuildUI(); + void HandleInput(); + void LoadScene(int sceneIndex); - Device m_Device; - SwapChain m_SwapChain; + Device m_Device; + SwapChain m_SwapChain; - GLTFCommon *m_pGltfLoader = NULL; - bool m_bLoadingScene = false; + GLTFCommon* m_pGltfLoader = NULL; + bool m_bLoadingScene = false; - SampleRenderer *m_Node = NULL; - SampleRenderer::State m_State; + SampleRenderer* m_Node = NULL; + SampleRenderer::State m_State; - float m_Distance; - float m_Yaw; - float m_Pitch; + float m_Distance; + float m_Yaw; + float m_Pitch; - float m_Time; // WallClock in seconds. - double m_DeltaTime; // The elapsed time in milliseconds since the previous frame. - double m_LastFrameTime; + float m_Time; // WallClock in seconds. + double m_DeltaTime; // The elapsed time in milliseconds since the previous frame. + double m_LastFrameTime; - // json config file - json m_JsonConfigFile; - std::vector m_SceneNames; + // json config file + json m_JsonConfigFile; + std::vector m_SceneNames; - bool m_bPlay; - bool m_bShowUI; + bool m_bPlay; + bool m_bShowUI; - int m_CameraControlSelected; -}; \ No newline at end of file + int m_CameraControlSelected; + int m_selectedScene; + }; +} \ No newline at end of file diff --git a/sample/src/VK/Sources/UploadHeapVK.cpp b/sample/src/VK/Sources/UploadHeapVK.cpp new file mode 100644 index 0000000..14e6bd9 --- /dev/null +++ b/sample/src/VK/Sources/UploadHeapVK.cpp @@ -0,0 +1,334 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#include "stdafx.h" +#include "Misc/Misc.h" +#include "UploadHeapVK.h" + +using namespace CAULDRON_VK; +namespace SSSR_SAMPLE_VK +{ + //-------------------------------------------------------------------------------------- + // + // OnCreate + // + //-------------------------------------------------------------------------------------- + void UploadHeapVK::OnCreate(Device* pDevice, size_t uSize) + { + m_pDevice = pDevice; + + VkResult res; + + // Create command list and allocators + { + VkCommandPoolCreateInfo cmd_pool_info = {}; + cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmd_pool_info.pNext = NULL; + cmd_pool_info.queueFamilyIndex = m_pDevice->GetGraphicsQueueFamilyIndex(); + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + res = vkCreateCommandPool(m_pDevice->GetDevice(), &cmd_pool_info, NULL, &m_commandPool); + assert(res == VK_SUCCESS); + + VkCommandBufferAllocateInfo cmd = {}; + cmd.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cmd.pNext = NULL; + cmd.commandPool = m_commandPool; + cmd.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd.commandBufferCount = 1; + res = vkAllocateCommandBuffers(m_pDevice->GetDevice(), &cmd, &m_pCommandBuffer); + assert(res == VK_SUCCESS); + } + + // Create buffer to suballocate + { + VkBufferCreateInfo buffer_info = {}; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.size = uSize; + buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + res = vkCreateBuffer(m_pDevice->GetDevice(), &buffer_info, NULL, &m_buffer); + assert(res == VK_SUCCESS); + + VkMemoryRequirements mem_reqs; + vkGetBufferMemoryRequirements(m_pDevice->GetDevice(), m_buffer, &mem_reqs); + + VkMemoryAllocateInfo alloc_info = {}; + alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + alloc_info.allocationSize = mem_reqs.size; + alloc_info.memoryTypeIndex = 0; + + bool pass = memory_type_from_properties(m_pDevice->GetPhysicalDeviceMemoryProperties(), mem_reqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + &alloc_info.memoryTypeIndex); + assert(pass && "No mappable, coherent memory"); + + res = vkAllocateMemory(m_pDevice->GetDevice(), &alloc_info, NULL, &m_deviceMemory); + assert(res == VK_SUCCESS); + + res = vkBindBufferMemory(m_pDevice->GetDevice(), m_buffer, m_deviceMemory, 0); + assert(res == VK_SUCCESS); + + res = vkMapMemory(m_pDevice->GetDevice(), m_deviceMemory, 0, mem_reqs.size, 0, (void**)&m_pDataBegin); + assert(res == VK_SUCCESS); + + m_pDataCur = m_pDataBegin; + m_pDataEnd = m_pDataBegin + mem_reqs.size; + } + + // Create fence + { + VkFenceCreateInfo fence_ci; + fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_ci.pNext = NULL; + fence_ci.flags = 0; + + res = vkCreateFence(m_pDevice->GetDevice(), &fence_ci, NULL, &m_fence); + assert(res == VK_SUCCESS); + } + + // Begin Command Buffer + { + VkCommandBufferBeginInfo cmd_buf_info; + cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmd_buf_info.pNext = NULL; + cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + cmd_buf_info.pInheritanceInfo = NULL; + + res = vkBeginCommandBuffer(m_pCommandBuffer, &cmd_buf_info); + assert(res == VK_SUCCESS); + } + } + + //-------------------------------------------------------------------------------------- + // + // OnDestroy + // + //-------------------------------------------------------------------------------------- + void UploadHeapVK::OnDestroy() + { + vkUnmapMemory(m_pDevice->GetDevice(), m_deviceMemory); + vkFreeMemory(m_pDevice->GetDevice(), m_deviceMemory, NULL); + vkDestroyBuffer(m_pDevice->GetDevice(), m_buffer, NULL); + + vkFreeCommandBuffers(m_pDevice->GetDevice(), m_commandPool, 1, &m_pCommandBuffer); + vkDestroyCommandPool(m_pDevice->GetDevice(), m_commandPool, NULL); + + vkDestroyFence(m_pDevice->GetDevice(), m_fence, NULL); + } + + //-------------------------------------------------------------------------------------- + // + // SuballocateFromUploadHeap + // + //-------------------------------------------------------------------------------------- + uint8_t* UploadHeapVK::Suballocate(size_t uSize, uint64_t uAlign) + { + // wait until we are done flusing the heap + flushing.Wait(); + + uint8_t* pRet = NULL; + + { + std::unique_lock lock(m_mutex); + + // make sure resource (and its mips) would fit the upload heap, if not please make the upload heap bigger + assert(uSize < (size_t)(m_pDataBegin - m_pDataEnd)); + + m_pDataCur = reinterpret_cast(AlignUp(reinterpret_cast(m_pDataCur), uAlign)); + uSize = AlignUp(uSize, uAlign); + + // return NULL if we ran out of space in the heap + if ((m_pDataCur >= m_pDataEnd) || (m_pDataCur + uSize >= m_pDataEnd)) + { + return NULL; + } + + pRet = m_pDataCur; + m_pDataCur += uSize; + } + + return pRet; + } + + uint8_t* UploadHeapVK::BeginSuballocate(size_t uSize, uint64_t uAlign) + { + uint8_t* pRes = NULL; + + for (;;) + { + pRes = Suballocate(uSize, uAlign); + if (pRes != NULL) + { + break; + } + + FlushAndFinish(); + } + + allocating.Inc(); + + return pRes; + } + + void UploadHeapVK::EndSuballocate() + { + allocating.Dec(); + } + + + void UploadHeapVK::AddCopy(VkImage image, VkBufferImageCopy bufferImageCopy) + { + std::unique_lock lock(m_mutex); + m_copies.push_back({ image, bufferImageCopy }); + } + + void UploadHeapVK::AddCopy(VkBuffer buffer, VkBufferCopy bufferImageCopy) + { + std::unique_lock lock(m_mutex); + m_copiesBuffer.push_back({ buffer, bufferImageCopy }); + } + + void UploadHeapVK::AddPreBarrier(VkImageMemoryBarrier imageMemoryBarrier) + { + std::unique_lock lock(m_mutex); + m_toPreBarrier.push_back(imageMemoryBarrier); + } + + + void UploadHeapVK::AddPostBarrier(VkImageMemoryBarrier imageMemoryBarrier) + { + std::unique_lock lock(m_mutex); + m_toPostBarrier.push_back(imageMemoryBarrier); + } + + void UploadHeapVK::AddPreBarrierBuffer(VkBufferMemoryBarrier bufferMemoryBarrier) + { + std::unique_lock lock(m_mutex); + m_toPreBarrierBuffer.push_back(bufferMemoryBarrier); + } + + void UploadHeapVK::AddPostBarrierBuffer(VkBufferMemoryBarrier bufferMemoryBarrier) + { + std::unique_lock lock(m_mutex); + m_toPostBarrierBuffer.push_back(bufferMemoryBarrier); + } + + void UploadHeapVK::Flush() + { + VkResult res; + + VkMappedMemoryRange range[1] = {}; + range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range[0].memory = m_deviceMemory; + range[0].size = m_pDataCur - m_pDataBegin; + res = vkFlushMappedMemoryRanges(m_pDevice->GetDevice(), 1, range); + assert(res == VK_SUCCESS); + } + + //-------------------------------------------------------------------------------------- + // + // FlushAndFinish + // + //-------------------------------------------------------------------------------------- + void UploadHeapVK::FlushAndFinish(bool bDoBarriers) + { + // make sure another thread is not already flushing + flushing.Wait(); + + // begins a critical section, and make sure no allocations happen while a thread is inside it + flushing.Inc(); + + // wait for pending allocations to finish + allocating.Wait(); + + std::unique_lock lock(m_mutex); + Flush(); + Trace("flushing %i", m_copies.size() + m_copiesBuffer.size()); + + //apply pre barriers in one go + if (m_toPreBarrier.size() > 0) + { + vkCmdPipelineBarrier(GetCommandList(), VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, (uint32_t)m_toPreBarrierBuffer.size(), m_toPreBarrierBuffer.data(), (uint32_t)m_toPreBarrier.size(), m_toPreBarrier.data()); + m_toPreBarrier.clear(); + } + + for (COPY c : m_copies) + { + vkCmdCopyBufferToImage(GetCommandList(), GetResource(), c.m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &c.m_bufferImageCopy); + } + m_copies.clear(); + + for (COPYBUFFER_ c : m_copiesBuffer) + { + vkCmdCopyBuffer(GetCommandList(), GetResource(), c.m_buffer, 1, &c.m_bufferCopy); + } + m_copiesBuffer.clear(); + + //apply post barriers in one go + if (m_toPostBarrier.size() > 0) + { + vkCmdPipelineBarrier(GetCommandList(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, (uint32_t)m_toPostBarrierBuffer.size(), m_toPostBarrierBuffer.data(), (uint32_t)m_toPostBarrier.size(), m_toPostBarrier.data()); + m_toPostBarrier.clear(); + } + + + // Close + VkResult res = vkEndCommandBuffer(m_pCommandBuffer); + assert(res == VK_SUCCESS); + + // Submit + const VkCommandBuffer cmd_bufs[] = { m_pCommandBuffer }; + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = cmd_bufs; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info, m_fence); + assert(res == VK_SUCCESS); + + // Make sure it's been processed by the GPU + + res = vkWaitForFences(m_pDevice->GetDevice(), 1, &m_fence, VK_TRUE, UINT64_MAX); + assert(res == VK_SUCCESS); + + vkResetFences(m_pDevice->GetDevice(), 1, &m_fence); + + // Reset so it can be reused + VkCommandBufferBeginInfo cmd_buf_info; + cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmd_buf_info.pNext = NULL; + cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + cmd_buf_info.pInheritanceInfo = NULL; + + res = vkBeginCommandBuffer(m_pCommandBuffer, &cmd_buf_info); + assert(res == VK_SUCCESS); + + m_pDataCur = m_pDataBegin; + + flushing.Dec(); + } +} \ No newline at end of file diff --git a/sample/src/VK/Sources/UploadHeapVK.h b/sample/src/VK/Sources/UploadHeapVK.h new file mode 100644 index 0000000..4efc2f0 --- /dev/null +++ b/sample/src/VK/Sources/UploadHeapVK.h @@ -0,0 +1,98 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ +#pragma once + +#include "base/Device.h" +#include "Misc/Async.h" +#include +#include + +using namespace CAULDRON_VK; +namespace SSSR_SAMPLE_VK +{ + // + // This class shows the most efficient way to upload resources to the GPU memory. + // The idea is to create just one upload heap and suballocate memory from it. + // For convenience this class comes with it's own command list & submit (FlushAndFinish) + // + class UploadHeapVK + { + Sync allocating, flushing; + struct COPY + { + VkImage m_image; VkBufferImageCopy m_bufferImageCopy; + }; + struct COPYBUFFER_ + { + VkBuffer m_buffer; VkBufferCopy m_bufferCopy; + }; + std::vector m_copies; + std::vector m_copiesBuffer; + + std::vector m_toPreBarrier; + std::vector m_toPostBarrier; + + std::vector m_toPreBarrierBuffer; + std::vector m_toPostBarrierBuffer; + + std::mutex m_mutex; + public: + void OnCreate(Device* pDevice, size_t uSize); + void OnDestroy(); + + uint8_t* Suballocate(size_t uSize, uint64_t uAlign); + uint8_t* BeginSuballocate(size_t uSize, uint64_t uAlign); + + void EndSuballocate(); + uint8_t* BasePtr() { return m_pDataBegin; } + VkBuffer GetResource() { return m_buffer; } + VkCommandBuffer GetCommandList() { return m_pCommandBuffer; } + + void AddCopy(VkImage image, VkBufferImageCopy bufferImageCopy); + void AddCopy(VkBuffer buffer, VkBufferCopy bufferImageCopy); + + void AddPreBarrier(VkImageMemoryBarrier imageMemoryBarrier); + void AddPostBarrier(VkImageMemoryBarrier imageMemoryBarrier); + + void AddPreBarrierBuffer(VkBufferMemoryBarrier imageMemoryBarrier); + void AddPostBarrierBuffer(VkBufferMemoryBarrier imageMemoryBarrier); + + void Flush(); + void FlushAndFinish(bool bDoBarriers = false); + + private: + + Device* m_pDevice; + + VkCommandPool m_commandPool; + VkCommandBuffer m_pCommandBuffer; + + VkBuffer m_buffer; + VkDeviceMemory m_deviceMemory; + + VkFence m_fence; + + uint8_t* m_pDataBegin = nullptr; // starting position of upload heap + uint8_t* m_pDataCur = nullptr; // current position of upload heap + uint8_t* m_pDataEnd = nullptr; // ending position of upload heap + }; +} \ No newline at end of file diff --git a/sample/src/VK/Sources/stdafx.h b/sample/src/VK/Sources/stdafx.h index 266b6b7..a4397de 100644 --- a/sample/src/VK/Sources/stdafx.h +++ b/sample/src/VK/Sources/stdafx.h @@ -60,7 +60,4 @@ using namespace DirectX; #include "Widgets/WireframeBox.h" #include "Widgets/WireframeSphere.h" -using namespace CAULDRON_VK; - -#include "ffx_sssr.h" -#include "ffx_sssr_vk.h" +using namespace CAULDRON_VK; \ No newline at end of file