From e812d9e4c439106e88a6f7d40ca6fef3e88fef6d Mon Sep 17 00:00:00 2001 From: Rys Sommefeldt Date: Thu, 16 Feb 2023 11:50:46 +0000 Subject: [PATCH] FidelityFX FSR v2.2 --- LICENSE.txt | 4 +- README.md | 318 ++- changelog.md | 15 + .../algorithm-structure.svg | 2281 ++++++++++------- .../api-architecture.svg | 2 +- .../super-resolution-temporal/overview.svg | 564 ++-- release_notes.txt | 31 +- src/DX12/CMakeLists.txt | 14 +- src/DX12/FSR2Sample.cpp | 2 +- src/DX12/UI.cpp | 6 + src/DX12/UpscaleContext_FSR2_API.cpp | 43 +- src/DX12/UpscaleContext_FSR2_API.h | 1 + src/VK/CMakeLists.txt | 18 +- src/VK/FSR2Sample.cpp | 4 +- src/VK/UI.cpp | 5 + src/VK/UpscaleContext_FSR2_API.cpp | 55 +- src/VK/UpscaleContext_FSR2_API.h | 3 + src/ffx-fsr2-api/CMakeLists.txt | 6 +- src/ffx-fsr2-api/dx12/CMakeLists.txt | 8 +- src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp | 22 +- src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h | 4 +- .../dx12/shaders/ffx_fsr2_shaders_dx12.cpp | 81 +- .../dx12/shaders/ffx_fsr2_shaders_dx12.h | 12 +- src/ffx-fsr2-api/ffx_assert.cpp | 2 +- src/ffx-fsr2-api/ffx_assert.h | 2 +- src/ffx-fsr2-api/ffx_error.h | 2 +- src/ffx-fsr2-api/ffx_fsr2.cpp | 549 +++- src/ffx-fsr2-api/ffx_fsr2.h | 23 +- src/ffx-fsr2-api/ffx_fsr2_interface.h | 39 +- src/ffx-fsr2-api/ffx_fsr2_maximum_bias.h | 2 +- src/ffx-fsr2-api/ffx_fsr2_private.h | 26 +- src/ffx-fsr2-api/ffx_types.h | 14 +- src/ffx-fsr2-api/ffx_util.h | 2 +- src/ffx-fsr2-api/shaders/ffx_common_types.h | 2 +- src/ffx-fsr2-api/shaders/ffx_core.h | 2 +- src/ffx-fsr2-api/shaders/ffx_core_cpu.h | 2 +- src/ffx-fsr2-api/shaders/ffx_core_glsl.h | 109 +- .../shaders/ffx_core_gpu_common.h | 2 +- .../shaders/ffx_core_gpu_common_half.h | 2 +- src/ffx-fsr2-api/shaders/ffx_core_hlsl.h | 114 +- .../shaders/ffx_core_portability.h | 2 +- .../shaders/ffx_fsr2_accumulate.h | 307 ++- .../shaders/ffx_fsr2_accumulate_pass.glsl | 46 +- .../shaders/ffx_fsr2_accumulate_pass.hlsl | 43 +- .../ffx_fsr2_autogen_reactive_pass.glsl | 22 +- .../ffx_fsr2_autogen_reactive_pass.hlsl | 24 +- .../shaders/ffx_fsr2_callbacks_glsl.h | 628 +++-- .../shaders/ffx_fsr2_callbacks_hlsl.h | 743 +++--- src/ffx-fsr2-api/shaders/ffx_fsr2_common.h | 347 ++- .../ffx_fsr2_compute_luminance_pyramid.h | 15 +- ...x_fsr2_compute_luminance_pyramid_pass.glsl | 47 +- ...x_fsr2_compute_luminance_pyramid_pass.hlsl | 75 +- .../shaders/ffx_fsr2_depth_clip.h | 251 +- .../shaders/ffx_fsr2_depth_clip_pass.glsl | 25 +- .../shaders/ffx_fsr2_depth_clip_pass.hlsl | 21 +- src/ffx-fsr2-api/shaders/ffx_fsr2_lock.h | 59 +- .../shaders/ffx_fsr2_lock_pass.glsl | 17 +- .../shaders/ffx_fsr2_lock_pass.hlsl | 15 +- .../ffx_fsr2_postprocess_lock_status.h | 78 +- src/ffx-fsr2-api/shaders/ffx_fsr2_rcas.h | 58 +- .../shaders/ffx_fsr2_rcas_pass.glsl | 20 +- .../shaders/ffx_fsr2_rcas_pass.hlsl | 19 +- ...ruct_dilated_velocity_and_previous_depth.h | 121 +- ..._fsr2_reconstruct_previous_depth_pass.glsl | 29 +- ..._fsr2_reconstruct_previous_depth_pass.hlsl | 21 +- src/ffx-fsr2-api/shaders/ffx_fsr2_reproject.h | 57 +- src/ffx-fsr2-api/shaders/ffx_fsr2_resources.h | 119 +- src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h | 23 +- .../shaders/ffx_fsr2_tcr_autogen.h | 250 ++ .../shaders/ffx_fsr2_tcr_autogen_pass.glsl | 116 + .../shaders/ffx_fsr2_tcr_autogen_pass.hlsl | 114 + src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h | 158 +- src/ffx-fsr2-api/shaders/ffx_spd.h | 2 +- src/ffx-fsr2-api/vk/CMakeLists.txt | 8 +- src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp | 61 +- src/ffx-fsr2-api/vk/ffx_fsr2_vk.h | 8 +- .../vk/shaders/ffx_fsr2_shaders_vk.cpp | 30 +- .../vk/shaders/ffx_fsr2_shaders_vk.h | 12 +- 78 files changed, 5162 insertions(+), 3222 deletions(-) create mode 100644 src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen.h create mode 100644 src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.glsl create mode 100644 src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.hlsl diff --git a/LICENSE.txt b/LICENSE.txt index 19b21ff..c066ae1 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ -FidelityFX Super Resolution 2.1 +FidelityFX Super Resolution 2.2 ================================= -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 6fd6976..66d9d02 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# FidelityFX Super Resolution 2.1 (FSR 2.1.2) +# FidelityFX Super Resolution 2.2 (FSR 2.2) Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. @@ -40,8 +40,9 @@ You can find the binaries for FidelityFX FSR in the release section on GitHub. - [Depth buffer configurations](#depth-buffer-configurations) - [Providing motion vectors](#providing-motion-vectors) - [Reactive mask](#reactive-mask) - - [Transparency & composition mask](#transparency-and-composition-mask) - [Automatically generating reactivity](#automatically-generating-reactivity) + - [Transparency and composition mask](#transparency-and-composition-mask) + - [Automatically generating transparency and composition mask](#automatically-generating-transparency-and-composition-mask) - [Placement in the frame](#placement-in-the-frame) - [Host API](#host-api) - [Modular backend](#modular-backend) @@ -54,10 +55,10 @@ You can find the binaries for FidelityFX FSR in the release section on GitHub. - [HDR support](#hdr-support) - [Falling back to 32bit floating point](#falling-back-to-32bit-floating-point) - [64-wide wavefronts](#64-wide-wavefronts) + - [API Debug Checker](#debug-checker) - [The technique](#the-technique) - [Algorithm structure](#algorithm-structure) - [Compute luminance pyramid](#compute-luminance-pyramid) - - [Adjust input color](#adjust-input-color) - [Reconstruct & dilate](#reconstruct-and-dilate) - [Depth clip](#depth-clip) - [Create locks](#create-locks) @@ -101,15 +102,15 @@ To use FSR2 you should follow the steps below: 8. Create a backend for your target API. E.g. for DirectX12 you should call [`ffxFsr2GetInterfaceDX12`](src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h#L55). A scratch buffer should be allocated of the size returned by calling [`ffxFsr2GetScratchMemorySizeDX12`](src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h#L40) and the pointer to that buffer passed to [`ffxFsr2GetInterfaceDX12`](src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h#L55). -9. Create a FSR2 context by calling [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L215). The parameters structure should be filled out matching the configuration of your application. See the API reference documentation for more details. +9. Create a FSR2 context by calling [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L228). The parameters structure should be filled out matching the configuration of your application. See the API reference documentation for more details. -10. Each frame you should call [`ffxFsr2ContextDispatch`](src/ffx-fsr2-api/ffx_fsr2.h#L256) to launch FSR2 workloads. The parameters structure should be filled out matching the configuration of your application. See the API reference documentation for more details, and ensure the [`frameTimeDelta` field is provided in milliseconds](#frame-time-delta-input). +10. Each frame you should call [`ffxFsr2ContextDispatch`](src/ffx-fsr2-api/ffx_fsr2.h#L269) to launch FSR2 workloads. The parameters structure should be filled out matching the configuration of your application. See the API reference documentation for more details, and ensure the [`frameTimeDelta` field is provided in milliseconds](#frame-time-delta-input). -11. When your application is terminating (or you wish to destroy the context for another reason) you should call [`ffxFsr2ContextDestroy`](src/ffx-fsr2-api/ffx_fsr2.h#L279). The GPU should be idle before calling this function. +11. When your application is terminating (or you wish to destroy the context for another reason) you should call [`ffxFsr2ContextDestroy`](src/ffx-fsr2-api/ffx_fsr2.h#L292). The GPU should be idle before calling this function. -12. Sub-pixel jittering should be applied to your application's projection matrix. This should be done when performing the main rendering of your application. You should use the [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L424) function to compute the precise jitter offsets. See [Camera jitter](#camera-jitter) section for more details. +12. Sub-pixel jittering should be applied to your application's projection matrix. This should be done when performing the main rendering of your application. You should use the [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L437) function to compute the precise jitter offsets. See [Camera jitter](#camera-jitter) section for more details. -13. For the best upscaling quality it is strongly advised that you populate the [Reactive mask](#reactive-mask) and [Transparency & composition mask](#transparency-and-composition-mask) according to our guidelines. You can also use [`ffxFsr2ContextGenerateReactiveMask`](src/ffx-fsr2-api/ffx_fsr2.h#L267) as a starting point. +13. For the best upscaling quality it is strongly advised that you populate the [Reactive mask](#reactive-mask) and [Transparency & composition mask](#transparency-and-composition-mask) according to our guidelines. You can also use [`ffxFsr2ContextGenerateReactiveMask`](src/ffx-fsr2-api/ffx_fsr2.h#L280) as a starting point. 14. Applications should expose [scaling modes](#scaling-modes), in their user interface in the following order: Quality, Balanced, Performance, and (optionally) Ultra Performance. @@ -134,62 +135,64 @@ Depending on your target hardware and operating configuration FSR2 will operate The table below summarizes the measured performance of FSR2 on a variety of hardware in DX12. -| Target resolution | Quality | RX 6950 XT | RX 6900 XT | RX 6800 XT | RX 6800 | RX 6700 XT | RX 6600 XT | RX 5700 XT | RX Vega 56 | RX 590 | -|-------------------|------------------|------------|------------|------------|---------|------------|------------|------------|------------|--------| -| 3840x2160 | Quality (1.5x) | 1.1ms | 1.2ms | 1.2ms | 1.3ms | 1.8ms | 3.0ms | 2.4ms | 4.8ms | 5.3ms | -| | Balanced (1.7x) | 1.0ms | 1.0ms | 1.1ms | 1.2ms | 1.6ms | 2.7ms | 2.1ms | 4.3ms | 4.8ms | -| | Performance (2x) | 0.8ms | 0.9ms | 0.9ms | 1.1ms | 1.5ms | 2.3ms | 1.9ms | 3.5ms | 4.2ms | -| | Ultra perf. (3x) | 0.7ms | 0.7ms | 0.7ms | 1.0ms | 1.3ms | 1.7ms | 1.6ms | 2.8ms | 3.5ms | -| 2560x1440 | Quality (1.5x) | 0.4ms | 0.4ms | 0.5ms | 0.6ms | 0.8ms | 1.2ms | 1.0ms | 1.8ms | 2.3ms | -| | Balanced (1.7x) | 0.4ms | 0.4ms | 0.4ms | 0.5ms | 0.7ms | 1.0ms | 0.9ms | 1.7ms | 2.1ms | -| | Performance (2x) | 0.4ms | 0.4ms | 0.4ms | 0.5ms | 0.7ms | 0.9ms | 0.8ms | 1.4ms | 1.9ms | -| | Ultra perf. (3x) | 0.3ms | 0.3ms | 0.3ms | 0.4ms | 0.6ms | 0.7ms | 0.7ms | 1.2ms | 1.6ms | -| 1920x1080 | Quality (1.5x) | 0.3ms | 0.3ms | 0.3ms | 0.3ms | 0.4ms | 0.6ms | 0.6ms | 1.0ms | 1.3ms | -| | Balanced (1.7x) | 0.2ms | 0.2ms | 0.2ms | 0.3ms | 0.4ms | 0.6ms | 0.5ms | 0.9ms | 1.2ms | -| | Performance (2x) | 0.2ms | 0.2ms | 0.2ms | 0.3ms | 0.4ms | 0.5ms | 0.5ms | 0.8ms | 1.1ms | -| | Ultra perf. (3x) | 0.2ms | 0.2ms | 0.2ms | 0.2ms | 0.3ms | 0.4ms | 0.4ms | 0.7ms | 0.9ms | - -Figures are rounded to the nearest 0.1ms and are without additional [`sharpness`](src/ffx-fsr2-api/ffx_fsr2.h#L129). +| Target resolution | Quality | RX 7900 XTX| RX 6950 XT | RX 6900 XT | RX 6800 XT | RX 6800 | RX 6700 XT | RX 6650 XT | RX 5700 XT | RX Vega 56 | RX 590 | +|-------------------|------------------|------------|------------|------------|------------|---------|------------|------------|------------|------------|--------| +| 3840x2160 | Quality (1.5x) | 0.7ms | 1.1ms | 1.2ms | 1.2ms | 1.4ms | 2.0ms | 2.8ms | 2.4ms | 4.9ms | 5.4ms | +| | Balanced (1.7x) | 0.6ms | 1.0ms | 1.0ms | 1.1ms | 1.4ms | 1.8ms | 2.6ms | 2.2ms | 4.1ms | 4.9ms | +| | Performance (2x) | 0.6ms | 0.9ms | 1.0ms | 1.0ms | 1.3ms | 1.7ms | 2.3ms | 2.0ms | 3.6ms | 4.4ms | +| | Ultra perf. (3x) | 0.5ms | 0.8ms | 0.8ms | 0.9ms | 1.1ms | 1.5ms | 1.8ms | 1.7ms | 2.9ms | 3.7ms | +| 2560x1440 | Quality (1.5x) | 0.3ms | 0.5ms | 0.5ms | 0.5ms | 0.7ms | 0.9ms | 1.2ms | 1.1ms | 1.9ms | 2.3ms | +| | Balanced (1.7x) | 0.3ms | 0.5ms | 0.5ms | 0.5ms | 0.6ms | 0.8ms | 1.1ms | 1.0ms | 1.7ms | 2.1ms | +| | Performance (2x) | 0.3ms | 0.4ms | 0.4ms | 0.4ms | 0.6ms | 0.8ms | 0.9ms | 0.9ms | 1.5ms | 1.9ms | +| | Ultra perf. (3x) | 0.2ms | 0.4ms | 0.4ms | 0.4ms | 0.5ms | 0.7ms | 0.8ms | 0.8ms | 1.2ms | 1.7ms | +| 1920x1080 | Quality (1.5x) | 0.2ms | 0.3ms | 0.3ms | 0.3ms | 0.4ms | 0.5ms | 0.6ms | 0.6ms | 1.0ms | 1.3ms | +| | Balanced (1.7x) | 0.2ms | 0.3ms | 0.3ms | 0.3ms | 0.4ms | 0.5ms | 0.6ms | 0.6ms | 0.9ms | 1.2ms | +| | Performance (2x) | 0.2ms | 0.2ms | 0.2ms | 0.3ms | 0.3ms | 0.5ms | 0.5ms | 0.5ms | 0.8ms | 1.1ms | +| | Ultra perf. (3x) | 0.1ms | 0.2ms | 0.2ms | 0.2ms | 0.3ms | 0.4ms | 0.4ms | 0.4ms | 0.7ms | 0.9ms | + +Figures are rounded to the nearest 0.1ms and are without additional [`sharpness`](src/ffx-fsr2-api/ffx_fsr2.h#L132) and are subject to change. ## Memory requirements Using FSR2 requires some additional GPU local memory to be allocated for consumption by the GPU. When using the FSR2 API, this memory is allocated when the FSR2 context is created, and is done so via the series of callbacks which comprise the backend interface. This memory is used to store intermediate surfaces which are computed by the FSR2 algorithm as well as surfaces which are persistent across many frames of the application. The table below includes the amount of memory used by FSR2 under various operating conditions. The "Working set" column indicates the total amount of memory used by FSR2 as the algorithm is executing on the GPU; this is the amount of memory FSR2 will require to run. The "Persistent memory" column indicates how much of the "Working set" column is required to be left intact for subsequent frames of the application; this memory stores the temporal data consumed by FSR2. The "Aliasable memory" column indicates how much of the "Working set" column may be aliased by surfaces or other resources used by the application outside of the operating boundaries of FSR2. -You can take control of resource creation in FSR2 by overriding the resource creation and destruction parts of the FSR2 backend interface. This means that for a perfect integration of FSR2, additional memory which is equal to the "Persistent memory" column of the table below is required depending on your operating conditions. +You can take control of resource creation in FSR2 by overriding the resource creation and destruction parts of the FSR2 backend interface, and forwarding the aliasing flags. This means that for a perfect integration of FSR2, additional memory which is equal to the "Persistent memory" column of the table below is required depending on your operating conditions. | Resolution | Quality | Working set (MB) | Persistent memory (MB) | Aliasable memory (MB) | | -----------|------------------------|------------------|------------------------|-------------------------| -| 3840x2160 | Quality (1.5x) | 302MB | 218MB | 85MB | -| | Balanced (1.7x) | 279MB | 214MB | 65MB | -| | Performance (2x) | 260MB | 211MB | 49MB | -| | Ultra performance (3x) | 228MB | 206MB | 22MB | -| 2560x1440 | Quality (1.5x) | 140MB | 100MB | 40MB | -| | Balanced (1.7x) | 129MB | 98MB | 33MB | -| | Performance (2x) | 119MB | 97MB | 24MB | -| | Ultra performance (3x) | 105MB | 95MB | 10MB | -| 1920x1080 | Quality (1.5x) | 78MB | 56MB | 22MB | -| | Balanced (1.7x) | 73MB | 55MB | 18MB | -| | Performance (2x) | 69MB | 54MB | 15MB | -| | Ultra performance (3x) | 59MB | 53MB | 6MB | - -Figures are rounded up to nearest MB and are without additional [`sharpness`](src/ffx-fsr2-api/ffx_fsr2.h#L129). Figures are approximations using an RX 6700XT GPU in DX12 and are subject to change. +| 3840x2160 | Quality (1.5x) | 448MB | 354MB | 93MB | +| | Balanced (1.7x) | 407MB | 330MB | 77MB | +| | Performance (2x) | 376MB | 312MB | 63MB | +| | Ultra performance (3x) | 323MB | 281MB | 42MB | +| 2560x1440 | Quality (1.5x) | 207MB | 164MB | 43MB | +| | Balanced (1.7x) | 189MB | 153MB | 36MB | +| | Performance (2x) | 172MB | 143MB | 29MB | +| | Ultra performance (3x) | 149MB | 130MB | 19MB | +| 1920x1080 | Quality (1.5x) | 115MB | 90MB | 24MB | +| | Balanced (1.7x) | 105MB | 85MB | 20MB | +| | Performance (2x) | 101MB | 83MB | 18MB | +| | Ultra performance (3x) | 84MB | 72MB | 11MB | + +Figures are approximations, rounded up to nearest MB using an RX 6700XT GPU in DX12, and are subject to change. For details on how to manage FSR2's memory requirements please refer to the section of this document dealing with [Memory management](#memory-management). ## Input resources FSR2 is a temporal algorithm, and therefore requires access to data from both the current and previous frame. The following table enumerates all external inputs required by FSR2. -> The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. All resources are from the current rendered frame, for DirectX(R)12 and Vulkan(R) applications all input resources should be transitioned to [`D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE`](https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_resource_states) and [`VK_ACCESS_SHADER_READ_BIT`](https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkAccessFlagBits.html) respectively before calling [`ffxFsr2ContextDispatch`](src/ffx-fsr2-api/ffx_fsr2.h#L256). +> The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. All resources are from the current rendered frame, for DirectX(R)12 and Vulkan(R) applications all input resources should be transitioned to [`D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE`](https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_resource_states) and [`VK_ACCESS_SHADER_READ_BIT`](https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkAccessFlagBits.html) respectively before calling [`ffxFsr2ContextDispatch`](src/ffx-fsr2-api/ffx_fsr2.h#L269). | Name | Resolution | Format | Type | Notes | | ----------------|------------------------------|------------------------------------|-----------|------------------------------------------------| -| Color buffer | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure. | -| Depth buffer | Render | `APPLICATION SPECIFIED (1x FLOAT)` | Texture | The render resolution depth buffer for the current frame provided by the application. The data should be provided as a single floating point value, the precision of which is under the application's control. The configuration of the depth should be communicated to FSR2 via the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166). You should set the [`FFX_FSR2_ENABLE_DEPTH_INVERTED`](src/ffx-fsr2-api/ffx_fsr2.h#L91) flag if your depth buffer is inverted (that is [1..0] range), and you should set the [`FFX_FSR2_ENABLE_DEPTH_INFINITE`](src/ffx-fsr2-api/ffx_fsr2.h#L92) flag if your depth buffer has an infinite far plane. If the application provides the depth buffer in `D32S8` format, then FSR2 will ignore the stencil component of the buffer, and create an `R32_FLOAT` resource to address the depth buffer. On GCN and RDNA hardware, depth buffers are stored separately from stencil buffers. | -| Motion vectors | Render or presentation | `APPLICATION SPECIFIED (2x FLOAT)` | Texture | The 2D motion vectors for the current frame provided by the application in [**(<-width, -height>**..****] range. If your application renders motion vectors with a different range, you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L126) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure to adjust them to match the expected range for FSR2. Internally, FSR2 uses 16-bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166), in which case it should be equal to the presentation resolution. | +| Color buffer | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. | +| Depth buffer | Render | `APPLICATION SPECIFIED (1x FLOAT)` | Texture | The render resolution depth buffer for the current frame provided by the application. The data should be provided as a single floating point value, the precision of which is under the application's control. The configuration of the depth should be communicated to FSR2 via the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). You should set the [`FFX_FSR2_ENABLE_DEPTH_INVERTED`](src/ffx-fsr2-api/ffx_fsr2.h#L91) flag if your depth buffer is inverted (that is [1..0] range), and you should set the [`FFX_FSR2_ENABLE_DEPTH_INFINITE`](src/ffx-fsr2-api/ffx_fsr2.h#L92) flag if your depth buffer has an infinite far plane. If the application provides the depth buffer in `D32S8` format, then FSR2 will ignore the stencil component of the buffer, and create an `R32_FLOAT` resource to address the depth buffer. On GCN and RDNA hardware, depth buffers are stored separately from stencil buffers. | +| Motion vectors | Render or presentation | `APPLICATION SPECIFIED (2x FLOAT)` | Texture | The 2D motion vectors for the current frame provided by the application in [**(<-width, -height>**..****] range. If your application renders motion vectors with a different range, you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L129) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure to adjust them to match the expected range for FSR2. Internally, FSR2 uses 16-bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179), in which case it should be equal to the presentation resolution. | | Reactive mask | Render | `R8_UNORM` | Texture | As some areas of a rendered image do not leave a footprint in the depth buffer or include motion vectors, FSR2 provides support for a reactive mask texture which can be used to indicate to FSR2 where such areas are. Good examples of these are particles, or alpha-blended objects which do not write depth or motion vectors. If this resource is not set, then FSR2's shading change detection logic will handle these cases as best it can, but for optimal results, this resource should be set. For more information on the reactive mask please refer to the [Reactive mask](#reactive-mask) section. | -| Exposure | 1x1 | `R32_FLOAT` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource is optional, and may be omitted if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166). | +| Exposure | 1x1 | `R32_FLOAT` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource is optional, and may be omitted if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). | + +All inputs that are provided at Render Resolution, except for motion vectors, should be rendered with jitter. Motion vectors should not have jitter applied, unless the `FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION` flag is present. ## Depth buffer configurations -It is strongly recommended that an inverted, infinite depth buffer is used with FSR2. However, alternative depth buffer configurations are supported. An application should inform the FSR2 API of its depth buffer configuration by setting the appropriate flags during the creation of the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L164). The table below contains the appropriate flags. +It is strongly recommended that an inverted, infinite depth buffer is used with FSR2. However, alternative depth buffer configurations are supported. An application should inform the FSR2 API of its depth buffer configuration by setting the appropriate flags during the creation of the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). The table below contains the appropriate flags. | FSR2 flag | Note | |----------------------------------|--------------------------------------------------------------------------------------------| @@ -204,7 +207,7 @@ A key part of a temporal algorithm (be it antialiasing or upscaling) is the prov ![alt text](docs/media/super-resolution-temporal/motion-vectors.svg "A diagram showing a 2D motion vector.") -If your application computes motion vectors in another space - for example normalized device coordinate space - then you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L126) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure to instruct FSR2 to adjust them to match the expected range for FSR2. The code examples below illustrate how motion vectors may be scaled to screen space. The example HLSL and C++ code below illustrates how NDC-space motion vectors can be scaled using the FSR2 host API. +If your application computes motion vectors in another space - for example normalized device coordinate space - then you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L129) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure to instruct FSR2 to adjust them to match the expected range for FSR2. The code examples below illustrate how motion vectors may be scaled to screen space. The example HLSL and C++ code below illustrates how NDC-space motion vectors can be scaled using the FSR2 host API. ```HLSL // GPU: Example of application NDC motion vector computation @@ -216,7 +219,7 @@ dispatchParameters.motionVectorScale.y = (float)renderHeight; ``` ### Precision & resolution -Internally, FSR2 uses 16bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not currently benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166), in which case it should be equal to the presentation resolution. +Internally, FSR2 uses 16bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not currently benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179), in which case it should be equal to the presentation resolution. ### Coverage FSR2 will perform better quality upscaling when more objects provide their motion vectors. It is therefore advised that all opaque, alpha-tested and alpha-blended objects should write their motion vectors for all covered pixels. If vertex shader effects are applied - such as scrolling UVs - these calculations should also be factored into the calculation of motion for the best results. For alpha-blended objects it is also strongly advised that the alpha value of each covered pixel is stored to the corresponding pixel in the [reactive mask](#reactive-mask). This will allow FSR2 to perform better handling of alpha-blended objects during upscaling. The reactive mask is especially important for alpha-blended objects where writing motion vectors might be prohibitive, such as particles. @@ -224,25 +227,36 @@ FSR2 will perform better quality upscaling when more objects provide their motio ## Reactive mask In the context of FSR2, the term "reactivity" means how much influence the samples rendered for the current frame have over the production of the final upscaled image. Typically, samples rendered for the current frame contribute a relatively modest amount to the result computed by FSR2; however, there are exceptions. To produce the best results for fast moving, alpha-blended objects, FSR2 requires the [Reproject & accumulate](#reproject-accumulate) stage to become more reactive for such pixels. As there is no good way to determine from either color, depth or motion vectors which pixels have been rendered using alpha blending, FSR2 performs best when applications explicitly mark such areas. -Therefore, it is strongly encouraged that applications provide a reactive mask to FSR2. The reactive mask guides FSR2 on where it should reduce its reliance on historical information when compositing the current pixel, and instead allow the current frame's samples to contribute more to the final result. The reactive mask allows the application to provide a value from [0..1] where 0 indicates that the pixel is not at all reactive (and should use the default FSR2 composition strategy), and a value of 1 indicates the pixel should be fully reactive. +Therefore, it is strongly encouraged that applications provide a reactive mask to FSR2. The reactive mask guides FSR2 on where it should reduce its reliance on historical information when compositing the current pixel, and instead allow the current frame's samples to contribute more to the final result. The reactive mask allows the application to provide a value from [0.0..1.0] where 0.0 indicates that the pixel is not at all reactive (and should use the default FSR2 composition strategy), and a value of 1.0 indicates the pixel should be fully reactive. This is a floating point range and can be tailored to different situations. While there are other applications for the reactive mask, the primary application for the reactive mask is producing better results of upscaling images which include alpha-blended objects. A good proxy for reactiveness is actually the alpha value used when compositing an alpha-blended object into the scene, therefore, applications should write `alpha` to the reactive mask. It should be noted that it is unlikely that a reactive value of close to 1 will ever produce good results. Therefore, we recommend clamping the maximum reactive value to around 0.9. -If a [Reactive mask](#reactive-mask) is not provided to FSR2 (by setting the [`reactive`](src/ffx-fsr2-api/ffx_fsr2.h#L122) field of [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) to `NULL`) then an internally generated 1x1 texture with a cleared reactive value will be used. +If a [Reactive mask](#reactive-mask) is not provided to FSR2 (by setting the [`reactive`](src/ffx-fsr2-api/ffx_fsr2.h#L125) field of [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) to `NULL`) then an internally generated 1x1 texture with a cleared reactive value will be used. + +## Automatically generating reactivity +To help applications generate the [Reactive mask](#reactive-mask) and the [Transparency & composition mask](#transparency-and-composition-mask), FSR2 provides an optional helper API. Under the hood, the API launches a compute shader which computes these values for each pixel using a luminance-based heuristic. -## Transparency & composition mask +Applications wishing to do this can call the [`ffxFsr2ContextGenerateReactiveMask`](src/ffx-fsr2-api/ffx_fsr2.h#L280) function and should pass two versions of the color buffer, one containing opaque only geometry, and the other containing both opaque and alpha-blended objects. + +## Transparency and composition mask In addition to the [Reactive mask](#reactive-mask), FSR2 provides for the application to denote areas of other specialist rendering which should be accounted for during the upscaling process. Examples of such special rendering include areas of raytraced reflections or animated textures. -While the [Reactive mask](#reactive-mask) adjusts the accumulation balance, the [Transparency & composition mask](#transparency-and-composition-mask) adjusts the pixel locks created by FSR2. A pixel with a value of 0 in the [Transparency & composition mask](#ttransparency-and-composition-mask) does not perform any additional modification to the lock for that pixel. Conversely, a value of 1 denotes that the lock for that pixel should be completely removed. +While the [Reactive mask](#reactive-mask) adjusts the accumulation balance, the [Transparency & composition mask](#transparency-and-composition-mask) adjusts the pixel history protection mechanisms. The mask also removes the effect of the luminance instability factor. A pixel with a value of 0 in the [Transparency & composition mask](#ttransparency-and-composition-mask) does not perform any additional modification to the lock for that pixel. Conversely, a value of 1 denotes that the lock for that pixel should be completely removed. -If a [Transparency & composition mask](#transparency-and-composition-mask) is not provided to FSR2 (by setting the [`transparencyAndComposition`](#src/ffx-fsr2-api/ffx_fsr2.h#L123) field of [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) to `NULL`) then an internally generated 1x1 texture with a cleared transparency and composition value will be used. +If a [Transparency & composition mask](#transparency-and-composition-mask) is not provided to FSR2 (by setting the [`transparencyAndComposition`](#src/ffx-fsr2-api/ffx_fsr2.h#L126) field of [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) to `NULL`) then an internally generated 1x1 texture with a cleared transparency and composition value will be used. -## Automatically generating reactivity -To help applications generate the [Reactive mask](#reactive-mask) and the [Transparency & composition mask](#transparency-and-composition-mask), FSR2 provides an optional helper API. Under the hood, the API launches a compute shader which computes these values for each pixel using a luminance-based heuristic. +## Automatically generating Transparency and composition mask +FSR2.2 includes an experimental feature to generate [Reactive mask](#reactive-mask) and [Transparency & composition mask](#transparency-and-composition-mask) automatically. To enable this, the [`enableAutoReactive`](#src/ffx-fsr2-api/ffx_fsr2.h#L142) field of [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) needs to be set to 'TRUE' and a copy of the opaque only portions of the backbuffer needs to be provided in ['colorOpaqueOnly'](src/ffx-fsr2-api/ffx_fsr2.h#L143). FSR2 will then automatically generate and use [Reactive mask](#reactive-mask) and [Transparency & composition mask](#transparency-and-composition-mask) internally. The masks are generated in a compute pass by analyzing the difference of the color buffer with and without transparent geometry, as well as compare it to the previous frame. Based on the result of those computations each pixel gets assigned [Reactive mask](#reactive-mask) and [Transparency & composition mask](#transparency-and-composition-mask) values. +To use autogeneration of the masks the following 4 values to scale and limit the intensity of the masks are required to be provided as well (Note the mentioned default values are suggested starting values but should be tuned per title): -Applications wishing to do this can call the [`ffxFsr2ContextGenerateReactiveMask`](src/ffx-fsr2-api/ffx_fsr2.h#L267) function and should pass two versions of the color buffer, one containing opaque only geometry, and the other containing both opaque and alpha-blended objects. +- ['autoTcThreshold'](#src/ffx-fsr2-api/ffx_fsr2.h#L144): Setting this value too small will cause visual instability. Larger values can cause ghosting. Recommended default value is 0.05f. +- ['autoTcScale'](#src/ffx-fsr2-api/ffx_fsr2.h#L145): Smaller values will increase stability at hard edges of translucent objects. Recommended default value is 1.0f. +- ['autoReactiveScale'](#src/ffx-fsr2-api/ffx_fsr2.h#L146): Larger values result in more reactive pixels. Recommended default value is 5.00f +- ['autoReactiveMax'](#src/ffx-fsr2-api/ffx_fsr2.h#L147): Maximum value reactivity can reach. Recommended default value is 0.90f. -In version 2.1, this helper changed slightly in order to give developers more options when items such as decals were used, which may have resulted in shimmer on certain surfaces. A "binaryValue" can now be set in the FfxFsr2GenerateReactiveDescription struct, to provide a specific value to be written into the reactive mask instead of 1.0f, which can be too high. +This feature is intended to help with integrating FSR2.2 into a new engine or title. However, for best quality we still recommend to render the [Reactive mask](#reactive-mask) and [Transparency & composition mask](#transparency-and-composition-mask) yourself, as generating those values based on material is expected to be more reliable than autogenerating them from the final image. + +Please note that this feature is still in experimental stage and may change significantly in the future. ## Exposure FSR2 provides two values which control the exposure used when performing upscaling. They are as follows: @@ -254,7 +268,7 @@ The exposure value should match that which the application uses during any subse > In various stages of the FSR2 algorithm described in this document, FSR2 will compute its own exposure value for internal use. It is worth noting that all outputs from FSR2 will have this internal tonemapping reversed before the final output is written. Meaning that FSR2 returns results in the same domain as the original input signal. -Poorly selected exposure values can have a drastic impact on the final quality of FSR2's upscaling. Therefore, it is recommended that [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) is used by the application, unless there is a particular reason not to. When [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure, the exposure calculation shown in the HLSL code below is used to compute the exposure value, this matches the exposure response of ISO 100 film stock. +Poorly selected exposure values can have a drastic impact on the final quality of FSR2's upscaling. Therefore, it is recommended that [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) is used by the application, unless there is a particular reason not to. When [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure, the exposure calculation shown in the HLSL code below is used to compute the exposure value, which matches the exposure response of ISO 100 film stock. ```HLSL float ComputeAutoExposureFromAverageLog(float averageLogLuminance) @@ -310,11 +324,11 @@ To use the FSR2 API, you should link `ffx_fsr2_api_x64.lib` which will provide t > Please note the modular architecture of the FSR2 API allows for custom backends to be implemented. See the [Modular backend](#modular-backend) section for more details. -To begin using the API, the application should first create a [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L164) structure. This structure should be located somewhere with a lifetime approximately matching that of your backbuffer; somewhere on the application's heap is usually a good choice. By calling [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L213) the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L164) structure will be populated with the data it requires. Moreover, a number of calls will be made from [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L213) to the backend which is provided to [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L164) as part of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L101) structure. These calls will perform such tasks as creating intermediate resources required by FSR2 and setting up shaders and their associated pipeline state. The FSR2 API does not perform any dynamic memory allocation. +To begin using the API, the application should first create a [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179) structure. This structure should be located somewhere with a lifetime approximately matching that of your backbuffer; somewhere on the application's heap is usually a good choice. By calling [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L228) the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179) structure will be populated with the data it requires. Moreover, a number of calls will be made from [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L228) to the backend which is provided to [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179) as part of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. These calls will perform such tasks as creating intermediate resources required by FSR2 and setting up shaders and their associated pipeline state. The FSR2 API does not perform any dynamic memory allocation. -Each frame of your application where upscaling is required, you should call [`ffxFsr2ContextDispatch`](src/ffx-fsr2-api/ffx_fsr2.h#L254). This function accepts the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L164) structure that was created earlier in the application's lifetime as well as a description of precisely how upscaling should be performed and on which data. This description is provided by the application filling out a [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L114) structure. +Each frame of your application where upscaling is required, you should call [`ffxFsr2ContextDispatch`](src/ffx-fsr2-api/ffx_fsr2.h#L269). This function accepts the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179) structure that was created earlier in the application's lifetime as well as a description of precisely how upscaling should be performed and on which data. This description is provided by the application filling out a [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure. -Destroying the context is performed by calling [`ffxFsr2ContextDestroy`](src/ffx-fsr2-api/ffx_fsr2.h#L277). Please note, that the GPU should be idle before attempting to call [`ffxFsr2ContextDestroy`](src/ffx-fsr2-api/ffx_fsr2.h#L277), and the function does not perform implicit synchronization to ensure that resources being accessed by FSR2 are not currently in flight. The reason for this choice is to avoid FSR2 introducing additional GPU flushes for applications who already perform adequate synchronization at the point where they might wish to destroy the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L164), this allows an application to perform the most efficient possible creation and teardown of the FSR2 API when required. +Destroying the context is performed by calling [`ffxFsr2ContextDestroy`](src/ffx-fsr2-api/ffx_fsr2.h#L292). Please note, that the GPU should be idle before attempting to call [`ffxFsr2ContextDestroy`](src/ffx-fsr2-api/ffx_fsr2.h#L292), and the function does not perform implicit synchronization to ensure that resources being accessed by FSR2 are not currently in flight. The reason for this choice is to avoid FSR2 introducing additional GPU flushes for applications who already perform adequate synchronization at the point where they might wish to destroy the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179), this allows an application to perform the most efficient possible creation and teardown of the FSR2 API when required. There are additional helper functions which are provided as part of the FSR2 API. These helper functions perform tasks like the computation of sub-pixel jittering offsets, as well as the calculation of rendering resolutions based on dispatch resolutions and the default [scaling modes](#scaling-modes) provided by FSR2. @@ -332,7 +346,7 @@ Out of the box, the FSR2 API will compile into multiple libraries following the ## Memory management If the FSR2 API is used with one of the supplied backends (e.g: DirectX(R)12 or Vulkan(R)) then all the resources required by FSR2 are created as committed resources directly using the graphics device provided by the host application. However, by overriding the create and destroy family of functions present in the backend interface it is possible for an application to more precisely control the memory management of FSR2. -To do this, you can either provide a full custom backend to FSR2 via the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure passed to [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L215) function, or you can retrieve the backend for your desired API and override the resource creation and destruction functions to handle them yourself. To do this, simply overwrite the [`fpCreateResource`](src/ffx-fsr2-api/ffx_fsr2_interface.h#L360) and [`fpDestroyResource`](src/ffx-fsr2-api/ffx_fsr2_interface.h#L364) function pointers. +To do this, you can either provide a full custom backend to FSR2 via the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure passed to [`ffxFsr2ContextCreate`](src/ffx-fsr2-api/ffx_fsr2.h#L228) function, or you can retrieve the backend for your desired API and override the resource creation and destruction functions to handle them yourself. To do this, simply overwrite the [`fpCreateResource`](src/ffx-fsr2-api/ffx_fsr2_interface.h#L379) and [`fpDestroyResource`](src/ffx-fsr2-api/ffx_fsr2_interface.h#L383) function pointers. ``` CPP // Setup DX12 interface. @@ -360,7 +374,7 @@ errorCode = ffxFsr2ContextCreate(&context, &contextDescription); FFX_ASSERT(errorCode == FFX_OK); ``` -One interesting advantage to an application taking control of the memory management required for FSR2 is that resource aliasing maybe performed, which can yield a memory saving. The table present in [Memory requirements](#memory-requirements) demonstrates the savings available through using this technique. In order to realise the savings shown in this table, an appropriate area of memory - the contents of which are not required to survive across a call to the FSR2 dispatches - should be found to share with the aliasable resources required for FSR2. Each [`FfxFsr2CreateResourceFunc`](src/ffx-fsr2-api/ffx_fsr2_interface.h#L399) call made by FSR2's core API through the FSR2 backend interface will contains a set of flags as part of the [`FfxCreateResourceDescription`](src/ffx-fsr2-api/ffx_types.h#L251) structure. If the [`FFX_RESOURCE_FLAGS_ALIASABLE`](src/ffx-fsr2-api/ffx_types.h#L101) is set in the [`flags`](src/ffx-fsr2-api/ffx_types.h#L208) field this indicates that the resource may be safely aliased with other resources in the rendering frame. +One interesting advantage to an application taking control of the memory management required for FSR2 is that resource aliasing maybe performed, which can yield a memory saving. The table present in [Memory requirements](#memory-requirements) demonstrates the savings available through using this technique. In order to realise the savings shown in this table, an appropriate area of memory - the contents of which are not required to survive across a call to the FSR2 dispatches - should be found to share with the aliasable resources required for FSR2. Each [`FfxFsr2CreateResourceFunc`](src/ffx-fsr2-api/ffx_fsr2_interface.h#L379) call made by FSR2's core API through the FSR2 backend interface will contains a set of flags as part of the [`FfxCreateResourceDescription`](src/ffx-fsr2-api/ffx_types.h#L266) structure. If the [`FFX_RESOURCE_FLAGS_ALIASABLE`](src/ffx-fsr2-api/ffx_types.h#L117) is set in the [`flags`](src/ffx-fsr2-api/ffx_types.h#L224) field this indicates that the resource may be safely aliased with other resources in the rendering frame. ## Temporal Antialiasing Temporal antialiasing (TAA) is a technique which uses the output of previous frames to construct a higher quality output from the current frame. As FSR2 has a similar goal - albeit with the additional goal of also increasing the resolution of the rendered image - there is no longer any need to include a separate TAA pass in your application. @@ -377,7 +391,7 @@ Internally, these function implement a Halton[2,3] sequence [[Halton](#reference ![alt text](docs/media/super-resolution-temporal/jitter-space.svg "A diagram showing how to map sub-pixel jitter offsets to projection offsets.") -It is important to understand that the values returned from the [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L424) are in unit pixel space, and in order to composite this correctly into a projection matrix we must convert them into projection offsets. The diagram above shows a single pixel in unit pixel space, and in projection space. The code listing below shows how to correctly composite the sub-pixel jitter offset value into a projection matrix. +It is important to understand that the values returned from the [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L437) are in unit pixel space, and in order to composite this correctly into a projection matrix we must convert them into projection offsets. The diagram above shows a single pixel in unit pixel space, and in projection space. The code listing below shows how to correctly composite the sub-pixel jitter offset value into a projection matrix. ``` CPP const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(renderWidth, displayWidth); @@ -393,9 +407,9 @@ const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vecto const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix; ``` -Jitter should be applied to *all* rendering. This includes opaque, alpha transparent, and raytraced objects. For rasterized objects, the sub-pixel jittering values calculated by the [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L422) function can be applied to the camera projection matrix which is ultimately used to perform transformations during vertex shading. For raytraced rendering, the sub-pixel jitter should be applied to the ray's origin - often the camera's position. +Jitter should be applied to *all* rendering. This includes opaque, alpha transparent, and raytraced objects. For rasterized objects, the sub-pixel jittering values calculated by the [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L437) function can be applied to the camera projection matrix which is ultimately used to perform transformations during vertex shading. For raytraced rendering, the sub-pixel jitter should be applied to the ray's origin - often the camera's position. -Whether you elect to use the recommended [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L424) function or your own sequence generator, you must set the [`jitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L125) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure to inform FSR2 of the jitter offset that has been applied in order to render each frame. Moreover, if not using the recommended [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L424) function, care should be taken that your jitter sequence never generates a null vector; that is value of 0 in both the X and Y dimensions. +Whether you elect to use the recommended [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L437) function or your own sequence generator, you must set the [`jitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L128) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure to inform FSR2 of the jitter offset that has been applied in order to render each frame. Moreover, if not using the recommended [`ffxFsr2GetJitterOffset`](src/ffx-fsr2-api/ffx_fsr2.h#L437) function, care should be taken that your jitter sequence never generates a null vector; that is value of 0 in both the X and Y dimensions. The table below shows the jitter sequence length for each of the default quality modes. @@ -408,7 +422,7 @@ The table below shows the jitter sequence length for each of the default quality | Custom | [1..n]x (per dimension) | `ceil(8 * n^2)` | ## Camera jump cuts -Most applications with real-time rendering have a large degree of temporal consistency between any two consecutive frames. However, there are cases where a change to a camera's transformation might cause an abrupt change in what is rendered. In such cases, FSR2 is unlikely to be able to reuse any data it has accumulated from previous frames, and should clear this data such to exclude it from consideration in the compositing process. In order to indicate to FSR2 that a jump cut has occurred with the camera you should set the [`reset`](src/ffx-fsr2-api/ffx_fsr2.h#L132) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure to `true` for the first frame of the discontinuous camera transformation. +Most applications with real-time rendering have a large degree of temporal consistency between any two consecutive frames. However, there are cases where a change to a camera's transformation might cause an abrupt change in what is rendered. In such cases, FSR2 is unlikely to be able to reuse any data it has accumulated from previous frames, and should clear this data such to exclude it from consideration in the compositing process. In order to indicate to FSR2 that a jump cut has occurred with the camera you should set the [`reset`](src/ffx-fsr2-api/ffx_fsr2.h#L135) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure to `true` for the first frame of the discontinuous camera transformation. Rendering performance may be slightly less than typical frame-to-frame operation when using the reset flag, as FSR2 will clear some additional internal resources. @@ -431,12 +445,12 @@ The following table illustrates the mipmap biasing factor which results from eva | Ultra performance | 3.0X (per dimension) | -2.58 | ## Frame Time Delta Input -The FSR2 API requires [`frameTimeDelta`](src/ffx-fsr2-api/ffx_fsr2.h#L130) be provided by the application through the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure. This value is in __milliseconds__: if running at 60fps, the value passed should be around __16.6f__. +The FSR2 API requires [`frameTimeDelta`](src/ffx-fsr2-api/ffx_fsr2.h#L133) be provided by the application through the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure. This value is in __milliseconds__: if running at 60fps, the value passed should be around __16.6f__. The value is used within the temporal component of the FSR 2 auto-exposure feature. This allows for tuning of the history accumulation for quality purposes. ## HDR support -High dynamic range images are supported in FSR2. To enable this, you should set the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) bit in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure. Images should be provided to FSR2 in linear color space. +High dynamic range images are supported in FSR2. To enable this, you should set the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) bit in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. Images should be provided to FSR2 in linear color space. > Support for additional color spaces might be provided in a future revision of FSR2. @@ -461,18 +475,28 @@ Modern GPUs execute collections of threads - called wavefronts - together in a S For DirectX(R)12 based applications which are running on RDNA and RDNA2-based GPUs and using the Microsoft Agility SDK, the FSR2 host API will select a 64-wide wavefront width. +## Debug Checker + +The context description structure can be provided with a callback function for passing textual warnings from the FSR 2 runtime to the underlying application. The `fpMessage` member of the description is of type `FfxFsr2Message` which is a function pointer for passing string messages of various types. Assigning this variable to a suitable function, and passing the [`FFX_FSR2_ENABLE_DEBUG_CHECKING`](src/ffx-fsr2-api/ffx_fsr2.h#L96) flag within the flags member of [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) will enable the feature. It is recommended this is enabled only in debug development builds. + +An example of the kind of output that can occur when the checker observes possible issues is below: + +``` +FSR2_API_DEBUG_WARNING: FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting +FSR2_API_DEBUG_WARNING: frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps) +``` + # The technique ## Algorithm structure The FSR2 algorithm is implemented in a series of stages, which are as follows: 1. Compute luminance pyramid -2. Adjust input color -3. Reconstruct & dilate -4. Depth clip -5. Create locks -6. Reproject & accumulate -7. Robust Contrast Adaptive Sharpening (RCAS) +2. Reconstruct & dilate +3. Depth clip +4. Create locks +5. Reproject & accumulate +6. Robust Contrast Adaptive Sharpening (RCAS) Each pass stage of the algorithm is laid out in the sections following this one, but the data flow for the complete FSR2 algorithm is shown in the diagram below. @@ -483,7 +507,7 @@ Each pass stage of the algorithm is laid out in the sections following this one, The compute luminance pyramid stage has two responsibilities: 1. To produce a lower resolution version of the input color's luminance. This is used by shading change detection in the accumulation pass. -2. To produce a 1x1 exposure texture which is optionally used by the exposure calculations of the [Adjust input color](#adjust-input-color) stage to apply tonemapping, and the [Reproject & Accumulate](#project-and-accumulate) stage for reversing local tonemapping ahead of producing an output from FSR2. +2. To produce a 1x1 exposure texture which is optionally used to apply tonemapping, and the [Reproject & Accumulate](#project-and-accumulate) stage for reversing local tonemapping ahead of producing an output from FSR2. ### Resource inputs @@ -493,7 +517,7 @@ The following table contains all resources consumed by the [Compute luminance py | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Color buffer | Current frame | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L87) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure. | +| Color buffer | Current frame | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. | ### Resource outputs The following table contains all resources produced or modified by the [Compute luminance pyramid](#compute-luminance-pyramid) stage. @@ -502,11 +526,11 @@ The following table contains all resources produced or modified by the [Compute | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------------------|-----------------|------------------|-------------------------|-----------|----------------------------------------------| -| Exposure | Current frame | 1x1 | `R32_FLOAT` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource is optional, and may be omitted if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L92) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166). | -| Current luminance | Current frame | `Render * 0.5` | `R16_FLOAT` | Texture | A texture at 50% of render resolution texture which contains the luminance of the current frame. | +| Exposure | Current frame | 1x1 | `R32_FLOAT` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource is optional, and may be omitted if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). | +| Current luminance | Current frame | `Render * 0.5` + MipChain | `R16_FLOAT` | Texture | A texture at 50% of render resolution texture which contains the luminance of the current frame. A full mip chain is allocated. | ### Description -The [Compute luminance pyramid](#compute-luminance-pyramid) stage is implemented using FidelityFX [Single Pass Downsampler](single-pass-downsampler.md), an optimized technique for producing mipmap chains using a single compute shader dispatch. Instead of the conventional (full) pyramidal approach, SPD provides a mechanism to produce a specific set of mipmap levels for an arbitrary input texture, as well as performing arbitrary calculations on that data as we store it to the target location in memory. In FSR2, we are interested in producing in upto two intermediate resources depending on the configuration of the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166). The first resource is a low-resolution representation of the current luminance, this is used later in FSR2 to attempt to detect shading changes. The second is the exposure value, and while it is always computed, it is only used by subsequent stages if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure upon context creation. The exposure value - either from the application, or the [Compute luminance pyramid](#compute-luminance-pyramid) stage - is used in the [Adjust input color](#adjust-input-color) stage of FSR2, as well as by the [Reproject & Accumulate](#project-and-accumulate) stage. +The [Compute luminance pyramid](#compute-luminance-pyramid) stage is implemented using FidelityFX [Single Pass Downsampler](https://github.com/GPUOpen-Effects/FidelityFX-SPD), an optimized technique for producing mipmap chains using a single compute shader dispatch. Instead of the conventional (full) pyramidal approach, SPD provides a mechanism to produce a specific set of mipmap levels for an arbitrary input texture, as well as performing arbitrary calculations on that data as we store it to the target location in memory. In FSR2, we are interested in producing in upto two intermediate resources depending on the configuration of the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166). The first resource is a low-resolution representation of the current luminance, this is used later in FSR2 to attempt to detect shading changes. The second is the exposure value, and while it is always computed, it is only used by subsequent stages if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure upon context creation. The exposure value - either from the application, or the [Compute luminance pyramid](#compute-luminance-pyramid) stage - is used in the [Adjust input color](#adjust-input-color) stage of FSR2, as well as by the [Reproject & Accumulate](#project-and-accumulate) stage. ![alt text](docs/media/super-resolution-temporal/auto-exposure.svg "A diagram showing the mipmap levels written by auto-exposure.") @@ -534,61 +558,6 @@ float ComputeAutoExposureFromAverageLog(float averageLogLuminance) return 1 / luminanceMax; } ``` - -## Adjust input color - -There are several types of adjustments which FSR2 performs on the input colors, these are as follows: - -1. The input color is divided by the pre-exposure value. -2. The input color is multiplied by the exposure value. -3. The exposed color is then converted to the YCoCg color space [[**YCoCg**](#references)]. - -Please note that manipulations to the color values provided by the application are strictly internal to FSR2, meaning that the results produced by FSR2 are always converted by into the requested color space (typically linear). - -### Resource inputs -The following table contains all resources consumed by the [Adjust input color](#Adjust-input-color) stage. - -> The temporal layer indicates which frame the data should be sourced from. 'Current frame' means that the data should be sourced from resources created for the frame that is to be presented next. 'Previous frame' indicates that the data should be sourced from resources which were created for the frame that has just presented. The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. - -| Name | Temporal layer | Resolution | Format | Type | Notes | -| ----------------|-----------------|--------------|---------------------------|-----------|----------------------------------------------| -| Color buffer | Current frame | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure. | -| Exposure | Current frame | 1x1 | ``R32_FLOAT`` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource can be supplied by the application, or computed by the [Compute luminance pyramid](#compute-luminance-pyramid) stage of FSR2 if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure. | - -### Resource outputs -The following table contains all resources produced or modified by the [Adjust input color](#Adjust-input-color) stage. - -> The temporal layer indicates which frame the data should be sourced from. 'Current frame' means that the data should be sourced from resources created for the frame that is to be presented next. 'Previous frame' indicates that the data should be sourced from resources which were created for the frame that has just presented. The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. - -| Name | Temporal layer | Resolution | Format | Type | Notes | -| ----------------------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Adjusted color buffer | Current frame | Render | `R16G16B16A16_FLOAT` | Texture | A texture containing the adjusted version of the application's color buffer. The tonemapping operator may not be the same as any tonemapping operator included in the application, and is instead a local, reversible operator used throughout FSR2. This buffer is stored in YCoCg format. | -| Luminance history | Many frames | Render | `R8G8B8A8_UNORM` | Texture | A texture containing three frames of luminance history, as well as a stability factor encoded in the alpha channel. | -| Previous depth buffer | Current frame | Render | `R32_UNORM` | Texture | A texture containing a reconstructed and dilated depth values. This surface is cleared by the [Adjust input color](#adjust-input-color) stage. Please note: When viewing this texture in a capture tool (such as [RenderDoc](https://renderdoc.org)) it may not display correctly. This is because the format of this texture is ``R32_UNORM`` and contains IEEE754 floating point values, which have been written after performing a bitcast using the [`asuint`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-asuint) intrinsic function. See the note in [Adjust input color](#adjust-input-color) for more details on the specifics of how this works. | - -### Description -The majority of the FSR2 algorithm operates in YCoCg color space. In order to avoid repeatedly calculating conversions from the color space used by the application, FSR2 implements a dedicated stage which applies all adjustments to the color once, with the results then being cached to an adjusted color texture which other passes may then read. As part of the adjustment process, FSR2 also calculates a luminance history buffer. - -As the luminance buffer is persistent (it is not available for aliasing, or cleared each frame), we have access to four frames of history during the [Adjust input color](#Adjust-input-color) stage on any one frame. However, at the end of the [Adjust input color](#Adjust-input-color) stage, the luminance history values are shifted down, meaning that subsequent stages of FSR2 have access to the three most recent frames of luminance (the current frame, and the two frames before it). Therefore, if we denote the current frame as n, then the values stored in the luminance history buffer are as follows. - -| Channel | Frame index (Start of adjust input color stage) | Frame index (End of adjust input color stage) | -|---------|-------------------------------------------------|-----------------------------------------------| -| Red | n-1 | n | -| Green | n-2 | n - 1 | -| Blue | n-3 | n - 2 | - -The alpha channel of the luminance history buffer contains a measure of the stability of the luminance over the current frame, and the three frames that came before it. This is computed in the following way: - -``` HLSL -float stabilityValue = 1.0f; -for (int i = 0; i < 3; i++) { - stabilityValue = min(stabilityValue, MinDividedByMax(currentFrameLuma, currentFrameLumaHistory[i])); -} -``` - -In additional to its color adjustment responsibilities already outlined, this stage also has the responsibility for clearing the reprojected depth buffer to a known value, ready for the [Reconstruct & dilate](#reconstruct-and-dilate) stage on the next frame of the application. The buffer must be cleared, as [Reconstruct & dilate](#reconstruct-and-dilate) will populate it using atomic operations. Depending on the configuration of the depth buffer, an appropriate clearing value is selected. - -The format of the previous depth buffer is `R32_UINT` which allows the use of [`InterlockedMax`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/interlockedmax) and [`InterlockedMin`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/interlockedmin) operations to be performed from the [Reconstruct & dilate](#reconstruct-and-dilate) stage of FSR2. This is done with the resulting integer values returned by converting depth values using the [`asint`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-asint) functions. This works because depth values are always greater than 0, meaning that the monotonicity of IEEE754 floating point values when interpreted as integers is guaranteed. ## Reconstruct and dilate The reconstruct & dilate stage consumes the applications depth buffer and motion vectors, and produces a reconstructed and dilated depth buffer for the previous frame, together with a dilated set of motion vectors in UV space. The stage runs at render resolution. @@ -602,8 +571,10 @@ The following table contains all of the resources which are required by the reco | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------------------|-----------------|------------|------------------------------------|-----------|------------------------------------------------| -| Depth buffer | Current frame | Render | `APPLICATION SPECIFIED (1x FLOAT)` | Texture | The render resolution depth buffer for the current frame provided by the application. The data should be provided as a single floating point value, the precision of which is under the application's control. The configuration of the depth should be communicated to FSR2 via the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166). You should set the [`FFX_FSR2_ENABLE_DEPTH_INVERTED`](src/ffx-fsr2-api/ffx_fsr2.h#L91) flag if your depth buffer is inverted (that is [1..0] range), and you should set the flag if your depth buffer has as infinite far plane. If the application provides the depth buffer in `D32S8` format, then FSR2 will ignore the stencil component of the buffer, and create an `R32_FLOAT` resource to address the depth buffer. On GCN and RDNA hardware, depth buffers are stored separately from stencil buffers. | -| Motion vectors | Current fraame | Render or presentation | `APPLICATION SPECIFIED (2x FLOAT)` | Texture | The 2D motion vectors for the current frame provided by the application in [*(<-width, -height>*..**] range. If your application renders motion vectors with a different range, you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L126) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure to adjust them to match the expected range for FSR2. Internally, FSR2 uses 16bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L104) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L102) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L166), in which case it should be equal to the presentation resolution. | +| Color buffer | Current frame | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. | +| Exposure | Current frame | 1x1 | ``R32_FLOAT`` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource can be supplied by the application, or computed by the [Compute luminance pyramid](#compute-luminance-pyramid) stage of FSR2 if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. | +| Depth buffer | Current frame | Render | `APPLICATION SPECIFIED (1x FLOAT)` | Texture | The render resolution depth buffer for the current frame provided by the application. The data should be provided as a single floating point value, the precision of which is under the application's control. The configuration of the depth should be communicated to FSR2 via the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). You should set the [`FFX_FSR2_ENABLE_DEPTH_INVERTED`](src/ffx-fsr2-api/ffx_fsr2.h#L91) flag if your depth buffer is inverted (that is [1..0] range), and you should set the flag if your depth buffer has as infinite far plane. If the application provides the depth buffer in `D32S8` format, then FSR2 will ignore the stencil component of the buffer, and create an `R32_FLOAT` resource to address the depth buffer. On GCN and RDNA hardware, depth buffers are stored separately from stencil buffers. | +| Motion vectors | Current fraame | Render or presentation | `APPLICATION SPECIFIED (2x FLOAT)` | Texture | The 2D motion vectors for the current frame provided by the application in [*(<-width, -height>*..**] range. If your application renders motion vectors with a different range, you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L129) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure to adjust them to match the expected range for FSR2. Internally, FSR2 uses 16bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179), in which case it should be equal to the presentation resolution. | ### Resource outputs The following table contains all of the resources which are produced by the reconstruct & dilate stage. @@ -612,14 +583,16 @@ The following table contains all of the resources which are produced by the reco | Name | Temporal layer | Resolution | Format | Type | Notes | | ------------------------------------|-----------------|------------|------------------------|-----------|------------------------------------------------| -| Previous depth buffer | Current frame | Render | `R32_UNORM` | Texture | A texture containing the reconstructed previous frame depth values. This surface should first be cleared, see the [Adjust input color](#adjust-input-color) stage for details. Please note: When viewing this texture in a capture tool (such as [RenderDoc](https://renderdoc.org)) it may not display correctly. This is because the format of this texture is ``R32_UNORM`` and contains IEEE754 floating point values, which have been written after performing a bitcast using the ``asuint`` intrinsic function. See the note in [Reproject & accumulate](#reproject-accumulate) for more details on the specifics of how this works. | +| Est.Previous depth buffer | Current frame | Render | `R32_UNORM` | Texture | A texture containing the reconstructed previous frame depth values. This surface should first be cleared, see the [Adjust input color](#adjust-input-color) stage for details. Please note: When viewing this texture in a capture tool (such as [RenderDoc](https://renderdoc.org)) it may not display correctly. This is because the format of this texture is ``R32_UNORM`` and contains IEEE754 floating point values, which have been written after performing a bitcast using the ``asuint`` intrinsic function. See the note in [Reproject & accumulate](#reproject-accumulate) for more details on the specifics of how this works. | | Dilated depth | Current frame | Render | `R16_UINT` | Texture | A texture containing dilated depth values computed from the application's depth buffer. | | Dilated motion vectors | Current frame | Render | `R16G16_FLOAT` | Texture | A texture containing dilated 2D motion vectors computed from the application's 2D motion vector buffer. The red and green channel contains the two-dimensional motion vectors in NDC space. | +| Previous depth buffer | Current frame | Render | `R32_UNORM` | Texture | A texture containing a reconstructed and dilated depth values. This surface is cleared by the [Adjust input color](#adjust-input-color) stage. Please note: When viewing this texture in a capture tool (such as [RenderDoc](https://renderdoc.org)) it may not display correctly. This is because the format of this texture is ``R32_UNORM`` and contains IEEE754 floating point values, which have been written after performing a bitcast using the [`asuint`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-asuint) intrinsic function. See the note in [Adjust input color](#adjust-input-color) for more details on the specifics of how this works. | +| Lock input luma | Current frame | Render | `R16_FLOAT` | Texture | A texture containing luma data to be consumed by the lock stage. | ### Description The first step of the [Reconstruct & dilate](#reconstruct-and-dilate) stage is to compute the dilated depth values and motion vectors from the application's depth values and motion vectors for the current frame. Dilated depth values and motion vectors emphasise the edges of geometry which has been rendered into the depth buffer. This is because the edges of geometry will often introduce discontinuities into a contiguous series of depth values, meaning that as depth values and motion vectors are dilated, they will naturally follow the contours of the geometric edges present in the depth buffer. In order to compute the dilated depth values and motion vectors, FSR2 looks at the depth values for a 3x3 neighbourhood for each pixel and then selects the depth values and motion vectors in that neighbourhood where the depth value is nearest to the camera. In the diagram below, you can see how the central pixel of the 3x3 kernel is updated with the depth value and motion vectors from the pixel with the largest depth value - the pixel on the central, right hand side. -As this stage is the first time that motion vectors are consumed by FSR2, this is where motion vector scaling is applied if using the FSR2 host API. Motion vector scaling factors provided via the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L126) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L115) structure and allows you to transform non-screenspace motion vectors into screenspace motion vectors which FSR2 expects. +As this stage is the first time that motion vectors are consumed by FSR2, this is where motion vector scaling is applied if using the FSR2 host API. Motion vector scaling factors provided via the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L129) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure and allows you to transform non-screenspace motion vectors into screenspace motion vectors which FSR2 expects. ``` CPP // An example of how to manipulate motion vector scaling factors using the FSR2 host API. @@ -646,9 +619,14 @@ The following table contains all the resources which are consumed by the [Depth | Name | Temporal layer | Resolution | Format | Type | Notes | | ------------------------------------|-----------------|------------|------------------------|-----------|------------------------------------------------| -| Previous depth buffer | Current frame | Render | `R32_UNORM` | Texture | A texture containing the reconstructed previous frame depth values. This surface should first be cleared, see the [Reproject & accumulate](#reproject-accumulate) stage for details. Please note: When viewing this texture in a capture tool (such as [RenderDoc](https://renderdoc.org)) it may not display correctly. This is because the format of this texture is ``R32_UINT`` and contains IEEE754 floating point values, which have been written after performing a bitcast using the ``asuint`` intrinsic function. See the note in [Reproject & accumulate](#reproject-accumulate) for more details on the specifics of how this works. | -| Dilated depth | Current frame | Render | `R16_UINT` | Texture | A texture containing dilated depth values computed from the application's depth buffer. | -| Dilated motion vectors | Current frame | Render | `R16G16_FLOAT` | Texture | A texture containing dilated 2D motion vectors computed from the application's 2D motion vector buffer. The red and green channel contains the two-dimensional motion vectors in NDC space, and the alpha channel contains the depth value used by the [Depth clip](#depth-clip) stage. | +| Est.Previous depth buffer | Current frame | Render | `R32_UNORM` | Texture | A texture containing the reconstructed previous frame depth values. This surface should first be cleared, see the [Reproject & accumulate](#reproject-accumulate) stage for details. Please note: When viewing this texture in a capture tool (such as [RenderDoc](https://renderdoc.org)) it may not display correctly. This is because the format of this texture is ``R32_UINT`` and contains IEEE754 floating point values, which have been written after performing a bitcast using the ``asuint`` intrinsic function. See the note in [Reproject & accumulate](#reproject-accumulate) for more details on the specifics of how this works. | +| Dilated depth | Current frame | Render | `R32_FLOAT` | Texture | A texture containing dilated depth values computed from the application's depth buffer. | +| Dilated motion vectors | Current & Previous frame | Render | `R16G16_FLOAT` | Texture | A texture containing dilated 2D motion vectors computed from the application's 2D motion vector buffer. The red and green channel contains the two-dimensional motion vectors in NDC space, and the alpha channel contains the depth value used by the [Depth clip](#depth-clip) stage. | +| Reactive masks | Current frame | Render | `R8_UNORM` | Texture | As some areas of a rendered image do not leave a footprint in the depth buffer or include motion vectors, FSR2 provides support for a reactive mask texture which can be used to indicate to FSR2 where such areas are. Good examples of these are particles, or alpha-blended objects which do not write depth or motion vectors. If this resource is not set, then FSR2's shading change detection logic will handle these cases as best it can, but for optimal results, this resource should be set. For more information on the reactive mask please refer to the [Reactive mask](#reactive-mask) section. | +| Color buffer | Current frame | Render | `APPLICATION SPECIFIED` | Texture | The render resolution color buffer for the current frame provided by the application. If the contents of the color buffer are in high dynamic range (HDR), then the [`FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE`](src/ffx-fsr2-api/ffx_fsr2.h#L88) flag should be set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. | +| Exposure | Current frame | 1x1 | ``R32_FLOAT`` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource can be supplied by the application, or computed by the [Compute luminance pyramid](#compute-luminance-pyramid) stage of FSR2 if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L93) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure. | +| Depth buffer | Current frame | Render | `APPLICATION SPECIFIED (1x FLOAT)` | Texture | The render resolution depth buffer for the current frame provided by the application. The data should be provided as a single floating point value, the precision of which is under the application's control. The configuration of the depth should be communicated to FSR2 via the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). You should set the [`FFX_FSR2_ENABLE_DEPTH_INVERTED`](src/ffx-fsr2-api/ffx_fsr2.h#L91) flag if your depth buffer is inverted (that is [1..0] range), and you should set the flag if your depth buffer has as infinite far plane. If the application provides the depth buffer in `D32S8` format, then FSR2 will ignore the stencil component of the buffer, and create an `R32_FLOAT` resource to address the depth buffer. On GCN and RDNA hardware, depth buffers are stored separately from stencil buffers. | +| Motion vectors | Current fraame | Render or presentation | `APPLICATION SPECIFIED (2x FLOAT)` | Texture | The 2D motion vectors for the current frame provided by the application in [*(<-width, -height>*..**] range. If your application renders motion vectors with a different range, you may use the [`motionVectorScale`](src/ffx-fsr2-api/ffx_fsr2.h#L129) field of the [`FfxFsr2DispatchDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L118) structure to adjust them to match the expected range for FSR2. Internally, FSR2 uses 16-bit quantities to represent motion vectors in many cases, which means that while motion vectors with greater precision can be provided, FSR2 will not benefit from the increased precision. The resolution of the motion vector buffer should be equal to the render resolution, unless the [`FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS`](src/ffx-fsr2-api/ffx_fsr2.h#L89) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179), in which case it should be equal to the presentation resolution. | ### Resource outputs The following table contains all the resources which are produced by the [Depth clip](#depth-clip) stage. @@ -657,14 +635,14 @@ The following table contains all the resources which are produced by the [Depth | Name | Temporal layer | Resolution | Format | Type | Notes | | ------------------------------------|-----------------|------------|------------------------|-----------|----------------------------------------| -| Disocclusion mask | Current frame | Render | `R8_FLOAT` | Texture | A texture containing a value indicating how much the pixel has been disoccluded. A value of 0 means that the pixel was entirely occluded in the previous frame, and values greater than zero mean that the pixel was visible to an extent proportional to the value. Therefore when examining the mask in a graphics debugging tool, the darker areas in the disocclusion mask indicate areas which are more disoccluded. | - +| Adjusted color buffer | Current frame | Render | `R16G16B16A16_FLOAT` | Texture | A texture containing the adjusted version of the application's color buffer. The tonemapping operator may not be the same as any tonemapping operator included in the application, and is instead a local, reversible operator used throughout FSR2. This buffer is stored in YCoCg format. Alpha channel contains disocclusion mask.| +| Dilated reactive mask | Current frame | Render | `R8G8_UNORM` | Texture | Dilated reactive masks. | ### Description To generate the disocclusion mask, the depth value must be computed for each pixel from the previous camera's position and the new camera's position. In the diagram below, you can see a camera moving from an initial position (labelled P0) to a new position (labelled P1). As it does so, the shaded area behind the sphere becomes disoccluded - that is it becomes visible from the camera at P1 and was previously occluded from the point of view of P0. ![alt text](docs/media/super-resolution-temporal/disocclusion.svg "A diagram showing a disoccluded area as a camera moves from position 0 to position 1.") -With both values depth values, we can compare the delta between them against the Akeley separation value [[Akeley-06](#references)]. Intuitively, the Akeley separation constant provides a minimum distance between two objects represented in a floating point depth buffer which allow you to say - with a high degree of certainty - that the objects were originally distinct from one another. In the diagram below you can see that the mid-grey and dark-grey objects have a delta which is larger than the kSep value which has been computed for the application's depth buffer configuration. However, the distance from the light-gray object to the mid-grey object does not exceed the computed kSep value, and therefore we are unable to conclude if this object is distinct. +With both values depth values, we can compare the delta between them against the Akeley separation value [[Akeley-06](#references)]. Intuitively, the Akeley separation constant provides a minimum distance between two objects represented in a floating point depth buffer which allow you to say - with a high degree of certainty - that the objects were originally distinct from one another. In the diagram below you can see that the mid-grey and dark-grey objects have a delta which is larger than the `kSep` value which has been computed for the application's depth buffer configuration. However, the distance from the light-gray object to the mid-grey object does not exceed the computed `kSep` value, and therefore we are unable to conclude if this object is distinct. ![alt text](docs/media/super-resolution-temporal/k-sep.svg "A diagram showing the concept behind the constant of separation.") @@ -680,8 +658,7 @@ The following table contains all resources consumed by the [Create locks](#creat | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Adjusted color buffer | Current frame | Render | `R16G16B16A16_FLOAT` | Texture | A texture containing the adjusted version of the application's color buffer. The tonemapping operator may not be the same as any tonemapping operator included in the application, and is instead a local, reversible operator used throughout FSR2. This buffer is stored in YCoCg format. | -| Lock status | Current frame | Presentation | `R16G16_FLOAT` | Texture | A mask which indicates whether or not to perform color rectification on a pixel, can be thought of as a lock on the pixel to stop rectification from removing the detail. Please note: This texture is part of an array of two textures along with the Lock status texture which is used as an input to this stage. The selection of which texture in the array is used for input and output is swapped each frame. The red channel contains the time remaining on the pixel lock, and the Y channel contains the luminance of the pixel at the time when the lock was created. | +| Lock input luma | Current frame | Render | `R16_FLOAT` | Texture | A texture containing luminance data to be consumed by the lock stage. | ### Resource outputs The following table contains all resources produced or modified by the [Create locks](#create-locks) stage. @@ -690,40 +667,46 @@ The following table contains all resources produced or modified by the [Create l | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Lock status | Current frame | Presentation | `R16G16_FLOAT` | Texture | A mask which indicates whether or not to perform color rectification on a pixel, can be thought of as a lock on the pixel to stop rectification from removing the detail. Please note: This texture is part of an array of two textures along with the Lock status texture which is used as an input to this stage. The selection of which texture in the array is used for input and output is swapped each frame. The red channel contains the time remaining on the pixel lock, and the Y channel contains the luminance of the pixel at the time when the lock was created. The [Create locks](#create-locks) stage updates only a subset of this resource. | +| New lock mask | Current frame | Presentation | `R8_UNORM` | Texture | A mask which indicates whether or not to perform color rectification on a pixel, can be thought of as a lock on the pixel to stop rectification from removing the detail. Please note: This texture is part of an array of two textures along with the Lock status texture which is used as an input to this stage. The selection of which texture in the array is used for input and output is swapped each frame. The red channel contains the time remaining on the pixel lock, and the Y channel contains the luminance of the pixel at the time when the lock was created. The [Create locks](#create-locks) stage updates only a subset of this resource. | +| Est.Previous depth buffer | Next frame | Render | `R32_UNORM` | Texture | This is only written here to clear it. | ### Description Intuitively, a pixel lock is a mechanism to stop color rectification from being applied to a pixel. The net effect of this locking is that more of the previous frame's color data is used when computing the final, super resolution pixel color in the [Reproject & accumulate](#reproject-accumulate) stage. The lock status texture contains two values which together compose a pixel lock. The red channel of the lock status texture contains the remaining lifetime of a pixel lock. This value is decremented by the initial lock length divided by the total length of the jitter sequence. When a lock reaches zero, it is considered to be expired. The green channel of the lock status texture contains the luminance of the pixel at the time the lock was created, but it is only populated during the reprojection stage of [Reproject & accumulate](#reproject-accumulate) stage. The luminance value is ultimately used in the [Reproject & Accumulate](#reproject-accumulate) stage as part of the shading change detection, this allows FSR2 to unlock a pixel if there is discontinuous change to the pixel's appearance (e.g.: an abrupt change to the shading of the pixel). When creating locks, the 3x3 neighbourhood of luminance values is compared against a threshold. The result of this comparison determines if a new lock should be created. The use of the neighbourhood allows us to detect thin features in the input image which should be locked in order to preserve details in the final super resolution image; such as wires, or chain linked fences. +Additionally, this stage also has the responsibility for clearing the reprojected depth buffer to a known value, ready for the [Reconstruct & dilate](#reconstruct-and-dilate) stage on the next frame of the application. The buffer must be cleared, as [Reconstruct & dilate](#reconstruct-and-dilate) will populate it using atomic operations. Depending on the configuration of the depth buffer, an appropriate clearing value is selected. + +The format of the previous depth buffer is `R32_UINT` which allows the use of [`InterlockedMax`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/interlockedmax) and [`InterlockedMin`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/interlockedmin) operations to be performed from the [Reconstruct & dilate](#reconstruct-and-dilate) stage of FSR2. This is done with the resulting integer values returned by converting depth values using the [`asint`](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-asint) functions. This works because depth values are always greater than 0, meaning that the monotonicity of IEEE754 floating point values when interpreted as integers is guaranteed. + + ## Reproject & accumulate This stage undertakes the following steps: 1. The current frame's color buffer is upsampled using Lanczos filtering. 2. The previous frame's output color and lock status buffers are reprojected, as if they were viewed from the current camera's perspective. 3. Various cleanup steps to the historical color data. -4. The historical color data, and the upscaled color data from the current frame are accumulated. -5. The output is (optionally) tonemapped ready for RCAS sharpening. +4. Luma instability is computed. +5. The historical color data, and the upscaled color data from the current frame are accumulated. This stage runs at presentation resolution. ### Resource inputs The following table contain all resources required by the [Reproject & accumulate](#reproject-accumulate) stage. -> The temporal layer indicates which frame the data should be sourced from. 'Current frame' means that the data should be sourced from resources created for the frame that is to be presented next. 'Previous frame' indicates that the data should be sourced from resources which were created for the frame that has just presented. The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. +> The temporal layer indicates which frame the data should be sourced from. 'Current frame' means that the data should be sourced from resources created for the frame that is to be presented next. 'Previous frame' indicates that the data should be sourced from resources which were created for the frame that has just presented. The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. If display resolution motion vectors are provided, the reprojection step will use the full precision of the vectors, as we read the resource directly. | Name | Temporal layer | Resolution | Format | Type | Notes | | ------------------------------------|-----------------|--------------|------------------------|-----------|----------------------------------------| -| Disocclusion mask | Current frame | Render | `R8_UNORM` | Texture | A texture containing a value indicating how much the pixel has been disoccluded. | +| Exposure | Current frame | 1x1 | `R32_FLOAT` | Texture | A 1x1 texture containing the exposure value computed for the current frame. This resource is optional, and may be omitted if the [`FFX_FSR2_ENABLE_AUTO_EXPOSURE`](src/ffx-fsr2-api/ffx_fsr2.h#L92) flag is set in the [`flags`](src/ffx-fsr2-api/ffx_fsr2.h#L105) field of the [`FfxFsr2ContextDescription`](src/ffx-fsr2-api/ffx_fsr2.h#L103) structure when creating the [`FfxFsr2Context`](src/ffx-fsr2-api/ffx_fsr2.h#L179). | | Dilated motion vectors | Current frame | Render | `R16G16_FLOAT` | Texture | A texture containing dilated motion vectors computed from the application's velocity buffer. The red and green channel contains the two-dimensional motion vectors in UV space. | -| Reactive mask | Current frame | Render | `R8_UNORM` | Texture | As some areas of a rendered image do not leave a footprint in the depth buffer or include motion vectors, FSR2 provides support for a reactive mask texture which can be used to indicate to FSR2 where such areas are. Good examples of these are particles, or alpha-blended objects which do not write depth or motion vectors. If this resource is not set, then FSR2's shading change detection logic will handle these cases as best it can, but for optimal results, this resource should be set. For more information on the reactive mask please refer to the [Reactive mask](#reactive-mask) section. | -| Output buffer | Previous frame | Presentation | ``R16G16B16A16_FLOAT`` | Texture | The output buffer produced by the FSR2 algorithm running in the previous frame. Please note: This buffer is used internally by FSR2, and is distinct from the presentation buffer which is derived from the output buffer, and has [RCAS](#robust-contrast-adpative-sharpening-rcas) applied. Please note: This texture is part of an array of two textures along with the Output buffer texture which is produced by the [Reproject & accumulate](#reproject-accumulate) stage. The selection of which texture in the array is used for input and output is swapped each frame. | +| Dilated reactive mask | Current frame | Render | `R8G8_UNORM` | Texture | Dilated reactive masks. | +| Upscaled buffer | Previous frame | Presentation | ``R16G16B16A16_FLOAT`` | Texture | The output buffer produced by the FSR2 algorithm running in the previous frame. Please note: This buffer is used internally by FSR2, and is distinct from the presentation buffer which is derived from the output buffer, and has [RCAS](#robust-contrast-adpative-sharpening-rcas) applied. Please note: This texture is part of an array of two textures along with the Output buffer texture which is produced by the [Reproject & accumulate](#reproject-accumulate) stage. The selection of which texture in the array is used for input and output is swapped each frame. | | Current luminance | Current frame | `Render * 0.5` | `R16_FLOAT` | Texture | A texture at 50% of render resolution texture which contains the luminance of the current frame. | | Luminance history | Many frames | Render | `R8G8B8A8_UNORM` | Texture | A texture containing three frames of luminance history, as well as a stability factor encoded in the alpha channel. | -| Adjusted color buffer | Current frame | Render | `R16G16B16A16_FLOAT` | Texture | A texture containing the adjusted version of the application's color buffer. The tonemapping operator may not be the same as any tonemapping operator included in the application, and is instead a local, reversible operator used throughout FSR2. This buffer is stored in YCoCg format. | +| Adjusted color buffer | Current frame | Render | `R16G16B16A16_FLOAT` | Texture | A texture containing the adjusted version of the application's color buffer. The tonemapping operator may not be the same as any tonemapping operator included in the application, and is instead a local, reversible operator used throughout FSR2. This buffer is stored in YCoCg format. Alpha channel contains disocclusion mask.| | Lock status | Previous frame | Presentation | `R16G16_FLOAT` | Texture | A mask which indicates not to perform color clipping on a pixel, can be thought of as a lock on the pixel to stop clipping removing the detail. For a more detailed description of the pixel locking mechanism please refer to the [Create locks](#create-locks) stage. Please note: This texture is part of an array of two textures along with the Lock status texture which is used as an output from this stage. The selection of which texture in the array is used for input and output is swapped each frame. | - +| New lock mask | Current frame | Presentation | `R8_UNORM` | Texture | A mask which indicates whether or not to perform color rectification on a pixel, can be thought of as a lock on the pixel to stop rectification from removing the detail. Please note: This texture is part of an array of two textures along with the Lock status texture which is used as an input to this stage. The selection of which texture in the array is used for input and output is swapped each frame. The red channel contains the time remaining on the pixel lock, and the Y channel contains the luminance of the pixel at the time when the lock was created. The [Create locks](#create-locks) stage updates only a subset of this resource. | ### Resource outputs @@ -733,14 +716,14 @@ This table contains the resources produced by the [Reproject & accumulate](#repr | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Output buffer | Current frame | Presentation | `R16G16B16A16_FLOAT` | Texture | The output buffer produced by the [Reproject & accumulate](#reproject-accumulate) stage for the current frame. Please note: This buffer is used internally by FSR2, and is distinct from the presentation buffer which is produced as an output from this stage after applying RCAS. Please note: This texture is part of an array of two textures along with the Output buffer texture which is consumed by the [Reproject & accumulate](#reproject-accumulate) stage. The selection of which texture in the array is used for input and output is swapped each frame. | +| Upscaled buffer | Current frame | Presentation | `R16G16B16A16_FLOAT` | Texture | The output buffer produced by the [Reproject & accumulate](#reproject-accumulate) stage for the current frame. Please note: This buffer is used internally by FSR2, and is distinct from the presentation buffer which is produced as an output from this stage after applying RCAS. Please note: This texture is part of an array of two textures along with the Output buffer texture which is consumed by the [Reproject & accumulate](#reproject-accumulate) stage. The selection of which texture in the array is used for input and output is swapped each frame. | | Reprojected locks | Current frame | Render | `R16G16_FLOAT` | Texture | The reprojected lock status texture. | +| Luminance history | Many frames | Render | `R8G8B8A8_UNORM` | Texture | A texture containing three frames of luminance history, as well as a stability factor encoded in the alpha channel. | +| New lock mask | Next frame | Presentation | `R8_UNORM` | Texture | This is cleared for next frame. | ### Description The reproject & accumulate stage of FSR2 is the most complicated and expensive stage in the algorithm. It brings together the results from many of the previous algorithmic steps and accumulates the reprojected color data from the previous frame together with the upsampled color data from the current frame. Please note the description in this documentation is designed to give you an intuition for the steps involved in this stage and does not necessarily match the implementation precisely. -![alt text](docs/media/super-resolution-temporal/reproject-and-accumulate-structure.svg "A diagram showing all phases in the rerpoject & accumulate portion of the FSR2 algorithm.") - The first step of the [Reproject & accumulate](#reproject-accumulate) stage is to assess each pixel for changes in its shading. If we are in a locked area, the luminance at the time the lock was created is compared to FSR2's shading change threshold. In a non-locked area, both the current frame and historical luminance values are used to make this determination. Shading change determination is a key part of FSR2's [Reproject & accumulate](#reproject-accumulate) stage, and feeds into many of the other parts of this stage. ![alt text](docs/media/super-resolution-temporal/upsample-with-lanczos.svg "A diagram showing upsampling of the current frame's input using Lanczos.") @@ -775,14 +758,14 @@ This table contains the resources consumed by the [Robust Contrast Adaptive Shar | Name | Temporal layer | Resolution | Format | Type | Notes | | ----------------------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Output buffer | Previous frame | Presentation | `R16G16B16A16_FLOAT` | Texture | The output buffer produced by the [Reproject & Accumulate](#reproject-accumulate) stage for the current frame. Please note: This buffer is used internally by FSR2, and is distinct from the presentation buffer which is produced as an output from this stage after applying RCAS. Please note: This texture is part of an array of two textures along with the Output buffer texture which is consumed by the [Reproject & Accumulate](#reproject-accumulate) stage. The selection of which texture in the array is used for input and output is swapped each frame. | +| Upscaled buffer | Current frame | Presentation | `R16G16B16A16_FLOAT` | Texture | The output buffer produced by the [Reproject & Accumulate](#reproject-accumulate) stage for the current frame. Please note: This buffer is used internally by FSR2, and is distinct from the presentation buffer which is produced as an output from this stage after applying RCAS. Please note: This texture is part of an array of two textures along with the Output buffer texture which is consumed by the [Reproject & Accumulate](#reproject-accumulate) stage. The selection of which texture in the array is used for input and output is swapped each frame. | ### Resource outputs > The temporal layer indicates which frame the data should be sourced from. 'Current frame' means that the data should be sourced from resources created for the frame that is to be presented next. 'Previous frame' indicates that the data should be sourced from resources which were created for the frame that has just presented. The resolution column indicates if the data should be at 'rendered' resolution or 'presentation' resolution. 'Rendered' resolution indicates that the resource should match the resolution at which the application is performing its rendering. Conversely, 'presentation' indicates that the resolution of the target should match that which is to be presented to the user. | Name | Temporal layer | Resolution | Format | Type | Notes | | -----------------------------|-----------------|--------------|-------------------------|-----------|----------------------------------------------| -| Presentation buffer | Previous frame | Presentation | Application specific | Texture | The presentation buffer produced by the completed FSR2 algorithm for the current frame. | +| Presentation buffer | Current frame | Presentation | Application specific | Texture | The presentation buffer produced by the completed FSR2 algorithm for the current frame. | ### Description @@ -816,14 +799,19 @@ To build the FSR2 sample, please follow the following instructions: # Limitations -FSR 2 requires a GPU with typed UAV load support. +FSR2 requires a GPU with typed UAV load and R16G16B16A16_UNORM support. # Version history -| Version | Date | Notes | -| ---------------|-------------------|--------------------------------------------------------------| -| **2.1.0** | 2022-09-06 | Release of FidelityFX Super Resolution 2.1. | -| **2.0.1** | 2022-06-22 | Initial release of FidelityFX Super Resolution 2.0. | +| Version | Date | +| ---------------|-------------------| +| **2.2.0** | 2023-02-16 | +| **2.1.2** | 2022-10-19 | +| **2.1.1** | 2022-09-15 | +| **2.1.0** | 2022-09-08 | +| **2.0.1** | 2022-06-22 | + +Refer to changelog for more detail on versions. # References diff --git a/changelog.md b/changelog.md index 37ee28f..03b97b6 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,18 @@ +2023-02-16 | FidelityFX Super Resolution 2.2 +------- +- Introduction of API debug checker. +- Changes to improve "High Velocity Ghosting" situations. +- Changes to Luminance computation with pre-exposure application. +- Small motion vectors ignored in previous depth estimation. +- Changes to depth logic to improve disocclusion detection and avoid self-disocclusions. +- Dilated reactive mask logic updated to use temporal motion vector divergence to kill locks. +- New lock luminance resource. +- Accumulation overhauled to use temporal reactivity. +- Changed how intermediate signals are stored and tonemapped. +- Luminance instability logic improved. +- Tonemapping no longer applied during RCAS to retain more dynamic range. +- Fixes for multiple user reported issues on GitHub and elsewhere. Thank you for your feedback! + 2022-10-10 | FidelityFX Super Resolution 2.1.2 ------- - Fix resource precision issue. diff --git a/docs/media/super-resolution-temporal/algorithm-structure.svg b/docs/media/super-resolution-temporal/algorithm-structure.svg index ac2c199..e34bfa4 100644 --- a/docs/media/super-resolution-temporal/algorithm-structure.svg +++ b/docs/media/super-resolution-temporal/algorithm-structure.svg @@ -2,13 +2,13 @@ + x="43" + y="24" + width="1169" + height="678" + id="rect886" /> + id="layer1"> + transform="matrix(0.24970884,0,0,0.25148495,-7.8374624,-3.3616689)" + id="g1361"> + + + + + + stroke="#000000" + stroke-linejoin="round" + stroke-miterlimit="10" + fill="#f2f2f2" + id="rect901" /> + AutoExposure + /Exposure + + x="1009.5" + y="408.5" + width="142" + height="109" + stroke="#44546a" + stroke-width="1.33333" + stroke-miterlimit="8" + stroke-dasharray="5.33333, 4" + fill="#ffffff" + id="rect909" /> + x="1069.5" + y="110.5" + width="119" + height="60" + stroke="#000000" + stroke-width="1.33333" + stroke-miterlimit="8" + fill="#ffffff" + id="rect911" /> + Lock status† + R16G16_FLOAT + Previous frame + Presentation resolution + x="407.5" + y="463.5" + width="119" + height="59" + stroke="#000000" + stroke-width="1.33333" + stroke-miterlimit="8" + fill="#ffffff" + id="rect921" /> + x="239.5" + y="198.5" + width="119" + height="59" + stroke="#000000" + stroke-width="1.33333" + stroke-miterlimit="8" + id="rect923" /> + Compute + Luminance Pyramid + (SPD) + Render resolution + + Color buffer + APPLICATION SPECIFIED + Current frame + Render resolution + + + id="rect947" /> Pass + font-size="12px" + transform="translate(247.451,402)" + id="text949">Reconstruct & dilate + Render resolution + id="rect953" /> Buffer + font-size="12px" + transform="translate(101.809,398)" + id="text955">Motion + vectors + APPLICATION SPECIFIED + (2x FLOAT) + Current frame + Render resolution + x="78.500099" + y="315.5" + width="120" + height="60" + stroke="#000000" + stroke-width="1.33333" + stroke-miterlimit="8" + stroke-dasharray="10.6667, 4" + fill="#ffffff" + id="rect967" /> + Depth buffer + APPLICATION SPECIFIED + (1x FLOAT) + Current frame + Render + resolution + + + x="79.500099" + y="247.5" + width="120" + height="60" + stroke="#000000" + stroke-width="1.33333" + stroke-miterlimit="8" + stroke-dasharray="10.6667, 4" + fill="#ffffff" + id="rect985" /> + Exposure + R32_FLOAT + Current frame + 1x1 + x="401.5" + y="313.5" + width="119" + height="60" + stroke="#000000" + stroke-width="1.33333" + stroke-miterlimit="8" + fill="#ffffff" + id="rect995" /> + Reconstructed + Previous depth + R32 + _UINT + Current frame + Render resolution + + Dilated motion vectors + R16G16 + _FLOAT + Current frame + Render resolution + fill="#ffffff" + id="rect1021" /> + Dilated depth + R32_FLOAT + Current frame + Render resolution + + + + + + Lock Luma + R16_FLOAT + Current frame + Render resolution + + Depth clip + Render resolution + + + + + Reactive mask + R8_UNORM + Current frame + Render + resolution + + T&C mask + R8_UNORM + Current frame + Render + resolution + + Create locks + Render resolution + + + New Lock Mask + R8_UNORM + Current frame + Render resolution + + + Reproject + & Accumulate + Presentation resolution + + Prepared Input + Color + R16G16B16A16_FLOAT + Current frame + Render resolution + + Dilated Reactive + masks + R8G8_ + UNORM + Current frame + Render Depth clipRender resolution - + font-size="8px" + transform="translate(796.877,442)" + id="text1139">resolution + + - Reconstruct & dilateRender resolution + id="rect1145" /> - + RCASPresentationresolution - + font-size="12px" + transform="translate(751.99,182)" + id="text1149">Internal Upscaled Update locksRender resolution - + font-size="12px" + transform="translate(837.49,182)" + id="text1151">* Adjust Input ColorRender resolution - + font-size="8px" + transform="translate(760.157,193)" + id="text1153">R16G16B16A16_FLOAT + Previous frame Auto-exposure (SPD)Render resolution + font-size="8px" + transform="translate(759.49,222)" + id="text1157">Presentation resolution + + + + + id="rect1167" /> - MotionvectorsAPPLICATION SPECIFIED(2x FLOAT)Current frameRender resolution + id="rect1169" /> + Luma History + Ω + R8G8B8A8_UNORM + Current frame + Render resolution + + + - Depth bufferAPPLICATION SPECIFIED(1x FLOAT)Current frameRenderresolution + id="rect1187" /> + Output buffer* + R16G16B16A16_SFLOAT + Current frame + Presentation resolution - Outputbuffer*R16G16B16A16_FLOATPrevious framePresentation resolution + id="rect1197" /> + Internal Upscaled* + R16G16B16A16_FLOAT + Current frame + Presentation resolution - Color bufferAPPLICATION SPECIFIEDCurrent frameRender resolution + id="rect1207" /> + Lock status† + R16G16_FLOAT + Current frame + Presentation resolution + - ExposureR32_FLOATCurrent frame1x1 + font-family="Calibri, Calibri_MSFontService, sans-serif" + font-weight="400" + font-size="13px" + transform="translate(235.41,654)" + id="text1219">Pass - Reactive maskR8_UNORMCurrent frameRenderresolution + id="rect1221" /> + Buffer + Key - Current LuminanceR16_FLOATCurrent frameRender resolution / 2 + id="rect1227" /> + Input/Output + Buffer - Color bufferR16G16B16A16_FLOATCurrent frameRender resolution + id="rect1233" /> + Lock status† + R16G16_FLOAT + Previous frame + Presentation resolution - Luma HistoryR8G8B8A8_UNORMCurrent frameRender resolution - + id="rect1243" /> + Lock status† + R16G16_FLOAT + Previous frame + Presentation resolution + d="m 0,-2 h 118.546 v 330.888 h -4 V 0 l 2,2 H 0 Z m 122.546,328.888 -6,12 -6,-12 z" + transform="matrix(1,0,0,-1,859,619.888)" + id="path1253" /> + d="M 991,241.325 V 133 h 63.35 v 4 H 993 l 2,-2 V 241.325 Z M 1052.35,129 l 12,6 -12,6 z" + id="path1255" /> - Lock status†R16G16_SFLOATPrevious framePresentation resolution - + id="rect1257" /> - Lock status†R16G16_FLOATPrevious framePresentation resolution - + id="rect1259" /> + Luma History + Ω + R8G8B8A8_UNORM + Previous frame + Render resolution + d="m 921,265 h 101.32 v 64.155 l -2,-2 h 12.63 v 4 h -14.63 V 267 l 2,2 H 921 Z m 109.95,58.155 12,6 -12,6 z" + id="path1271" /> + RCAS + Presentation + - Previous depthR32_UNORMCurrent frameRender resolution + font-family="Calibri, Calibri_MSFontService, sans-serif" + font-weight="400" + font-size="9px" + transform="translate(1085.12,496)" + id="text1279">resolution + + id="rect1283" /> Dilated motion vectorsR16G16_UINTCurrent frameRender resolution - Output + buffer + APPLICATION SPECIFIED + Current frame + Presentation resolution + + + + - Dilated depthR16_UINTCurrent frameRender resolution + fill="none" + fill-rule="evenodd" + id="path1301" /> + + Sharpening + Enabled + Sharpening Disabled + d="m 577,272 c 0,-3.314 2.462,-6 5.5,-6 3.038,0 5.5,2.686 5.5,6 0,3.314 -2.462,6 -5.5,6 -3.038,0 -5.5,-2.686 -5.5,-6 z" + fill-rule="evenodd" + id="path1311" /> + d="m 540,485.5 c 0,-3.038 2.462,-5.5 5.5,-5.5 3.038,0 5.5,2.462 5.5,5.5 0,3.038 -2.462,5.5 -5.5,5.5 -3.038,0 -5.5,-2.462 -5.5,-5.5 z" + fill-rule="evenodd" + id="path1313" /> + d="m 634,159.5 c 0,-3.038 2.462,-5.5 5.5,-5.5 3.038,0 5.5,2.462 5.5,5.5 0,3.038 -2.462,5.5 -5.5,5.5 -3.038,0 -5.5,-2.462 -5.5,-5.5 z" + fill-rule="evenodd" + id="path1315" /> + d="m 292,270.5 c 0,-3.038 2.462,-5.5 5.5,-5.5 3.038,0 5.5,2.462 5.5,5.5 0,3.038 -2.462,5.5 -5.5,5.5 -3.038,0 -5.5,-2.462 -5.5,-5.5 z" + fill-rule="evenodd" + id="path1317" /> + d="m 609,188 v 189.033 h -4 V 188 Z m 4,187.033 -6,12 -6,-12 z" + id="path1319" /> - + - Disocclusion maskR8_UNORMCurrent frameRender resolution + fill="none" + fill-rule="evenodd" + id="path1323" /> - - Reprojected locks†R16G16_SFLOATCurrent framePresentation resolution - + - Output buffer*R16G16B16A16_SFLOATCurrent framePresentation resolution - + - Output buffer*R16G16B16A16_FLOATCurrent framePresentation resolution - + - Reprojected locks†R16G16_FLOATCurrent framePresentation resolution - - - + fill="none" + fill-rule="evenodd" + id="path1331" /> + d="M 44,158 H 640.041" + stroke="#000000" + stroke-width="4" + stroke-miterlimit="8" + fill="none" + fill-rule="evenodd" + id="path1333" /> + d="m 642,157.999 0.06,219.454 -4,0.001 L 638,158.001 Z m 4.06,217.453 -5.997,12.001 -6.003,-11.998 z" + id="path1335" /> + d="M 658.305,99.6654 199,99" + stroke="#000000" + stroke-width="4" + stroke-miterlimit="8" + stroke-dasharray="4, 4" + fill="none" + fill-rule="evenodd" + id="path1337" /> - - - - - Reproject & AccumulatePresentation resolution - - + stroke-dasharray="4, 4" + fill="none" + fill-rule="evenodd" + transform="matrix(-1,0,0,1,674.254,72)" + id="path1339" /> + d="m 650.5,158.5 h 30.359" + stroke="#7f7f7f" + stroke-width="3.33333" + stroke-miterlimit="8" + fill="none" + fill-rule="evenodd" + id="path1341" /> + d="M 2,9.99989 V 13.9999 H -2 V 9.99989 Z m 1e-5,8.00001 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -10e-6,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 V 102 h -4 V 97.9999 Z M 2.00004,106 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -10e-6,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 0.905 H -1.99989 L -1.9999,314 Z M -6,12 0,0 6,12 Z" + transform="matrix(1,0,0,-1,673,386.905)" + id="path1343" /> + d="m 2,10.0001 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 10e-6,8 V 102 h -4 v -3.9999 z m 0,7.9999 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 1e-5,4 h -4 l -1e-5,-4 z m 1e-5,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z m 0,8 v 4 h -4 v -4 z M -6,12 0,0 6,12 Z" + transform="matrix(1,0,0,-1,656,386.985)" + id="path1345" /> - - Presentation bufferAPPLICATION SPECIFIEDCurrent framePresentation resolution + stroke-dasharray="4, 4" + fill="none" + fill-rule="evenodd" + transform="matrix(-1,0,0,1,89.1643,641)" + id="path1347" /> + d="M 54,664 H 88.6146" + stroke="#000000" + stroke-width="4" + stroke-miterlimit="8" + fill="none" + fill-rule="evenodd" + id="path1349" /> Key - : Dataflow + : Optional Dataflow + + fill="none" + fill-rule="evenodd" + id="path1355" /> Input/Output Buffer - + font-size="13px" + transform="translate(52.5376,689)" + id="text1357">Buffer names with symbols indicate temporal data. - - - - - + d="m 984,252 200.44,0.565" + stroke="#000000" + stroke-width="4" + stroke-miterlimit="8" + fill="none" + fill-rule="evenodd" + id="path1359" /> diff --git a/docs/media/super-resolution-temporal/api-architecture.svg b/docs/media/super-resolution-temporal/api-architecture.svg index 8e8f8b5..b695ce5 100644 --- a/docs/media/super-resolution-temporal/api-architecture.svg +++ b/docs/media/super-resolution-temporal/api-architecture.svg @@ -63,7 +63,7 @@ font-size="20px" x="-44.619999" y="24" - id="tspan172">Super Resolution 2.0Super Resolution 2 + + id="defs309"> + x="278" + y="124" + width="743" + height="390" + id="rect316" /> + + + + + + + + + + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + clip-path="url(#clip0-2)" + transform="matrix(0.26458333,0,0,0.26458333,-71.593309,-30.941483)" + id="g1055"> + d="m 529.001,315 293.231,0.166 -0.002,4 L 528.999,319 Z m 291.234,-3.836 11.996,6.007 -12.003,5.993 z" + id="path951" /> + x="529" + y="204" + width="239" + height="226" + id="rect953" /> + FidelityFX + Super Resolution FidelityFX Super Resolution2.0 + font-size="16px" + transform="translate(701.797,243)" + id="text959">2 + id="path961" /> - Output bufferAPPLICATION SPECIFIEDCurrent framePresentation resolution + id="rect963" /> + Output buffer + APPLICATION SPECIFIED + Current frame + Presentation resolution - Depth bufferAPPLICATION SPECIFIED (1x FLOAT)Current frameRenderresolution + id="rect973" /> + Depth buffer + APPLICATION SPECIFIED (1x FLOAT) + Current frame + Render resolution - Velocity bufferAPPLICATION SPECIFIED (2x FLOAT)Current frameRender resolution + id="rect983" /> + Velocity buffer + APPLICATION SPECIFIED (2x FLOAT) + Current frame + Render resolution + d="m 465,159 h 31.354 v 79.674" + stroke="#000000" + stroke-width="4" + stroke-miterlimit="8" + fill="none" + fill-rule="evenodd" + id="path993" /> - Color bufferAPPLICATION SPECIFIEDCurrent frameRender resolution + id="rect995" /> + Color + buffer + APPLICATION SPECIFIED + Current frame + Render resolution + d="M 0,0 H 31.3544 V 78.8643" + stroke="#000000" + stroke-width="4" + stroke-miterlimit="8" + fill="none" + fill-rule="evenodd" + transform="matrix(1,0,0,-1,465,316.864)" + id="path1007" /> - OutputbufferR16G16B16A16_FLOATPrevious framePresentation resolution + id="rect1009" /> + Output buffer + R16G16B16A16_FLOAT + Previous frame + Presentation resolution + d="m 465,315 h 54.369 v 4 H 465 Z m 52.369,-4 12,6 -12,6 z" + id="path1019" /> + + id="path1023" /> + + + + id="rect1031" /> + Reactive Mask + APPLICATION + SPECIFIED + Current frame + Render resolution + + Transparency and Composition + Mask + APPLICATION SPECIFIED + Current frame + Render resolution diff --git a/release_notes.txt b/release_notes.txt index 9d4ef22..5f2ffb2 100644 --- a/release_notes.txt +++ b/release_notes.txt @@ -1,28 +1,23 @@ -FidelityFX Super Resolution 2.1.2 +FidelityFX Super Resolution 2.2 ================================= Features -------- -- Reactivity mask interpretation has been modified to give game developers more levers to alleviate ghosting and other artefacts. -- Sample has example use of Reactivity mask. -- Sample has example use of Transparency and Composition mask. -- Sample has particles and animated textures. +- API debug Checker +- Changes to improve "High Velocity Ghosting" situations. Changes ------- -- Reactivity mask now uses full range of values in the mask (0.0 - 1.0). -- Reactivity and Composition and Transparency mask dialation is now based on input colors to avoid expanding reactiveness into non-relevant upscaled areas. -- Disocclusion logic improved in order to detect disocclusions in areas with very small depth separation. -- RCAS pass forced to fp32 mode to reduce chance of issues seen with HDR input values. -- Fix for display-resolution motion vectors interpretation. -- fp16/fp32 computation review, readjusting balance of fp16/fp32 for maximum quality. -- Amended motion vector description within the documentation. -- Various documentation edits for spelling. -- Clarified the frame delta time input value within the readme documentation. -- Fixed issue with bad memset within the shader blob selection logic. -- Fix issue with reprojection data on a reset. -- Fix resource precision issue. -- Clamp coordinates in software sampling logic. +- Changes to Luminance computation with pre-exposure application. +- Small motion vectors ignored in previous depth estimation. +- Changes to depth logic to improve disocclusion detection and avoid self-disocclusions. +- Dilated reactive mask logic updated to use temporal motion vector divergence to kill locks. +- New lock luminance resource. +- Accumulation overhauled to use temporal reactivity. +- Changed how intermediate signals are stored and tonemapped. +- Luminance instability logic improved. +- Tonemapping no longer applied during RCAS to retain more dynamic range. +- Fixes for multiple user reported issues on GitHub and elsewhere. Thank you for your feedback! Limitations ----------- diff --git a/src/DX12/CMakeLists.txt b/src/DX12/CMakeLists.txt index 01330f1..7a507ec 100644 --- a/src/DX12/CMakeLists.txt +++ b/src/DX12/CMakeLists.txt @@ -86,12 +86,10 @@ set(fsr2_shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_common.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_lock.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_postprocess_lock_status.h - ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_prepare_input_color.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_prepare_input_color_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_reproject.h @@ -100,6 +98,8 @@ set(fsr2_shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_upsample.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_rcas.h + ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen.h + ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.hlsl) set(particle_shaders_src @@ -129,9 +129,12 @@ source_group("fsr2_shaders" FILES ${fsr2_shaders_src}) source_group("particle_shaders" FILES ${particle_shaders_src}) source_group("sample_shaders" FILES ${sample_shaders_src}) -copyCommand("${spd_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) -copyCommand("${fsr1_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) -copyCommand("${fsr2_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) +copyTargetCommand("${spd_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX copied_dx12_shaders_spd_src) +add_dependencies(copied_dx12_shaders_spd_src Cauldron_DX12 copied_dx12_shaders_postproc_src) +copyTargetCommand("${fsr1_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX copied_dx12_shaders_fsr1_src) +add_dependencies(copied_dx12_shaders_fsr1_src copied_dx12_shaders_spd_src) +copyTargetCommand("${fsr2_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX copied_dx12_shaders_fsr2_src) +add_dependencies(copied_dx12_shaders_fsr2_src copied_dx12_shaders_fsr1_src) copyCommand("${particle_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) copyCommand("${sample_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) @@ -140,6 +143,7 @@ target_compile_definitions(FSR2_Sample_DX12 PRIVATE USE_PIX=1 $<$BuildDevUI(&m_UIState); + } } else if (m_UIState.m_nUpscaleType <= UPSCALE_TYPE_FSR_1_0) { diff --git a/src/DX12/UpscaleContext_FSR2_API.cpp b/src/DX12/UpscaleContext_FSR2_API.cpp index 896a5b5..5fa12f0 100644 --- a/src/DX12/UpscaleContext_FSR2_API.cpp +++ b/src/DX12/UpscaleContext_FSR2_API.cpp @@ -67,7 +67,7 @@ static uint64_t getMemoryUsageSnapshot(ID3D12Device* device) } UpscaleContext_FSR2_API::UpscaleContext_FSR2_API(UpscaleType type, std::string name) - : UpscaleContext(name) + : m_enableDebugCheck(false), UpscaleContext(name) { } @@ -81,6 +81,19 @@ void UpscaleContext_FSR2_API::OnDestroy() UpscaleContext::OnDestroy(); } +static void onFSR2Msg(FfxFsr2MsgType type, const wchar_t* message) +{ + if (type == FFX_FSR2_MESSAGE_TYPE_ERROR) + { + OutputDebugStringW(L"FSR2_API_DEBUG_ERROR: "); + } else if (type == FFX_FSR2_MESSAGE_TYPE_WARNING) + { + OutputDebugStringW(L"FSR2_API_DEBUG_WARNING: "); + } + OutputDebugStringW(message); + OutputDebugStringW(L"\n"); +} + void UpscaleContext_FSR2_API::OnCreateWindowSizeDependentResources( ID3D12Resource* input, ID3D12Resource* output, @@ -107,13 +120,18 @@ void UpscaleContext_FSR2_API::OnCreateWindowSizeDependentResources( initializationParameters.flags = FFX_FSR2_ENABLE_AUTO_EXPOSURE; if (m_bInvertedDepth) { - initializationParameters.flags |= FFX_FSR2_ENABLE_DEPTH_INVERTED; + initializationParameters.flags |= FFX_FSR2_ENABLE_DEPTH_INVERTED | FFX_FSR2_ENABLE_DEPTH_INFINITE; } - if (hdr) { - initializationParameters.flags |= FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + if (m_enableDebugCheck) + { + initializationParameters.flags |= FFX_FSR2_ENABLE_DEBUG_CHECKING; + initializationParameters.fpMessage = &onFSR2Msg; } + // Input data is HDR + initializationParameters.flags |= FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + #if COMPILE_FROM_HLSL // Override the shader creation so we can compile from HLSL source. FfxFsr2Interface d3dInterface = {}; @@ -144,6 +162,10 @@ void UpscaleContext_FSR2_API::OnDestroyWindowSizeDependentResources() void UpscaleContext_FSR2_API::BuildDevUI(UIState* pState) { + if (ImGui::Checkbox("Enable API Debug Checking", &m_enableDebugCheck)) + { + ReloadPipelines(); + } } void UpscaleContext_FSR2_API::ReloadPipelines() @@ -177,6 +199,15 @@ void UpscaleContext_FSR2_API::GenerateReactiveMask(ID3D12GraphicsCommandList* pC void UpscaleContext_FSR2_API::Draw(ID3D12GraphicsCommandList* pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState) { + float farPlane = pState->camera.GetFarPlane(); + float nearPlane = pState->camera.GetNearPlane(); + + if (m_bInvertedDepth) + { + // Cauldron1.0 can have planes inverted. Adjust before providing to FSR2. + std::swap(farPlane, nearPlane); + } + FfxFsr2DispatchDescription dispatchParameters = {}; dispatchParameters.commandList = ffxGetCommandListDX12(pCommandList); dispatchParameters.color = ffxGetResourceDX12(&context, cameraSetup.unresolvedColorResource, L"FSR2_InputColor"); @@ -215,8 +246,8 @@ void UpscaleContext_FSR2_API::Draw(ID3D12GraphicsCommandList* pCommandList, cons dispatchParameters.preExposure = 1.0f; dispatchParameters.renderSize.width = pState->renderWidth; dispatchParameters.renderSize.height = pState->renderHeight; - dispatchParameters.cameraFar = pState->camera.GetFarPlane(); - dispatchParameters.cameraNear = pState->camera.GetNearPlane(); + dispatchParameters.cameraFar = farPlane; + dispatchParameters.cameraNear = nearPlane; dispatchParameters.cameraFovAngleVertical = pState->camera.GetFovV(); pState->bReset = false; diff --git a/src/DX12/UpscaleContext_FSR2_API.h b/src/DX12/UpscaleContext_FSR2_API.h index 64ffcc8..b67ef23 100644 --- a/src/DX12/UpscaleContext_FSR2_API.h +++ b/src/DX12/UpscaleContext_FSR2_API.h @@ -60,5 +60,6 @@ class UpscaleContext_FSR2_API : public UpscaleContext FfxFsr2ContextDescription initializationParameters = {}; FfxFsr2Context context; + bool m_enableDebugCheck; float memoryUsageInMegabytes = 0; }; diff --git a/src/VK/CMakeLists.txt b/src/VK/CMakeLists.txt index a8d8b60..e849d8c 100644 --- a/src/VK/CMakeLists.txt +++ b/src/VK/CMakeLists.txt @@ -87,20 +87,20 @@ set(fsr2_shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_common.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_lock.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_postprocess_lock_status.h - ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_prepare_input_color.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_reproject.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_sample.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_upsample.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_rcas.h - ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.glsl - ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_prepare_input_color_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl - ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.glsl) + ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.glsl + ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen.h + ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.glsl + ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.glsl) set(particle_shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../GpuParticleShaders/ParticleStructs.h @@ -129,9 +129,12 @@ source_group("fsr2_shaders" FILES ${fsr2_shaders_src}) source_group("particle_shaders" FILES ${particle_shaders_src}) source_group("sample_shaders" FILES ${sample_shaders_src}) -copyCommand("${spd_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) -copyCommand("${fsr1_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) -copyCommand("${fsr2_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) +copyTargetCommand("${spd_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX copied_vk_shaders_spd_src) +add_dependencies(copied_vk_shaders_spd_src Cauldron_VK copied_vk_shaders_postproc_src) +copyTargetCommand("${fsr1_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX copied_vk_shaders_fsr1_src) +add_dependencies(copied_vk_shaders_fsr1_src copied_vk_shaders_spd_src) +copyTargetCommand("${fsr2_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK copied_vk_shaders_fsr2_src) +add_dependencies(copied_vk_shaders_fsr2_src copied_vk_shaders_fsr1_src) copyCommand("${particle_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) copyCommand("${sample_shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) @@ -140,6 +143,7 @@ target_compile_definitions(FSR2_Sample_VK PRIVATE $<$:FSR target_link_libraries(FSR2_Sample_VK LINK_PUBLIC FSR2_Sample_Common Cauldron_VK ImGUI d3dcompiler Vulkan::Vulkan ffx_fsr2_api_x64 ffx_fsr2_api_vk_x64) # ffx_fsr2_api_x64 target_include_directories(FSR2_Sample_VK PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api ${CMAKE_CURRENT_SOURCE_DIR}/../../libs) target_link_directories(FSR2_Sample_VK PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../libs) +add_dependencies(FSR2_Sample_VK copied_vk_shaders_fsr2_src) set_target_properties(FSR2_Sample_VK PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin" DEBUG_POSTFIX "d") set_source_files_properties(${Shaders_src} PROPERTIES VS_TOOL_OVERRIDE "Text") diff --git a/src/VK/FSR2Sample.cpp b/src/VK/FSR2Sample.cpp index 2491710..92f4d92 100644 --- a/src/VK/FSR2Sample.cpp +++ b/src/VK/FSR2Sample.cpp @@ -790,8 +790,8 @@ int WINAPI WinMain(HINSTANCE hInstance, LPSTR lpCmdLine, int nCmdShow) { - LPCSTR Name = "FidelityFX Super Resolution 2.1"; + LPCSTR Name = "FidelityFX Super Resolution 2.2"; // create new DX sample return RunFramework(hInstance, lpCmdLine, nCmdShow, new FSR2Sample(Name)); -} \ No newline at end of file +} diff --git a/src/VK/UI.cpp b/src/VK/UI.cpp index 8c8dcfb..05bcc93 100644 --- a/src/VK/UI.cpp +++ b/src/VK/UI.cpp @@ -224,6 +224,11 @@ void FSR2Sample::BuildUI() ImGui::Combo("Reactive Mask mode", (int*)(&m_UIState.nReactiveMaskMode), reactiveOptions, _countof(reactiveOptions)); ImGui::Checkbox("Use Transparency and Composition Mask", &m_UIState.bCompositionMask); + if (m_pRenderer && + ImGui::CollapsingHeader("Dev Options", ImGuiTreeNodeFlags_DefaultOpen)) + { + m_pRenderer->BuildDevUI(&m_UIState); + } } else { diff --git a/src/VK/UpscaleContext_FSR2_API.cpp b/src/VK/UpscaleContext_FSR2_API.cpp index 4805ee9..f58ad9e 100644 --- a/src/VK/UpscaleContext_FSR2_API.cpp +++ b/src/VK/UpscaleContext_FSR2_API.cpp @@ -64,7 +64,7 @@ static VkDeviceSize getMemoryUsageSnapshot(VkPhysicalDevice physicalDevice) } UpscaleContext_FSR2_API::UpscaleContext_FSR2_API(UpscaleType type, std::string name) - : UpscaleContext(name) + : m_enableDebugCheck(false), UpscaleContext(name) { } @@ -77,6 +77,21 @@ void UpscaleContext_FSR2_API::OnCreate(const FfxUpscaleInitParams& initParams) void UpscaleContext_FSR2_API::OnDestroy() { UpscaleContext::OnDestroy(); + +} + +static void onFSR2Msg(FfxFsr2MsgType type, const wchar_t* message) +{ + if (type == FFX_FSR2_MESSAGE_TYPE_ERROR) + { + OutputDebugStringW(L"FSR2_API_DEBUG_ERROR: "); + } + else if (type == FFX_FSR2_MESSAGE_TYPE_WARNING) + { + OutputDebugStringW(L"FSR2_API_DEBUG_WARNING: "); + } + OutputDebugStringW(message); + OutputDebugStringW(L"\n"); } void UpscaleContext_FSR2_API::OnCreateWindowSizeDependentResources( @@ -104,13 +119,18 @@ void UpscaleContext_FSR2_API::OnCreateWindowSizeDependentResources( initializationParameters.flags = FFX_FSR2_ENABLE_AUTO_EXPOSURE; if (m_bInvertedDepth) { - initializationParameters.flags |= FFX_FSR2_ENABLE_DEPTH_INVERTED; + initializationParameters.flags |= FFX_FSR2_ENABLE_DEPTH_INVERTED | FFX_FSR2_ENABLE_DEPTH_INFINITE; } - if (hdr) { - initializationParameters.flags |= FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + if (m_enableDebugCheck) + { + initializationParameters.flags |= FFX_FSR2_ENABLE_DEBUG_CHECKING; + initializationParameters.fpMessage = &onFSR2Msg; } + // Input data is HDR + initializationParameters.flags |= FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + const uint64_t memoryUsageBefore = getMemoryUsageSnapshot(m_pDevice->GetPhysicalDevice()); ffxFsr2ContextCreate(&context, &initializationParameters); const uint64_t memoryUsageAfter = getMemoryUsageSnapshot(m_pDevice->GetPhysicalDevice()); @@ -129,12 +149,18 @@ void UpscaleContext_FSR2_API::OnDestroyWindowSizeDependentResources() } } +void UpscaleContext_FSR2_API::ReloadPipelines() +{ + m_pDevice->GPUFlush(); + OnDestroyWindowSizeDependentResources(); + OnCreateWindowSizeDependentResources(m_input, m_output, m_renderWidth, m_renderHeight, m_displayWidth, m_displayHeight, m_hdr); +} + void UpscaleContext_FSR2_API::BuildDevUI(UIState* pState) { - if (memoryUsageInMegabytes > 0) { - char meminfo[256]; - sprintf_s(meminfo, "FSR 2.0 GPU memory usage: %.2f MB", memoryUsageInMegabytes); - pState->Text(meminfo); + if (ImGui::Checkbox("Enable API Debug Checking", &m_enableDebugCheck)) + { + ReloadPipelines(); } pState->bReset = ImGui::Button("Reset accumulation"); @@ -164,6 +190,15 @@ void UpscaleContext_FSR2_API::GenerateReactiveMask(VkCommandBuffer pCommandList, void UpscaleContext_FSR2_API::Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) { + float farPlane = pState->camera.GetFarPlane(); + float nearPlane = pState->camera.GetNearPlane(); + + if (m_bInvertedDepth) + { + // Cauldron1.0 can have planes inverted. Adjust before providing to FSR2. + std::swap(farPlane, nearPlane); + } + FfxFsr2DispatchDescription dispatchParameters = {}; dispatchParameters.commandList = ffxGetCommandListVK(commandBuffer); dispatchParameters.color = ffxGetTextureResourceVK(&context, cameraSetup.unresolvedColorResource->Resource(), cameraSetup.unresolvedColorResourceView, cameraSetup.unresolvedColorResource->GetWidth(), cameraSetup.unresolvedColorResource->GetHeight(), cameraSetup.unresolvedColorResource->GetFormat(), L"FSR2_InputColor"); @@ -202,8 +237,8 @@ void UpscaleContext_FSR2_API::Draw(VkCommandBuffer commandBuffer, const FfxUpsca dispatchParameters.preExposure = 1.0f; dispatchParameters.renderSize.width = pState->renderWidth; dispatchParameters.renderSize.height = pState->renderHeight; - dispatchParameters.cameraFar = pState->camera.GetFarPlane(); - dispatchParameters.cameraNear = pState->camera.GetNearPlane(); + dispatchParameters.cameraFar = farPlane; + dispatchParameters.cameraNear = nearPlane; dispatchParameters.cameraFovAngleVertical = pState->camera.GetFovV(); pState->bReset = false; diff --git a/src/VK/UpscaleContext_FSR2_API.h b/src/VK/UpscaleContext_FSR2_API.h index dc620ec..f6d365a 100644 --- a/src/VK/UpscaleContext_FSR2_API.h +++ b/src/VK/UpscaleContext_FSR2_API.h @@ -48,8 +48,11 @@ class UpscaleContext_FSR2_API : public UpscaleContext virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState); private: + void ReloadPipelines(); + FfxFsr2ContextDescription initializationParameters = {}; FfxFsr2Context context; + bool m_enableDebugCheck; float memoryUsageInMegabytes = 0; }; diff --git a/src/ffx-fsr2-api/CMakeLists.txt b/src/ffx-fsr2-api/CMakeLists.txt index 7ef023c..6195dee 100644 --- a/src/ffx-fsr2-api/CMakeLists.txt +++ b/src/ffx-fsr2-api/CMakeLists.txt @@ -1,7 +1,7 @@ # This file is part of the FidelityFX SDK. -# -# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. -# +# +# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights diff --git a/src/ffx-fsr2-api/dx12/CMakeLists.txt b/src/ffx-fsr2-api/dx12/CMakeLists.txt index 77a30c4..e08ebd2 100644 --- a/src/ffx-fsr2-api/dx12/CMakeLists.txt +++ b/src/ffx-fsr2-api/dx12/CMakeLists.txt @@ -1,7 +1,7 @@ # This file is part of the FidelityFX SDK. -# -# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. -# +# +# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights @@ -31,12 +31,12 @@ file(GLOB SHADERS "${CMAKE_CURRENT_SOURCE_DIR}/../shaders/*.hlsl") set(PASS_SHADERS + ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_tcr_autogen_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_autogen_reactive_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_accumulate_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_depth_clip_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_lock_pass.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_prepare_input_color_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_rcas_pass.hlsl) diff --git a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp index 8973648..f508d7d 100644 --- a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp +++ b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -271,6 +271,8 @@ DXGI_FORMAT ffxGetDX12FormatFromSurfaceFormat(FfxSurfaceFormat surfaceFormat) return DXGI_FORMAT_R16_SNORM; case(FFX_SURFACE_FORMAT_R8_UNORM): return DXGI_FORMAT_R8_UNORM; + case(FFX_SURFACE_FORMAT_R8_UINT): + return DXGI_FORMAT_R8_UINT; case(FFX_SURFACE_FORMAT_R8G8_UNORM): return DXGI_FORMAT_R8G8_UNORM; case(FFX_SURFACE_FORMAT_R32_FLOAT): @@ -324,6 +326,8 @@ FfxSurfaceFormat ffxGetSurfaceFormatDX12(DXGI_FORMAT format) return FFX_SURFACE_FORMAT_R16_SNORM; case(DXGI_FORMAT_R8_UNORM): return FFX_SURFACE_FORMAT_R8_UNORM; + case(DXGI_FORMAT_R8_UINT): + return FFX_SURFACE_FORMAT_R8_UINT; default: return FFX_SURFACE_FORMAT_UNKNOWN; } @@ -372,13 +376,15 @@ FfxResource ffxGetResourceDX12(FfxFsr2Context* context, ID3D12Resource* dx12Reso return resource; } -ID3D12Resource* ffxGetDX12ResourcePtr(FfxFsr2Context* context, uint32_t uavResId) +ID3D12Resource* ffxGetDX12ResourcePtr(FfxFsr2Context* context, uint32_t resId) { FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context); - contextPrivate->uavResources[uavResId].internalIndex; - BackendContext_DX12* backendContext = (BackendContext_DX12*)(contextPrivate->contextDescription.callbacks.scratchBuffer); - return backendContext->resources[contextPrivate->uavResources[uavResId].internalIndex].resourcePtr; + + if (resId > FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK) + return backendContext->resources[contextPrivate->uavResources[resId].internalIndex].resourcePtr; + else // Input resources are present only in srvResources array + return backendContext->resources[contextPrivate->srvResources[resId].internalIndex].resourcePtr; } FfxErrorCode RegisterResourceDX12( @@ -996,7 +1002,7 @@ FfxErrorCode CreatePipelineDX12( flags |= (canForceWave64) ? FSR2_SHADER_PERMUTATION_FORCE_WAVE64 : 0; flags |= (supportedFP16 && (pass != FFX_FSR2_PASS_RCAS)) ? FSR2_SHADER_PERMUTATION_ALLOW_FP16 : 0; - const Fsr2ShaderBlobDX12 shaderBlob = fsr2GetPermutationBlobByIndex(pass, flags); + const Fsr2ShaderBlobDX12 shaderBlob = fsr2GetPermutationBlobByIndexDX12(pass, flags); FFX_ASSERT(shaderBlob.data && shaderBlob.size); // set up root signature @@ -1384,8 +1390,8 @@ static FfxErrorCode executeGpuJobCompute(BackendContext_DX12* backendContext, Ff // set root constants, free local copy { for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < job->computeJobDescriptor.pipeline.constCount; ++currentRootConstantIndex) { - - dx12CommandList->SetComputeRoot32BitConstants(descriptorTableIndex + currentRootConstantIndex, job->computeJobDescriptor.cbs[currentRootConstantIndex].uint32Size, job->computeJobDescriptor.cbs[currentRootConstantIndex].data, 0); + const uint32_t currentCbSlotIndex = job->computeJobDescriptor.pipeline.cbResourceBindings[currentRootConstantIndex].slotIndex; + dx12CommandList->SetComputeRoot32BitConstants(descriptorTableIndex + currentCbSlotIndex, job->computeJobDescriptor.cbs[currentCbSlotIndex].uint32Size, job->computeJobDescriptor.cbs[currentCbSlotIndex].data, 0); } } diff --git a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h index d3626fc..db82fbf 100644 --- a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h +++ b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -97,7 +97,7 @@ FFX_API FfxResource ffxGetResourceDX12( FfxResourceStates state = FFX_RESOURCE_STATE_COMPUTE_READ, UINT shaderComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING); -/// Retrieve a ID3D12Resource pointer associated with a UAV RESOURCE_IDENTIFIER. +/// Retrieve a ID3D12Resource pointer associated with a RESOURCE_IDENTIFIER. /// Used for debug purposes when blitting internal surfaces. /// /// @param [in] context A pointer to a FfxFsr2Context structure. diff --git a/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.cpp b/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.cpp index c61555b..4bda9de 100644 --- a/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.cpp +++ b/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.cpp @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,37 +22,37 @@ #include "ffx_fsr2_shaders_dx12.h" #include "../../ffx_util.h" +#include "ffx_fsr2_tcr_autogen_pass_permutations.h" #include "ffx_fsr2_autogen_reactive_pass_permutations.h" #include "ffx_fsr2_accumulate_pass_permutations.h" #include "ffx_fsr2_compute_luminance_pyramid_pass_permutations.h" #include "ffx_fsr2_depth_clip_pass_permutations.h" #include "ffx_fsr2_lock_pass_permutations.h" -#include "ffx_fsr2_prepare_input_color_pass_permutations.h" #include "ffx_fsr2_reconstruct_previous_depth_pass_permutations.h" #include "ffx_fsr2_rcas_pass_permutations.h" +#include "ffx_fsr2_tcr_autogen_pass_wave64_permutations.h" #include "ffx_fsr2_autogen_reactive_pass_wave64_permutations.h" #include "ffx_fsr2_accumulate_pass_wave64_permutations.h" #include "ffx_fsr2_compute_luminance_pyramid_pass_wave64_permutations.h" #include "ffx_fsr2_depth_clip_pass_wave64_permutations.h" #include "ffx_fsr2_lock_pass_wave64_permutations.h" -#include "ffx_fsr2_prepare_input_color_pass_wave64_permutations.h" #include "ffx_fsr2_reconstruct_previous_depth_pass_wave64_permutations.h" #include "ffx_fsr2_rcas_pass_wave64_permutations.h" +#include "ffx_fsr2_tcr_autogen_pass_16bit_permutations.h" #include "ffx_fsr2_autogen_reactive_pass_16bit_permutations.h" #include "ffx_fsr2_accumulate_pass_16bit_permutations.h" #include "ffx_fsr2_depth_clip_pass_16bit_permutations.h" #include "ffx_fsr2_lock_pass_16bit_permutations.h" -#include "ffx_fsr2_prepare_input_color_pass_16bit_permutations.h" #include "ffx_fsr2_reconstruct_previous_depth_pass_16bit_permutations.h" #include "ffx_fsr2_rcas_pass_16bit_permutations.h" +#include "ffx_fsr2_tcr_autogen_pass_wave64_16bit_permutations.h" #include "ffx_fsr2_autogen_reactive_pass_wave64_16bit_permutations.h" #include "ffx_fsr2_accumulate_pass_wave64_16bit_permutations.h" #include "ffx_fsr2_depth_clip_pass_wave64_16bit_permutations.h" #include "ffx_fsr2_lock_pass_wave64_16bit_permutations.h" -#include "ffx_fsr2_prepare_input_color_pass_wave64_16bit_permutations.h" #include "ffx_fsr2_reconstruct_previous_depth_pass_wave64_16bit_permutations.h" #include "ffx_fsr2_rcas_pass_wave64_16bit_permutations.h" @@ -73,37 +73,6 @@ key.FFX_FSR2_OPTION_APPLY_SHARPENING = FFX_CONTAINS_FLAG(options, FSR2_SHADER_PE #endif // #if defined(POPULATE_SHADER_BLOB) #define POPULATE_SHADER_BLOB(info, index) { info[index].blobData, info[index].blobSize, info[index].numUAVResources, info[index].numSRVResources, info[index].numCBVResources, info[index].uavResourceNames, info[index].uavResourceBindings, info[index].srvResourceNames, info[index].srvResourceBindings, info[index].cbvResourceNames, info[index].cbvResourceBindings } -static Fsr2ShaderBlobDX12 fsr2GetPrepareInputColorPassPermutationBlobByIndex(uint32_t permutationOptions, bool isWave64, bool is16bit) { - - ffx_fsr2_prepare_input_color_pass_PermutationKey key; - - POPULATE_PERMUTATION_KEY(permutationOptions, key); - - if (isWave64) { - - if (is16bit) { - - const int32_t tableIndex = g_ffx_fsr2_prepare_input_color_pass_wave64_16bit_IndirectionTable[key.index]; - return POPULATE_SHADER_BLOB(g_ffx_fsr2_prepare_input_color_pass_wave64_16bit_PermutationInfo, tableIndex); - } else { - - const int32_t tableIndex = g_ffx_fsr2_prepare_input_color_pass_wave64_IndirectionTable[key.index]; - return POPULATE_SHADER_BLOB(g_ffx_fsr2_prepare_input_color_pass_wave64_PermutationInfo, tableIndex); - } - } else { - - if (is16bit) { - - const int32_t tableIndex = g_ffx_fsr2_prepare_input_color_pass_16bit_IndirectionTable[key.index]; - return POPULATE_SHADER_BLOB(g_ffx_fsr2_prepare_input_color_pass_16bit_PermutationInfo, tableIndex); - } else { - - const int32_t tableIndex = g_ffx_fsr2_prepare_input_color_pass_IndirectionTable[key.index]; - return POPULATE_SHADER_BLOB(g_ffx_fsr2_prepare_input_color_pass_PermutationInfo, tableIndex); - } - } -} - static Fsr2ShaderBlobDX12 fsr2GetDepthClipPassPermutationBlobByIndex(uint32_t permutationOptions, bool isWave64, bool is16bit) { ffx_fsr2_depth_clip_pass_PermutationKey key; @@ -310,15 +279,47 @@ static Fsr2ShaderBlobDX12 fsr2GetAutogenReactivePassPermutationBlobByIndex(uint3 } } -Fsr2ShaderBlobDX12 fsr2GetPermutationBlobByIndex(FfxFsr2Pass passId, uint32_t permutationOptions) { +static Fsr2ShaderBlobDX12 fsr2GetTcrAutogeneratePassPermutationBlobByIndex(uint32_t permutationOptions, bool isWave64, bool is16bit) { + + ffx_fsr2_autogen_reactive_pass_PermutationKey key; + + POPULATE_PERMUTATION_KEY(permutationOptions, key); + + if (isWave64) { + + if (is16bit) { + + const int32_t tableIndex = g_ffx_fsr2_tcr_autogen_pass_wave64_16bit_IndirectionTable[key.index]; + return POPULATE_SHADER_BLOB(g_ffx_fsr2_tcr_autogen_pass_wave64_16bit_PermutationInfo, tableIndex); + } + else { + + const int32_t tableIndex = g_ffx_fsr2_tcr_autogen_pass_wave64_IndirectionTable[key.index]; + return POPULATE_SHADER_BLOB(g_ffx_fsr2_tcr_autogen_pass_wave64_PermutationInfo, tableIndex); + } + } + else { + + if (is16bit) { + + const int32_t tableIndex = g_ffx_fsr2_tcr_autogen_pass_16bit_IndirectionTable[key.index]; + return POPULATE_SHADER_BLOB(g_ffx_fsr2_tcr_autogen_pass_16bit_PermutationInfo, tableIndex); + } + else { + + const int32_t tableIndex = g_ffx_fsr2_tcr_autogen_pass_IndirectionTable[key.index]; + return POPULATE_SHADER_BLOB(g_ffx_fsr2_tcr_autogen_pass_PermutationInfo, tableIndex); + } + } +} + +Fsr2ShaderBlobDX12 fsr2GetPermutationBlobByIndexDX12(FfxFsr2Pass passId, uint32_t permutationOptions) { bool isWave64 = FFX_CONTAINS_FLAG(permutationOptions, FSR2_SHADER_PERMUTATION_FORCE_WAVE64); bool is16bit = FFX_CONTAINS_FLAG(permutationOptions, FSR2_SHADER_PERMUTATION_ALLOW_FP16); switch (passId) { - case FFX_FSR2_PASS_PREPARE_INPUT_COLOR: - return fsr2GetPrepareInputColorPassPermutationBlobByIndex(permutationOptions, isWave64, is16bit); case FFX_FSR2_PASS_DEPTH_CLIP: return fsr2GetDepthClipPassPermutationBlobByIndex(permutationOptions, isWave64, is16bit); case FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH: @@ -334,6 +335,8 @@ Fsr2ShaderBlobDX12 fsr2GetPermutationBlobByIndex(FfxFsr2Pass passId, uint32_t pe return fsr2GetComputeLuminancePyramidPassPermutationBlobByIndex(permutationOptions, isWave64, is16bit); case FFX_FSR2_PASS_GENERATE_REACTIVE: return fsr2GetAutogenReactivePassPermutationBlobByIndex(permutationOptions, isWave64, is16bit); + case FFX_FSR2_PASS_TCR_AUTOGENERATE: + return fsr2GetTcrAutogeneratePassPermutationBlobByIndex(permutationOptions, isWave64, is16bit); default: FFX_ASSERT_FAIL("Should never reach here."); break; diff --git a/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.h b/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.h index 70a4003..2097c50 100644 --- a/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.h +++ b/src/ffx-fsr2-api/dx12/shaders/ffx_fsr2_shaders_dx12.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -32,10 +32,10 @@ extern "C" { typedef struct Fsr2ShaderBlobDX12 { const uint8_t* data; // A pointer to the blob - const uint32_t size; // Size in bytes. - const uint32_t uavCount; // Number of UAV. - const uint32_t srvCount; // Number of SRV. - const uint32_t cbvCount; // Number of CBs. + uint32_t size; // Size in bytes. + uint32_t uavCount; // Number of UAV. + uint32_t srvCount; // Number of SRV. + uint32_t cbvCount; // Number of CBs. const char** boundUAVResourceNames; const uint32_t* boundUAVResources; // Pointer to an array of bound UAV resources. const char** boundSRVResourceNames; @@ -58,7 +58,7 @@ typedef enum Fs2ShaderPermutationOptionsDX12 { } Fs2ShaderPermutationOptionsDX12; // Get a DX12 shader blob for the specified pass and permutation index. -Fsr2ShaderBlobDX12 fsr2GetPermutationBlobByIndex(FfxFsr2Pass passId, uint32_t permutationOptions); +Fsr2ShaderBlobDX12 fsr2GetPermutationBlobByIndexDX12(FfxFsr2Pass passId, uint32_t permutationOptions); #if defined(__cplusplus) } diff --git a/src/ffx-fsr2-api/ffx_assert.cpp b/src/ffx-fsr2-api/ffx_assert.cpp index 7705490..8a70ad5 100644 --- a/src/ffx-fsr2-api/ffx_assert.cpp +++ b/src/ffx-fsr2-api/ffx_assert.cpp @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/ffx_assert.h b/src/ffx-fsr2-api/ffx_assert.h index f96b157..ae32d2a 100644 --- a/src/ffx-fsr2-api/ffx_assert.h +++ b/src/ffx-fsr2-api/ffx_assert.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/ffx_error.h b/src/ffx-fsr2-api/ffx_error.h index 39d885e..7ba7d9c 100644 --- a/src/ffx-fsr2-api/ffx_error.h +++ b/src/ffx-fsr2-api/ffx_error.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/ffx_fsr2.cpp b/src/ffx-fsr2-api/ffx_fsr2.cpp index c7a342f..b0cd593 100644 --- a/src/ffx-fsr2-api/ffx_fsr2.cpp +++ b/src/ffx-fsr2-api/ffx_fsr2.cpp @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -51,26 +51,32 @@ typedef struct ResourceBinding static const ResourceBinding srvResourceBindingTable[] = { {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color_jittered"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS, L"r_motion_vectors"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH, L"r_depth" }, - {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE, L"r_exposure"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY, L"r_input_opaque_only"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS, L"r_input_motion_vectors"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH, L"r_input_depth" }, + {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE, L"r_input_exposure"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"r_auto_exposure"}, {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK, L"r_reactive_mask"}, {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK, L"r_transparency_and_composition_mask"}, {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"r_reconstructed_previous_nearest_depth"}, {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"r_dilated_motion_vectors"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS, L"r_previous_dilated_motion_vectors"}, {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"r_dilatedDepth"}, {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"r_internal_upscaled_color"}, {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS, L"r_lock_status"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP, L"r_depth_clip"}, {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"r_prepared_input_color"}, {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"r_luma_history" }, {FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT, L"r_rcas_input"}, {FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"r_lanczos_lut"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"r_imgMips"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE, L"r_img_mip_shading_change"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5, L"r_img_mip_5"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, L"r_imgMips"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE, L"r_img_mip_shading_change"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5, L"r_img_mip_5"}, {FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, L"r_upsample_maximum_bias_lut"}, {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"r_dilated_reactive_masks"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"r_new_locks"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"r_lock_input_luma"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR, L"r_input_prev_color_pre_alpha"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR, L"r_input_prev_color_post_alpha"}, }; static const ResourceBinding uavResourceBindingTable[] = @@ -80,25 +86,28 @@ static const ResourceBinding uavResourceBindingTable[] = {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"rw_dilatedDepth"}, {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"rw_internal_upscaled_color"}, {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS, L"rw_lock_status"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP, L"rw_depth_clip"}, {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"rw_prepared_input_color"}, {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"rw_luma_history"}, {FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"rw_upscaled_output"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE, L"rw_img_mip_shading_change"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5, L"rw_img_mip_5"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE, L"rw_img_mip_shading_change"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5, L"rw_img_mip_5"}, {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"rw_dilated_reactive_masks"}, - {FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE, L"rw_exposure"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"rw_auto_exposure"}, {FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"rw_spd_global_atomic"}, -#if defined(FFX_INTERNAL) - {FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT, L"rw_debug_out"}, -#endif + {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"rw_new_locks"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"rw_lock_input_luma"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"rw_output_autoreactive"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, L"rw_output_autocomposition"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR, L"rw_output_prev_color_pre_alpha"}, + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR, L"rw_output_prev_color_post_alpha"}, }; static const ResourceBinding cbResourceBindingTable[] = { - {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2, L"cbFSR2"}, - {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbSPD"}, - {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS, L"cbRCAS"}, + {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2, L"cbFSR2"}, + {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbSPD"}, + {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS, L"cbRCAS"}, + {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE, L"cbGenerateReactive"}, }; // Broad structure of the root signature. @@ -130,12 +139,23 @@ typedef struct Fsr2GenerateReactiveConstants float threshold; float binaryValue; uint32_t flags; + } Fsr2GenerateReactiveConstants; +typedef struct Fsr2GenerateReactiveConstants2 +{ + float autoTcThreshold; + float autoTcScale; + float autoReactiveScale; + float autoReactiveMax; + +} Fsr2GenerateReactiveConstants2; + typedef union Fsr2SecondaryUnion { - Fsr2RcasConstants rcas; - Fsr2SpdConstants spd; + Fsr2RcasConstants rcas; + Fsr2SpdConstants spd; + Fsr2GenerateReactiveConstants2 autogenReactive; } Fsr2SecondaryUnion; typedef struct Fsr2ResourceDescription { @@ -152,10 +172,11 @@ typedef struct Fsr2ResourceDescription { void* initData; } Fsr2ResourceDescription; -FfxConstantBuffer globalFsr2ConstantBuffers[3] = { +FfxConstantBuffer globalFsr2ConstantBuffers[4] = { { sizeof(Fsr2Constants) / sizeof(uint32_t) }, { sizeof(Fsr2SpdConstants) / sizeof(uint32_t) }, - { sizeof(Fsr2RcasConstants) / sizeof(uint32_t) } + { sizeof(Fsr2RcasConstants) / sizeof(uint32_t) }, + { sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t) } }; // Lanczos @@ -179,6 +200,139 @@ static float halton(int32_t index, int32_t base) return result; } +static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params) +{ + if (params->commandList == nullptr) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"commandList is null"); + } + + if (params->color.resource == nullptr) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"color resource is null"); + } + + if (params->depth.resource == nullptr) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"depth resource is null"); + } + + if (params->motionVectors.resource == nullptr) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"motionVectors resource is null"); + } + + if (params->exposure.resource != nullptr) + { + if ((context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) == FFX_FSR2_ENABLE_AUTO_EXPOSURE) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present"); + } + } + + if (params->output.resource == nullptr) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"output resource is null"); + } + + if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]"); + } + + if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) || + (params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height)) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize"); + } + if ((params->motionVectorScale.x == 0.0f) || + (params->motionVectorScale.y == 0.0f)) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value"); + } + + if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) || + (params->renderSize.height > context->contextDescription.maxRenderSize.height)) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize"); + } + if ((params->renderSize.width == 0) || + (params->renderSize.height == 0)) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension"); + } + + if (params->sharpness < 0.0f || params->sharpness > 1.0f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]"); + } + + if (params->frameTimeDelta < 1.0f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)"); + } + + if (params->preExposure == 0.0f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid"); + } + + bool infiniteDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE; + bool inverseDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED; + + if (inverseDepth) + { + if (params->cameraNear < params->cameraFar) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + L"FFX_FSR2_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar"); + } + if (infiniteDepth) + { + if (params->cameraNear != FLT_MAX) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX"); + } + } + if (params->cameraFar < 0.075f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting"); + } + } + else + { + if (params->cameraNear > params->cameraFar) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + L"cameraNear is greater than cameraFar in non-inverted-depth context"); + } + if (infiniteDepth) + { + if (params->cameraFar != FLT_MAX) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraFar != FLT_MAX"); + } + } + if (params->cameraNear < 0.075f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraNear value is very low which may result in depth separation artefacting"); + } + } + + if (params->cameraFovAngleVertical <= 0.0f) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f"); + } + if (params->cameraFovAngleVertical > FFX_PI) + { + context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI"); + } +} + static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) { for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex) @@ -252,18 +406,17 @@ static FfxErrorCode createPipelineStates(FfxFsr2Context_Private* context) // set up pipeline descriptor (basically RootSignature and binding) FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, &pipelineDescription, &context->pipelineComputeLuminancePyramid)); FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RCAS, &pipelineDescription, &context->pipelineRCAS)); + FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_GENERATE_REACTIVE, &pipelineDescription, &context->pipelineGenerateReactive)); + FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_TCR_AUTOGENERATE, &pipelineDescription, &context->pipelineTcrAutogenerate)); pipelineDescription.rootConstantBufferCount = 1; - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_PREPARE_INPUT_COLOR, &pipelineDescription, &context->pipelinePrepareInputColor)); FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_DEPTH_CLIP, &pipelineDescription, &context->pipelineDepthClip)); FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, &pipelineDescription, &context->pipelineReconstructPreviousDepth)); FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_LOCK, &pipelineDescription, &context->pipelineLock)); FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE, &pipelineDescription, &context->pipelineAccumulate)); FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, &pipelineDescription, &context->pipelineAccumulateSharpen)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_GENERATE_REACTIVE, &pipelineDescription, &context->pipelineGenerateReactive)); // for each pipeline: re-route/fix-up IDs based on names - patchResourceBindings(&context->pipelinePrepareInputColor); patchResourceBindings(&context->pipelineDepthClip); patchResourceBindings(&context->pipelineReconstructPreviousDepth); patchResourceBindings(&context->pipelineLock); @@ -272,10 +425,13 @@ static FfxErrorCode createPipelineStates(FfxFsr2Context_Private* context) patchResourceBindings(&context->pipelineAccumulateSharpen); patchResourceBindings(&context->pipelineRCAS); patchResourceBindings(&context->pipelineGenerateReactive); + patchResourceBindings(&context->pipelineTcrAutogenerate); return FFX_OK; } +static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params); + static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2ContextDescription* contextDescription) { FFX_ASSERT(context); @@ -287,6 +443,16 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr2ContextDescription)); + if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING) + { + if (context->contextDescription.fpMessage == nullptr) + { + FFX_ASSERT(context->contextDescription.fpMessage != nullptr); + // remove the debug checking flag - we have no message function + context->contextDescription.flags &= ~FFX_FSR2_ENABLE_DEBUG_CHECKING; + } + } + // Create the device. FfxErrorCode errorCode = context->contextDescription.callbacks.fpCreateBackendContext(&context->contextDescription.callbacks, context->device); FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); @@ -301,8 +467,6 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con context->constants.displaySize[0] = contextDescription->displaySize.width; context->constants.displaySize[1] = contextDescription->displaySize.height; - context->constants.displaySizeRcp[0] = 1.0f / contextDescription->displaySize.width; - context->constants.displaySizeRcp[1] = 1.0f / contextDescription->displaySize.height; // generate the data for the LUT. const uint32_t lanczos2LutWidth = 128; @@ -331,39 +495,48 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con const Fsr2ResourceDescription internalSurfaceDesc[] = { { FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, { FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"FSR2_DilatedVelocity", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1, L"FSR2_InternalDilatedVelocity1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"FSR2_DilatedDepth", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2, L"FSR2_InternalDilatedVelocity2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP, L"FSR2_DepthClip", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"FSR2_DilatedDepth", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R32_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1, L"FSR2_LockStatus1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2, L"FSR2_LockStatus2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + + { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"FSR2_LockInputLuma", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + + { FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR2_NewLocks", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR2_InternalUpscaled1", FFX_RESOURCE_USAGE_UAV, + { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR2_InternalUpscaled1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR2_InternalUpscaled2", FFX_RESOURCE_USAGE_UAV, + { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR2_InternalUpscaled2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"FSR2_ExposureMips", FFX_RESOURCE_USAGE_UAV, + { FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, L"FSR2_ExposureMips", FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"FSR2_LumaHistory", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR2_LumaHistory1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR2_SpdAtomicCounter", FFX_RESOURCE_USAGE_UAV, + { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR2_LumaHistory2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + + { FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR2_SpdAtomicCounter", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_ALIASABLE, sizeof(atomicInitData), &atomicInitData }, { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR2_DilatedReactiveMasks", FFX_RESOURCE_USAGE_UAV, @@ -378,16 +551,27 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con { FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, L"FSR2_MaximumUpsampleBias", FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(maximumBias), maximumBias }, - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR2_DefaultExposure", FFX_RESOURCE_USAGE_UAV, + { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR2_DefaultExposure", FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultExposure), defaultExposure }, - { FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE, L"FSR2_Exposure", FFX_RESOURCE_USAGE_UAV, + { FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"FSR2_AutoExposure", FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE }, -#if defined(FFX_INTERNAL) - { FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT, L"FSR2_DebugOut", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, -#endif + + // only one for now, will need pingpont to respect the motion vectors + { FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"FSR2_AutoReactive", FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, L"FSR2_AutoComposition", FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1, L"FSR2_PrevPreAlpha0", FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1, L"FSR2_PrevPostAlpha0", FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2, L"FSR2_PrevPreAlpha1", FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2, L"FSR2_PrevPostAlpha1", FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, + }; // clear the SRV resources to NULL. @@ -442,7 +626,6 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context) { FFX_ASSERT(context); - fsr2SafeReleasePipeline(context, &context->pipelinePrepareInputColor); fsr2SafeReleasePipeline(context, &context->pipelineDepthClip); fsr2SafeReleasePipeline(context, &context->pipelineReconstructPreviousDepth); fsr2SafeReleasePipeline(context, &context->pipelineLock); @@ -451,8 +634,10 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context) fsr2SafeReleasePipeline(context, &context->pipelineRCAS); fsr2SafeReleasePipeline(context, &context->pipelineComputeLuminancePyramid); fsr2SafeReleasePipeline(context, &context->pipelineGenerateReactive); + fsr2SafeReleasePipeline(context, &context->pipelineTcrAutogenerate); // unregister resources not created internally + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; @@ -475,6 +660,57 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context) return FFX_OK; } +static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params) +{ + const bool bInverted = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED; + const bool bInfinite = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE; + + // make sure it has no impact if near and far plane values are swapped in dispatch params + // the flags "inverted" and "infinite" will decide what transform to use + float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar); + float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar); + + if (bInverted) { + float tmp = fMin; + fMin = fMax; + fMax = tmp; + } + + // a 0 0 0 x + // 0 b 0 0 y + // 0 0 c d z + // 0 0 e 0 1 + + const float fQ = fMax / (fMin - fMax); + const float d = -1.0f; // for clarity + + const float matrix_elem_c[2][2] = { + fQ, // non reversed, non infinite + -1.0f - FLT_EPSILON, // non reversed, infinite + fQ, // reversed, non infinite + 0.0f + FLT_EPSILON // reversed, infinite + }; + + const float matrix_elem_e[2][2] = { + fQ * fMin, // non reversed, non infinite + -fMin - FLT_EPSILON, // non reversed, infinite + fQ * fMin, // reversed, non infinite + fMax, // reversed, infinite + }; + + context->constants.deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite]; + context->constants.deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite]; + + // revert x and y coords + const float aspect = params->renderSize.width / float(params->renderSize.height); + const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical); + const float a = cotHalfFovY / aspect; + const float b = cotHalfFovY; + + context->constants.deviceToViewDepth[2] = (1.0f / a); + context->constants.deviceToViewDepth[3] = (1.0f / b); +} + static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) { FfxComputeJobDescription jobDescriptor = {}; @@ -492,11 +728,11 @@ static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2Dispa const uint32_t currentResourceId = pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].resourceIdentifier; wcscpy_s(jobDescriptor.uavNames[currentUnorderedAccessViewIndex], pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].name); - if (currentResourceId >= FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 && currentResourceId <= FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12) + if (currentResourceId >= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 && currentResourceId <= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12) { - const FfxResourceInternal currentResource = context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE]; + const FfxResourceInternal currentResource = context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE]; jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource; - jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0; + jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0; } else { @@ -514,6 +750,7 @@ static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2Dispa for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name); jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier]; + jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex; } FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; @@ -524,6 +761,10 @@ static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2Dispa static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params) { + if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING) + { + fsr2DebugCheckDispatch(context, params); + } // take a short cut to the command list FfxCommandList commandList = params->commandList; @@ -536,28 +777,17 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); } - static const float lockInitialLifetime = 1.0f; - if (context->firstExecution) { - const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + + const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1]; context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2]; context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); } // Prepare per frame descriptor tables @@ -568,6 +798,15 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2; const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1; const uint32_t upscaledColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2; + const uint32_t dilatedMotionVectorsResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1; + const uint32_t previousDilatedMotionVectorsResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2; + const uint32_t lumaHistorySrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1; + const uint32_t lumaHistoryUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2; + + const uint32_t prevPreAlphaColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1; + const uint32_t prevPreAlphaColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2; + const uint32_t prevPostAlphaColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1; + const uint32_t prevPostAlphaColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2; const bool resetAccumulation = params->reset || context->firstExecution; context->firstExecution = false; @@ -578,7 +817,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends. if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) { - context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE]; + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE]; } else { if (ffxFsr2ResourceIsNull(params->exposure)) { context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE]; @@ -587,12 +826,18 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D } } + if (params->enableAutoReactive) + { + context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]); + } + if (ffxFsr2ResourceIsNull(params->reactive)) { context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY]; - } else { + } + else { context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->reactive, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); } - + if (ffxFsr2ResourceIsNull(params->transparencyAndComposition)) { context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY]; } else { @@ -606,51 +851,47 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->uavResources[upscaledColorUavResourceIndex]; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = context->uavResources[upscaledColorUavResourceIndex]; + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->srvResources[dilatedMotionVectorsResourceIndex]; + context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->uavResources[dilatedMotionVectorsResourceIndex]; + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS] = context->srvResources[previousDilatedMotionVectorsResourceIndex]; + + context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->uavResources[lumaHistoryUavResourceIndex]; + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->srvResources[lumaHistorySrvResourceIndex]; + + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR] = context->srvResources[prevPreAlphaColorSrvResourceIndex]; + context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR] = context->uavResources[prevPreAlphaColorUavResourceIndex]; + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->srvResources[prevPostAlphaColorSrvResourceIndex]; + context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->uavResources[prevPostAlphaColorUavResourceIndex]; + // actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment const FfxResourceDescription resourceDescInputColor = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); - const FfxResourceDescription resourceDescDepthClip = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP]); const FfxResourceDescription resourceDescLockStatus = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[lockStatusSrvResourceIndex]); const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D); - FFX_ASSERT(resourceDescDepthClip.type == FFX_RESOURCE_TYPE_TEXTURE2D); FFX_ASSERT(resourceDescLockStatus.type == FFX_RESOURCE_TYPE_TEXTURE2D); context->constants.jitterOffset[0] = params->jitterOffset.x; context->constants.jitterOffset[1] = params->jitterOffset.y; context->constants.renderSize[0] = int32_t(params->renderSize.width ? params->renderSize.width : resourceDescInputColor.width); context->constants.renderSize[1] = int32_t(params->renderSize.height ? params->renderSize.height : resourceDescInputColor.height); + context->constants.maxRenderSize[0] = int32_t(context->contextDescription.maxRenderSize.width); + context->constants.maxRenderSize[1] = int32_t(context->contextDescription.maxRenderSize.height); + context->constants.inputColorResourceDimensions[0] = resourceDescInputColor.width; + context->constants.inputColorResourceDimensions[1] = resourceDescInputColor.height; // compute the horizontal FOV for the shader from the vertical one. const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height; const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2; context->constants.tanHalfFOV = tanf(cameraAngleHorizontal * 0.5f); + context->constants.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f; - if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED) { - - const float c = 0.0f; - context->constants.deviceToViewDepth[0] = c + FLT_EPSILON; - context->constants.deviceToViewDepth[1] = -1.00000000f; - context->constants.deviceToViewDepth[2] = 0.100000001f; - context->constants.deviceToViewDepth[3] = FLT_EPSILON; - - } else { - - const float c = -1.0f; - context->constants.deviceToViewDepth[0] = c - FLT_EPSILON; - context->constants.deviceToViewDepth[1] = -1.00000000f; - context->constants.deviceToViewDepth[2] = -0.200019985f; - context->constants.deviceToViewDepth[3] = FLT_EPSILON; - } + // compute params to enable device depth to view space depth computation in shader + setupDeviceDepthToViewSpaceDepthParams(context, params); // To be updated if resource is larger than the actual image size - context->constants.depthClipUVScale[0] = float(context->constants.renderSize[0]) / resourceDescDepthClip.width; - context->constants.depthClipUVScale[1] = float(context->constants.renderSize[1]) / resourceDescDepthClip.height; - context->constants.postLockStatusUVScale[0] = float(context->contextDescription.displaySize.width) / resourceDescLockStatus.width; - context->constants.postLockStatusUVScale[1] = float(context->contextDescription.displaySize.height) / resourceDescLockStatus.height; - context->constants.reactiveMaskDimRcp[0] = 1.0f / float(resourceDescReactiveMask.width); - context->constants.reactiveMaskDimRcp[1] = 1.0f / float(resourceDescReactiveMask.height); context->constants.downscaleFactor[0] = float(context->constants.renderSize[0]) / context->contextDescription.displaySize.width; context->constants.downscaleFactor[1] = float(context->constants.renderSize[1]) / context->contextDescription.displaySize.height; + context->constants.previousFramePreExposure = context->constants.preExposure; context->constants.preExposure = (params->preExposure != 0) ? params->preExposure : 1.0f; // motion vector data @@ -672,8 +913,6 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // lock data, assuming jitter sequence length computation for now const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(params->renderSize.width, context->contextDescription.displaySize.width); - context->constants.lockInitialLifetime = lockInitialLifetime; - // init on first frame if (resetAccumulation || context->constants.jitterPhaseCount == 0) { context->constants.jitterPhaseCount = (float)jitterPhaseCount; @@ -686,9 +925,6 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D } } - const int32_t maxLockFrames = (int32_t)(context->constants.jitterPhaseCount) + 1; - context->constants.lockTickDelta = lockInitialLifetime / maxLockFrames; - // convert delta time to seconds and clamp to [0, 1]. context->constants.deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, params->frameTimeDelta / 1000.0f)); @@ -702,10 +938,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D context->constants.lumaMipLevelToUse = uint32_t(FFX_FSR2_SHADING_CHANGE_MIP_LEVEL); const float mipDiv = float(2 << context->constants.lumaMipLevelToUse); - context->constants.lumaMipDimensions[0] = uint32_t(context->constants.renderSize[0] / mipDiv); - context->constants.lumaMipDimensions[1] = uint32_t(context->constants.renderSize[1] / mipDiv); - context->constants.lumaMipRcp = float(context->constants.lumaMipDimensions[0] * context->constants.lumaMipDimensions[1]) / - float(context->constants.renderSize[0] * context->constants.renderSize[1]); + context->constants.lumaMipDimensions[0] = uint32_t(context->constants.maxRenderSize[0] / mipDiv); + context->constants.lumaMipDimensions[1] = uint32_t(context->constants.maxRenderSize[1] / mipDiv); // reactive mask bias const int32_t threadGroupWorkRegionDim = 8; @@ -722,9 +956,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // LockStatus resource has no sign bit, callback functions are compensating for this. // Clearing the resource must follow the same logic. float clearValuesLockStatus[4]{}; - clearValuesLockStatus[LOCK_LIFETIME_REMAINING] = lockInitialLifetime * 2.0f; + clearValuesLockStatus[LOCK_LIFETIME_REMAINING] = 0.0f; clearValuesLockStatus[LOCK_TEMPORAL_LUMA] = 0.0f; - clearValuesLockStatus[LOCK_TRUST] = 1.0f; memcpy(clearJob.clearJobDescriptor.color, clearValuesLockStatus, 4 * sizeof(float)); clearJob.clearJobDescriptor.target = context->srvResources[lockStatusSrvResourceIndex]; @@ -735,13 +968,15 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D clearJob.clearJobDescriptor.target = context->srvResources[upscaledColorSrvResourceIndex]; context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE]; + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE]; context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); - if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) { + //if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) + // Auto exposure always used to track luma changes in locking logic + { const float clearValuesExposure[]{ -1.f, 1e8f, 0.f, 0.f }; memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float)); - clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE]; + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE]; context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); } } @@ -767,13 +1002,26 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f; FsrRcasCon(rcasConsts.rcasConfig, sharpenessRemapped); + Fsr2GenerateReactiveConstants2 genReactiveConsts = {}; + genReactiveConsts.autoTcThreshold = params->autoTcThreshold; + genReactiveConsts.autoTcScale = params->autoTcScale; + genReactiveConsts.autoReactiveScale = params->autoReactiveScale; + genReactiveConsts.autoReactiveMax = params->autoReactiveMax; + // initialize constantBuffers data - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].data, &context->constants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].uint32Size * sizeof(uint32_t)); - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].data, &luminancePyramidConstants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].uint32Size * sizeof(uint32_t)); - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].data, &rcasConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].uint32Size * sizeof(uint32_t)); + memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].data, &context->constants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].uint32Size * sizeof(uint32_t)); + memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].data, &luminancePyramidConstants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].uint32Size * sizeof(uint32_t)); + memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].data, &rcasConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].uint32Size * sizeof(uint32_t)); + memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].data, &genReactiveConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].uint32Size * sizeof(uint32_t)); + // Auto reactive + if (params->enableAutoReactive) + { + generateReactiveMaskInternal(context, params); + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION]; + } scheduleDispatch(context, params, &context->pipelineComputeLuminancePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]); - scheduleDispatch(context, params, &context->pipelinePrepareInputColor, dispatchSrcX, dispatchSrcY); scheduleDispatch(context, params, &context->pipelineReconstructPreviousDepth, dispatchSrcX, dispatchSrcY); scheduleDispatch(context, params, &context->pipelineDepthClip, dispatchSrcX, dispatchSrcY); @@ -999,10 +1247,16 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F const int32_t dispatchSrcX = (params->renderSize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + // save internal reactive resource + FfxResourceInternal internalReactive = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + FfxComputeJobDescription jobDescriptor = {}; - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &jobDescriptor.srvs[0]); - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorPreUpscale, &jobDescriptor.srvs[1]); - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->outReactive, &jobDescriptor.uavs[0]); + contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); + contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorPreUpscale, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); + contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->outReactive, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]); + + jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + wcscpy_s(jobDescriptor.srvNames[0], pipeline->srvResourceBindings[0].name); wcscpy_s(jobDescriptor.srvNames[1], pipeline->srvResourceBindings[1].name); wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name); @@ -1012,6 +1266,14 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F jobDescriptor.dimensions[2] = 1; jobDescriptor.pipeline = *pipeline; + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) { + + const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId]; + jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource; + wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name); + } + Fsr2GenerateReactiveConstants constants = {}; constants.scale = params->scale; constants.threshold = params->cutoffThreshold; @@ -1029,5 +1291,66 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F contextPrivate->contextDescription.callbacks.fpExecuteGpuJobs(&contextPrivate->contextDescription.callbacks, commandList); + // restore internal reactive + contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE] = internalReactive; + + return FFX_OK; +} + +static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params) +{ + if (contextPrivate->refreshPipelineStates) { + + createPipelineStates(contextPrivate); + contextPrivate->refreshPipelineStates = false; + } + + // take a short cut to the command list + FfxCommandList commandList = params->commandList; + + FfxPipelineState* pipeline = &contextPrivate->pipelineTcrAutogenerate; + + const int32_t threadGroupWorkRegionDim = 8; + const int32_t dispatchSrcX = (params->renderSize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + FfxComputeJobDescription jobDescriptor = {}; + contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); + contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->color, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); + + jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + jobDescriptor.uavs[1] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION]; + jobDescriptor.uavs[2] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]; + jobDescriptor.uavs[3] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR]; + + wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name); + wcscpy_s(jobDescriptor.uavNames[1], pipeline->uavResourceBindings[1].name); + wcscpy_s(jobDescriptor.uavNames[2], pipeline->uavResourceBindings[2].name); + wcscpy_s(jobDescriptor.uavNames[3], pipeline->uavResourceBindings[3].name); + + jobDescriptor.dimensions[0] = dispatchSrcX; + jobDescriptor.dimensions[1] = dispatchSrcY; + jobDescriptor.dimensions[2] = 1; + jobDescriptor.pipeline = *pipeline; + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) { + + const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId]; + jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource; + wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name); + } + + for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { + wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name); + jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier]; + jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex; + } + + FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + dispatchJob.computeJobDescriptor = jobDescriptor; + + contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob); + return FFX_OK; } diff --git a/src/ffx-fsr2-api/ffx_fsr2.h b/src/ffx-fsr2-api/ffx_fsr2.h index ee2ff7d..e611fef 100644 --- a/src/ffx-fsr2-api/ffx_fsr2.h +++ b/src/ffx-fsr2-api/ffx_fsr2.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -35,12 +35,12 @@ /// FidelityFX Super Resolution 2 minor version. /// /// @ingroup FSR2 -#define FFX_FSR2_VERSION_MINOR (1) +#define FFX_FSR2_VERSION_MINOR (2) /// FidelityFX Super Resolution 2 patch version. /// /// @ingroup FSR2 -#define FFX_FSR2_VERSION_PATCH (2) +#define FFX_FSR2_VERSION_PATCH (0) /// The size of the context specified in 32bit values. /// @@ -92,7 +92,8 @@ typedef enum FfxFsr2InitializationFlagBits { FFX_FSR2_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane. FFX_FSR2_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data. FFX_FSR2_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling. - FFX_FSR2_ENABLE_TEXTURE1D_USAGE = (1<<7) ///< A bit indicating that the backend should use 1D textures. + FFX_FSR2_ENABLE_TEXTURE1D_USAGE = (1<<7), ///< A bit indicating that the backend should use 1D textures. + FFX_FSR2_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues. } FfxFsr2InitializationFlagBits; /// A structure encapsulating the parameters required to initialize FidelityFX @@ -106,6 +107,8 @@ typedef struct FfxFsr2ContextDescription { FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the upscaling process. FfxFsr2Interface callbacks; ///< A set of pointers to the backend implementation for FSR 2.0. FfxDevice device; ///< The abstracted device which is passed to some callback functions. + + FfxFsr2Message fpMessage; ///< A pointer to a function that can recieve messages from the runtime. } FfxFsr2ContextDescription; /// A structure encapsulating the parameters for dispatching the various passes @@ -128,11 +131,21 @@ typedef struct FfxFsr2DispatchDescription { bool enableSharpening; ///< Enable an additional sharpening pass. float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness. float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds). - float preExposure; ///< The exposure value if not using FFX_FSR2_ENABLE_AUTO_EXPOSURE. + float preExposure; ///< The pre exposure value (must be > 0.0f) bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. float cameraNear; ///< The distance to the near plane of the camera. float cameraFar; ///< The distance to the far plane of the camera. This is used only used in case of non infinite depth. float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians). + float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters + + // EXPERIMENTAL reactive mask generation parameters + bool enableAutoReactive; ///< A boolean value to indicate internal reactive autogeneration should be used + FfxResource colorOpaqueOnly; ///< A FfxResource containing the opaque only color buffer for the current frame (at render resolution). + float autoTcThreshold; ///< Cutoff value for TC + float autoTcScale; ///< A value to scale the transparency and composition mask + float autoReactiveScale; ///< A value to scale the reactive mask + float autoReactiveMax; ///< A value to clamp the reactive mask + } FfxFsr2DispatchDescription; /// A structure encapsulating the parameters for automatic generation of a reactive mask diff --git a/src/ffx-fsr2-api/ffx_fsr2_interface.h b/src/ffx-fsr2-api/ffx_fsr2_interface.h index db13fd0..b6be976 100644 --- a/src/ffx-fsr2-api/ffx_fsr2_interface.h +++ b/src/ffx-fsr2-api/ffx_fsr2_interface.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -55,19 +55,25 @@ FFX_FORWARD_DECLARE(FfxFsr2Interface); /// @ingroup FSR2 typedef enum FfxFsr2Pass { - FFX_FSR2_PASS_PREPARE_INPUT_COLOR = 0, ///< A pass which prepares input colors for subsequent use. - FFX_FSR2_PASS_DEPTH_CLIP = 1, ///< A pass which performs depth clipping. - FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 2, ///< A pass which performs reconstruction of previous frame's depth. - FFX_FSR2_PASS_LOCK = 3, ///< A pass which calculates pixel locks. - FFX_FSR2_PASS_ACCUMULATE = 4, ///< A pass which performs upscaling. - FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 5, ///< A pass which performs upscaling when sharpening is used. - FFX_FSR2_PASS_RCAS = 6, ///< A pass which performs sharpening. - FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 7, ///< A pass which generates the luminance mipmap chain for the current frame. - FFX_FSR2_PASS_GENERATE_REACTIVE = 8, ///< An optional pass to generate a reactive mask + FFX_FSR2_PASS_DEPTH_CLIP = 0, ///< A pass which performs depth clipping. + FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1, ///< A pass which performs reconstruction of previous frame's depth. + FFX_FSR2_PASS_LOCK = 2, ///< A pass which calculates pixel locks. + FFX_FSR2_PASS_ACCUMULATE = 3, ///< A pass which performs upscaling. + FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4, ///< A pass which performs upscaling when sharpening is used. + FFX_FSR2_PASS_RCAS = 5, ///< A pass which performs sharpening. + FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6, ///< A pass which generates the luminance mipmap chain for the current frame. + FFX_FSR2_PASS_GENERATE_REACTIVE = 7, ///< An optional pass to generate a reactive mask + FFX_FSR2_PASS_TCR_AUTOGENERATE = 8, ///< An optional pass to generate a texture-and-composition and reactive masks FFX_FSR2_PASS_COUNT ///< The number of passes performed by FSR2. } FfxFsr2Pass; +typedef enum FfxFsr2MsgType { + FFX_FSR2_MESSAGE_TYPE_ERROR = 0, + FFX_FSR2_MESSAGE_TYPE_WARNING = 1, + FFX_FSR2_MESSAGE_TYPE_COUNT +} FfxFsr2MsgType; + /// Create and initialize the backend context. /// /// The callback function sets up the backend context for rendering. @@ -313,6 +319,19 @@ typedef FfxErrorCode (*FfxFsr2ExecuteGpuJobsFunc)( FfxFsr2Interface* backendInterface, FfxCommandList commandList); +/// Pass a string message +/// +/// Used for debug messages. +/// +/// @param [in] type The type of message. +/// @param [in] message A string message to pass. +/// +/// +/// @ingroup FSR2 +typedef void(*FfxFsr2Message)( + FfxFsr2MsgType type, + const wchar_t* message); + /// A structure encapsulating the interface between the core implentation of /// the FSR2 algorithm and any graphics API that it should ultimately call. /// diff --git a/src/ffx-fsr2-api/ffx_fsr2_maximum_bias.h b/src/ffx-fsr2-api/ffx_fsr2_maximum_bias.h index ad840f3..5fdbd0c 100644 --- a/src/ffx-fsr2-api/ffx_fsr2_maximum_bias.h +++ b/src/ffx-fsr2-api/ffx_fsr2_maximum_bias.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/ffx_fsr2_private.h b/src/ffx-fsr2-api/ffx_fsr2_private.h index 12fa107..6b5fbc5 100644 --- a/src/ffx-fsr2-api/ffx_fsr2_private.h +++ b/src/ffx-fsr2-api/ffx_fsr2_private.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -25,27 +25,25 @@ typedef struct Fsr2Constants { int32_t renderSize[2]; + int32_t maxRenderSize[2]; int32_t displaySize[2]; - uint32_t lumaMipDimensions[2]; - uint32_t lumaMipLevelToUse; - uint32_t frameIndex; - float displaySizeRcp[2]; - float jitterOffset[2]; + int32_t inputColorResourceDimensions[2]; + int32_t lumaMipDimensions[2]; + int32_t lumaMipLevelToUse; + int32_t frameIndex; + float deviceToViewDepth[4]; - float depthClipUVScale[2]; - float postLockStatusUVScale[2]; - float reactiveMaskDimRcp[2]; + float jitterOffset[2]; float motionVectorScale[2]; float downscaleFactor[2]; + float motionVectorJitterCancellation[2]; float preExposure; + float previousFramePreExposure; float tanHalfFOV; - float motionVectorJitterCancellation[2]; float jitterPhaseCount; - float lockInitialLifetime; - float lockTickDelta; float deltaTime; float dynamicResChangeFactor; - float lumaMipRcp; + float viewSpaceToMetersFactor; } Fsr2Constants; struct FfxFsr2ContextDescription; @@ -61,7 +59,6 @@ typedef struct FfxFsr2Context_Private { Fsr2Constants constants; FfxDevice device; FfxDeviceCapabilities deviceCapabilities; - FfxPipelineState pipelinePrepareInputColor; FfxPipelineState pipelineDepthClip; FfxPipelineState pipelineReconstructPreviousDepth; FfxPipelineState pipelineLock; @@ -70,6 +67,7 @@ typedef struct FfxFsr2Context_Private { FfxPipelineState pipelineRCAS; FfxPipelineState pipelineComputeLuminancePyramid; FfxPipelineState pipelineGenerateReactive; + FfxPipelineState pipelineTcrAutogenerate; // 2 arrays of resources, as e.g. FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV FfxResourceInternal srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT]; diff --git a/src/ffx-fsr2-api/ffx_types.h b/src/ffx-fsr2-api/ffx_types.h index 0079572..74edd19 100644 --- a/src/ffx-fsr2-api/ffx_types.h +++ b/src/ffx-fsr2-api/ffx_types.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -44,7 +44,17 @@ #define FFX_MAX_CONST_SIZE 64 /// Off by default warnings +#if defined(_MSC_VER) #pragma warning(disable : 4365 4710 4820 5039) +#elif defined(__clang__) +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wsign-compare" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wignored-qualifiers" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-function" +#endif #ifdef __cplusplus extern "C" { @@ -70,6 +80,7 @@ typedef enum FfxSurfaceFormat { FFX_SURFACE_FORMAT_R16_UNORM, ///< 16 bit per channel, 1 channel unsigned normalized format FFX_SURFACE_FORMAT_R16_SNORM, ///< 16 bit per channel, 1 channel signed normalized format FFX_SURFACE_FORMAT_R8_UNORM, ///< 8 bit per channel, 1 channel unsigned normalized format + FFX_SURFACE_FORMAT_R8_UINT, ///< 8 bit per channel, 1 channel unsigned int format FFX_SURFACE_FORMAT_R8G8_UNORM, ///< 8 bit per channel, 2 channel unsigned normalized format FFX_SURFACE_FORMAT_R32_FLOAT ///< 32 bit per channel, 1 channel float format } FfxSurfaceFormat; @@ -326,6 +337,7 @@ typedef struct FfxComputeJobDescription { wchar_t uavNames[FFX_MAX_NUM_UAVS][64]; FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job. wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][64]; + uint32_t cbSlotIndex[FFX_MAX_NUM_CONST_BUFFERS]; ///< Slot index in the descriptor table } FfxComputeJobDescription; /// A structure describing a copy render job. diff --git a/src/ffx-fsr2-api/ffx_util.h b/src/ffx-fsr2-api/ffx_util.h index aca9183..ca4324e 100644 --- a/src/ffx-fsr2-api/ffx_util.h +++ b/src/ffx-fsr2-api/ffx_util.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_common_types.h b/src/ffx-fsr2-api/shaders/ffx_common_types.h index cf6ba99..ddd1786 100644 --- a/src/ffx-fsr2-api/shaders/ffx_common_types.h +++ b/src/ffx-fsr2-api/shaders/ffx_common_types.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_core.h b/src/ffx-fsr2-api/shaders/ffx_core.h index 3a66f44..4e687d6 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core.h +++ b/src/ffx-fsr2-api/shaders/ffx_core.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_core_cpu.h b/src/ffx-fsr2-api/shaders/ffx_core_cpu.h index 9bb9915..3bf0295 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core_cpu.h +++ b/src/ffx-fsr2-api/shaders/ffx_core_cpu.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_core_glsl.h b/src/ffx-fsr2-api/shaders/ffx_core_glsl.h index e419e39..6ec58f3 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core_glsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_core_glsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -829,6 +829,79 @@ FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) return max(min(x, y), min(max(x, y), z)); } +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + + /// Compute the minimum of three values. /// /// NOTE: This function should compile down to a single V_MIN3_F32 operation on @@ -1400,6 +1473,40 @@ FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a) return mix(x, y, a); } //------------------------------------------------------------------------------------------------------------------------------ +// No packed version of ffxMid3. +FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ // No packed version of ffxMax3. FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) { diff --git a/src/ffx-fsr2-api/shaders/ffx_core_gpu_common.h b/src/ffx-fsr2-api/shaders/ffx_core_gpu_common.h index 3a49c55..ae07642 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core_gpu_common.h +++ b/src/ffx-fsr2-api/shaders/ffx_core_gpu_common.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_core_gpu_common_half.h b/src/ffx-fsr2-api/shaders/ffx_core_gpu_common_half.h index 63105be..c46ccb3 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core_gpu_common_half.h +++ b/src/ffx-fsr2-api/shaders/ffx_core_gpu_common_half.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_core_hlsl.h b/src/ffx-fsr2-api/shaders/ffx_core_hlsl.h index f114687..ad4ff65 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core_hlsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_core_hlsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -916,9 +916,81 @@ FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) return max(min(x, y), min(max(x, y), z)); } +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -935,7 +1007,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -952,7 +1024,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1268,6 +1340,40 @@ FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) return min(x, min(y, z)); } //------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x) { return rcp(x); diff --git a/src/ffx-fsr2-api/shaders/ffx_core_portability.h b/src/ffx-fsr2-api/shaders/ffx_core_portability.h index f0d3fd7..45be059 100644 --- a/src/ffx-fsr2-api/shaders/ffx_core_portability.h +++ b/src/ffx-fsr2-api/shaders/ffx_core_portability.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h index d0c5eae..1c5cd16 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,8 +22,6 @@ #ifndef FFX_FSR2_ACCUMULATE_H #define FFX_FSR2_ACCUMULATE_H -#define FFX_FSR2_OPTION_GUARANTEE_UPSAMPLE_WEIGHT_ON_NEW_SAMPLES 1 - FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) { return length(fMotionVector * DisplaySize()); @@ -35,31 +33,41 @@ FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) } #endif -void Accumulate(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT FfxFloat32x4 fHistory, FFX_PARAMETER_IN FfxFloat32x4 fUpsampled, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, FFX_PARAMETER_IN FfxFloat32 fHrVelocity) +void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) { - fHistory.w = fHistory.w + fUpsampled.w; + // Aviod invalid values when accumulation and upsampled weight is 0 + fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); - fUpsampled.rgb = YCoCgToRGB(fUpsampled.rgb); +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz))); + fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor))); +#endif - const FfxFloat32 fAlpha = fUpsampled.w / fHistory.w; - fHistory.rgb = ffxLerp(fHistory.rgb, fUpsampled.rgb, fAlpha); + const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation; + fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha); - FfxFloat32 fMaxAverageWeight = FfxFloat32(ffxLerp(MaxAccumulationWeight(), accumulationMaxOnMotion, ffxSaturate(fHrVelocity * 10.0f))); - fHistory.w = ffxMin(fHistory.w, fMaxAverageWeight); + fHistoryColor = YCoCgToRGB(fHistoryColor); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fHistoryColor = InverseTonemap(fHistoryColor); +#endif } void RectifyHistory( - RectificationBoxData clippingBox, - inout FfxFloat32x4 fHistory, - FFX_PARAMETER_IN FfxFloat32x3 fLockStatus, - FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, - FFX_PARAMETER_IN FfxFloat32 fLumaStabilityFactor, - FFX_PARAMETER_IN FfxFloat32 fLuminanceDiff, - FFX_PARAMETER_IN FfxFloat32 fUpsampleWeight, - FFX_PARAMETER_IN FfxFloat32 fLockContributionThisFrame) + const AccumulationPassCommonParams params, + RectificationBox clippingBox, + FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, + FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation, + FfxFloat32 fLockContributionThisFrame, + FfxFloat32 fTemporalReactiveFactor, + FfxFloat32 fLumaInstabilityFactor) { - FfxFloat32 fScaleFactorInfluence = FfxFloat32(1.0f / DownscaleFactor().x - 1); - FfxFloat32 fBoxScale = FfxFloat32(1.0f) + (FfxFloat32(0.5f) * fScaleFactorInfluence); + FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f)); + + const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); + const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); + FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT); FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; @@ -70,26 +78,22 @@ void RectifyHistory( boxMin = ffxMax(clippingBox.aabbMin, boxMin); boxMax = ffxMin(clippingBox.aabbMax, boxMax); - FfxFloat32x3 distToClampOutside = ffxMax(ffxMax(FfxFloat32x3(0, 0, 0), boxMin - fHistory.xyz), ffxMax(FfxFloat32x3(0, 0, 0), fHistory.xyz - boxMax)); - - if (any(FFX_GREATER_THAN(distToClampOutside, FfxFloat32x3(0, 0, 0)))) { + if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) { - const FfxFloat32x3 clampedHistorySample = clamp(fHistory.xyz, boxMin, boxMax); + const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax); - FfxFloat32x3 clippedHistoryToBoxCenter = abs(clampedHistorySample - boxCenter); - FfxFloat32x3 historyToBoxCenter = abs(fHistory.xyz - boxCenter); - FfxFloat32x3 HistoryColorWeight; - HistoryColorWeight.x = historyToBoxCenter.x > FfxFloat32(0) ? clippedHistoryToBoxCenter.x / historyToBoxCenter.x : FfxFloat32(0.0f); - HistoryColorWeight.y = historyToBoxCenter.y > FfxFloat32(0) ? clippedHistoryToBoxCenter.y / historyToBoxCenter.y : FfxFloat32(0.0f); - HistoryColorWeight.z = historyToBoxCenter.z > FfxFloat32(0) ? clippedHistoryToBoxCenter.z / historyToBoxCenter.z : FfxFloat32(0.0f); + FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; + + const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor; + const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f); + fHistoryContribution *= fReactiveContribution; - FfxFloat32x3 fHistoryContribution = HistoryColorWeight; + // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection + fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution)); - // only lock luma - fHistoryContribution += ffxMax(fLockContributionThisFrame, fLumaStabilityFactor).xxx; - fHistoryContribution *= (fDepthClipFactor * fDepthClipFactor); - - fHistory.xyz = ffxLerp(clampedHistorySample.xyz, fHistory.xyz, ffxSaturate(fHistoryContribution)); + // Scale accumulation using rectification info + const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f)); + fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution)); } } @@ -98,166 +102,189 @@ void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor) StoreUpscaledOutput(iPxHrPos, fUpscaledColor); } -FfxFloat32 GetLumaStabilityFactor(FfxFloat32x2 fHrUv, FfxFloat32 fHrVelocity) +void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight) { - FfxFloat32 fLumaStabilityFactor = SampleLumaStabilityFactor(fHrUv); - - // Only apply on still, have to reproject luma history resource if we want it to work on motion - fLumaStabilityFactor *= FfxFloat32(fHrVelocity < 0.1f); + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + if (IsUvInside(fEstimatedUvNextFrame) == false) { + KillLock(fLockStatus); + } + else { + // Decrease lock lifetime + const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame); + const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); + fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + } - return fLumaStabilityFactor; + StoreLockStatus(params.iPxHrPos, fLockStatus); } -FfxFloat32 GetLockContributionThisFrame(FfxFloat32x2 fUvCoord, FfxFloat32 fAccumulationMask, FfxFloat32 fParticleMask, FfxFloat32x3 fLockStatus) + +FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState) { - const FfxFloat32 fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); + // Always assume max accumulation was reached + FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); - // Rectify on lock frame - FfxFloat32 fLockContributionThisFrame = ffxSaturate(fNormalizedLockLifetime * FfxFloat32(4)); + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10))))); - return fLockContributionThisFrame; + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20)))); + + return fBaseAccumulation.xxx; } -void FinalizeLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x3 fLockStatus, FfxFloat32 fUpsampledWeight) +FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff) { - // Increase trust - const FfxFloat32 fTrustIncreaseLanczosMax = FfxFloat32(12); // same increase no matter the MaxAccumulationWeight() value. - const FfxFloat32 fTrustIncrease = FfxFloat32(fUpsampledWeight / fTrustIncreaseLanczosMax); - fLockStatus[LOCK_TRUST] = ffxMin(FfxFloat32(1), fLockStatus[LOCK_TRUST] + fTrustIncrease); + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; - // Decrease lock lifetime - const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(averageLanczosWeightPerFrame); - const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); - fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x; - StoreLockStatus(iPxHrPos, fLockStatus); -} +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma)); +#endif -FfxFloat32 ComputeMaxAccumulationWeight(FfxFloat32 fHrVelocity, FfxFloat32 fReactiveMax, FfxFloat32 fDepthClipFactor, FfxFloat32 fLuminanceDiff, LockState lockState) { + fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f; - FfxFloat32 normalizedMinimum = FfxFloat32(accumulationMaxOnMotion) / FfxFloat32(MaxAccumulationWeight()); + const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false); + FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f); - FfxFloat32 fReactiveMaxAccumulationWeight = FfxFloat32(1) - fReactiveMax; - FfxFloat32 fMotionMaxAccumulationWeight = ffxLerp(FfxFloat32(1), normalizedMinimum, ffxSaturate(fHrVelocity * FfxFloat32(10))); - FfxFloat32 fDepthClipMaxAccumulationWeight = fDepthClipFactor; + FfxFloat32 fLumaInstability = 0.0f; + FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]); - FfxFloat32 fLuminanceDiffMaxAccumulationWeight = ffxSaturate(ffxMax(normalizedMinimum, FfxFloat32(1) - fLuminanceDiff)); + FfxFloat32 fMin = abs(fDiffs0); - FfxFloat32 maxAccumulation = FfxFloat32(MaxAccumulationWeight()) * ffxMin( - ffxMin(fReactiveMaxAccumulationWeight, fMotionMaxAccumulationWeight), - ffxMin(fDepthClipMaxAccumulationWeight, fLuminanceDiffMaxAccumulationWeight) - ); + if (fMin >= (1.0f / 255.0f)) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); - return (lockState.NewLock && !lockState.WasLockedPrevFrame) ? FfxFloat32(accumulationMaxOnMotion) : maxAccumulation; -} + if (sign(fDiffs0) == sign(fDiffs1)) { + + // Scale difference to protect historically similar values + const FfxFloat32 fMinBias = 1.0f; + fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias); + } + } + + fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)); -FfxFloat32x2 ComputeKernelWeight(in FfxFloat32 fHistoryWeight, in FfxFloat32 fDepthClipFactor, in FfxFloat32 fReactivityFactor) { - FfxFloat32 fKernelSizeBias = ffxSaturate(ffxMax(FfxFloat32(0), fHistoryWeight - FfxFloat32(0.5)) / FfxFloat32(3)); + fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 3.0f)); + fLumaInstability *= ffxLerp(1.0f, 0.0f, ffxSaturate(params.fHrVelocity / 20.0f)); + } - FfxFloat32 fOneMinusReactiveMax = FfxFloat32(1) - fReactivityFactor; - FfxFloat32x2 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)) * FfxFloat32(fKernelSizeBias) * fOneMinusReactiveMax; + //shift history + fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3]; + fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2]; + fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1]; + fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma; - //average value on disocclusion, to help decrease high value sample importance wait for accumulation to kick in - fKernelWeight *= FfxFloat32x2(0.5f, 0.5f) + fDepthClipFactor * FfxFloat32x2(0.5f, 0.5f); + StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory); - return ffxMin(FfxFloat32x2(1.99f, 1.99f), fKernelWeight); + return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0); } -void Accumulate(FfxInt32x2 iPxHrPos) +FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor) { - const FfxFloat32x2 fSamplePosHr = iPxHrPos + 0.5f; - const FfxFloat32x2 fPxLrPos = fSamplePosHr * DownscaleFactor(); // Source resolution output pixel center position - const FfxInt32x2 iPxLrPos = FfxInt32x2(floor(fPxLrPos)); // TODO: what about weird upscale factors... - - const FfxFloat32x2 fSamplePosUnjitterLr = (FfxFloat32x2(iPxLrPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor); - const FfxFloat32x2 fLrUvJittered = (fPxLrPos + Jitter()) / RenderSize(); + fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity))); - const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); - const FfxFloat32x2 fMotionVector = GetMotionVector(iPxHrPos, fHrUv); - - const FfxFloat32 fHrVelocity = GetPxHrVelocity(fMotionVector); - const FfxFloat32 fDepthClipFactor = ffxSaturate(SampleDepthClip(fLrUvJittered)); - const FfxFloat32 fLumaStabilityFactor = GetLumaStabilityFactor(fHrUv, fHrVelocity); - const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered); - const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x; - const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y; - const FfxBoolean bIsResetFrame = (0 == FrameIndex()); - - FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0); - FfxFloat32x3 fLockStatus; - InitializeNewLockSample(fLockStatus); - FfxBoolean bIsExistingSample = FFX_TRUE; + fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); - FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0); - ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample); + // Force reactive factor for new samples + fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; - if (bIsExistingSample && !bIsResetFrame) { - ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight); - ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus); + if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) { + fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f; } + + return fNewFactor; +} - FfxFloat32 fLuminanceDiff = FfxFloat32(0.0f); +AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params; - LockState lockState = PostProcessLockStatus(iPxHrPos, fLrUvJittered, FfxFloat32(fDepthClipFactor), fAccumulationMask, fHrVelocity, fHistoryColorAndWeight.w, fLockStatus, fLuminanceDiff); + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); + params.fHrUv = fHrUv; + + const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize()); - fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, ComputeMaxAccumulationWeight( - FfxFloat32(fHrVelocity), fReactiveMax, FfxFloat32(fDepthClipFactor), FfxFloat32(fLuminanceDiff), lockState - )); + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); - const FfxFloat32 fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); + ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); - // Kill accumulation based on shading change - fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, FfxFloat32(ffxMax(0.0f, MaxAccumulationWeight() * ffxPow(FfxFloat32(1) - fLuminanceDiff, 2.0f / 1.0f)))); + params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler)); + + const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); + params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; + params.fAccumulationMask = fDilatedReactiveMasks.y; + params.bIsResetFrame = (0 == FrameIndex()); - // Load upsampled input color - RectificationBoxData clippingBox; + params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame); - FfxFloat32 fKernelBias = fAccumulationMask * ffxSaturate(ffxMax(0.0f, fHistoryColorAndWeight.w - 0.5f) / 3.0f); + return params; +} - FfxFloat32 fReactiveWeighted = 0; +void Accumulate(FfxInt32x2 iPxHrPos) +{ + const AccumulationPassCommonParams params = InitParams(iPxHrPos); - // No trust in reactive areas - fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], FfxFloat32(1.0f) - FfxFloat32(pow(fReactiveMax, 1.0f / 3.0f))); - fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], FfxFloat32(fDepthClipFactor)); + FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); + FfxFloat32x2 fLockStatus; + InitializeNewLockSample(fLockStatus); - FfxFloat32x2 fKernelWeight = ComputeKernelWeight(fHistoryColorAndWeight.w, FfxFloat32(fDepthClipFactor), ffxMax((FfxFloat32(1) - fLockStatus[LOCK_TRUST]), fReactiveMax)); + FfxFloat32 fTemporalReactiveFactor = 0.0f; + FfxBoolean bInMotionLastFrame = FFX_FALSE; + LockState lockState = { FFX_FALSE , FFX_FALSE }; + if (params.bIsExistingSample && !params.bIsResetFrame) { + ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame); + lockState = ReprojectHistoryLockStatus(params, fLockStatus); + } - FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(iPxHrPos, fKernelWeight, clippingBox); + FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor); -#if FFX_FSR2_OPTION_GUARANTEE_UPSAMPLE_WEIGHT_ON_NEW_SAMPLES - // Make sure all samples have same weight on reset/first frame. Upsampled weight should never be 0.0f when history accumulation is 0.0f. - fUpsampledColorAndWeight.w = (fHistoryColorAndWeight.w == 0.0f) ? ffxMax(FSR2_EPSILON, fUpsampledColorAndWeight.w) : fUpsampledColorAndWeight.w; -#endif + FfxFloat32 fLuminanceDiff = 0.0f; + FfxFloat32 fLockContributionThisFrame = 0.0f; + UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff); - FfxFloat32 fLockContributionThisFrame = GetLockContributionThisFrame(fHrUv, fAccumulationMask, fReactiveMax, fLockStatus); + // Load upsampled input color + RectificationBox clippingBox; + FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); + + const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff); - // Update accumulation and rectify history - if (fHistoryColorAndWeight.w > FfxFloat32(0)) { - RectifyHistory(clippingBox, fHistoryColorAndWeight, fLockStatus, FfxFloat32(fDepthClipFactor), FfxFloat32(fLumaStabilityFactor), FfxFloat32(fLuminanceDiff), fUpsampledColorAndWeight.w, fLockContributionThisFrame); + FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); - fHistoryColorAndWeight.rgb = YCoCgToRGB(fHistoryColorAndWeight.rgb); + if (params.bIsNewSample) { + fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz); } + else { + RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor); - Accumulate(iPxHrPos, fHistoryColorAndWeight, fUpsampledColorAndWeight, fDepthClipFactor, fHrVelocity); + Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight); + } - //Subtract accumulation weight in reactive areas - fHistoryColorAndWeight.w -= fUpsampledColorAndWeight.w * fReactiveMax; + fHistoryColor = UnprepareRgb(fHistoryColor, Exposure()); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - fHistoryColorAndWeight.rgb = InverseTonemap(fHistoryColorAndWeight.rgb); -#endif - fHistoryColorAndWeight.rgb /= FfxFloat32(Exposure()); + FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w); - FinalizeLockStatus(iPxHrPos, fLockStatus, fUpsampledColorAndWeight.w); + // Get new temporal reactive factor + fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor); - StoreInternalColorAndWeight(iPxHrPos, fHistoryColorAndWeight); + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor)); // Output final color when RCAS is disabled #if FFX_FSR2_OPTION_APPLY_SHARPENING == 0 - WriteUpscaledOutput(iPxHrPos, fHistoryColorAndWeight.rgb); + WriteUpscaledOutput(iPxHrPos, fHistoryColor); #endif + StoreNewLocks(iPxHrPos, 0); } #endif // FFX_FSR2_ACCUMULATE_H diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl index e1ee116..6006fd0 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,50 +19,38 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 5 -// SRV 4 : FSR2_Exposure : r_exposure -// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 12 : FSR2_DepthClip : r_depth_clip -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// SRV 14 : FSR2_LumaHistory : r_luma_history -// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut -// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut -// SRV 27 : FSR2_ReactiveMaskMax : r_reactive_max -// SRV 28 : FSR2_ExposureMips : r_imgMips -// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : FSR2DispatchOffsets - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_upscaled_output declaration +#extension GL_EXT_shader_image_load_formatted : require -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 #if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 #else -#define FSR2_BIND_SRV_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 #endif #define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 #define FSR2_BIND_SRV_LOCK_STATUS 4 -#define FSR2_BIND_SRV_DEPTH_CLIP 5 +#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5 #define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 -#define FSR2_BIND_SRV_LUMA_HISTORY 7 +#define FSR2_BIND_SRV_LUMA_INSTABILITY 7 #define FSR2_BIND_SRV_LANCZOS_LUT 8 #define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 -#define FSR2_BIND_SRV_EXPOSURE_MIPS 10 -#define FSR2_BIND_UAV_INTERNAL_UPSCALED 11 -#define FSR2_BIND_UAV_LOCK_STATUS 12 -#define FSR2_BIND_UAV_UPSCALED_OUTPUT 13 +#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 10 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 11 +#define FSR2_BIND_SRV_LUMA_HISTORY 12 + +#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13 +#define FSR2_BIND_UAV_LOCK_STATUS 14 +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15 +#define FSR2_BIND_UAV_NEW_LOCKS 16 +#define FSR2_BIND_UAV_LUMA_HISTORY 17 -#define FSR2_BIND_CB_FSR2 14 +#define FSR2_BIND_CB_FSR2 18 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.hlsl index 4321f99..747f380 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,43 +19,27 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 5 -// SRV 4 : FSR2_Exposure : r_exposure -// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 12 : FSR2_DepthClip : r_depth_clip -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// SRV 14 : FSR2_LumaHistory : r_luma_history -// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut -// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut -// SRV 27 : FSR2_DilatedReactiveMasks : r_dilated_reactive_masks -// SRV 28 : FSR2_ExposureMips : r_imgMips -// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : FSR2DispatchOffsets - -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 #if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 #else -#define FSR2_BIND_SRV_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 #endif #define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 #define FSR2_BIND_SRV_LOCK_STATUS 4 -#define FSR2_BIND_SRV_DEPTH_CLIP 5 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 -#define FSR2_BIND_SRV_LUMA_HISTORY 7 -#define FSR2_BIND_SRV_LANCZOS_LUT 8 -#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 -#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 10 -#define FSR2_BIND_SRV_EXPOSURE_MIPS 11 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +#define FSR2_BIND_SRV_LANCZOS_LUT 6 +#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7 +#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 8 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 9 +#define FSR2_BIND_SRV_LUMA_HISTORY 10 + #define FSR2_BIND_UAV_INTERNAL_UPSCALED 0 #define FSR2_BIND_UAV_LOCK_STATUS 1 #define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 +#define FSR2_BIND_UAV_NEW_LOCKS 3 +#define FSR2_BIND_UAV_LUMA_HISTORY 4 #define FSR2_BIND_CB_FSR2 0 @@ -80,6 +64,7 @@ #define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] #endif // #ifndef FFX_FSR2_NUM_THREADS +FFX_FSR2_PREFER_WAVE64 FFX_FSR2_NUM_THREADS FFX_FSR2_EMBED_ROOTSIG_CONTENT void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.glsl index b509eb0..7ae41cf 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,18 +24,18 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require -#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 -#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 -#define FSR2_BIND_UAV_REACTIVE 2 +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_UAV_AUTOREACTIVE 2 #define FSR2_BIND_CB_REACTIVE 3 #define FSR2_BIND_CB_FSR2 4 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" -layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha; -layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha; -layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask; +// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha; +// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha; +// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask; #ifndef FFX_FSR2_THREAD_GROUP_WIDTH @@ -51,6 +51,7 @@ layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D r #define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; #endif // #ifndef FFX_FSR2_NUM_THREADS +#if defined(FSR2_BIND_CB_REACTIVE) layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t { float scale; @@ -58,14 +59,15 @@ layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReac float binaryValue; uint flags; } cbGenerateReactive; +#endif FFX_FSR2_NUM_THREADS void main() { FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy; - FfxFloat32x3 ColorPreAlpha = texelFetch(r_input_color_pre_alpha, FfxInt32x2(uDispatchThreadId), 0).rgb; - FfxFloat32x3 ColorPostAlpha = texelFetch(r_input_color_post_alpha, FfxInt32x2(uDispatchThreadId), 0).rgb; + FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb; + FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb; if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0) { @@ -87,5 +89,5 @@ void main() out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value; - imageStore(rw_output_reactive_mask, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value)); + imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value)); } diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.hlsl index 903ceae..a78a8e3 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_autogen_reactive_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,18 +19,16 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 -#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 -#define FSR2_BIND_UAV_REACTIVE 0 +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_UAV_AUTOREACTIVE 0 + #define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_REACTIVE 1 #include "ffx_fsr2_callbacks_hlsl.h" #include "ffx_fsr2_common.h" -Texture2D r_input_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PRE_ALPHA_COLOR); -Texture2D r_input_color_post_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_POST_ALPHA_COLOR); -RWTexture2D rw_output_reactive_mask : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_REACTIVE); - #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 #endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH @@ -44,13 +42,15 @@ RWTexture2D rw_output_reactive_mask : FF #define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] #endif // #ifndef FFX_FSR2_NUM_THREADS -cbuffer cbGenerateReactive : register(b0) +#if defined(FSR2_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) { float scale; float threshold; float binaryValue; uint flags; }; +#endif FFX_FSR2_NUM_THREADS FFX_FSR2_EMBED_ROOTSIG_CONTENT @@ -58,8 +58,8 @@ void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) { uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; - float3 ColorPreAlpha = r_input_color_pre_alpha[uDispatchThreadId].rgb; - float3 ColorPostAlpha = r_input_color_post_alpha[uDispatchThreadId].rgb; + float3 ColorPreAlpha = LoadOpaqueOnly( FFX_MIN16_I2(uDispatchThreadId) ).rgb; + float3 ColorPostAlpha = LoadInputColor(uDispatchThreadId).rgb; if (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) { @@ -81,5 +81,5 @@ void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) out_reactive_value = (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < threshold ? 0 : binaryValue) : out_reactive_value; - rw_output_reactive_mask[uDispatchThreadId] = out_reactive_value; + rw_output_autoreactive[uDispatchThreadId] = out_reactive_value; } diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h index d598250..10da13f 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -32,49 +32,67 @@ #if defined(FSR2_BIND_CB_FSR2) layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t { - FfxInt32x2 iRenderSize; - FfxInt32x2 iDisplaySize; - FfxInt32x2 uLumaMipDimensions; - FfxInt32 uLumaMipLevelToUse; - FfxInt32 uFrameIndex; - FfxFloat32x2 fDisplaySizeRcp; - FfxFloat32x2 fJitter; - FfxFloat32x4 fDeviceToViewDepth; - FfxFloat32x2 depthclip_uv_scale; - FfxFloat32x2 postprocessed_lockstatus_uv_scale; - FfxFloat32x2 reactive_mask_dim_rcp; - FfxFloat32x2 MotionVectorScale; - FfxFloat32x2 fDownscaleFactor; - FfxFloat32 fPreExposure; - FfxFloat32 fTanHalfFOV; - FfxFloat32x2 fMotionVectorJitterCancellation; - FfxFloat32 fJitterSequenceLength; - FfxFloat32 fLockInitialLifetime; - FfxFloat32 fLockTickDelta; - FfxFloat32 fDeltaTime; - FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fLumaMipRcp; + FfxInt32x2 iRenderSize; + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iDisplaySize; + FfxInt32x2 iInputColorResourceDimensions; + FfxInt32x2 iLumaMipDimensions; + FfxInt32 iLumaMipLevelToUse; + FfxInt32 iFrameIndex; + + FfxFloat32x4 fDeviceToViewDepth; + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fPreExposure; + FfxFloat32 fPreviousFramePreExposure; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + FfxFloat32 fDeltaTime; + FfxFloat32 fDynamicResChangeFactor; + FfxFloat32 fViewSpaceToMetersFactor; } cbFSR2; #endif -FfxFloat32 LumaMipRcp() +FfxInt32x2 RenderSize() +{ + return cbFSR2.iRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return cbFSR2.iMaxRenderSize; +} + +FfxInt32x2 DisplaySize() +{ + return cbFSR2.iDisplaySize; +} + +FfxInt32x2 InputColorResourceDimensions() { - return cbFSR2.fLumaMipRcp; + return cbFSR2.iInputColorResourceDimensions; } FfxInt32x2 LumaMipDimensions() { - return cbFSR2.uLumaMipDimensions; + return cbFSR2.iLumaMipDimensions; } FfxInt32 LumaMipLevelToUse() { - return cbFSR2.uLumaMipLevelToUse; + return cbFSR2.iLumaMipLevelToUse; } -FfxFloat32x2 DownscaleFactor() +FfxInt32 FrameIndex() { - return cbFSR2.fDownscaleFactor; + return cbFSR2.iFrameIndex; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return cbFSR2.fDeviceToViewDepth; } FfxFloat32x2 Jitter() @@ -82,39 +100,39 @@ FfxFloat32x2 Jitter() return cbFSR2.fJitter; } -FfxFloat32x2 MotionVectorJitterCancellation() +FfxFloat32x2 MotionVectorScale() { - return cbFSR2.fMotionVectorJitterCancellation; + return cbFSR2.fMotionVectorScale; } -FfxInt32x2 RenderSize() +FfxFloat32x2 DownscaleFactor() { - return cbFSR2.iRenderSize; + return cbFSR2.fDownscaleFactor; } -FfxInt32x2 DisplaySize() +FfxFloat32x2 MotionVectorJitterCancellation() { - return cbFSR2.iDisplaySize; + return cbFSR2.fMotionVectorJitterCancellation; } -FfxFloat32x2 DisplaySizeRcp() +FfxFloat32 PreExposure() { - return cbFSR2.fDisplaySizeRcp; + return cbFSR2.fPreExposure; } -FfxFloat32 JitterSequenceLength() +FfxFloat32 PreviousFramePreExposure() { - return cbFSR2.fJitterSequenceLength; + return cbFSR2.fPreviousFramePreExposure; } -FfxFloat32 LockInitialLifetime() +FfxFloat32 TanHalfFoV() { - return cbFSR2.fLockInitialLifetime; + return cbFSR2.fTanHalfFOV; } -FfxFloat32 LockTickDelta() +FfxFloat32 JitterSequenceLength() { - return cbFSR2.fLockTickDelta; + return cbFSR2.fJitterSequenceLength; } FfxFloat32 DeltaTime() @@ -122,38 +140,37 @@ FfxFloat32 DeltaTime() return cbFSR2.fDeltaTime; } -FfxFloat32 MaxAccumulationWeight() -{ - const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples - - return 12; //32.0f * averageLanczosWeightPerFrame; -} - FfxFloat32 DynamicResChangeFactor() { return cbFSR2.fDynamicResChangeFactor; } -FfxInt32 FrameIndex() +FfxFloat32 ViewSpaceToMetersFactor() { - return cbFSR2.uFrameIndex; + return cbFSR2.fViewSpaceToMetersFactor; } layout (set = 0, binding = 0) uniform sampler s_PointClamp; layout (set = 0, binding = 1) uniform sampler s_LinearClamp; // SRVs +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only; +#endif #if defined(FSR2_BIND_SRV_INPUT_COLOR) layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; #endif -#if defined(FSR2_BIND_SRV_MOTION_VECTORS) - layout (set = 1, binding = FSR2_BIND_SRV_MOTION_VECTORS) uniform texture2D r_motion_vectors; +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; #endif -#if defined(FSR2_BIND_SRV_DEPTH) - layout (set = 1, binding = FSR2_BIND_SRV_DEPTH) uniform texture2D r_depth; +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; +#endif +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure; #endif -#if defined(FSR2_BIND_SRV_EXPOSURE) - layout (set = 1, binding = FSR2_BIND_SRV_EXPOSURE) uniform texture2D r_exposure; +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) + layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure; #endif #if defined(FSR2_BIND_SRV_REACTIVE_MASK) layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; @@ -167,6 +184,9 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; #endif +#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) + layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors; +#endif #if defined(FSR2_BIND_SRV_DILATED_DEPTH) layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth; #endif @@ -176,8 +196,11 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_LOCK_STATUS) layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status; #endif -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) - layout (set = 1, binding = FSR2_BIND_SRV_DEPTH_CLIP) uniform texture2D r_depth_clip; +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) + layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma; +#endif +#if defined(FSR2_BIND_SRV_NEW_LOCKS) + layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks; #endif #if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color; @@ -191,8 +214,8 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_LANCZOS_LUT) layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; #endif -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) - layout (set = 1, binding = FSR2_BIND_SRV_EXPOSURE_MIPS) uniform texture2D r_imgMips; +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) + layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips; #endif #if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut; @@ -200,444 +223,348 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; #endif +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) + layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha; +#endif +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) + layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha; +#endif // UAV #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f) uniform image2D rw_dilated_motion_vectors; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors; #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilatedDepth; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth; #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f) uniform image2D rw_internal_upscaled_color; + layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color; #endif #if defined FSR2_BIND_UAV_LOCK_STATUS - layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status; + layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status; +#endif +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) + layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma; #endif -#if defined FSR2_BIND_UAV_DEPTH_CLIP - layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip; +#if defined FSR2_BIND_UAV_NEW_LOCKS + layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks; #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) uniform image2D rw_prepared_input_color; + layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color; #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history; + layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output; + layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D rw_img_mip_shading_change; + layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f) coherent uniform image2D rw_img_mip_5; + layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5; #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f) uniform image2D rw_dilated_reactive_masks; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks; #endif #if defined FSR2_BIND_UAV_EXPOSURE layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; -#endif +#endif +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure; +#endif #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; #endif +#if defined FSR2_BIND_UAV_AUTOREACTIVE + layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive; +#endif +#if defined FSR2_BIND_UAV_AUTOCOMPOSITION + layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition; +#endif +#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR + layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha; +#endif +#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR + layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha; +#endif + +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r; -#else - return 0.f; -#endif } +#endif - +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) - fUV *= cbFSR2.depthclip_uv_scale; return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r; -#else - return 0.f; -#endif -} - -// -// a 0 0 0 x -// 0 b 0 0 y -// 0 0 c d z -// 0 0 e 0 1 -// -// z' = (z*c+d)/(z*e) -// z' = (c/e) + d/(z*e) -// z' - (c/e) = d/(z*e) -// (z'e - c)/e = d/(z*e) -// e / (z'e - c) = (z*e)/d -// (e * d) / (z'e - c) = z*e -// z = d / (z'e - c) -FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) -{ - return -cbFSR2.fDeviceToViewDepth[2] / (fDeviceDepth * cbFSR2.fDeviceToViewDepth[1] - cbFSR2.fDeviceToViewDepth[0]); } +#endif +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH) - return texelFetch(r_depth, iPxPos, 0).r; -#else - return 0.f; -#endif + return texelFetch(r_input_depth, iPxPos, 0).r; } +#endif +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; -#else - return 0.f; -#endif -} - -FfxFloat32x4 GatherReactiveMask(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) - return textureGather(sampler2D(r_reactive_mask, s_LinearClamp), FfxFloat32x2(iPxPos) * cbFSR2.reactive_mask_dim_rcp, 0); -#else - return FfxFloat32x4(0.f); -#endif } - -FfxFloat32 LoadTransparencyAndCompositionMask(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) - return texelFetch(r_transparency_and_composition_mask, iPxPos, 0).r; -#else - return 0.f; #endif -} -FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) -{ #if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) - fUV *= cbFSR2.depthclip_uv_scale; - return textureLod(sampler2D(r_transparency_and_composition_mask, s_LinearClamp), fUV, 0.0f).x; -#else - return 0.f; -#endif -} - -FfxFloat32 PreExposure() +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { - return cbFSR2.fPreExposure; + return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r; } +#endif +#if defined(FSR2_BIND_SRV_INPUT_COLOR) FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_INPUT_COLOR) - return texelFetch(r_input_color_jittered, iPxPos, 0).rgb / PreExposure(); -#else - return FfxFloat32x3(0.f); -#endif + return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; } +#endif -FfxFloat32x3 LoadInputColorWithoutPreExposure(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_INPUT_COLOR) - return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; -#else - return FfxFloat32x3(0.f); -#endif +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb; } +#endif +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) - return texelFetch(r_prepared_input_color, iPxPos, 0).rgb; -#else - return FfxFloat32x3(0.f); -#endif + return texelFetch(r_prepared_input_color, iPxPos, 0).xyz; } - -FfxFloat32 LoadPreparedInputColorLuma(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) - return texelFetch(r_prepared_input_color, iPxPos, 0).a; -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) { -#if defined(FSR2_BIND_SRV_MOTION_VECTORS) - FfxFloat32x2 fSrcMotionVector = texelFetch(r_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; -#else - FfxFloat32x2 fSrcMotionVector = FfxFloat32x2(0.f); -#endif + FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; - FfxFloat32x2 fUvMotionVector = fSrcMotionVector * cbFSR2.MotionVectorScale; + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); #if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS - fUvMotionVector -= cbFSR2.fMotionVectorJitterCancellation; + fUvMotionVector -= MotionVectorJitterCancellation(); #endif return fUvMotionVector; } +#endif +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory) { -#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) return texelFetch(r_internal_upscaled_color, iPxHistory, 0); -#else - return FfxFloat32x4(0.0f); -#endif } - -FfxFloat32x4 LoadRwInternalUpscaledColorAndWeight(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) - return imageLoad(rw_internal_upscaled_color, iPxPos); -#else - return FfxFloat32x4(0.f); #endif -} +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) { -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory); -#endif } - -FfxFloat32x4 LoadRwLumaHistory(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) - return imageLoad(rw_luma_history, FfxInt32x2(iPxPos)); -#else - return FfxFloat32x4(1.f); #endif -} -FfxFloat32 LoadLumaStabilityFactor(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LUMA_HISTORY) - return texelFetch(r_luma_history, FfxInt32x2(iPxPos), 0).w; -#else - return 0.f; -#endif -} - -FfxFloat32 SampleLumaStabilityFactor(FfxFloat32x2 fUV) +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_LUMA_HISTORY) - fUV *= cbFSR2.depthclip_uv_scale; - return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f).w; -#else - return 0.f; -#endif + return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f); } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) imageStore(rw_internal_upscaled_color, iPxHistory, fHistory); -#endif } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight); -#endif } +#endif +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor) { -#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) - imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor * PreExposure(), 1.f)); -#endif + imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f)); } +#endif -FfxFloat32x3 LoadLockStatus(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LOCK_STATUS) - FfxFloat32x3 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rgb; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; +FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg; return fLockStatus; -#else - return FfxFloat32x3(0.f); -#endif } +#endif -FfxFloat32x3 LoadRwLockStatus(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_UAV_LOCK_STATUS) - FfxFloat32x3 fLockStatus = imageLoad(rw_lock_status, iPxPos).rgb; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - - return fLockStatus; -#else - return FfxFloat32x3(0.f); -#endif +void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus) +{ + imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f)); } +#endif -void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x3 fLockstatus) +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) +FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_LOCK_STATUS) - fLockstatus[0] += LockInitialLifetime() * 2.0f; - - imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f)); -#endif + return texelFetch(r_lock_input_luma, iPxPos, 0).r; } +#endif -void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) +void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma) { -#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) - imageStore(rw_prepared_input_color, iPxPos, fTonemapped); -#endif + imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0)); } +#endif -FfxBoolean IsResponsivePixel(FfxInt32x2 iPxPos) +#if defined(FSR2_BIND_SRV_NEW_LOCKS) +FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos) { - return FFX_FALSE; //not supported in prototype + return texelFetch(r_new_locks, iPxPos, 0).r; } +#endif -FfxFloat32 LoadDepthClip(FfxInt32x2 iPxPos) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) - return texelFetch(r_depth_clip, iPxPos, 0).r; -#else - return 0.f; -#endif + return imageLoad(rw_new_locks, iPxPos).r; } +#endif -FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) - fUV *= cbFSR2.depthclip_uv_scale; - return textureLod(sampler2D(r_depth_clip, s_LinearClamp), fUV, 0.0f).r; -#else - return 0.f; -#endif + imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0)); } +#endif -FfxFloat32x3 SampleLockStatus(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) +void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) { -#if defined(FSR2_BIND_SRV_LOCK_STATUS) - fUV *= cbFSR2.postprocessed_lockstatus_uv_scale; - FfxFloat32x3 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rgb; - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - return fLockStatus; -#else - return FfxFloat32x3(0.f); -#endif + imageStore(rw_prepared_input_color, iPxPos, fTonemapped); } +#endif -void StoreDepthClip(FfxInt32x2 iPxPos, FfxFloat32 fClip) +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_UAV_DEPTH_CLIP) - imageStore(rw_depth_clip, iPxPos, vec4(fClip, 0.0f, 0.0f, 0.0f)); -#endif + return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w; } +#endif -FfxFloat32 TanHalfFoV() +#if defined(FSR2_BIND_SRV_LOCK_STATUS) +FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) { - return cbFSR2.fTanHalfFOV; + FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg; + return fLockStatus; } +#endif +#if defined(FSR2_BIND_SRV_DEPTH) FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DEPTH) - return texelFetch(r_depth, iPxInput, 0).r; -#else - return 0.f; -#endif + return texelFetch(r_input_depth, iPxInput, 0).r; } +#endif +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r); -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth) { FfxUInt32 uDepth = floatBitsToUint(fDepth); -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) + #if FFX_FSR2_OPTION_INVERTED_DEPTH imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); #else imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth #endif -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) { -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0)); -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { -#if defined(FSR2_BIND_UAV_DILATED_DEPTH) //FfxUInt32 uDepth = f32tof16(fDepth); imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { -#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f)); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg; -#else - return FfxFloat32x2(0.f); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) - fUV *= cbFSR2.depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg; -#else - return FfxFloat32x2(0.f); +} #endif + +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) +FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput) +{ + return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg; } -FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) +FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV) { + return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy; +} +#endif + #if defined(FSR2_BIND_SRV_DILATED_DEPTH) +FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) +{ return texelFetch(r_dilatedDepth, iPxInput, 0).r; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) FfxFloat32 Exposure() { - #if defined(FSR2_BIND_SRV_EXPOSURE) - FfxFloat32 exposure = texelFetch(r_exposure, FfxInt32x2(0,0), 0).x; - #else - FfxFloat32 exposure = 1.f; - #endif + FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x; if (exposure == 0.0f) { exposure = 1.0f; @@ -645,6 +572,20 @@ FfxFloat32 Exposure() return exposure; } +#endif + +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) +FfxFloat32 AutoExposure() +{ + FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) { @@ -655,41 +596,86 @@ FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) #endif } +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) { -#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r); -#else - return FfxFloat32(0.f); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) - fUV *= cbFSR2.depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg; -#else - return FfxFloat32x2(0.f); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg; -#else - return FfxFloat32x2(0.f); -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) { -#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f)); +} +#endif + +#if defined(FFX_INTERNAL) +FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba; +} #endif + +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_opaque_only, iPxPos, 0).xyz; } +#endif +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR2_BIND_UAV_AUTOREACTIVE) +#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f)); + + imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f)); +} +#endif +#endif + +#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f)); +} +#endif + +#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f)); +} +#endif #endif // #if defined(FFX_GPU) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h index 4641927..fd722b3 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -48,56 +48,32 @@ #define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) #define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) -#if defined(FSR2_BIND_CB_FSR2) +#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL) cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) { - FfxInt32x2 uRenderSize; - FfxInt32x2 uDisplaySize; - FfxInt32x2 uLumaMipDimensions; - FfxInt32 uLumaMipLevelToUse; - FfxUInt32 uFrameIndex; - FfxFloat32x2 fDisplaySizeRcp; - FfxFloat32x2 fJitter; + FfxInt32x2 iRenderSize; + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iDisplaySize; + FfxInt32x2 iInputColorResourceDimensions; + FfxInt32x2 iLumaMipDimensions; + FfxInt32 iLumaMipLevelToUse; + FfxInt32 iFrameIndex; + FfxFloat32x4 fDeviceToViewDepth; - FfxFloat32x2 depthclip_uv_scale; - FfxFloat32x2 postprocessed_lockstatus_uv_scale; - FfxFloat32x2 reactive_mask_dim_rcp; - FfxFloat32x2 MotionVectorScale; + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; FfxFloat32x2 fDownscaleFactor; + FfxFloat32x2 fMotionVectorJitterCancellation; FfxFloat32 fPreExposure; + FfxFloat32 fPreviousFramePreExposure; FfxFloat32 fTanHalfFOV; - FfxFloat32x2 fMotionVectorJitterCancellation; FfxFloat32 fJitterSequenceLength; - FfxFloat32 fLockInitialLifetime; - FfxFloat32 fLockTickDelta; FfxFloat32 fDeltaTime; FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fLumaMipRcp; -#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE 36 // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. + FfxFloat32 fViewSpaceToMetersFactor; }; -#else - #define iRenderSize 0 - #define iDisplaySize 0 - #define iLumaMipDimensions 0 - #define iLumaMipLevelToUse 0 - #define iFrameIndex 0 - #define fDisplaySizeRcp 0 - #define fJitter 0 - #define fDeviceToViewDepth FfxFloat32x4(0,0,0,0) - #define depthclip_uv_scale 0 - #define postprocessed_lockstatus_uv_scale 0 - #define reactive_mask_dim_rcp 0 - #define MotionVectorScale 0 - #define fDownscaleFactor 0 - #define fPreExposure 0 - #define fTanHalfFOV 0 - #define fMotionVectorJitterCancellation 0 - #define fJitterSequenceLength 0 - #define fLockInitialLifetime 0 - #define fLockTickDelta 0 - #define fDeltaTime 0 - #define fDynamicResChangeFactor 0 - #define fLumaMipRcp 0 + +#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. #endif #if defined(FFX_GPU) @@ -146,25 +122,40 @@ #endif // #if FFX_FSR2_EMBED_ROOTSIG #endif // #if defined(FFX_GPU) +/* Define getter functions in the order they are defined in the CB! */ +FfxInt32x2 RenderSize() +{ + return iRenderSize; +} -FfxFloat32 LumaMipRcp() +FfxInt32x2 MaxRenderSize() { - return fLumaMipRcp; + return iMaxRenderSize; +} + +FfxInt32x2 DisplaySize() +{ + return iDisplaySize; +} + +FfxInt32x2 InputColorResourceDimensions() +{ + return iInputColorResourceDimensions; } FfxInt32x2 LumaMipDimensions() { - return uLumaMipDimensions; + return iLumaMipDimensions; } FfxInt32 LumaMipLevelToUse() { - return uLumaMipLevelToUse; + return iLumaMipLevelToUse; } -FfxFloat32x2 DownscaleFactor() +FfxInt32 FrameIndex() { - return fDownscaleFactor; + return iFrameIndex; } FfxFloat32x2 Jitter() @@ -172,51 +163,49 @@ FfxFloat32x2 Jitter() return fJitter; } -FfxFloat32x2 MotionVectorJitterCancellation() +FfxFloat32x4 DeviceToViewSpaceTransformFactors() { - return fMotionVectorJitterCancellation; + return fDeviceToViewDepth; } -FfxInt32x2 RenderSize() +FfxFloat32x2 MotionVectorScale() { - return uRenderSize; + return fMotionVectorScale; } -FfxInt32x2 DisplaySize() +FfxFloat32x2 DownscaleFactor() { - return uDisplaySize; + return fDownscaleFactor; } -FfxFloat32x2 DisplaySizeRcp() +FfxFloat32x2 MotionVectorJitterCancellation() { - return fDisplaySizeRcp; + return fMotionVectorJitterCancellation; } -FfxFloat32 JitterSequenceLength() +FfxFloat32 PreExposure() { - return fJitterSequenceLength; + return fPreExposure; } -FfxFloat32 LockInitialLifetime() +FfxFloat32 PreviousFramePreExposure() { - return fLockInitialLifetime; + return fPreviousFramePreExposure; } -FfxFloat32 LockTickDelta() +FfxFloat32 TanHalfFoV() { - return fLockTickDelta; + return fTanHalfFOV; } -FfxFloat32 DeltaTime() +FfxFloat32 JitterSequenceLength() { - return fDeltaTime; + return fJitterSequenceLength; } -FfxFloat32 MaxAccumulationWeight() +FfxFloat32 DeltaTime() { - const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples - - return 12; //32.0f * averageLanczosWeightPerFrame; + return fDeltaTime; } FfxFloat32 DynamicResChangeFactor() @@ -224,574 +213,495 @@ FfxFloat32 DynamicResChangeFactor() return fDynamicResChangeFactor; } -FfxUInt32 FrameIndex() +FfxFloat32 ViewSpaceToMetersFactor() { - return uFrameIndex; + return fViewSpaceToMetersFactor; } + SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); // SRVs #if defined(FFX_INTERNAL) + Texture2D r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY); Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); - Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); - Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); - Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); + Texture2D r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); + Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS); Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA); + Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); - Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE); Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); - Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); + Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); - // declarations not current form, no accessor functions - Texture2D r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK); - Texture2D r_bias_current_color_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_BIAS_CURRENT_COLOR_MASK); - Texture2D r_gbuffer_albedo : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_ALBEDO); - Texture2D r_gbuffer_roughness : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_ROUGHNESS); - Texture2D r_gbuffer_metallic : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_METALLIC); - Texture2D r_gbuffer_specular : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SPECULAR); - Texture2D r_gbuffer_subsurface : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SUBSURFACE); - Texture2D r_gbuffer_normals : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_NORMALS); - Texture2D r_gbuffer_shading_mode_id : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SHADING_MODE_ID); - Texture2D r_gbuffer_material_id : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_MATERIAL_ID); - Texture2D r_motion_vectors_3d : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_3D); - Texture2D r_is_particle_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_IS_PARTICLE_MASK); - Texture2D r_animated_texture_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_ANIMATED_TEXTURE_MASK); - Texture2D r_depth_high_res : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_HIGH_RES); - Texture2D r_position_view_space : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_POSITION_VIEW_SPACE); - Texture2D r_ray_tracing_hit_distance : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RAY_TRACING_HIT_DISTANCE); - Texture2D r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION); + Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); // UAV declarations RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA); + RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE); - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5); - RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE); - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT); - RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE); + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5); + RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); + RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT); + RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE); + RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION); + RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + #else // #if defined(FFX_INTERNAL) #if defined FSR2_BIND_SRV_INPUT_COLOR - Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); #endif - #if defined FSR2_BIND_SRV_MOTION_VECTORS - Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_MOTION_VECTORS); + #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY + Texture2D r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); #endif - #if defined FSR2_BIND_SRV_DEPTH - Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH); + #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS + Texture2D r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_INPUT_DEPTH + Texture2D r_input_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); #endif - #if defined FSR2_BIND_SRV_EXPOSURE - Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_EXPOSURE); + #if defined FSR2_BIND_SRV_INPUT_EXPOSURE + Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); + #endif + #if defined FSR2_BIND_SRV_AUTO_EXPOSURE + Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_SRV_REACTIVE_MASK - Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); #endif #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK - Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); #endif #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH - Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); #endif #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS - Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS + Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_SRV_DILATED_DEPTH - Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); + Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); #endif #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED - Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); + Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); #endif #if defined FSR2_BIND_SRV_LOCK_STATUS - Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); #endif - #if defined FSR2_BIND_SRV_DEPTH_CLIP - Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP); + #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA + Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_SRV_NEW_LOCKS + Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS); #endif #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_LUMA_HISTORY - Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); #endif #if defined FSR2_BIND_SRV_RCAS_INPUT - Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); + Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); #endif #if defined FSR2_BIND_SRV_LANCZOS_LUT - Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); + Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); #endif - #if defined FSR2_BIND_SRV_EXPOSURE_MIPS - Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_EXPOSURE_MIPS); + #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS); #endif #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT - Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); + Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); #endif #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS - Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); + Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); #endif + #if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR + Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + #endif + #if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR + Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + #endif + // UAV declarations #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); + RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); + RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); + RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); #endif #if defined FSR2_BIND_UAV_LOCK_STATUS - RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); #endif - #if defined FSR2_BIND_UAV_DEPTH_CLIP - RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP); + #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA + RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_UAV_NEW_LOCKS + RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); + RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); #endif #if defined FSR2_BIND_UAV_EXPOSURE - RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + #endif + #if defined FSR2_BIND_UAV_AUTO_EXPOSURE + RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + #endif + + #if defined FSR2_BIND_UAV_AUTOREACTIVE + RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE); + #endif + #if defined FSR2_BIND_UAV_AUTOCOMPOSITION + RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION); + #endif + #if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR + RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR); + #endif + #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR + RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR); #endif #endif // #if defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL) FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) return r_imgMips.mips[mipLevel][iPxPos]; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL) FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); -#else - return 0.f; -#endif - -} - -// -// a 0 0 0 x -// 0 b 0 0 y -// 0 0 c d z -// 0 0 e 0 1 -// -// z' = (z*c+d)/(z*e) -// z' = (c/e) + d/(z*e) -// z' - (c/e) = d/(z*e) -// (z'e - c)/e = d/(z*e) -// e / (z'e - c) = (z*e)/d -// (e * d) / (z'e - c) = z*e -// z = d / (z'e - c) -FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) -{ - return -fDeviceToViewDepth[2] / (fDeviceDepth * fDeviceToViewDepth[1] - fDeviceToViewDepth[0]); } +#endif +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL) FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH) || defined(FFX_INTERNAL) - return r_depth[iPxPos]; -#else - return 0.f; + return r_input_depth[iPxPos]; +} #endif + +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL) +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; } +#endif +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) return r_reactive_mask[iPxPos]; -#else - return 0.f; -#endif } - -FfxFloat32x4 GatherReactiveMask(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) - return r_reactive_mask.GatherRed(s_LinearClamp, FfxFloat32x2(iPxPos) * reactive_mask_dim_rcp); -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) return r_transparency_and_composition_mask[iPxPos]; -#else - return 0.f; -#endif } - -FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) -{ -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; - return r_transparency_and_composition_mask.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; #endif -} - -FfxFloat32 PreExposure() -{ - return fPreExposure; -} +#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_input_color_jittered[iPxPos].rgb / PreExposure(); -#else - return 0; -#endif + return r_input_color_jittered[iPxPos].rgb; } +#endif -FfxFloat32x3 LoadInputColorWithoutPreExposure(FfxUInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_input_color_jittered[iPxPos].rgb; -#else - return 0; -#endif +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; } +#endif +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_prepared_input_color[iPxPos].rgb; -#else - return 0.f; -#endif + return r_prepared_input_color[iPxPos].xyz; } - -FfxFloat32 LoadPreparedInputColorLuma(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_prepared_input_color[iPxPos].a; -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL) FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) { -#if defined(FSR2_BIND_SRV_MOTION_VECTORS) || defined(FFX_INTERNAL) - FfxFloat32x2 fSrcMotionVector = r_motion_vectors[iPxDilatedMotionVectorPos].xy; -#else - FfxFloat32x2 fSrcMotionVector = 0.f; -#endif + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; - FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale; + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); #if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS - fUvMotionVector -= fMotionVectorJitterCancellation; + fUvMotionVector -= MotionVectorJitterCancellation(); #endif return fUvMotionVector; } +#endif +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) { -#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) return r_internal_upscaled_color[iPxHistory]; -#else - return 0.f; -#endif } - -FfxFloat32x4 LoadRwInternalUpscaledColorAndWeight(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) - return rw_internal_upscaled_color[iPxPos]; -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) { -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) rw_luma_history[iPxPos] = fLumaHistory; -#endif } - -FfxFloat32x4 LoadRwLumaHistory(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) - return rw_luma_history[iPxPos]; -#else - return 1.f; #endif -} -FfxFloat32 LoadLumaStabilityFactor(FfxUInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) - return r_luma_history[iPxPos].w; -#else - return 0.f; -#endif +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); } +#endif -FfxFloat32 SampleLumaStabilityFactor(FfxFloat32x2 fUV) +#if defined(FFX_INTERNAL) +FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; - return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0).w; -#else - return 0.f; -#endif + return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w; } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) rw_internal_upscaled_color[iPxHistory] = fHistory; -#endif } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) rw_internal_upscaled_color[iPxPos] = fColorAndWeight; -#endif } +#endif +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL) void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) { -#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL) - rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor * PreExposure(), 1.f); -#endif + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); } +#endif //LOCK_LIFETIME_REMAINING == 0 //Should make LockInitialLifetime() return a const 1.0f later -FfxFloat32x3 LoadLockStatus(FfxUInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) - FfxFloat32x3 fLockStatus = r_lock_status[iPxPos]; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - return fLockStatus; -#else - return 0.f; -#endif - - -} - -FfxFloat32x3 LoadRwLockStatus(FfxUInt32x2 iPxPos) +FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) - FfxFloat32x3 fLockStatus = rw_lock_status[iPxPos]; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - - return fLockStatus; -#else - return 0.f; -#endif + return r_lock_status[iPxPos]; } +#endif -void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x3 fLockstatus) -{ #if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) - fLockstatus[0] += LockInitialLifetime() * 2.0f; - - rw_lock_status[iPxPos] = fLockstatus; -#endif +void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) +{ + rw_lock_status[iPxPos] = fLockStatus; } +#endif -void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL) +FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) - rw_prepared_input_color[iPxPos] = fTonemapped; -#endif + return r_lock_input_luma[iPxPos]; } +#endif -FfxBoolean IsResponsivePixel(FfxUInt32x2 iPxPos) +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL) +void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) { - return FFX_FALSE; //not supported in prototype + rw_lock_input_luma[iPxPos] = fLuma; } +#endif -FfxFloat32 LoadDepthClip(FfxUInt32x2 iPxPos) +#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL) +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) || defined(FFX_INTERNAL) - return r_depth_clip[iPxPos]; -#else - return 0.f; -#endif + return r_new_locks[iPxPos]; } +#endif -FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL) +FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; - return r_depth_clip.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; -#endif + return rw_new_locks[iPxPos]; } +#endif -FfxFloat32x3 SampleLockStatus(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL) +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) { -#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) - fUV *= postprocessed_lockstatus_uv_scale; - FfxFloat32x3 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - return fLockStatus; -#else - return 0.f; -#endif + rw_new_locks[iPxPos] = newLock; } +#endif -void StoreDepthClip(FfxUInt32x2 iPxPos, FfxFloat32 fClip) +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) { -#if defined(FSR2_BIND_UAV_DEPTH_CLIP) || defined(FFX_INTERNAL) - rw_depth_clip[iPxPos] = fClip; -#endif + rw_prepared_input_color[iPxPos] = fTonemapped; } +#endif -FfxFloat32 TanHalfFoV() +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) { - return fTanHalfFOV; + return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; } +#endif -FfxFloat32 LoadSceneDepth(FfxUInt32x2 iPxInput) +#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) +FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DEPTH) || defined(FFX_INTERNAL) - return r_depth[iPxInput]; -#else - return 0.f; -#endif + FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); + return fLockStatus; } +#endif +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); -#else - return 0; -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) { FfxUInt32 uDepth = asuint(fDepth); -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) + #if FFX_FSR2_OPTION_INVERTED_DEPTH InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); #else InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth #endif -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) { -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL) void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { -#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL) rw_dilatedDepth[iPxPos] = fDepth; -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { -#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) rw_dilated_motion_vectors[iPxPos] = fMotionVector; -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) return r_dilated_motion_vectors[iPxInput].xy; -#else - return 0.f; +} #endif + +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) +FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_previous_dilated_motion_vectors[iPxInput].xy; } -FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) - return r_dilated_motion_vectors.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; -#endif + return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy; } +#endif +#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) return r_dilatedDepth[iPxInput]; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL) FfxFloat32 Exposure() { - // return 1.0f; - #if defined(FSR2_BIND_SRV_EXPOSURE) || defined(FFX_INTERNAL) - FfxFloat32 exposure = r_exposure[FfxUInt32x2(0, 0)].x; - #else - FfxFloat32 exposure = 1.f; - #endif + FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL) +FfxFloat32 AutoExposure() +{ + FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; if (exposure == 0.0f) { exposure = 1.0f; @@ -799,6 +709,7 @@ FfxFloat32 Exposure() return exposure; } +#endif FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) { @@ -809,40 +720,80 @@ FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) #endif } +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL) FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) { -#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL) // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) return r_dilated_reactive_masks[iPxPos]; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) { -#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL) +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_opaque_only[iPxPos].xyz; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL) +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_pre_alpha[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL) +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_post_alpha[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL) +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + rw_output_autoreactive[iPxPos] = fReactive.x; + + rw_output_autocomposition[iPxPos] = fReactive.y; +} +#endif +#endif + +#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL) +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_pre_alpha[iPxPos] = color; + +} #endif + +#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL) +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_post_alpha[iPxPos] = color; } +#endif #endif // #if defined(FFX_GPU) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_common.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_common.h index 7f6acf2..0c72aa8 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_common.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_common.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -26,12 +26,13 @@ //Locks #define LOCK_LIFETIME_REMAINING 0 #define LOCK_TEMPORAL_LUMA 1 -#define LOCK_TRUST 2 #endif // #if defined(FFX_CPU) || defined(FFX_GPU) #if defined(FFX_GPU) +FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f; FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f; -FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1e-03f; +FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX; FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f; FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; @@ -43,162 +44,174 @@ FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; #pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative // Reconstructed depth usage -FFX_STATIC const FfxFloat32 reconstructedDepthBilinearWeightThreshold = 0.05f; +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f; // Accumulation -FFX_STATIC const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples -FFX_STATIC const FfxFloat32 accumulationMaxOnMotion = 4.0f; +FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f; +FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f; +FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; // Auto exposure FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; +struct AccumulationPassCommonParams +{ + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 fHrVelocity; + FfxFloat32 fDepthClipFactor; + FfxFloat32 fDilatedReactiveFactor; + FfxFloat32 fAccumulationMask; + + FfxBoolean bIsResetFrame; + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; +}; + struct LockState { FfxBoolean NewLock; //Set for both unique new and re-locked new FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) }; -FfxFloat32 GetNormalizedRemainingLockLifetime(FfxFloat32x3 fLockStatus) +void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus) { - const FfxFloat32 fTrust = fLockStatus[LOCK_TRUST]; - - return ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - LockInitialLifetime()) / LockInitialLifetime() * fTrust; + fLockStatus = FfxFloat32x2(0, 0); } #if FFX_HALF -FFX_MIN16_F GetNormalizedRemainingLockLifetime(FFX_MIN16_F3 fLockStatus) +void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus) { - const FFX_MIN16_F fTrust = fLockStatus[LOCK_TRUST]; - const FFX_MIN16_F fInitialLockLifetime = FFX_MIN16_F(LockInitialLifetime()); - - return ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - fInitialLockLifetime) / fInitialLockLifetime * fTrust; + fLockStatus = FFX_MIN16_F2(0, 0); } #endif -void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x3 fLockStatus) -{ - fLockStatus = FfxFloat32x3(0, 0, 1); // LOCK_TRUST to 1 -} -#if FFX_HALF -void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F3 fLockStatus) -{ - fLockStatus = FFX_MIN16_F3(0, 0, 1); // LOCK_TRUST to 1 -} -#endif - - -void KillLock(FFX_PARAMETER_INOUT FfxFloat32x3 fLockStatus) +void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus) { fLockStatus[LOCK_LIFETIME_REMAINING] = 0; } #if FFX_HALF -void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F3 fLockStatus) +void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus) { fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0); } #endif -struct RectificationBoxData +struct RectificationBox { FfxFloat32x3 boxCenter; FfxFloat32x3 boxVec; FfxFloat32x3 aabbMin; FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; }; #if FFX_HALF -struct RectificationBoxDataMin16 +struct RectificationBoxMin16 { FFX_MIN16_F3 boxCenter; FFX_MIN16_F3 boxVec; FFX_MIN16_F3 aabbMin; FFX_MIN16_F3 aabbMax; -}; -#endif - -struct RectificationBox -{ - RectificationBoxData data_; - FfxFloat32 fBoxCenterWeight; -}; -#if FFX_HALF -struct RectificationBoxMin16 -{ - RectificationBoxDataMin16 data_; FFX_MIN16_F fBoxCenterWeight; }; #endif -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 initialColorSample) +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox) { rectificationBox.fBoxCenterWeight = FfxFloat32(0); - rectificationBox.data_.boxCenter = FfxFloat32x3(0, 0, 0); - rectificationBox.data_.boxVec = FfxFloat32x3(0, 0, 0); - rectificationBox.data_.aabbMin = initialColorSample; - rectificationBox.data_.aabbMax = initialColorSample; + rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0); + rectificationBox.boxVec = FfxFloat32x3(0, 0, 0); + rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); + rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); } #if FFX_HALF -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 initialColorSample) +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) { rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0); - rectificationBox.data_.boxCenter = FFX_MIN16_F3(0, 0, 0); - rectificationBox.data_.boxVec = FFX_MIN16_F3(0, 0, 0); - rectificationBox.data_.aabbMin = initialColorSample; - rectificationBox.data_.aabbMax = initialColorSample; + rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0); + rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0); + rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); + rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); } #endif -void RectificationBoxAddSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) { - rectificationBox.data_.aabbMin = ffxMin(rectificationBox.data_.aabbMin, colorSample); - rectificationBox.data_.aabbMax = ffxMax(rectificationBox.data_.aabbMax, colorSample); + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; - rectificationBox.data_.boxCenter += weightedSample; - rectificationBox.data_.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } } #if FFX_HALF -void RectificationBoxAddSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) { - rectificationBox.data_.aabbMin = ffxMin(rectificationBox.data_.aabbMin, colorSample); - rectificationBox.data_.aabbMax = ffxMax(rectificationBox.data_.aabbMax, colorSample); + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; - rectificationBox.data_.boxCenter += weightedSample; - rectificationBox.data_.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } } #endif void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) { rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); - rectificationBox.data_.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.data_.boxVec /= rectificationBox.fBoxCenterWeight; - FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.data_.boxVec - rectificationBox.data_.boxCenter * rectificationBox.data_.boxCenter)); - rectificationBox.data_.boxVec = stdDev; + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; } #if FFX_HALF void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) { rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f)); - rectificationBox.data_.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.data_.boxVec /= rectificationBox.fBoxCenterWeight; - FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.data_.boxVec - rectificationBox.data_.boxCenter * rectificationBox.data_.boxCenter)); - rectificationBox.data_.boxVec = stdDev; -} -#endif - -RectificationBoxData RectificationBoxGetData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) -{ - return rectificationBox.data_; -} -#if FFX_HALF -RectificationBoxDataMin16 RectificationBoxGetData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) -{ - return rectificationBox.data_; + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; } #endif @@ -231,8 +244,6 @@ FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) { FfxFloat32x3 fRgb; - fYCoCg.yz -= FfxFloat32x2(0.5f, 0.5f); // [0,1] -> [-0.5,0.5] - fRgb = FfxFloat32x3( fYCoCg.x + fYCoCg.y - fYCoCg.z, fYCoCg.x + fYCoCg.z, @@ -245,8 +256,6 @@ FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg) { FFX_MIN16_F3 fRgb; - fYCoCg.yz -= FFX_MIN16_F2(0.5f, 0.5f); // [0,1] -> [-0.5,0.5] - fRgb = FFX_MIN16_F3( fYCoCg.x + fYCoCg.y - fYCoCg.z, fYCoCg.x + fYCoCg.z, @@ -265,8 +274,6 @@ FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) 0.5f * fRgb.r - 0.5f * fRgb.b, -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); - fYCoCg.yz += FfxFloat32x2(0.5f, 0.5f); // [-0.5,0.5] -> [0,1] - return fYCoCg; } #if FFX_HALF @@ -279,8 +286,6 @@ FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb) 0.5 * fRgb.r - 0.5 * fRgb.b, -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b); - fYCoCg.yz += FFX_MIN16_F2(0.5, 0.5); // [-0.5,0.5] -> [0,1] - return fYCoCg; } #endif @@ -303,7 +308,8 @@ FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) FfxFloat32 fPercievedLuminance = 0; if (fLuminance <= 216.0f / 24389.0f) { fPercievedLuminance = fLuminance * (24389.0f / 27.0f); - } else { + } + else { fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; } @@ -326,7 +332,6 @@ FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb) } #endif - FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) { return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; @@ -351,23 +356,46 @@ FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb) FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) { - return clamp(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; + + // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); } #if FFX_HALF FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) { - return clamp(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); + FFX_MIN16_I2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; + return result; + + // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); } #endif +FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) +{ + const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; + const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); + const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); + + return fClampedUv; +} + FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) { - return all(FFX_GREATER_THAN_EQUAL(pos, FfxInt32x2(0, 0))) && all(FFX_LESS_THAN(pos, size)); + return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); } #if FFX_HALF FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size) { - return all(FFX_GREATER_THAN_EQUAL(pos, FFX_MIN16_I2(0, 0))) && all(FFX_LESS_THAN(pos, size)); + return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size))); } #endif @@ -404,19 +432,134 @@ FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) { FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); - FfxFloat32x2 fHrPos = floor(fLrPosInHr) + 0.5f; - return FfxInt32x2(fHrPos); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; } #if FFX_HALF FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos) { FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter()); FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize()); - FFX_MIN16_F2 fHrPos = floor(fLrPosInHr) + FFX_MIN16_F(0.5); - return FFX_MIN16_I2(fHrPos); + FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr)); + return iPxHrPos; } #endif +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + // fDeviceToViewDepth details found in ffx_fsr2.cpp + return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); +} + +FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) +{ + return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} + +FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32 GetMaxDistanceInMeters() +{ +#if FFX_FSR2_OPTION_INVERTED_DEPTH + return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); +#else + return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); +#endif +} + +FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure) +{ + fRgb /= fPreExposure; + fRgb *= fExposure; + + fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX); + + return fRgb; +} + +FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) +{ + fRgb /= fExposure; + fRgb *= PreExposure(); + + return fRgb; +} + + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +struct PlaneData +{ + FfxFloat32x3 fNormal; + FfxFloat32 fDistanceFromOrigin; +}; + +PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) +{ + PlaneData plane; + + FfxFloat32x3 v0 = fP0 - fP1; + FfxFloat32x3 v1 = fP0 - fP2; + plane.fNormal = normalize(cross(v0, v1)); + plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); + + return plane; +} + +FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) +{ + return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); +} + #endif // #if defined(FFX_GPU) #endif //!defined(FFX_FSR2_COMMON_H) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid.h index d5bbbcf..c63f182 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -29,12 +29,14 @@ FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) { - FfxFloat32x3 fRgb = LoadInputColor(FfxInt32x2(tex)); + FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); + fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); + FfxFloat32x3 fRgb = SampleInputColor(fUv); - FFX_STATIC const FfxFloat32x3 rgb2y = FfxFloat32x3(0.2126, 0.7152, 0.0722); + fRgb /= PreExposure(); //compute log luma - const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, dot(rgb2y, fRgb))); + const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb))); // Make sure out of screen pixels contribute no value to the end result const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; @@ -59,8 +61,7 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) { FfxFloat32 prev = SPD_LoadExposureBuffer().y; - FfxUInt32x2 renderSize = SPD_RenderSize(); - FfxFloat32 result = outValue.r / (renderSize.x * renderSize.y); + FfxFloat32 result = outValue.r; if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values { @@ -105,7 +106,7 @@ void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) } FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) { - return (v0 + v1 + v2 + v3); + return (v0 + v1 + v2 + v3) * 0.25f; } #endif diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl index 9a6a329..3c99b98 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -28,7 +28,7 @@ #define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 #define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 #define FSR2_BIND_UAV_EXPOSURE_MIP_5 3 -#define FSR2_BIND_UAV_EXPOSURE 4 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 4 #define FSR2_BIND_CB_FSR2 5 #define FSR2_BIND_CB_SPD 6 @@ -63,68 +63,35 @@ { return cbSPD.renderSize; } -#else - uint MipCount() - { - return 0; - } - - uint NumWorkGroups() - { - return 0; - } - - uvec2 WorkGroupOffset() - { - return uvec2(0); - } - - uvec2 SPD_RenderSize() - { - return uvec2(0); - } #endif vec2 SPD_LoadExposureBuffer() { -#if defined(FSR2_BIND_UAV_EXPOSURE) - return imageLoad(rw_exposure, ivec2(0,0)).xy; -#else - return vec2(0); -#endif + return imageLoad(rw_auto_exposure, ivec2(0,0)).xy; } void SPD_SetExposureBuffer(vec2 value) { -#if defined(FSR2_BIND_UAV_EXPOSURE) - imageStore(rw_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f)); -#endif + imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f)); } vec4 SPD_LoadMipmap5(ivec2 iPxPos) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f); -#else - return vec4(0); -#endif } void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value) { switch (slice) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); break; -#endif -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) case 5: imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); break; -#endif default: + // avoid flattened side effect #if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f)); @@ -137,16 +104,12 @@ void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value) void SPD_IncreaseAtomicCounter(inout uint spdCounter) { -#if defined(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1); -#endif } void SPD_ResetAtomicCounter() { -#if defined(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0)); -#endif } #include "ffx_fsr2_compute_luminance_pyramid.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl index 07a097a..2b96636 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ #define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 0 #define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 #define FSR2_BIND_UAV_EXPOSURE_MIP_5 2 -#define FSR2_BIND_UAV_EXPOSURE 3 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 3 #define FSR2_BIND_CB_FSR2 0 #define FSR2_BIND_CB_SPD 1 @@ -33,94 +33,61 @@ #if defined(FSR2_BIND_CB_SPD) cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { - uint mips; - uint numWorkGroups; - uint2 workGroupOffset; - uint2 renderSize; + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; }; - uint MipCount() + FfxUInt32 MipCount() { return mips; } - uint NumWorkGroups() + FfxUInt32 NumWorkGroups() { return numWorkGroups; } - uint2 WorkGroupOffset() + FfxUInt32x2 WorkGroupOffset() { return workGroupOffset; } - uint2 SPD_RenderSize() + FfxUInt32x2 SPD_RenderSize() { return renderSize; } -#else - uint MipCount() - { - return 0; - } - - uint NumWorkGroups() - { - return 0; - } - - uint2 WorkGroupOffset() - { - return uint2(0, 0); - } - - uint2 SPD_RenderSize() - { - return uint2(0, 0); - } #endif -float2 SPD_LoadExposureBuffer() +FfxFloat32x2 SPD_LoadExposureBuffer() { -#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) - return rw_exposure[min16int2(0,0)]; -#else - return 0; -#endif + return rw_auto_exposure[FfxInt32x2(0,0)]; } -void SPD_SetExposureBuffer(float2 value) +void SPD_SetExposureBuffer(FfxFloat32x2 value) { -#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) - rw_exposure[min16int2(0,0)] = value; -#endif + rw_auto_exposure[FfxInt32x2(0,0)] = value; } -float4 SPD_LoadMipmap5(int2 iPxPos) +FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) - return float4(rw_img_mip_5[iPxPos], 0, 0, 0); -#else - return 0; -#endif + return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); } -void SPD_SetMipmap(int2 iPxPos, int slice, float value) +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxInt32 slice, FfxFloat32 value) { switch (slice) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: rw_img_mip_shading_change[iPxPos] = value; break; -#endif -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) case 5: rw_img_mip_5[iPxPos] = value; break; -#endif default: + // avoid flattened side effect #if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; @@ -131,14 +98,14 @@ void SPD_SetMipmap(int2 iPxPos, int slice, float value) } } -void SPD_IncreaseAtomicCounter(inout uint spdCounter) +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) { - InterlockedAdd(rw_spd_global_atomic[min16int2(0,0)], 1, spdCounter); + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0,0)], 1, spdCounter); } void SPD_ResetAtomicCounter() { - rw_spd_global_atomic[min16int2(0,0)] = 0; + rw_spd_global_atomic[FfxInt32x2(0,0)] = 0; } #include "ffx_fsr2_compute_luminance_pyramid.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h index 81db737..be41b38 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,75 +24,234 @@ FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; -FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousDepth, FfxFloat32 fPreviousDepthBilinearWeight, FfxFloat32 fCurrentDepthViewSpace) +FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) { - FfxFloat32 fPrevNearestDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(fPreviousDepth)); + FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample); + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); - // Depth separation logic ref: See "Minimum Triangle Separation for Correct Z-Buffer Occlusion" - // Intention: worst case of formula in Figure4 combined with Ksep factor in Section 4 - // TODO: check intention and improve, some banding visible - const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f; - FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + FfxFloat32 fDilatedSum = 0.0f; + FfxFloat32 fDepth = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos); + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample); - // WARNING: Ksep only works with reversed-z with infinite projection. - const FfxFloat32 Ksep = 1.37e-05f; - FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; - FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; - FfxFloat32 fDepthClipFactor = (fDepthDiff > 0) ? ffxSaturate(fRequiredDepthSeparation / fDepthDiff) : 1.0f; + if (fDepthDiff > 0.0f) { -#ifdef _DEBUG - rw_debug_out[iPxSamplePos] = FfxFloat32x4(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace, fDepthDiff, fDepthClipFactor); +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); +#else + const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); #endif + + const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); + const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); - return fPreviousDepthBilinearWeight * fDepthClipFactor * ffxLerp(1.0f, DepthClipBaseScale, ffxSaturate(fDepthDiff * fDepthDiff)); + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 Kfov = length(fCorner) / length(fCenter); + const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; + + const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); + const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); + fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; } -FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthViewSpace) +FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) { - FfxFloat32x2 fPxSample = fUvSample * RenderSize() - 0.5f; - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + FfxFloat32 minconvergence = 1.0f; - const FfxFloat32 fBilinearWeights[2][2] = { - { - (1 - fPxFrac.x) * (1 - fPxFrac.y), - (fPxFrac.x) * (1 - fPxFrac.y) - }, - { - (1 - fPxFrac.x) * (fPxFrac.y), - (fPxFrac.x) * (fPxFrac.y) - } - }; + FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos); + FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize()); + FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus); - FfxFloat32 fDepth = 0.0f; - FfxFloat32 fWeightSum = 0.0f; - for (FfxInt32 y = 0; y <= 1; ++y) { - for (FfxInt32 x = 0; x <= 1; ++x) { - FfxInt32x2 iSamplePos = iPxSample + FfxInt32x2(x, y); - if (IsOnScreen(iSamplePos, RenderSize())) { - FfxFloat32 fBilinearWeight = fBilinearWeights[y][x]; - if (fBilinearWeight > reconstructedDepthBilinearWeightThreshold) { - fDepth += ComputeSampleDepthClip(iSamplePos, LoadReconstructedPrevDepth(iSamplePos), fBilinearWeight, fCurrentDepthViewSpace); - fWeightSum += fBilinearWeight; - } + const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; + + if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) { + for (FfxInt32 y = -1; y <= 1; ++y) { + for (FfxInt32 x = -1; x <= 1; ++x) { + + FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp); + FfxFloat32 fVelocityUv = length(fMotionVector); + + fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv)); } } } - return (fWeightSum > 0) ? fDepth / fWeightSum : DepthClipBaseScale; + return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f); +} + +FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters(); + FfxFloat32 fDepthMax = 0.0f; + FfxFloat32 fDepthMin = fMaxDistInMeters; + + FfxInt32 iMaxDistFound = 0; + + for (FfxInt32 y = -1; y < 2; y++) { + for (FfxInt32 x = -1; x < 2; x++) { + + const FfxInt32x2 iOffset = FfxInt32x2(x, y); + const FfxInt32x2 iSamplePos = iPxPos + iOffset; + + const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f; + FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor; + + iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth); + + fDepthMin = ffxMin(fDepthMin, fDepth); + fDepthMax = ffxMax(fDepthMax, fDepth); + } + } + + return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f); +} + +FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize(); + + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize()); + FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv); + + float fPxDistance = length(fMotionVector * DisplaySize()); + return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0; +} + +void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) +{ + // Compensate for bilinear sampling in accumulation pass + + FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz; + FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); + + float fMasksSum = 0.0f; + + FfxFloat32x3 fColorSamples[9]; + FfxFloat32 fReactiveSamples[9]; + FfxFloat32 fTransparencyAndCompositionSamples[9]; + + FFX_UNROLL + for (FfxInt32 y = -1; y < 2; y++) { + FFX_UNROLL + for (FfxInt32 x = -1; x < 2; x++) { + + const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + + FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz; + FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); + FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); + + fColorSamples[sampleIdx] = fColorSample; + fReactiveSamples[sampleIdx] = fReactiveSample; + fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample; + + fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample); + } + } + + if (fMasksSum > 0) + { + for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++) + { + FfxFloat32x3 fColorSample = fColorSamples[sampleIdx]; + FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx]; + FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx]; + + const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample)); + const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq; + + // Increase power for non-similar samples + const FfxFloat32 fPowerBiasMax = 6.0f; + const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); + const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); + const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); + + fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); + } + } + + StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); } +FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); + + fRgb = PrepareRgb(fRgb, Exposure(), PreExposure()); + + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); + + return fPreparedYCoCg; +} + +float EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1))); + FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0))); + FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1))); + + return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f))); +} void DepthClip(FfxInt32x2 iPxPos) { FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); - FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; - FfxFloat32 fCurrentDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(LoadDilatedDepth(iPxPos))); - FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fCurrentDepthViewSpace); + // Discard tiny mvs + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f); + + const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos)); + + // Compute prepared input color and depth clip + FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector); + FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos); + StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip)); + + // Compute dilated reactive mask +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxPos; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); +#endif + + FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); + FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos)); - StoreDepthClip(iPxPos, fDepthClip); + PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence)); } #endif //!defined( FFX_FSR2_DEPTH_CLIPH ) \ No newline at end of file diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.glsl index 7233ec6..c7e3093 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,13 +19,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 3 -// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_reconstructed_previous_nearest_depth -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth -// UAV 12 : FSR2_DepthClip : rw_depth_clip -// CB 0 : cbFSR2 - #version 450 #extension GL_GOOGLE_include_directive : require @@ -34,8 +27,20 @@ #define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 #define FSR2_BIND_SRV_DILATED_DEPTH 2 -#define FSR2_BIND_UAV_DEPTH_CLIP 3 -#define FSR2_BIND_CB_FSR2 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 3 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7 +#define FSR2_BIND_SRV_INPUT_COLOR 8 +#define FSR2_BIND_SRV_INPUT_DEPTH 9 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 10 + +#define FSR2_BIND_UAV_DEPTH_CLIP 11 +#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13 + +#define FSR2_BIND_CB_FSR2 14 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.hlsl index 8433734..3cf501c 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,17 +19,20 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 3 -// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_reconstructed_previous_nearest_depth -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth -// UAV 12 : FSR2_DepthClip : rw_depth_clip -// CB 0 : cbFSR2 - #define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 #define FSR2_BIND_SRV_DILATED_DEPTH 2 -#define FSR2_BIND_UAV_DEPTH_CLIP 0 +#define FSR2_BIND_SRV_REACTIVE_MASK 3 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 6 +#define FSR2_BIND_SRV_INPUT_COLOR 7 +#define FSR2_BIND_SRV_INPUT_DEPTH 8 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 9 + +#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 0 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 1 + #define FSR2_BIND_CB_FSR2 0 #include "ffx_fsr2_callbacks_hlsl.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_lock.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_lock.h index b2266b7..8347fa8 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_lock.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_lock.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,17 +22,24 @@ #ifndef FFX_FSR2_LOCK_H #define FFX_FSR2_LOCK_H -FfxFloat32 GetLuma(FfxInt32x2 pos) +void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) { - //add some bias to avoid locking dark areas - return FfxFloat32(LoadPreparedInputColorLuma(pos)); + if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) + { +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxUInt32 farZ = 0x0; +#else + const FfxUInt32 farZ = 0x3f800000; +#endif + SetReconstructedDepth(iPxHrPos, farZ); + } } -FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) +FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) { const FfxInt32 RADIUS = 1; - FfxFloat32 fNucleus = GetLuma(pos); + FfxFloat32 fNucleus = LoadLockInputLuma(pos); FfxFloat32 similar_threshold = 1.05f; FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX; @@ -48,7 +55,8 @@ FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar - const FfxUInt32 rejectionMasks[4] = { + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left @@ -64,7 +72,7 @@ FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - FfxFloat32 sampleLuma = GetLuma(samplePos); + FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos); FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); if (difference > 0 && (difference < similar_threshold)) { @@ -80,47 +88,28 @@ FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) if (FFX_FALSE == isRidge) { - return 0; + return false; } FFX_UNROLL for (FfxInt32 i = 0; i < 4; i++) { - if ((mask & rejectionMasks[i]) == rejectionMasks[i]) { - return 0; + if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) { + return false; } } - return 1; + return true; } -FFX_STATIC FfxBoolean s_bLockUpdated = FFX_FALSE; - -FfxFloat32x3 ComputeLockStatus(FfxInt32x2 iPxLrPos, FfxFloat32x3 fLockStatus) +void ComputeLock(FfxInt32x2 iPxLrPos) { - FfxFloat32 fConfidenceOfThinFeature = ComputeThinFeatureConfidence(iPxLrPos); - - s_bLockUpdated = FFX_FALSE; - if (fConfidenceOfThinFeature > 0.0f) + if (ComputeThinFeatureConfidence(iPxLrPos)) { - //put to negative on new lock - fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] == FfxFloat32(0.0f)) ? FfxFloat32(-LockInitialLifetime()) : FfxFloat32(-(LockInitialLifetime() * 2)); - - s_bLockUpdated = FFX_TRUE; + StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); } - return fLockStatus; -} - -void ComputeLock(FfxInt32x2 iPxLrPos) -{ - FfxInt32x2 iPxHrPos = ComputeHrPosFromLrPos(iPxLrPos); - - FfxFloat32x3 fLockStatus = ComputeLockStatus(iPxLrPos, LoadLockStatus(iPxHrPos)); - - if ((s_bLockUpdated)) { - StoreLockStatus(iPxHrPos, fLockStatus); - } + ClearResourcesForNextFrame(iPxLrPos); } #endif // FFX_FSR2_LOCK_H diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.glsl index 9c37774..f7cad59 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,23 +19,14 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 4 -// SRV 5 : m_UpscaleReactive : r_reactive_mask -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// UAV 27 : FSR2_ReactiveMaskMax : rw_reactive_max -// CB 0 : cbFSR2 -// CB 1 : FSR2DispatchOffsets - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require -#define FSR2_BIND_SRV_LOCK_STATUS 0 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 1 -#define FSR2_BIND_UAV_LOCK_STATUS 2 +#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 +#define FSR2_BIND_UAV_NEW_LOCKS 1 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 #define FSR2_BIND_CB_FSR2 3 #include "ffx_fsr2_callbacks_glsl.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.hlsl index 492965c..1409dce 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_lock_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,16 +19,9 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 4 -// SRV 5 : m_UpscaleReactive : r_reactive_mask -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// CB 0 : cbFSR2 - -#define FSR2_BIND_SRV_LOCK_STATUS 1 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 2 -#define FSR2_BIND_UAV_LOCK_STATUS 0 +#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 +#define FSR2_BIND_UAV_NEW_LOCKS 0 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 1 #define FSR2_BIND_CB_FSR2 0 #include "ffx_fsr2_callbacks_hlsl.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_postprocess_lock_status.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_postprocess_lock_status.h index 959031b..cee9e14 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_postprocess_lock_status.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_postprocess_lock_status.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -37,62 +37,70 @@ FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) #if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) #else -DeclareCustomFetchBilinearSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) #endif -DeclareCustomTextureSample(ShadingChangeLumaSample, Bilinear, FetchShadingChangeLumaSamples) +DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples) -FfxFloat32 GetShadingChangeLuma(FfxFloat32x2 fUvCoord) +FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) { - // const FfxFloat32 fShadingChangeLuma = exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()) * LumaMipRcp()); - const FfxFloat32 fShadingChangeLuma = FfxFloat32(exp(SampleMipLuma(fUvCoord, LumaMipLevelToUse()) * FfxFloat32(LumaMipRcp()))); - return fShadingChangeLuma; -} - -LockState GetLockState(FfxFloat32x3 fLockStatus) -{ - LockState state = { FFX_FALSE, FFX_FALSE }; + FfxFloat32 fShadingChangeLuma = 0; - //Check if this is a new or refreshed lock - state.NewLock = fLockStatus[LOCK_LIFETIME_REMAINING] < FfxFloat32(0.0f); +#if 0 + fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x); +#else - //For a non-refreshed lock, the lifetime is set to LockInitialLifetime() - state.WasLockedPrevFrame = fLockStatus[LOCK_TRUST] != FfxFloat32(0.0f); + const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse()); + FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv); - return state; -} + fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions()); + fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse()))); +#endif -LockState PostProcessLockStatus(FfxInt32x2 iPxHrPos, FFX_PARAMETER_IN FfxFloat32x2 fLrUvJittered, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, const FfxFloat32 fAccumulationMask, FFX_PARAMETER_IN FfxFloat32 fHrVelocity, - FFX_PARAMETER_INOUT FfxFloat32 fAccumulationTotalWeight, FFX_PARAMETER_INOUT FfxFloat32x3 fLockStatus, FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { + fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f); - const LockState state = GetLockState(fLockStatus); + return fShadingChangeLuma; +} - fLockStatus[LOCK_LIFETIME_REMAINING] = abs(fLockStatus[LOCK_LIFETIME_REMAINING]); +void UpdateLockStatus(AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, + FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus, + FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, + FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { - FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(fLrUvJittered); + const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; - fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), FfxFloat32(0.5f)); - fLuminanceDiff = FfxFloat32(1) - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); - if (fLuminanceDiff > FfxFloat32(0.2f)) { - KillLock(fLockStatus); - } + fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); - if (!state.NewLock && fLockStatus[LOCK_LIFETIME_REMAINING] >= FfxFloat32(0)) - { - fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fAccumulationMask); + if (state.NewLock) { + fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; - const FfxFloat32 depthClipThreshold = FfxFloat32(0.99f); - if (fDepthClipFactor < depthClipThreshold) - { + fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f; + } + else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) { + fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f); + } + else { + if (fLuminanceDiff > 0.1f) { KillLock(fLockStatus); } } - return state; + fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f)); + fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor); + + fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask); + fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f); + + // Compute this frame lock contribution + const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f); + const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); + + fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution); } #endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H ) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas.h index 0429d8f..d9006cd 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -28,67 +28,29 @@ void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); } -#if FFX_HALF - #define FSR_RCAS_H - FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p) - { - FfxFloat32x4 inputSample = LoadRCAS_Input(p); //TODO: fix type - - inputSample.rgb *= Exposure(); - -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - inputSample.rgb = Tonemap(inputSample.rgb); -#endif // #if FFX_FSR2_OPTION_HDR_COLOR_INPUT - - return FfxFloat16x4(inputSample); - } - void FsrRcasInputH(inout FfxFloat16 r, inout FfxFloat16 g, inout FfxFloat16 b) {} -#else - #define FSR_RCAS_F - FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) - { - FfxFloat32x4 inputSample = LoadRCAS_Input(p); - - inputSample.rgb *= Exposure(); +#define FSR_RCAS_F +FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) +{ + FfxFloat32x4 fColor = LoadRCAS_Input(p); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - inputSample.rgb = Tonemap(inputSample.rgb); -#endif + fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); - return inputSample; - } + return fColor; +} - void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} -#endif // #if FFX_HALF +void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} #include "ffx_fsr1.h" void CurrFilter(FFX_MIN16_U2 pos) { -#if FFX_HALF - FfxFloat16x3 c; - FsrRcasH(c.r, c.g, c.b, pos, RCASConfig()); - -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - c = InverseTonemap(c); -#endif - - c /= FfxFloat16(Exposure()); - - WriteUpscaledOutput(pos, c); //TODO: fix type -#else FfxFloat32x3 c; FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - c = InverseTonemap(c); -#endif - - c /= Exposure(); + c = UnprepareRgb(c, Exposure()); WriteUpscaledOutput(pos, c); -#endif } void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.glsl index 1097faf..20807a3 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,19 +19,14 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 6 -// SRV 4 : m_Exposure : r_exposure -// SRV 19 : FSR2_InternalUpscaled1 : r_rcas_input -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : cbRCAS - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_upscaled_output declaration +#extension GL_EXT_shader_image_load_formatted : require -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_RCAS_INPUT 1 #define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 #define FSR2_BIND_CB_FSR2 3 @@ -58,17 +53,10 @@ } #endif -#if FFX_HALF -vec4 LoadRCAS_Input(FfxInt16x2 iPxPos) -{ - return texelFetch(r_rcas_input, iPxPos, 0); -} -#else vec4 LoadRCAS_Input(FfxInt32x2 iPxPos) { return texelFetch(r_rcas_input, iPxPos, 0); } -#endif #include "ffx_fsr2_rcas.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.hlsl index ea6b35a..f447b7e 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_rcas_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,14 +19,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 6 -// SRV 4 : m_Exposure : r_exposure -// SRV 19 : FSR2_InternalUpscaled1 : r_rcas_input -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : cbRCAS - -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_RCAS_INPUT 1 #define FSR2_BIND_UAV_UPSCALED_OUTPUT 0 #define FSR2_BIND_CB_FSR2 0 @@ -53,17 +46,11 @@ } #endif -#if FFX_HALF -float4 LoadRCAS_Input(FfxInt16x2 iPxPos) -{ - return r_rcas_input[iPxPos]; -} -#else + float4 LoadRCAS_Input(FfxInt32x2 iPxPos) { return r_rcas_input[iPxPos]; } -#endif #include "ffx_fsr2_rcas.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h index aad1992..e9ccc4b 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,36 +24,25 @@ void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) { - FfxFloat32x2 fDepthUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; - FfxFloat32x2 fPxPrevPos = (fDepthUv + fMotionVector) * iPxDepthSize - FfxFloat32x2(0.5, 0.5); - FfxInt32x2 iPxPrevPos = FfxInt32x2(floor(fPxPrevPos)); - FfxFloat32x2 fPxFrac = ffxFract(fPxPrevPos); - - const FfxFloat32 bilinearWeights[2][2] = { - { - (1 - fPxFrac.x) * (1 - fPxFrac.y), - (fPxFrac.x) * (1 - fPxFrac.y) - }, - { - (1 - fPxFrac.x) * (fPxFrac.y), - (fPxFrac.x) * (fPxFrac.y) - } - }; + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); + + FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); // Project current depth into previous frame locations. // Push to all pixels having some contribution if reprojection is using bilinear logic. - for (FfxInt32 y = 0; y <= 1; ++y) { - for (FfxInt32 x = 0; x <= 1; ++x) { - - FfxInt32x2 offset = FfxInt32x2(x, y); - FfxFloat32 w = bilinearWeights[y][x]; - - if (w > reconstructedDepthBilinearWeightThreshold) { - - FfxInt32x2 storePos = iPxPrevPos + offset; - if (IsOnScreen(storePos, iPxDepthSize)) { - StoreReconstructedDepth(storePos, fDepth); - } + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, iPxDepthSize)) { + StoreReconstructedDepth(iStorePos, fDepth); } } } @@ -106,65 +95,24 @@ void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxIn } } -FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) -{ - FfxFloat32 minconvergence = 1.0f; - - FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos) * RenderSize(); - FfxFloat32 fNucleusVelocity = length(fMotionVectorNucleus); - - const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; - - if (fNucleusVelocity > MotionVectorVelocityEpsilon) { - for (FfxInt32 y = -1; y <= 1; ++y) { - for (FfxInt32 x = -1; x <= 1; ++x) { - - FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); - - FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp) * RenderSize(); - FfxFloat32 fVelocity = length(fMotionVector); - - fVelocity = ffxMax(fVelocity, fNucleusVelocity); - minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocity, fMotionVectorNucleus / fVelocity)); - } - } - } - - return ffxSaturate(1.0f - minconvergence); -} - - -void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) +FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos) { - // Compensate for bilinear sampling in accumulation pass - - FfxFloat32x3 fReferenceColor = LoadPreparedInputColor(iPxLrPos); - FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); - - for (int y = -1; y < 2; y++) { - for (int x = -1; x < 2; x++) { - - const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - - FfxFloat32x3 fColorSample = LoadPreparedInputColor(sampleCoord); - FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); - FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); - const FfxFloat32 fColorSimilarity = dot(normalize(fReferenceColor), normalize(fColorSample)); - const FfxFloat32 fVelocitySimilarity = 1.0f - abs(length(fReferenceColor) - length(fColorSample)); - const FfxFloat32 fSimilarity = fColorSimilarity * fVelocitySimilarity; + // Use internal auto exposure for locking logic + fRgb /= PreExposure(); + fRgb *= Exposure(); - // Increase power for non-similar samples - const FfxFloat32 fPowerBiasMax = 6.0f; - const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); - const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); - const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fRgb = Tonemap(fRgb); +#endif - fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); - } - } + //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! + const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0)); - StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); + return fLockInputLuma; } void ReconstructAndDilate(FfxInt32x2 iPxLrPos) @@ -189,13 +137,8 @@ void ReconstructAndDilate(FfxInt32x2 iPxLrPos) ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); -#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); -#else - FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, DisplaySize()); -#endif - - PreProcessReactiveMasks(iPxLrPos, fMotionDivergence); + FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos); + StoreLockInputLuma(iPxLrPos, fLockInputLuma); } diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl index 96d1383..20e17ee 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,29 +19,26 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 2 -// SRV 2 : m_MotionVector : r_motion_vectors -// SRV 3 : m_depthbuffer : r_depth -// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth -// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors -// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth -// CB 0 : cbFSR2 - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require -#define FSR2_BIND_SRV_MOTION_VECTORS 0 -#define FSR2_BIND_SRV_DEPTH 1 -#define FSR2_BIND_SRV_REACTIVE_MASK 2 -#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 3 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 4 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_INPUT_DEPTH 1 +#define FSR2_BIND_SRV_INPUT_COLOR 2 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 3 +#define FSR2_BIND_SRV_LUMA_HISTORY 4 + #define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5 #define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6 #define FSR2_BIND_UAV_DILATED_DEPTH 7 -#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 8 -#define FSR2_BIND_CB_FSR2 9 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8 +#define FSR2_BIND_UAV_LUMA_HISTORY 9 +#define FSR2_BIND_UAV_LUMA_INSTABILITY 10 +#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11 + +#define FSR2_BIND_CB_FSR2 12 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl index 57f3f49..33c044e 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,23 +19,16 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 2 -// SRV 2 : m_MotionVector : r_motion_vectors -// SRV 3 : m_depthbuffer : r_depth -// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth -// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors -// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth -// CB 0 : cbFSR2 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_INPUT_DEPTH 1 +#define FSR2_BIND_SRV_INPUT_COLOR 2 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 3 -#define FSR2_BIND_SRV_MOTION_VECTORS 0 -#define FSR2_BIND_SRV_DEPTH 1 -#define FSR2_BIND_SRV_REACTIVE_MASK 2 -#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 3 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 4 #define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 #define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 1 #define FSR2_BIND_UAV_DILATED_DEPTH 2 -#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 3 +#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 3 + #define FSR2_BIND_CB_FSR2 0 #include "ffx_fsr2_callbacks_hlsl.h" diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_reproject.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_reproject.h index 5ae962d..f7f3961 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_reproject.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_reproject.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ #define FFX_FSR2_REPROJECT_H #ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE -#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 // Approximate +#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference #endif FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) @@ -49,13 +49,16 @@ DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) { - return FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f); + FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f); + return fSample; } #if FFX_HALF FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample) { - return FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0f); + FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0); + + return fSample; } #endif @@ -88,38 +91,46 @@ FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) return fDilatedMotionVector; } -void ComputeReprojectedUVs(FfxInt32x2 iPxHrPos, FfxFloat32x2 fMotionVector, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); +} + +void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) { - FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); - fReprojectedHrUv = fHrUv + fMotionVector; + fReprojectedHrUv = params.fHrUv + params.fMotionVector; - bIsExistingSample = (fReprojectedHrUv.x >= 0.0f && fReprojectedHrUv.x <= 1.0f) && - (fReprojectedHrUv.y >= 0.0f && fReprojectedHrUv.y <= 1.0f); + bIsExistingSample = IsUvInside(fReprojectedHrUv); } -void ReprojectHistoryColor(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x4 fHistoryColorAndWeight) +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame) { - fHistoryColorAndWeight = HistorySample(fReprojectedHrUv, DisplaySize()); - fHistoryColorAndWeight.rgb *= Exposure(); + FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - fHistoryColorAndWeight.rgb = Tonemap(fHistoryColorAndWeight.rgb); -#endif + fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()); + + fHistoryColor = RGBToYCoCg(fHistoryColor); - fHistoryColorAndWeight.rgb = RGBToYCoCg(fHistoryColorAndWeight.rgb); + //Compute temporal reactivity info + fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); + bInMotionLastFrame = (fHistory.w < 0.0f); } -void ReprojectHistoryLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x3 fReprojectedLockStatus) +LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) { - // If function is called from Accumulate pass, we need to treat locks differently - FfxFloat32 fInPlaceLockLifetime = LoadRwLockStatus(iPxHrPos)[LOCK_LIFETIME_REMAINING]; + LockState state = { FFX_FALSE, FFX_FALSE }; + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); + state.NewLock = fNewLockIntensity > (127.0f / 255.0f); - fReprojectedLockStatus = SampleLockStatus(fReprojectedHrUv); + FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; - // Keep lifetime if new lock - if (fInPlaceLockLifetime < 0.0f) { - fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] = fInPlaceLockLifetime; + fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv); + + if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) { + state.WasLockedPrevFrame = true; } + + return state; } #endif //!defined( FFX_FSR2_REPROJECT_H ) diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_resources.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_resources.h index 89734f6..535dbc3 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_resources.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_resources.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,59 +24,76 @@ #if defined(FFX_CPU) || defined(FFX_GPU) #define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 1 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 2 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 3 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 4 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 5 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 6 -#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 7 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 8 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 9 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 10 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 11 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP 12 -#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 13 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 14 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 15 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 16 -#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 17 -#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 18 -#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 19 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 20 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 21 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 22 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 23 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 24 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 25 -#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 26 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 27 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 28 // same as FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 28 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_1 29 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_2 30 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_3 31 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_4 32 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5 33 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_6 34 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_7 35 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_8 36 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_9 37 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_10 38 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_11 39 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12 40 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 41 -#define FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE 42 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12 +#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18 +#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26 +#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION 45 -// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12] -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_4 -#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE) +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 53 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 54 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 55 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 56 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 57 -#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 43 +// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) -#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 -#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 -#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 58 + +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 #define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 #define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h index cfa9db8..f94f40a 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -137,16 +137,19 @@ FfxFloat32 Lanczos2(FfxFloat32 x) } #if FFX_HALF + +#if 0 FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) { const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); } +#endif FFX_MIN16_F Lanczos2(FFX_MIN16_F x) { x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); - return Lanczos2NoClamp(x); + return FFX_MIN16_F(Lanczos2NoClamp(x)); } #endif //FFX_HALF @@ -566,12 +569,12 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 #define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ /* Clamp base coords */ \ - iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ - iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ @@ -580,12 +583,12 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 #define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ /* Clamp base coords */ \ - iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ - iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen.h new file mode 100644 index 0000000..101b75d --- /dev/null +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen.h @@ -0,0 +1,250 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define USE_YCOCG 1 + +#define fAutogenEpsilon 0.01f + +// EXPERIMENTAL + +FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 X = colorPreAlpha; + FfxFloat32x3 Y = colorPostAlpha; + FfxFloat32x3 Z = colorPrevPreAlpha; + FfxFloat32x3 W = colorPrevPostAlpha; + + FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1)))); + + // cleanup very small values + retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); + + return retVal; +} + +// works ok: thin edges +FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha = + FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + + FfxFloat32x3 X = colorPrevPreAlpha; + FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha; + FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha; + FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha; + + FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0); + + FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) ); + + // only pixels that have significantly changed in color shuold be considered + retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) ); + + return retVal; +} + +// This function computes the TransparencyAndComposition mask: +// This mask indicates pixels that should discard locks and apply color clamping. +// +// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of +// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization) +// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting. +// +// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame. +// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency. +// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels. +// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation. +// +// In the final step it stores the current textures in internal textures for the next frame + +FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx); + + // [branch] + if (retVal > FFX_MIN16_F(0.01f)) + { + retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx); + } + return retVal; +} + +float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb; + FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb; + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb); + FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb); + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId); + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f); + FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + W = RGBToYCoCg(W); +#endif + minPrev = min(minPrev, W); + maxPrev = max(maxPrev, W); + } + } + // instead of computing the overlap: simply count how many samples are outside + // set reactive based on that + FFX_MIN16_F count = FFX_MIN16_F(0.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + Y = RGBToYCoCg(Y); +#endif + count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + } + } + retVal = count / FFX_MIN16_F(27.f); + + return retVal; +} + + +// This function computes the Reactive mask: +// We want pixels marked where the alpha portion of the frame changes a lot between neighbours +// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...) +// As a result history would not be trustworthy. +// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation +// For mirrors we may assume the pre-alpha is pretty uniform color. +// +// This works well generally, but also marks edge pixels +FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + // we only get here if alpha has a significant contribution and has changed since last frame. + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + // mark pixels with huge variance in alpha as reactive + FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx)); + FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx)); + retVal = ffxSaturate(alphaEdge - opaqueEdge); + + // the above also marks edge pixels due to jitter, so we need to cancel those out + + + return retVal; +} diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.glsl new file mode 100644 index 0000000..bebca91 --- /dev/null +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.glsl @@ -0,0 +1,116 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 +#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 5 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6 + +#define FSR2_BIND_UAV_AUTOREACTIVE 7 +#define FSR2_BIND_UAV_AUTOCOMPOSITION 8 +#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9 +#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10 + +#define FSR2_BIND_CB_FSR2 11 +#define FSR2_BIND_CB_REACTIVE 12 + +#include "ffx_fsr2_callbacks_glsl.h" +#include "ffx_fsr2_common.h" + +#ifdef FSR2_BIND_CB_REACTIVE +layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t +{ + float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + float fTcScale; + float fReactiveScale; + float fReactiveMax; +} cbGenerateReactive; + +float getTcThreshold() +{ + return cbGenerateReactive.fTcThreshold; +} + +#else + float getTcThreshold() + { + return 0.05f; + } +#endif + +#include "ffx_fsr2_tcr_autogen.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +void main() +{ + FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy); + + // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV + // fetch pre- and post-alpha color values + FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); + FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); + FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); + + FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); + FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); + + FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f ); + + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); + + if (outReactiveMask.y > 0.5f) + { + outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); + outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale); + outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax ); + } + + outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale); + + outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId))); + outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId))); + + StoreAutoReactive(uDispatchThreadId, outReactiveMask); + + StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); + StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); +} diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.hlsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.hlsl new file mode 100644 index 0000000..8e635d1 --- /dev/null +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_tcr_autogen_pass.hlsl @@ -0,0 +1,114 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 +#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 4 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 5 + +#define FSR2_BIND_UAV_AUTOREACTIVE 0 +#define FSR2_BIND_UAV_AUTOCOMPOSITION 1 +#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 2 +#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 3 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_AUTOREACTIVE 1 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" + +#if defined(FSR2_BIND_CB_AUTOREACTIVE) + cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_AUTOREACTIVE) + { + float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + float fTcScale; + float fReactiveScale; + float fReactiveMax; + }; + float getTcThreshold() + { + return fTcThreshold; + } +#else + #define fTcThreshold 0.05f + #define fTcScale 1.00f + #define fReactiveScale 10.0f + #define fReactiveMax 0.90f + float getTcThreshold() + { + return fTcThreshold; + } +#endif + +#include "ffx_fsr2_tcr_autogen.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId); + + // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV + // fetch pre- and post-alpha color values + FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); + FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); + FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); + + FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); + FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); + + FFX_MIN16_F2 outReactiveMask = 0; + + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); + + if (outReactiveMask.y > 0.5f) + { + outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); + outReactiveMask.x *= FFX_MIN16_F(fReactiveScale); + outReactiveMask.x = outReactiveMask.x < fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( fReactiveMax ); + } + + outReactiveMask.y *= FFX_MIN16_F(fTcScale ); + + outReactiveMask.x = max( outReactiveMask.x, FFX_MIN16_F( LoadReactiveMask(uDispatchThreadId) ) ); + outReactiveMask.y = max( outReactiveMask.y, FFX_MIN16_F( LoadTransparencyAndCompositionMask(uDispatchThreadId) ) ); + + StoreAutoReactive(uDispatchThreadId, outReactiveMask); + + StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); + StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); +} diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h index 80524d4..abdb888 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,28 +22,26 @@ #ifndef FFX_FSR2_UPSAMPLE_H #define FFX_FSR2_UPSAMPLE_H -#define FFX_FSR2_OPTION_GUARANTEE_POSITIVE_UPSAMPLE_WEIGHT 0 - FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; -void Deringing(RectificationBoxData clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) +void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) { fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); } #if FFX_HALF -void Deringing(RectificationBoxDataMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) +void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) { fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); } #endif #ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE -#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 1 // Approximate +#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate #endif -FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2 fKernelWeight) +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) { - FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; #if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); #elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT @@ -57,15 +55,16 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2 } #if FFX_HALF -FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F2 fKernelWeight) +FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) { - FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; + FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; #if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_APPROXIMATE - FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_LUT +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + // To Test: Save reciproqual sqrt compute // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); #else @@ -75,44 +74,33 @@ FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F2 } #endif -FfxFloat32 Pow3(FfxFloat32 x) -{ - return x * x * x; -} +FfxFloat32 ComputeMaxKernelWeight() { + const FfxFloat32 fKernelSizeBias = 1.0f; -#if FX_HALF -FFX_MIN16_F Pow3(FFX_MIN16_F x) -{ - return x * x * x; + FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); + + return ffxMin(FfxFloat32(1.99f), fKernelWeight); } -#endif -FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fKernelWeight, FFX_PARAMETER_INOUT RectificationBoxData clippingBox) +FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) { -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_begin.h" -#endif + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_begin.h" + #endif // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) - FfxFloat32x2 fDstOutputPos = FfxFloat32x2(iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position + FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_end.h" -#endif - -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_begin.h" - RectificationBoxMin16 fRectificationBox; -#else - RectificationBox fRectificationBox; -#endif + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_end.h" + #endif FfxFloat32x3 fSamples[iLanczos2SampleCount]; - FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 - + FfxInt32x2 offsetTL; offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); @@ -127,30 +115,37 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fK FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); FFX_UNROLL - for (FfxInt32 row = 0; row < 4; row++) { + for (FfxInt32 row = 0; row < 3; row++) { FFX_UNROLL - for (FfxInt32 col = 0; col < 4; col++) { - FfxInt32 iSampleIndex = col + (row << 2); + for (FfxInt32 col = 0; col < 3; col++) { + FfxInt32 iSampleIndex = col + (row << 2); - FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; + FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; - const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); - fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); - } + fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); + } } - RectificationBoxReset(fRectificationBox, fSamples[0]); + FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); - FfxFloat32x3 fColor = FfxFloat32x3(0.f, 0.f, 0.f); - FfxFloat32 fWeight = FfxFloat32(0.f); FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); + // Identify how much of each upsampled color to be used for this frame + const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); + + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); + + const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); + FFX_UNROLL for (FfxInt32 row = 0; row < 3; row++) { - FFX_UNROLL for (FfxInt32 col = 0; col < 3; col++) { FfxInt32 iSampleIndex = col + (row << 2); @@ -161,54 +156,39 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fK FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; - FfxFloat32 fSampleWeight = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))) * GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelWeight); + const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); + FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); // Update rectification box - const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); - FfxFloat32 fBoxSampleWeight = FfxFloat32(1) - ffxSaturate(fSrcSampleOffsetSq / FfxFloat32(3)); - fBoxSampleWeight *= fBoxSampleWeight; - RectificationBoxAddSample(fRectificationBox, fSamples[iSampleIndex], fBoxSampleWeight); + { + const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); - fWeight += fSampleWeight; - fColor += fSampleWeight * fSamples[iSampleIndex]; + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } } } - // Normalize for deringing (we need to compare colors) - fColor = fColor / (abs(fWeight) > FSR2_EPSILON ? fWeight : FfxFloat32(1.f)); - - RectificationBoxComputeVarianceBoxData(fRectificationBox); -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - RectificationBoxDataMin16 rectificationData = RectificationBoxGetData(fRectificationBox); - clippingBox.aabbMax = rectificationData.aabbMax; - clippingBox.aabbMin = rectificationData.aabbMin; - clippingBox.boxCenter = rectificationData.boxCenter; - clippingBox.boxVec = rectificationData.boxVec; -#else - RectificationBoxData rectificationData = RectificationBoxGetData(fRectificationBox); - clippingBox = rectificationData; -#endif - Deringing(rectificationData, fColor); + RectificationBoxComputeVarianceBoxData(clippingBox); -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - clippingBox.aabbMax = rectificationData.aabbMax; - clippingBox.aabbMin = rectificationData.aabbMin; - clippingBox.boxCenter = rectificationData.boxCenter; - clippingBox.boxVec = rectificationData.boxVec; -#endif + fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON); + + if (fColorAndWeight.w > FSR2_EPSILON) { + // Normalize for deringing (we need to compare colors) + fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; + fColorAndWeight.w *= fUpsampleLanczosWeightScale; - if (any(FFX_LESS_THAN(fKernelWeight, FfxFloat32x2(1, 1)))) { - fWeight = FfxFloat32(averageLanczosWeightPerFrame); + Deringing(clippingBox, fColorAndWeight.xyz); } -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_end.h" -#endif -#if FFX_FSR2_OPTION_GUARANTEE_POSITIVE_UPSAMPLE_WEIGHT - return FfxFloat32x4(fColor, ffxMax(FfxFloat32(FSR2_EPSILON), fWeight)); -#else - return FfxFloat32x4(fColor, ffxMax(FfxFloat32(0), fWeight)); -#endif + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_end.h" + #endif + + return fColorAndWeight; } #endif //!defined( FFX_FSR2_UPSAMPLE_H ) diff --git a/src/ffx-fsr2-api/shaders/ffx_spd.h b/src/ffx-fsr2-api/shaders/ffx_spd.h index 5a27a84..5ce24ec 100644 --- a/src/ffx-fsr2-api/shaders/ffx_spd.h +++ b/src/ffx-fsr2-api/shaders/ffx_spd.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/src/ffx-fsr2-api/vk/CMakeLists.txt b/src/ffx-fsr2-api/vk/CMakeLists.txt index 933d097..859504b 100644 --- a/src/ffx-fsr2-api/vk/CMakeLists.txt +++ b/src/ffx-fsr2-api/vk/CMakeLists.txt @@ -1,7 +1,7 @@ # This file is part of the FidelityFX SDK. -# -# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. -# +# +# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights @@ -31,12 +31,12 @@ file(GLOB SHADERS "${CMAKE_CURRENT_SOURCE_DIR}/../shaders/*.glsl") set(PASS_SHADERS + ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_tcr_autogen_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_autogen_reactive_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_accumulate_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_depth_clip_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_lock_pass.glsl - ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_prepare_input_color_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl ${CMAKE_CURRENT_SOURCE_DIR}/../shaders/ffx_fsr2_rcas_pass.glsl) diff --git a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp index be766b5..ec5aa8b 100644 --- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp +++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -312,6 +312,8 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt) return VK_FORMAT_R8G8_UNORM; case(FFX_SURFACE_FORMAT_R32_FLOAT): return VK_FORMAT_R32_SFLOAT; + case(FFX_SURFACE_FORMAT_R8_UINT): + return VK_FORMAT_R8_UINT; default: return VK_FORMAT_UNDEFINED; } @@ -435,6 +437,8 @@ FfxSurfaceFormat ffxGetSurfaceFormatVK(VkFormat fmt) return FFX_SURFACE_FORMAT_R8_UNORM; case(VK_FORMAT_R32_SFLOAT): return FFX_SURFACE_FORMAT_R32_FLOAT; + case(VK_FORMAT_R8_UINT): + return FFX_SURFACE_FORMAT_R8_UINT; default: return FFX_SURFACE_FORMAT_UNKNOWN; } @@ -508,6 +512,20 @@ VkDescriptorBufferInfo accquireDynamicUBO(BackendContext_VK* backendContext, uin return bufferInfo; } +static uint32_t getDefaultSubgroupSize(const BackendContext_VK* backendContext) +{ + VkPhysicalDeviceVulkan11Properties vulkan11Properties = {}; + vulkan11Properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES; + + VkPhysicalDeviceProperties2 deviceProperties2 = {}; + deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + deviceProperties2.pNext = &vulkan11Properties; + vkGetPhysicalDeviceProperties2(backendContext->physicalDevice, &deviceProperties2); + FFX_ASSERT(vulkan11Properties.subgroupSize == 32 || vulkan11Properties.subgroupSize == 64); // current desktop market + + return vulkan11Properties.subgroupSize; +} + // Create a FfxFsr2Device from a VkDevice FfxDevice ffxGetDeviceVK(VkDevice vkDevice) { @@ -521,7 +539,7 @@ FfxCommandList ffxGetCommandListVK(VkCommandBuffer cmdBuf) return reinterpret_cast(cmdBuf); } -FfxResource ffxGetTextureResourceVK(FfxFsr2Context* context, VkImage imgVk, VkImageView imageView, uint32_t width, uint32_t height, VkFormat imgFormat, wchar_t* name, FfxResourceStates state) +FfxResource ffxGetTextureResourceVK(FfxFsr2Context* context, VkImage imgVk, VkImageView imageView, uint32_t width, uint32_t height, VkFormat imgFormat, const wchar_t* name, FfxResourceStates state) { FfxResource resource = {}; resource.resource = reinterpret_cast(imgVk); @@ -562,7 +580,7 @@ FfxResource ffxGetTextureResourceVK(FfxFsr2Context* context, VkImage imgVk, VkIm return resource; } -FfxResource ffxGetBufferResourceVK(FfxFsr2Context* context, VkBuffer bufVk, uint32_t size, wchar_t* name, FfxResourceStates state) +FfxResource ffxGetBufferResourceVK(FfxFsr2Context* context, VkBuffer bufVk, uint32_t size, const wchar_t* name, FfxResourceStates state) { FfxResource resource = {}; resource.resource = reinterpret_cast(bufVk); @@ -696,15 +714,15 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi { BackendContext_VK* backendContext = (BackendContext_VK*)backendInterface->scratchBuffer; + const uint32_t defaultSubgroupSize = getDefaultSubgroupSize(backendContext); + // no shader model in vulkan so assume the minimum deviceCapabilities->minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1; - deviceCapabilities->waveLaneCountMin = 32; - deviceCapabilities->waveLaneCountMax = 32; + deviceCapabilities->waveLaneCountMin = defaultSubgroupSize; + deviceCapabilities->waveLaneCountMax = defaultSubgroupSize; deviceCapabilities->fp16Supported = false; deviceCapabilities->raytracingSupported = false; - BackendContext_VK* context = (BackendContext_VK*)backendInterface->scratchBuffer; - // check if extensions are enabled for (uint32_t i = 0; i < backendContext->numDeviceExtensions; i++) @@ -718,10 +736,16 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi VkPhysicalDeviceProperties2 deviceProperties2 = {}; deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; deviceProperties2.pNext = &subgroupSizeControlProperties; - vkGetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2); + vkGetPhysicalDeviceProperties2(backendContext->physicalDevice, &deviceProperties2); - deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize; - deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize; + // NOTE: It's important to check requiredSubgroupSizeStages flags (and it's required by the spec). + // As of August 2022, AMD's Vulkan drivers do not support subgroup size selection through Vulkan API + // and this information is reported through requiredSubgroupSizeStages flags. + if (subgroupSizeControlProperties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT) + { + deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize; + deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize; + } } if (strcmp(backendContext->extensionProperties[i].extensionName, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME) == 0) { @@ -733,7 +757,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features; - vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); + vkGetPhysicalDeviceFeatures2(backendContext->physicalDevice, &physicalDeviceFeatures2); deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16; } @@ -747,7 +771,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.pNext = &accelerationStructureFeatures; - vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); + vkGetPhysicalDeviceFeatures2(backendContext->physicalDevice, &physicalDeviceFeatures2); deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure; } @@ -1252,16 +1276,21 @@ FfxErrorCode CreatePipelineVK(FfxFsr2Interface* backendInterface, FfxFsr2Pass pa FfxDeviceCapabilities deviceCapabilities; GetDeviceCapabilitiesVK(backendInterface, &deviceCapabilities, ffxGetDeviceVK(backendContext->device)); + const uint32_t defaultSubgroupSize = getDefaultSubgroupSize(backendContext); // check if we can force wave64 bool canForceWave64 = false; bool useLut = false; - if (deviceCapabilities.waveLaneCountMin == 32 && deviceCapabilities.waveLaneCountMax == 64) { - + if (defaultSubgroupSize == 32 && deviceCapabilities.waveLaneCountMax == 64) + { useLut = true; canForceWave64 = true; } + else if (defaultSubgroupSize == 64) + { + useLut = true; + } // check if we have 16bit floating point. bool supportedFP16 = deviceCapabilities.fp16Supported; @@ -1287,7 +1316,7 @@ FfxErrorCode CreatePipelineVK(FfxFsr2Interface* backendInterface, FfxFsr2Pass pa flags |= (canForceWave64) ? FSR2_SHADER_PERMUTATION_FORCE_WAVE64 : 0; flags |= (supportedFP16 && (pass != FFX_FSR2_PASS_RCAS)) ? FSR2_SHADER_PERMUTATION_ALLOW_FP16 : 0; - const Fsr2ShaderBlobVK shaderBlob = fsr2GetPermutationBlobByIndex(pass, flags); + const Fsr2ShaderBlobVK shaderBlob = fsr2GetPermutationBlobByIndexVK(pass, flags); FFX_ASSERT(shaderBlob.data && shaderBlob.size); // populate the pass. @@ -1917,4 +1946,4 @@ FfxErrorCode DestroyPipelineVK(FfxFsr2Interface* backendInterface, FfxPipelineSt } return FFX_OK; -} \ No newline at end of file +} diff --git a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h index e0e226a..3ed87d5 100644 --- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h +++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -99,7 +99,7 @@ extern "C" { uint32_t width, uint32_t height, VkFormat imgFormat, - wchar_t* name = nullptr, + const wchar_t* name = nullptr, FfxResourceStates state = FFX_RESOURCE_STATE_COMPUTE_READ); /// Create a FfxResource from a VkBuffer. @@ -117,7 +117,7 @@ extern "C" { FFX_API FfxResource ffxGetBufferResourceVK(FfxFsr2Context* context, VkBuffer bufVk, uint32_t size, - wchar_t* name = nullptr, + const wchar_t* name = nullptr, FfxResourceStates state = FFX_RESOURCE_STATE_COMPUTE_READ); /// Convert a FfxResource value to a VkImage. @@ -155,4 +155,4 @@ extern "C" { #if defined(__cplusplus) } -#endif // #if defined(__cplusplus) \ No newline at end of file +#endif // #if defined(__cplusplus) diff --git a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp index 230ae9b..7f348dd 100644 --- a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp +++ b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,12 @@ #include "ffx_fsr2_shaders_vk.h" +#include "ffx_fsr2_tcr_autogen_pass_permutations.h" #include "ffx_fsr2_autogen_reactive_pass_permutations.h" #include "ffx_fsr2_accumulate_pass_permutations.h" #include "ffx_fsr2_compute_luminance_pyramid_pass_permutations.h" #include "ffx_fsr2_depth_clip_pass_permutations.h" #include "ffx_fsr2_lock_pass_permutations.h" -#include "ffx_fsr2_prepare_input_color_pass_permutations.h" #include "ffx_fsr2_reconstruct_previous_depth_pass_permutations.h" #include "ffx_fsr2_rcas_pass_permutations.h" @@ -48,16 +48,6 @@ key.FFX_HALF = FFX_CONTAINS_FLAG(options, FSR2_SHADER_PERMUTATION_ALLOW_FP16); #endif // #if defined(POPULATE_SHADER_BLOB) #define POPULATE_SHADER_BLOB(info, index) { info[index].blobData, info[index].blobSize, info[index].numStorageImageResources, info[index].numSampledImageResources, info[index].numUniformBufferResources, info[index].storageImageResourceNames, info[index].storageImageResourceBindings, info[index].sampledImageResourceNames, info[index].sampledImageResourceBindings, info[index].uniformBufferResourceNames, info[index].uniformBufferResourceBindings } -Fsr2ShaderBlobVK fsr2GetPrepareInputColorPassPermutationBlobByIndex(uint32_t permutationOptions) { - - ffx_fsr2_prepare_input_color_pass_PermutationKey key; - - POPULATE_PERMUTATION_KEY(permutationOptions, key); - - const int32_t tableIndex = g_ffx_fsr2_prepare_input_color_pass_IndirectionTable[key.index]; - return POPULATE_SHADER_BLOB(g_ffx_fsr2_prepare_input_color_pass_PermutationInfo, tableIndex); -} - Fsr2ShaderBlobVK fsr2GetDepthClipPassPermutationBlobByIndex(uint32_t permutationOptions) { ffx_fsr2_depth_clip_pass_PermutationKey key; @@ -134,12 +124,20 @@ Fsr2ShaderBlobVK fsr2GetAutogenReactivePassPermutationBlobByIndex(uint32_t permu return POPULATE_SHADER_BLOB(g_ffx_fsr2_autogen_reactive_pass_PermutationInfo, tableIndex); } -Fsr2ShaderBlobVK fsr2GetPermutationBlobByIndex(FfxFsr2Pass passId, uint32_t permutationOptions) +Fsr2ShaderBlobVK fsr2GetTcrAutogeneratePassPermutationBlobByIndex(uint32_t permutationOptions) { + + ffx_fsr2_tcr_autogen_pass_PermutationKey key; + + POPULATE_PERMUTATION_KEY(permutationOptions, key); + + const int32_t tableIndex = g_ffx_fsr2_tcr_autogen_pass_IndirectionTable[key.index]; + return POPULATE_SHADER_BLOB(g_ffx_fsr2_tcr_autogen_pass_PermutationInfo, tableIndex); +} + +Fsr2ShaderBlobVK fsr2GetPermutationBlobByIndexVK(FfxFsr2Pass passId, uint32_t permutationOptions) { switch (passId) { - case FFX_FSR2_PASS_PREPARE_INPUT_COLOR: - return fsr2GetPrepareInputColorPassPermutationBlobByIndex(permutationOptions); case FFX_FSR2_PASS_DEPTH_CLIP: return fsr2GetDepthClipPassPermutationBlobByIndex(permutationOptions); case FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH: @@ -155,6 +153,8 @@ Fsr2ShaderBlobVK fsr2GetPermutationBlobByIndex(FfxFsr2Pass passId, uint32_t perm return fsr2GetComputeLuminancePyramidPassPermutationBlobByIndex(permutationOptions); case FFX_FSR2_PASS_GENERATE_REACTIVE: return fsr2GetAutogenReactivePassPermutationBlobByIndex(permutationOptions); + case FFX_FSR2_PASS_TCR_AUTOGENERATE: + return fsr2GetTcrAutogeneratePassPermutationBlobByIndex(permutationOptions); default: FFX_ASSERT_FAIL("Should never reach here."); break; diff --git a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.h b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.h index da581c7..8035657 100644 --- a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.h +++ b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -32,10 +32,10 @@ extern "C" { typedef struct Fsr2ShaderBlobVK { const uint8_t* data; // A pointer to the blob - const uint32_t size; // Size in bytes. - const uint32_t storageImageCount; // Number of storage images. - const uint32_t sampledImageCount; // Number of sampled images. - const uint32_t uniformBufferCount; // Number of uniform buffers. + uint32_t size; // Size in bytes. + uint32_t storageImageCount; // Number of storage images. + uint32_t sampledImageCount; // Number of sampled images. + uint32_t uniformBufferCount; // Number of uniform buffers. const char** boundStorageImageNames; const uint32_t* boundStorageImageBindings; // Pointer to an array of bound UAV resources. const char** boundSampledImageNames; @@ -58,7 +58,7 @@ extern "C" { } Fs2ShaderPermutationOptionsVK; // Get a VK shader blob for the specified pass and permutation index. - Fsr2ShaderBlobVK fsr2GetPermutationBlobByIndex(FfxFsr2Pass passId, uint32_t permutationOptions); + Fsr2ShaderBlobVK fsr2GetPermutationBlobByIndexVK(FfxFsr2Pass passId, uint32_t permutationOptions); #if defined(__cplusplus) }