diff --git a/Assets/BuiltIn/Shader/Compute/HLSL/IrradianceSH.hlsl b/Assets/BuiltIn/Shader/Compute/HLSL/IrradianceSH.hlsl new file mode 100644 index 000000000..806f4c94a --- /dev/null +++ b/Assets/BuiltIn/Shader/Compute/HLSL/IrradianceSH.hlsl @@ -0,0 +1,177 @@ +// Number of thread +#define MAX_NUM_THREAD 120 + +// MAX_NUM_THREAD * 6 faces +#define TANGENT_COUNT 720 + +// NUM FACE +#define NUM_FACE 6 + +// Size each group thread +#define RT_SIZE 16 + +// Uniform Constants +cbuffer cbConstants +{ + float4x4 uToTangentSpace[TANGENT_COUNT]; + float2 uPixelOffset; + float2 uFaceSize; +} + +// uRadianceMap texture +Texture2D uRadianceMap : register(t0); + +// Output result +RWBuffer OutputBuffer : register(u0); + +// Share group thread data +groupshared float3 ResultSH[RT_SIZE * RT_SIZE][9]; + +// SH compute function +void ProjectOntoSH(in float3 n, in float3 color, out float3 sh[9]) +{ + // Band 0 + sh[0] = 0.282095f * color; + + // Band 1 + sh[1] = 0.488603f * n.y * color; + sh[2] = 0.488603f * n.z * color; + sh[3] = 0.488603f * n.x * color; + + // Band 2 + sh[4] = 1.092548f * n.x * n.y * color; + sh[5] = 1.092548f * n.y * n.z * color; + sh[6] = 0.315392f * (3.0f * n.z * n.z - 1.0f) * color; + sh[7] = 1.092548f * n.x * n.z * color; + sh[8] = 0.546274f * (n.x * n.x - n.y * n.y) * color; +} + +// Params: +// groupID.x [0 -> MAX_NUM_THREAD] +// groupID.y [0 -> NUM_FACE] + +// groupThreadID.x [0 -> RT_SIZE] +// groupThreadID.y [0 -> RT_SIZE] + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-attributes-numthreads +[numthreads(RT_SIZE, RT_SIZE, 1)] +void main( + uint3 groupID : SV_GroupID, + uint3 groupThreadID : SV_GroupThreadID, + uint3 dispatchThreadID : SV_DispatchThreadID, + uint groupIndex : SV_GroupIndex) +{ + // begin run thread + uint threadID = groupID.x; + uint faceID = groupID.y; + + uint pixelX = groupThreadID.x; + uint pixelY = groupThreadID.y; + + // need init variable + if (uPixelOffset.x == 0 && uPixelOffset.y == 0 && pixelX == 0 && pixelY == 0) + { + uint result = 0; + + // Calc offset id + uint id = (threadID * NUM_FACE + faceID) * 9; + + float4 zero = float4(0.0, 0.0, 0.0f, 0.0); + + // Write result + OutputBuffer[id + 0] = zero; + OutputBuffer[id + 1] = zero; + OutputBuffer[id + 2] = zero; + OutputBuffer[id + 3] = zero; + OutputBuffer[id + 4] = zero; + OutputBuffer[id + 5] = zero; + OutputBuffer[id + 6] = zero; + OutputBuffer[id + 7] = zero; + OutputBuffer[id + 8] = zero; + } + + GroupMemoryBarrierWithGroupSync(); + + const int3 pixelLocation = int3(pixelX + (int)uPixelOffset.x, pixelY + (int)uPixelOffset.y, 0.0); + + const int3 location = int3( + pixelLocation.x + faceID * uFaceSize.x, + pixelLocation.y + threadID * uFaceSize.y, + 0.0); + + // Gather RGB from the texels + float3 radiance = uRadianceMap.Load(location).xyz; + + // Calculate the location in [-1, 1] texture space + float u = (pixelLocation.x / float(uFaceSize.x)) * 2.0f - 1.0f; + float v = -((pixelLocation.y / float(uFaceSize.y)) * 2.0f - 1.0f); + + // Calculate weight + float temp = 1.0f + u * u + v * v; + float weight = 4.0f / (sqrt(temp) * temp); + radiance *= weight; + + // Extract direction from texel u,v + float3 dirVS = normalize(float3(u, v, 1.0f)); + float3 dirTS = mul(dirVS, (float3x3)uToTangentSpace[threadID * NUM_FACE + faceID]); + + // Project onto SH + float3 sh[9]; + ProjectOntoSH(dirTS, radiance, sh); + + // SH add + uint pixel = pixelY * RT_SIZE + pixelX; + + ResultSH[pixel][0] = sh[0]; + ResultSH[pixel][1] = sh[1]; + ResultSH[pixel][2] = sh[2]; + ResultSH[pixel][3] = sh[3]; + ResultSH[pixel][4] = sh[4]; + ResultSH[pixel][5] = sh[5]; + ResultSH[pixel][6] = sh[6]; + ResultSH[pixel][7] = sh[7]; + ResultSH[pixel][8] = sh[8]; + + GroupMemoryBarrierWithGroupSync(); + + // Sum total SH[RT_SIZE * RT_SIZE] by GPU MT store at [0] + uint totalSize = RT_SIZE * RT_SIZE; + + for(uint s = totalSize / 2; s > 0; s >>= 1) + { + if (pixel < s) + { + ResultSH[pixel][0] += ResultSH[pixel + s][0]; + ResultSH[pixel][1] += ResultSH[pixel + s][1]; + ResultSH[pixel][2] += ResultSH[pixel + s][2]; + ResultSH[pixel][3] += ResultSH[pixel + s][3]; + ResultSH[pixel][4] += ResultSH[pixel + s][4]; + ResultSH[pixel][5] += ResultSH[pixel + s][5]; + ResultSH[pixel][6] += ResultSH[pixel + s][6]; + ResultSH[pixel][7] += ResultSH[pixel + s][7]; + ResultSH[pixel][8] += ResultSH[pixel + s][8]; + } + + GroupMemoryBarrierWithGroupSync(); + } + + // Write result on first group thread + if (pixel == 0) + { + uint result = 0; + + // Calc offset id + uint id = (threadID * NUM_FACE + faceID) * 9; + + // Write result + OutputBuffer[id + 0] = float4(ResultSH[0][0], 0.0); + OutputBuffer[id + 1] = float4(ResultSH[0][1], 0.0); + OutputBuffer[id + 2] = float4(ResultSH[0][2], 0.0); + OutputBuffer[id + 3] = float4(ResultSH[0][3], 0.0); + OutputBuffer[id + 4] = float4(ResultSH[0][4], 0.0); + OutputBuffer[id + 5] = float4(ResultSH[0][5], 0.0); + OutputBuffer[id + 6] = float4(ResultSH[0][6], 0.0); + OutputBuffer[id + 7] = float4(ResultSH[0][7], 0.0); + OutputBuffer[id + 8] = float4(ResultSH[0][8], 0.0); + } +} \ No newline at end of file diff --git a/Projects/Irrlicht/Include/IGPUCompute.h b/Projects/Irrlicht/Include/IGPUCompute.h index 29aa84e34..2f6d9dfc7 100644 --- a/Projects/Irrlicht/Include/IGPUCompute.h +++ b/Projects/Irrlicht/Include/IGPUCompute.h @@ -29,6 +29,10 @@ namespace irr E_DRIVER_TYPE getDriverType() const { return DriverType; }; + virtual bool setVariable(s32 id, const f32* floats, int count) = 0; + + virtual s32 getVariableID(const c8* name) = 0; + virtual void setTexture(int slot, ITexture *texture) = 0; virtual void setBuffer(int slot, IRWBuffer *buffer) = 0; diff --git a/Projects/Irrlicht/Include/IRWBuffer.h b/Projects/Irrlicht/Include/IRWBuffer.h index 0cfc5e17c..3a992dc8a 100644 --- a/Projects/Irrlicht/Include/IRWBuffer.h +++ b/Projects/Irrlicht/Include/IRWBuffer.h @@ -26,6 +26,10 @@ namespace irr E_DRIVER_TYPE getDriverType() const { return DriverType; }; + virtual void* lock(bool readOnly) = 0; + + virtual void unlock() = 0; + protected: E_DRIVER_TYPE DriverType; diff --git a/Projects/Irrlicht/Source/CD3D11GPUCompute.cpp b/Projects/Irrlicht/Source/CD3D11GPUCompute.cpp index 11bbf52e2..12ad25225 100644 --- a/Projects/Irrlicht/Source/CD3D11GPUCompute.cpp +++ b/Projects/Irrlicht/Source/CD3D11GPUCompute.cpp @@ -21,7 +21,8 @@ namespace irr { CD3D11GPUCompute::CD3D11GPUCompute(CD3D11Driver *driver) : ComputeShader(NULL), - ShaderBuffer(NULL) + ShaderBuffer(NULL), + VariableArrayPtr(NULL) { DriverType = EDT_DIRECT3D11; @@ -151,9 +152,258 @@ namespace irr return false; } + initConstant(); + + return true; + } + + s32 CD3D11GPUCompute::getVariableID(const c8* name) + { + const u32 size = VariableArray.size(); + + for (u32 i = 0; i < size; ++i) + { + if (VariableArray[i]->name == name) + return i; + } + + core::stringc s = "HLSL variable to get ID not found: '"; + s += name; + s += "'. Available variables are:"; + os::Printer::log(s.c_str(), ELL_WARNING); + + return -1; + } + + bool CD3D11GPUCompute::setVariable(s32 id, const f32* floats, int count) + { + SShaderVariable* var = VariableArrayPtr[id]; + + if (!var) + return false; + + SShaderBuffer* buff = var->buffer; + + c8* byteData = (c8*)buff->cData; + byteData += var->offset; + + if (var->classType == D3D10_SVC_MATRIX_COLUMNS) + { + // transpose matrix + int numMatrix = count / 16; + + float *m = (float*)byteData; + const float *v = floats; + + for (int i = 0; i < numMatrix; i++) + { + m[0] = v[0]; + m[1] = v[4]; + m[2] = v[8]; + m[3] = v[12]; + + m[4] = v[1]; + m[5] = v[5]; + m[6] = v[9]; + m[7] = v[13]; + + m[8] = v[2]; + m[9] = v[6]; + m[10] = v[10]; + m[11] = v[14]; + + m[12] = v[3]; + m[13] = v[7]; + m[14] = v[11]; + m[15] = v[15]; + + m += 16; + v += 16; + } + } + else + { + memcpy(byteData, floats, count * sizeof(f32)); + } + + return true; + } + + //! uploadVariableToGPU + bool CD3D11GPUCompute::uploadVariableToGPU() + { + for (int i = 0, n = BufferArray.size(); i < n; i++) + { + SShaderBuffer* buff = BufferArray[i]; + + // do it later + D3D11_MAPPED_SUBRESOURCE mappedData; + + HRESULT hr = Context->Map(buff->data, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedData); + + if (FAILED(hr)) + { + logFormatError(hr, "Could not map float variable in shader"); + return false; + } + + memcpy(mappedData.pData, buff->cData, buff->size); + + Context->Unmap(buff->data, 0); + } + + return true; + } + + bool CD3D11GPUCompute::initConstant() + { + // D3DXCompile + typedef HRESULT(WINAPI *D3DX11ReflectFunc)(LPCVOID pSrcData, SIZE_T SrcDataSize, REFIID pInterface, void** ppReflector); + + static D3DX11ReflectFunc pFn = 0; + static bool LoadFailed = false; + + if (LoadFailed) + return false; + + ID3D11ShaderReflection* pReflector = NULL; + HRESULT hr = D3DReflect(ShaderBuffer->GetBufferPointer(), ShaderBuffer->GetBufferSize(), IID_ID3D11ShaderReflection, (void**)&pReflector); + + if (FAILED(hr)) + { + logFormatError(hr, "Could not reflect shader"); + return false; + } + + D3D11_SHADER_DESC shaderDesc; + pReflector->GetDesc(&shaderDesc); + + for (u32 i = 0; i < shaderDesc.BoundResources; ++i) + { + D3D11_SHADER_INPUT_BIND_DESC resourceDesc; + pReflector->GetResourceBindingDesc(i, &resourceDesc); + + switch (resourceDesc.Type) + { + case D3D_SIT_CBUFFER: + { + ID3D11ShaderReflectionConstantBuffer* reflectionBuffer = pReflector->GetConstantBufferByName(resourceDesc.Name); + + D3D11_SHADER_BUFFER_DESC bufferDesc; + reflectionBuffer->GetDesc(&bufferDesc); + + SShaderBuffer* sBuffer = createConstantBuffer(bufferDesc); + + if (sBuffer) + { + BufferArray.push_back(sBuffer); + + // add vars to shader + for (u32 j = 0; j < bufferDesc.Variables; j++) + { + ID3D11ShaderReflectionVariable* var = reflectionBuffer->GetVariableByIndex(j); + + D3D11_SHADER_VARIABLE_DESC varDesc; + var->GetDesc(&varDesc); + + D3D11_SHADER_TYPE_DESC typeDesc; + var->GetType()->GetDesc(&typeDesc); + + SShaderVariable* sv = new SShaderVariable(); + sv->name = varDesc.Name; + sv->buffer = sBuffer; + sv->offset = varDesc.StartOffset; + sv->size = varDesc.Size; + sv->baseType = typeDesc.Type; + sv->classType = typeDesc.Class; + + VariableArray.push_back(sv); + } + } + + break; + } + case D3D_SIT_TBUFFER: + { + // same as cbuffer? + break; + } + case D3D_SIT_SAMPLER: + { + break; + } + case D3D_SIT_TEXTURE: + { + break; + } + } + } + + VariableArrayPtr = VariableArray.pointer(); + + pReflector->Release(); + return true; } + SShaderBuffer* CD3D11GPUCompute::createConstantBuffer(D3D11_SHADER_BUFFER_DESC& bufferDesc) + { + SShaderBuffer* sBuffer = NULL; + + // take the same buffer from the other shader if it has the same name + bool found = false; + + for (u32 j = 0; j < BufferArray.size(); ++j) + { + if (BufferArray[j]->name == bufferDesc.Name) + { + sBuffer = BufferArray[j]; + sBuffer->AddRef(); + found = true; + break; + } + } + + // no buffer found so create a new one + if (!sBuffer) + { + sBuffer = new SShaderBuffer(); + sBuffer->name = bufferDesc.Name; + sBuffer->size = bufferDesc.Size; + } + + if (!sBuffer->data) + { + D3D11_BUFFER_DESC cbDesc; + cbDesc.ByteWidth = sBuffer->size; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + cbDesc.MiscFlags = 0; + cbDesc.StructureByteStride = 0; + + // Create the buffer. + HRESULT hr = Device->CreateBuffer(&cbDesc, NULL, &sBuffer->data); + + if (FAILED(hr)) + { + core::stringc error = "Could not create constant buffer \""; + error += sBuffer->name; + error += "\""; + + logFormatError(hr, error); + + delete sBuffer; + + return NULL; + } + + sBuffer->cData = malloc(sBuffer->size); + } + + return sBuffer; + } + void CD3D11GPUCompute::setTexture(int slot, ITexture *texture) { TextureSlot[slot] = texture; @@ -177,14 +427,29 @@ namespace irr Context->CSSetShaderResources(i, 1, &views); // Buffer unorderred access view - ID3D11UnorderedAccessView* unorderedAccessView; + ID3D11UnorderedAccessView* unorderedAccessView = NULL; if (BufferSlot[i]) unorderedAccessView = ((CD3D11RWBuffer*)BufferSlot[i])->getUnorderedAccessView(); Context->CSSetUnorderedAccessViews(i, 1, &unorderedAccessView, NULL); } + // update constant buffer to GPU + uploadVariableToGPU(); + + u32 size = BufferArray.size(); + if (size > 0) + { + core::array buffs; + buffs.reallocate(size); + + for (u32 i = 0; i < size; ++i) + buffs.push_back(BufferArray[i]->data); + + Context->CSSetConstantBuffers(0, size, &buffs[0]); + } + // do gpu compute - Context->Dispatch(threadGroupX, threadGroupY, threadGroupZ); + Context->Dispatch(threadGroupX, threadGroupY, threadGroupZ); } } } diff --git a/Projects/Irrlicht/Source/CD3D11GPUCompute.h b/Projects/Irrlicht/Source/CD3D11GPUCompute.h index 0004ed530..4c042f03a 100644 --- a/Projects/Irrlicht/Source/CD3D11GPUCompute.h +++ b/Projects/Irrlicht/Source/CD3D11GPUCompute.h @@ -32,6 +32,10 @@ namespace irr ITexture *TextureSlot[NUM_PARAMS_SUPPORT]; IRWBuffer *BufferSlot[NUM_PARAMS_SUPPORT]; + core::array BufferArray; + core::array VariableArray; + SShaderVariable** VariableArrayPtr; + public: CD3D11GPUCompute(CD3D11Driver *driver); @@ -40,12 +44,24 @@ namespace irr bool compile(const c8* computeShaderProgram, const c8* computeShaderEntryPointName = "main", E_COMPUTE_SHADER_TYPE csCompileTarget = ECST_CS_5_0); + + bool setVariable(s32 id, const f32* floats, int count); + + s32 getVariableID(const c8* name); virtual void setTexture(int slot, ITexture *texture); virtual void setBuffer(int slot, IRWBuffer *buffer); virtual void dispatch(int threadGroupX, int threadGroupY, int threadGroupZ); + + protected: + + bool initConstant(); + + SShaderBuffer* createConstantBuffer(D3D11_SHADER_BUFFER_DESC& bufferDesc); + + bool uploadVariableToGPU(); }; } } diff --git a/Projects/Irrlicht/Source/CD3D11RWBuffer.cpp b/Projects/Irrlicht/Source/CD3D11RWBuffer.cpp index 963359f23..93c42bce9 100644 --- a/Projects/Irrlicht/Source/CD3D11RWBuffer.cpp +++ b/Projects/Irrlicht/Source/CD3D11RWBuffer.cpp @@ -34,8 +34,8 @@ namespace irr D3D11_BUFFER_DESC bufferDesc; bufferDesc.ByteWidth = bytePerPixel * numElements; bufferDesc.Usage = D3D11_USAGE_DEFAULT; + bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ; bufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; - bufferDesc.CPUAccessFlags = 0; bufferDesc.MiscFlags = 0; bufferDesc.StructureByteStride = 0; @@ -77,6 +77,36 @@ namespace irr Context->Release(); Device->Release(); } + + //! Lock function. + void* CD3D11RWBuffer::lock(bool readOnly) + { + if (!Buffer) + return 0; + + if (readOnly) + LastMapDirection = D3D11_MAP_READ; + else + LastMapDirection = (D3D11_MAP)(D3D11_MAP_WRITE | D3D11_MAP_READ); + + // Otherwise, map this buffer + D3D11_MAPPED_SUBRESOURCE mappedData; + HRESULT hr = Context->Map(Buffer, 0, LastMapDirection, 0, &mappedData); + if (FAILED(hr)) + return 0; + + return mappedData.pData; + } + + //! Unlock function. Must be called after a lock() to the buffer. + void CD3D11RWBuffer::unlock() + { + if (!Buffer) + return; + + // Otherwise, unmap this + Context->Unmap(Buffer, 0); + } } } diff --git a/Projects/Irrlicht/Source/CD3D11RWBuffer.h b/Projects/Irrlicht/Source/CD3D11RWBuffer.h index 30d9e85b8..6e62b6b93 100644 --- a/Projects/Irrlicht/Source/CD3D11RWBuffer.h +++ b/Projects/Irrlicht/Source/CD3D11RWBuffer.h @@ -35,6 +35,10 @@ namespace irr return UAView; } + void* lock(bool readOnly); + + void unlock(); + protected: CD3D11Driver *Driver; @@ -47,6 +51,8 @@ namespace irr ID3D11UnorderedAccessView* UAView; DXGI_FORMAT D3DFormat; + + D3D11_MAP LastMapDirection; }; } } diff --git a/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.cpp b/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.cpp index 9c4d4c9b8..c370d0916 100644 --- a/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.cpp +++ b/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.cpp @@ -24,6 +24,9 @@ This file is part of the "Skylicht Engine". #include "pch.h" #include "CGPUBaker.h" +#include "CLightmapper.h" + +#include "RenderPipeline/CBaseRP.h" namespace Skylicht { @@ -31,21 +34,147 @@ namespace Skylicht { CGPUBaker::CGPUBaker() { - m_shBuffer = getVideoDriver()->createRWBuffer(video::ECF_A32B32G32R32F, MAX_NUM_THREAD * 9); + IVideoDriver *driver = getVideoDriver(); + + // load compute shader + if (driver->getDriverType() == video::EDT_DIRECT3D11) + { + // output buffer + m_shBuffer = driver->createRWBuffer(video::ECF_A32B32G32R32F, MAX_NUM_THREAD * NUM_FACES * 9); + + // gpu compute program + m_shCompute = driver->getGPUProgrammingServices()->createComputeProgramFromFile("BuiltIn/Shader/Compute/HLSL/IrradianceSH.hlsl"); + + // uniform matrix + m_tangentToSpaceData = new float[MAX_NUM_THREAD * NUM_FACES * 16]; + } + else + { + m_shBuffer = NULL; + m_shCompute = NULL; + m_tangentToSpaceData = NULL; + } } CGPUBaker::~CGPUBaker() { if (m_shBuffer != NULL) m_shBuffer->drop(); + + if (m_shCompute != NULL) + m_shCompute->drop(); + + if (m_tangentToSpaceData != NULL) + delete m_tangentToSpaceData; } bool CGPUBaker::canUseGPUBaker() { - if (getVideoDriver()->getDriverType() == video::EDT_DIRECT3D11 && m_shBuffer != NULL) + if (getVideoDriver()->getDriverType() == video::EDT_DIRECT3D11 && + m_shBuffer != NULL && + m_shCompute != NULL) return true; return false; } + + void CGPUBaker::computeSH(int count, int numFace) + { + // render target size + u32 rtSize = CLightmapper::getHemisphereBakeSize(); + + // set radiance as texture0 + m_shCompute->setTexture(0, m_radiance); + + // set buffer + m_shCompute->setBuffer(0, m_shBuffer); + + int groupThreadSize = 16; + + int numCell = rtSize / groupThreadSize; + + // clear sh value + for (int tid = 0; tid < count; tid++) + m_sh[tid].zero(); + + // set const buffer + s32 uToTangentSpace = m_shCompute->getVariableID("uToTangentSpace"); + if (uToTangentSpace >= 0) + { + for (int i = 0; i < MAX_NUM_THREAD * NUM_FACES; i++) + memcpy(&m_tangentToSpaceData[i * 16], m_toTangentSpace[i].pointer(), sizeof(float) * 16); + + // update compute constance + m_shCompute->setVariable(uToTangentSpace, m_tangentToSpaceData, MAX_NUM_THREAD * NUM_FACES * 16); + } + + for (int loopY = 0; loopY < numCell; loopY++) + { + for (int loopX = 0; loopX < numCell; loopX++) + { + s32 uPixelOffset = m_shCompute->getVariableID("uPixelOffset"); + if (uPixelOffset >= 0) + { + core::vector2df offset; + offset.X = (float)(loopX * groupThreadSize); + offset.Y = (float)(loopY * groupThreadSize); + m_shCompute->setVariable(uPixelOffset, &offset.X, 2); + } + + s32 uFaceSize = m_shCompute->getVariableID("uFaceSize"); + if (uFaceSize >= 0) + { + core::vector2df faceSize; + faceSize.X = (float)rtSize; + faceSize.Y = (float)rtSize; + m_shCompute->setVariable(uFaceSize, &faceSize.X, 2); + } + + // run thread + m_shCompute->dispatch(count, numFace, 1); + } + } + + // get result buffer data + video::SVec4 *data = (video::SVec4*)m_shBuffer->lock(true); + + // copy SH value compute from GPU to data + for (int tid = 0; tid < count; tid++) + { + core::vector3df shResult[9]; + + // sum sh each face + for (int fid = 0; fid < numFace; fid++) + { + video::SVec4 *computeResult = &data[(tid * NUM_FACES + fid) * 9]; + + for (int i = 0; i < 9; i++) + { + core::vector3df computeSH; + + computeSH.X = computeResult[i].X; + computeSH.Y = computeResult[i].Y; + computeSH.Z = computeResult[i].Z; + + shResult[i] += computeSH; + } + } + + core::vector3df* shValue = m_sh[tid].getValue(); + + for (int i = 0; i < 9; i++) + shValue[i] += shResult[i]; + } + + m_shBuffer->unlock(); + + // finalWeight is weight for 1 pixel on Sphere + // S = 4 * PI * R^2 + float finalWeight = (4.0f * 3.14159f) / (m_weightSum * numFace); + for (int tid = 0; tid < count; tid++) + { + m_sh[tid] *= finalWeight; + } + } } } \ No newline at end of file diff --git a/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.h b/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.h index 2b2824499..8bbfdfea5 100644 --- a/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.h +++ b/Projects/Skylicht/Lightmapper/Source/Lightmapper/CGPUBaker.h @@ -38,14 +38,19 @@ namespace Skylicht class CGPUBaker : public CMTBaker { protected: + IGPUCompute *m_shCompute; + IRWBuffer *m_shBuffer; + float *m_tangentToSpaceData; public: CGPUBaker(); virtual ~CGPUBaker(); bool canUseGPUBaker(); + + virtual void computeSH(int count, int numFace); }; } } \ No newline at end of file diff --git a/Projects/Skylicht/Lightmapper/Source/Lightmapper/CMTBaker.cpp b/Projects/Skylicht/Lightmapper/Source/Lightmapper/CMTBaker.cpp index 4033e51eb..62dd5b91f 100644 --- a/Projects/Skylicht/Lightmapper/Source/Lightmapper/CMTBaker.cpp +++ b/Projects/Skylicht/Lightmapper/Source/Lightmapper/CMTBaker.cpp @@ -208,7 +208,7 @@ namespace Skylicht if (CDeferredRP::isEnableRenderIndirect() == true && test == true) { char filename[512]; - sprintf(filename, "C:\\SVN\\test_%d.png", t); + sprintf(filename, "test_%d.png", t); CBaseRP::saveFBOToFile(m_radiance, filename); test = true; } diff --git a/Samples/Sponza/Source/CViewInit.cpp b/Samples/Sponza/Source/CViewInit.cpp index 1c91cd469..b6f5fc036 100644 --- a/Samples/Sponza/Source/CViewInit.cpp +++ b/Samples/Sponza/Source/CViewInit.cpp @@ -163,7 +163,7 @@ void CViewInit::initProbes() for (int i = 0; i < 7; i++) { - float x = i * 5.6f - 6.0f * 5.6f; + float x = i * 5.6f - 3.0f * 5.6f; // row 0 probesPosition.push_back(core::vector3df(x, 2.0f, -0.4f));