Skip to content

Commit

Permalink
feat: #98 Add prototype gpu compute SH lighting
Browse files Browse the repository at this point in the history
  • Loading branch information
ducphamhong committed Jul 27, 2020
1 parent a05cb2b commit 73765a0
Show file tree
Hide file tree
Showing 11 changed files with 644 additions and 8 deletions.
177 changes: 177 additions & 0 deletions Assets/BuiltIn/Shader/Compute/HLSL/IrradianceSH.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Number of thread
#define MAX_NUM_THREAD 120

// MAX_NUM_THREAD * 6 faces
#define TANGENT_COUNT 720

// NUM FACE
#define NUM_FACE 6

// Size each group thread
#define RT_SIZE 16

// Uniform Constants
cbuffer cbConstants
{
float4x4 uToTangentSpace[TANGENT_COUNT];
float2 uPixelOffset;
float2 uFaceSize;
}

// uRadianceMap texture
Texture2D uRadianceMap : register(t0);

// Output result
RWBuffer<float4> OutputBuffer : register(u0);

// Share group thread data
groupshared float3 ResultSH[RT_SIZE * RT_SIZE][9];

// SH compute function
void ProjectOntoSH(in float3 n, in float3 color, out float3 sh[9])
{
// Band 0
sh[0] = 0.282095f * color;

// Band 1
sh[1] = 0.488603f * n.y * color;
sh[2] = 0.488603f * n.z * color;
sh[3] = 0.488603f * n.x * color;

// Band 2
sh[4] = 1.092548f * n.x * n.y * color;
sh[5] = 1.092548f * n.y * n.z * color;
sh[6] = 0.315392f * (3.0f * n.z * n.z - 1.0f) * color;
sh[7] = 1.092548f * n.x * n.z * color;
sh[8] = 0.546274f * (n.x * n.x - n.y * n.y) * color;
}

// Params:
// groupID.x [0 -> MAX_NUM_THREAD]
// groupID.y [0 -> NUM_FACE]

// groupThreadID.x [0 -> RT_SIZE]
// groupThreadID.y [0 -> RT_SIZE]

// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-attributes-numthreads
[numthreads(RT_SIZE, RT_SIZE, 1)]
void main(
uint3 groupID : SV_GroupID,
uint3 groupThreadID : SV_GroupThreadID,
uint3 dispatchThreadID : SV_DispatchThreadID,
uint groupIndex : SV_GroupIndex)
{
// begin run thread
uint threadID = groupID.x;
uint faceID = groupID.y;

uint pixelX = groupThreadID.x;
uint pixelY = groupThreadID.y;

// need init variable
if (uPixelOffset.x == 0 && uPixelOffset.y == 0 && pixelX == 0 && pixelY == 0)
{
uint result = 0;

// Calc offset id
uint id = (threadID * NUM_FACE + faceID) * 9;

float4 zero = float4(0.0, 0.0, 0.0f, 0.0);

// Write result
OutputBuffer[id + 0] = zero;
OutputBuffer[id + 1] = zero;
OutputBuffer[id + 2] = zero;
OutputBuffer[id + 3] = zero;
OutputBuffer[id + 4] = zero;
OutputBuffer[id + 5] = zero;
OutputBuffer[id + 6] = zero;
OutputBuffer[id + 7] = zero;
OutputBuffer[id + 8] = zero;
}

GroupMemoryBarrierWithGroupSync();

const int3 pixelLocation = int3(pixelX + (int)uPixelOffset.x, pixelY + (int)uPixelOffset.y, 0.0);

const int3 location = int3(
pixelLocation.x + faceID * uFaceSize.x,
pixelLocation.y + threadID * uFaceSize.y,
0.0);

// Gather RGB from the texels
float3 radiance = uRadianceMap.Load(location).xyz;

// Calculate the location in [-1, 1] texture space
float u = (pixelLocation.x / float(uFaceSize.x)) * 2.0f - 1.0f;
float v = -((pixelLocation.y / float(uFaceSize.y)) * 2.0f - 1.0f);

// Calculate weight
float temp = 1.0f + u * u + v * v;
float weight = 4.0f / (sqrt(temp) * temp);
radiance *= weight;

// Extract direction from texel u,v
float3 dirVS = normalize(float3(u, v, 1.0f));
float3 dirTS = mul(dirVS, (float3x3)uToTangentSpace[threadID * NUM_FACE + faceID]);

// Project onto SH
float3 sh[9];
ProjectOntoSH(dirTS, radiance, sh);

// SH add
uint pixel = pixelY * RT_SIZE + pixelX;

ResultSH[pixel][0] = sh[0];
ResultSH[pixel][1] = sh[1];
ResultSH[pixel][2] = sh[2];
ResultSH[pixel][3] = sh[3];
ResultSH[pixel][4] = sh[4];
ResultSH[pixel][5] = sh[5];
ResultSH[pixel][6] = sh[6];
ResultSH[pixel][7] = sh[7];
ResultSH[pixel][8] = sh[8];

GroupMemoryBarrierWithGroupSync();

// Sum total SH[RT_SIZE * RT_SIZE] by GPU MT store at [0]
uint totalSize = RT_SIZE * RT_SIZE;

for(uint s = totalSize / 2; s > 0; s >>= 1)
{
if (pixel < s)
{
ResultSH[pixel][0] += ResultSH[pixel + s][0];
ResultSH[pixel][1] += ResultSH[pixel + s][1];
ResultSH[pixel][2] += ResultSH[pixel + s][2];
ResultSH[pixel][3] += ResultSH[pixel + s][3];
ResultSH[pixel][4] += ResultSH[pixel + s][4];
ResultSH[pixel][5] += ResultSH[pixel + s][5];
ResultSH[pixel][6] += ResultSH[pixel + s][6];
ResultSH[pixel][7] += ResultSH[pixel + s][7];
ResultSH[pixel][8] += ResultSH[pixel + s][8];
}

GroupMemoryBarrierWithGroupSync();
}

// Write result on first group thread
if (pixel == 0)
{
uint result = 0;

// Calc offset id
uint id = (threadID * NUM_FACE + faceID) * 9;

// Write result
OutputBuffer[id + 0] = float4(ResultSH[0][0], 0.0);
OutputBuffer[id + 1] = float4(ResultSH[0][1], 0.0);
OutputBuffer[id + 2] = float4(ResultSH[0][2], 0.0);
OutputBuffer[id + 3] = float4(ResultSH[0][3], 0.0);
OutputBuffer[id + 4] = float4(ResultSH[0][4], 0.0);
OutputBuffer[id + 5] = float4(ResultSH[0][5], 0.0);
OutputBuffer[id + 6] = float4(ResultSH[0][6], 0.0);
OutputBuffer[id + 7] = float4(ResultSH[0][7], 0.0);
OutputBuffer[id + 8] = float4(ResultSH[0][8], 0.0);
}
}
4 changes: 4 additions & 0 deletions Projects/Irrlicht/Include/IGPUCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ namespace irr

E_DRIVER_TYPE getDriverType() const { return DriverType; };

virtual bool setVariable(s32 id, const f32* floats, int count) = 0;

virtual s32 getVariableID(const c8* name) = 0;

virtual void setTexture(int slot, ITexture *texture) = 0;

virtual void setBuffer(int slot, IRWBuffer *buffer) = 0;
Expand Down
4 changes: 4 additions & 0 deletions Projects/Irrlicht/Include/IRWBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ namespace irr

E_DRIVER_TYPE getDriverType() const { return DriverType; };

virtual void* lock(bool readOnly) = 0;

virtual void unlock() = 0;

protected:

E_DRIVER_TYPE DriverType;
Expand Down
Loading

0 comments on commit 73765a0

Please sign in to comment.