Skip to content

Commit

Permalink
Merge pull request #99 from skylicht-lab/feature/#98-compute-shader
Browse files Browse the repository at this point in the history
Feature/#98 compute shader
  • Loading branch information
ducphamhong authored Jul 28, 2020
2 parents 81d2b84 + 91664e7 commit 0817e47
Show file tree
Hide file tree
Showing 34 changed files with 1,505 additions and 147 deletions.
177 changes: 177 additions & 0 deletions Assets/BuiltIn/Shader/Compute/HLSL/IrradianceSH.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Number of thread
#define MAX_NUM_THREAD 128

// MAX_NUM_THREAD * 6 faces
#define TANGENT_COUNT 768

// NUM FACE
#define NUM_FACE 6

// Size each group thread
#define RT_SIZE 16

// Uniform Constants
cbuffer cbConstants
{
float4x4 uToTangentSpace[TANGENT_COUNT];
float2 uPixelOffset;
float2 uFaceSize;
}

// uRadianceMap texture
Texture2D uRadianceMap : register(t0);

// Output result
RWBuffer<float4> OutputBuffer : register(u0);

// Share group thread data
groupshared float3 ResultSH[RT_SIZE * RT_SIZE][9];

// SH compute function
void ProjectOntoSH(in float3 n, in float3 color, out float3 sh[9])
{
// Band 0
sh[0] = 0.282095f * color;

// Band 1
sh[1] = 0.488603f * n.y * color;
sh[2] = 0.488603f * n.z * color;
sh[3] = 0.488603f * n.x * color;

// Band 2
sh[4] = 1.092548f * n.x * n.y * color;
sh[5] = 1.092548f * n.y * n.z * color;
sh[6] = 0.315392f * (3.0f * n.z * n.z - 1.0f) * color;
sh[7] = 1.092548f * n.x * n.z * color;
sh[8] = 0.546274f * (n.x * n.x - n.y * n.y) * color;
}

// Params:
// groupID.x [0 -> MAX_NUM_THREAD]
// groupID.y [0 -> NUM_FACE]

// groupThreadID.x [0 -> RT_SIZE]
// groupThreadID.y [0 -> RT_SIZE]

// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-attributes-numthreads
[numthreads(RT_SIZE, RT_SIZE, 1)]
void main(
uint3 groupID : SV_GroupID,
uint3 groupThreadID : SV_GroupThreadID,
uint3 dispatchThreadID : SV_DispatchThreadID,
uint groupIndex : SV_GroupIndex)
{
// begin run thread
uint threadID = groupID.x;
uint faceID = groupID.y;

uint pixelX = groupThreadID.x;
uint pixelY = groupThreadID.y;

// need init variable
if (uPixelOffset.x == 0 && uPixelOffset.y == 0 && pixelX == 0 && pixelY == 0)
{
uint result = 0;

// Calc offset id
uint id = (threadID * NUM_FACE + faceID) * 9;

float4 zero = float4(0.0, 0.0, 0.0f, 0.0);

// Write result
OutputBuffer[id + 0] = zero;
OutputBuffer[id + 1] = zero;
OutputBuffer[id + 2] = zero;
OutputBuffer[id + 3] = zero;
OutputBuffer[id + 4] = zero;
OutputBuffer[id + 5] = zero;
OutputBuffer[id + 6] = zero;
OutputBuffer[id + 7] = zero;
OutputBuffer[id + 8] = zero;
}

GroupMemoryBarrierWithGroupSync();

const int3 pixelLocation = int3(pixelX + (int)uPixelOffset.x, pixelY + (int)uPixelOffset.y, 0.0);

const int3 location = int3(
pixelLocation.x + faceID * uFaceSize.x,
pixelLocation.y + threadID * uFaceSize.y,
0.0);

// Gather RGB from the texels
float3 radiance = uRadianceMap.Load(location).xyz;

// Calculate the location in [-1, 1] texture space
float u = (pixelLocation.x / float(uFaceSize.x)) * 2.0f - 1.0f;
float v = -((pixelLocation.y / float(uFaceSize.y)) * 2.0f - 1.0f);

// Calculate weight
float temp = 1.0f + u * u + v * v;
float weight = 4.0f / (sqrt(temp) * temp);
radiance *= weight;

// Extract direction from texel u,v
float3 dirVS = normalize(float3(u, v, 1.0f));
float3 dirTS = mul(dirVS, (float3x3)uToTangentSpace[threadID * NUM_FACE + faceID]);

// Project onto SH
float3 sh[9];
ProjectOntoSH(dirTS, radiance, sh);

// SH add
uint pixel = pixelY * RT_SIZE + pixelX;

ResultSH[pixel][0] = sh[0];
ResultSH[pixel][1] = sh[1];
ResultSH[pixel][2] = sh[2];
ResultSH[pixel][3] = sh[3];
ResultSH[pixel][4] = sh[4];
ResultSH[pixel][5] = sh[5];
ResultSH[pixel][6] = sh[6];
ResultSH[pixel][7] = sh[7];
ResultSH[pixel][8] = sh[8];

GroupMemoryBarrierWithGroupSync();

// Sum total SH[RT_SIZE * RT_SIZE] by GPU MT store at [0]
uint totalSize = RT_SIZE * RT_SIZE;

for(uint s = totalSize / 2; s > 0; s >>= 1)
{
if (pixel < s)
{
ResultSH[pixel][0] += ResultSH[pixel + s][0];
ResultSH[pixel][1] += ResultSH[pixel + s][1];
ResultSH[pixel][2] += ResultSH[pixel + s][2];
ResultSH[pixel][3] += ResultSH[pixel + s][3];
ResultSH[pixel][4] += ResultSH[pixel + s][4];
ResultSH[pixel][5] += ResultSH[pixel + s][5];
ResultSH[pixel][6] += ResultSH[pixel + s][6];
ResultSH[pixel][7] += ResultSH[pixel + s][7];
ResultSH[pixel][8] += ResultSH[pixel + s][8];
}

GroupMemoryBarrierWithGroupSync();
}

// Write result on first group thread
if (pixel == 0)
{
uint result = 0;

// Calc offset id
uint id = (threadID * NUM_FACE + faceID) * 9;

// Write result
OutputBuffer[id + 0] += float4(ResultSH[0][0], 0.0);
OutputBuffer[id + 1] += float4(ResultSH[0][1], 0.0);
OutputBuffer[id + 2] += float4(ResultSH[0][2], 0.0);
OutputBuffer[id + 3] += float4(ResultSH[0][3], 0.0);
OutputBuffer[id + 4] += float4(ResultSH[0][4], 0.0);
OutputBuffer[id + 5] += float4(ResultSH[0][5], 0.0);
OutputBuffer[id + 6] += float4(ResultSH[0][6], 0.0);
OutputBuffer[id + 7] += float4(ResultSH[0][7], 0.0);
OutputBuffer[id + 8] += float4(ResultSH[0][8], 0.0);
}
}
2 changes: 1 addition & 1 deletion Assets/Sponza/Sponza.smesh
Git LFS file not shown
3 changes: 0 additions & 3 deletions Assets/Sponza/mesh_charts00.png

This file was deleted.

3 changes: 0 additions & 3 deletions Assets/Sponza/mesh_charts01.png

This file was deleted.

3 changes: 0 additions & 3 deletions Assets/Sponza/mesh_charts02.png

This file was deleted.

3 changes: 0 additions & 3 deletions Assets/Sponza/mesh_charts03.png

This file was deleted.

3 changes: 0 additions & 3 deletions Assets/Sponza/mesh_charts04.png

This file was deleted.

49 changes: 49 additions & 0 deletions Projects/Irrlicht/Include/IGPUCompute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (C) 2020 Pham Hong Duc
// This file is part of the "Skylicht Engine".
// For conditions of distribution and use, see copyright notice in irrlicht.h
// Add irrlicht compute shader feature

#ifndef __I_GPU_COMPUTE_H_INCLUDED__
#define __I_GPU_COMPUTE_H_INCLUDED__

#include "IrrCompileConfig.h"

#include "IReferenceCounted.h"
#include "EDriverTypes.h"

namespace irr
{
namespace video
{
class IRWBuffer;
class ITexture;

class IGPUCompute : public virtual IReferenceCounted
{
public:
IGPUCompute() :
DriverType(EDT_NULL)
{

}

E_DRIVER_TYPE getDriverType() const { return DriverType; };

virtual bool setVariable(s32 id, const f32* floats, int count) = 0;

virtual s32 getVariableID(const c8* name) = 0;

virtual void setTexture(int slot, ITexture *texture) = 0;

virtual void setBuffer(int slot, IRWBuffer *buffer) = 0;

virtual void dispatch(int threadGroupX, int threadGroupY, int threadGroupZ) = 0;

protected:

E_DRIVER_TYPE DriverType;
};
}
}

#endif
11 changes: 11 additions & 0 deletions Projects/Irrlicht/Include/IGPUProgrammingServices.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include "EPrimitiveTypes.h"
#include "path.h"

#include "IGPUCompute.h"

namespace irr
{

Expand All @@ -23,6 +25,7 @@ namespace video

class IVideoDriver;
class IShaderConstantSetCallBack;
class IGPUCompute;

//! Enumeration for different types of shading languages
enum E_GPU_SHADING_LANGUAGE
Expand Down Expand Up @@ -124,6 +127,14 @@ class IGPUProgrammingServices
callback, baseMaterial, userData, shadingLang);
}

virtual IGPUCompute* createComputeProgram(const c8* computeShaderProgram,
const c8* computeShaderEntryPointName = "main",
E_COMPUTE_SHADER_TYPE csCompileTarget = ECST_CS_5_0) = 0;

virtual IGPUCompute* createComputeProgramFromFile(const io::path& computeShaderFileName,
const c8* computeShaderEntryPointName = "main",
E_COMPUTE_SHADER_TYPE csCompileTarget = ECST_CS_5_0) = 0;

//! convenience function for use with many defaults, without geometry shader
/** All shader names are set to "main" and compile targets are shader
type 1.1.
Expand Down
42 changes: 42 additions & 0 deletions Projects/Irrlicht/Include/IRWBuffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (C) 2020 Pham Hong Duc
// This file is part of the "Skylicht Engine"
// Upgrade GPU Compute Shader feature

#ifndef __IRR_IRW_BUFFER_H_INCLUDED__
#define __IRR_IRW_BUFFER_H_INCLUDED__

#include "IrrCompileConfig.h"

#include "IReferenceCounted.h"
#include "EDriverTypes.h"

namespace irr
{
namespace video
{
class IRWBuffer : public virtual IReferenceCounted
{
public:
IRWBuffer(ECOLOR_FORMAT format, u32 numElements) :
DriverType(EDT_NULL),
Format(format),
NumElements(numElements)
{
}

E_DRIVER_TYPE getDriverType() const { return DriverType; };

virtual void* lock(bool readOnly) = 0;

virtual void unlock() = 0;

protected:

E_DRIVER_TYPE DriverType;
ECOLOR_FORMAT Format;
u32 NumElements;
};
}
}

#endif
10 changes: 10 additions & 0 deletions Projects/Irrlicht/Include/IVideoDriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "SExposedVideoData.h"

#include "IHardwareBuffer.h"
#include "IRWBuffer.h"

namespace irr
{
Expand Down Expand Up @@ -1108,6 +1109,15 @@ namespace video
const core::position2d<s32>& pos,
const core::dimension2d<u32>& size) =0;

//! Creates a buffer stored on gpu
/**
\param format pixel data.
\param number of pixels
\return The gpu buffer object.
If you no longer need the image, you should call IImage::drop().
See IReferenceCounted::drop() for more information. */
virtual IRWBuffer* createRWBuffer(video::ECOLOR_FORMAT format, u32 numElements, void *initialData = NULL) = 0;

//! Event handler for resize events. Only used by the engine internally.
/** Used to notify the driver that the window was resized.
Usually, there is no need to call this method. */
Expand Down
24 changes: 23 additions & 1 deletion Projects/Irrlicht/Source/CD3D11Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#include "CD3D11Texture.h"
#include "CD3D11HardwareBuffer.h"
#include "CD3D11VideoRT.h"

#include "CD3D11RWBuffer.h"
#include "CD3D11GPUCompute.h"

inline void unpack_texureBlendFunc(irr::video::E_BLEND_FACTOR &srcFact, irr::video::E_BLEND_FACTOR &dstFact,
irr::video::E_MODULATE_FUNC &modulo, irr::u32& alphaSource, const irr::f32 param)
Expand Down Expand Up @@ -1412,6 +1413,12 @@ namespace irr
return new CD3D11TextureCube(this, "TextureCube", imageX1, imageX2, imageY1, imageY2, imageZ1, imageZ2);
}

//! creates a buffer stored on gpu
IRWBuffer* CD3D11Driver::createRWBuffer(video::ECOLOR_FORMAT format, u32 numElements, void *initialData)
{
return new CD3D11RWBuffer(this, format, numElements);
}

void CD3D11Driver::setViewPort(const core::rect<s32>& area)
{
core::dimension2du size = getCurrentRenderTargetSize();
Expand Down Expand Up @@ -2801,6 +2808,21 @@ namespace irr
return id;
}

IGPUCompute* CD3D11Driver::createComputeProgram(const c8* computeShaderProgram,
const c8* computeShaderEntryPointName,
E_COMPUTE_SHADER_TYPE csCompileTarget)
{
CD3D11GPUCompute *compute = new CD3D11GPUCompute(this);

if (compute->compile(computeShaderProgram, computeShaderEntryPointName, csCompileTarget) == true)
{
return compute;
}

compute->drop();
return NULL;
}

//! Adds a new material renderer to the VideoDriver, using pixel and/or
//! vertex shaders to render geometry.
s32 CD3D11Driver::addShaderMaterial(const c8* vertexShaderProgram,
Expand Down
Loading

0 comments on commit 0817e47

Please sign in to comment.