I am trying to implement forward plus rendering in my DX12 rendering engine. For some reason I am not correctly able to cull my point lights. I currently have a depth pre pass, a compute shader pass for light culling, and a final light calculation pass.
Here is my light culling compute shader
#include "Lighting.hlsli"
StructuredBuffer<Light> lights : register(t0);
Texture2D depthMap : register(t1);
RWStructuredBuffer<uint> LightIndices : register(u0);
//groupshared variables
groupshared uint minDepth;
groupshared uint maxDepth;
groupshared uint visibleLightCount;
groupshared uint visibleLightIndices[1024];
groupshared matrix viewProjection;
groupshared Frustum frustum;
groupshared float4 frustumPlanes[6];
#define TILE_SIZE 16
cbuffer externalData : register(b0)
{
matrix view;
matrix projection;
matrix inverseProjection;
int lightCount;
};
// Convert clip space coordinates to view space
float4 ClipToView(float4 clip)
{
// View space position.
float4 view = mul(clip, inverseProjection);
// Perspective projection.
view = view / view.w;
return view;
}
// Convert screen space coordinates to view space.
float4 ScreenToView(float4 screen)
{
// Convert to normalized texture coordinates
float2 texCoord = screen.xy / float2(1280,720);
// Convert to clip space
float4 clip = float4(float2(texCoord.x,1.0f - texCoord.y) * 2.0f - 1.0f, screen.z, screen.w);
return ClipToView(clip);
}
float3 CreatePlaneEquation(float3 b, float3 c)
{
return normalize(cross(b, c));;
}
float GetSignedDistanceFromPlane(float3 p, float3 eqn)
{
// dot(eqn.xyz,p) + eqn.w, , except we know eqn.w is zero
// (see CreatePlaneEquation above)
return dot(eqn, p);
}
bool TestFrustumSides(float3 c, float r, float3 plane0, float3 plane1, float3 plane2, float3 plane3)
{
bool intersectingOrInside0 = GetSignedDistanceFromPlane(c, plane0) < r;
bool intersectingOrInside1 = GetSignedDistanceFromPlane(c, plane1) < r;
bool intersectingOrInside2 = GetSignedDistanceFromPlane(c, plane2) < r;
bool intersectingOrInside3 = GetSignedDistanceFromPlane(c, plane3) < r;
return (intersectingOrInside0 &amp;&amp; intersectingOrInside1 &amp;&amp;
intersectingOrInside2 &amp;&amp; intersectingOrInside3);
}
// calculate the number of tiles in the horizontal direction
uint GetNumTilesX()
{
return (uint) ((1280 + 16 - 1) / (float) 16);
}
// calculate the number of tiles in the vertical direction
uint GetNumTilesY()
{
return (uint) ((720 + 16 - 1) / (float) 16);
}
// convert a point from post-projection space into view space
float4 ConvertProjToView(float4 p)
{
p = mul(p, inverseProjection);
p /= p.w;
return p;
}
// convert a depth value from post-projection space into view space
float ConvertProjDepthToView(float z)
{
z = 1.f / (z * inverseProjection._34 + inverseProjection._44);
return z;
}
[numthreads(TILE_SIZE, TILE_SIZE, 1)]
void main(int3 groupID : SV_GroupID, // 3D index of the thread group in the dispatch.
int3 groupThreadID : SV_GroupThreadID, // 3D index of local thread ID in a thread group.
int3 dispatchThreadID : SV_DispatchThreadID, // 3D index of global thread ID in the dispatch.
int groupIndex : SV_GroupIndex)
{
int2 location = (dispatchThreadID.xy);
int2 itemID = (groupThreadID.xy);
int2 tileID = (groupID.xy);
int2 tileNumber = (uint2(1280 / TILE_SIZE, 720 / TILE_SIZE));
int index = tileID.y * tileNumber.x + tileID.x;
float depth = depthMap.Load(int3(location, 0)).r;
uint udepth = asuint(depth);
if (groupIndex == 0)
{
minDepth = 0xffffffff;
maxDepth = 0;
visibleLightCount = 0;
viewProjection = mul(view, projection);
}
GroupMemoryBarrierWithGroupSync();
//creating the frustums
if (groupIndex == 0)
{
// View space eye position is always at the origin.
const float3 eyePos = float3(0, 0, 0);
float4 screenSpace[4];
screenSpace[0] = float4(dispatchThreadID.xy * TILE_SIZE, 1.0f, 1.0f);
screenSpace[1] = float4(float2(dispatchThreadID.x + 1, dispatchThreadID.y) * TILE_SIZE, 1.0f, 1.0f);
screenSpace[2] = float4(float2(dispatchThreadID.x, dispatchThreadID.y + 1) * TILE_SIZE, 1.0f, 1.0f);
screenSpace[3] = float4(float2(dispatchThreadID.x + 1, dispatchThreadID.y + 1) * TILE_SIZE, 1.0f, 1.0f);
float3 viewSpace[4];
for (int i = 0; i < 4; i++)
{
viewSpace[i] = ScreenToView(screenSpace[i]).xyz;
}
frustum.frustumPlanes[0] = ComputePlane(eyePos, viewSpace[2], viewSpace[0]);
frustum.frustumPlanes[1] = ComputePlane(eyePos, viewSpace[1], viewSpace[3]);
frustum.frustumPlanes[2] = ComputePlane(eyePos, viewSpace[0], viewSpace[1]);
frustum.frustumPlanes[3] = ComputePlane(eyePos, viewSpace[3], viewSpace[2]);
}
GroupMemoryBarrierWithGroupSync();
//Calculating the min and max depth values for the tile
InterlockedMin(minDepth, udepth);
InterlockedMax(maxDepth, udepth);
GroupMemoryBarrierWithGroupSync();
float minGroupDepth = asfloat(minDepth);
float maxGroupDepth = asfloat(maxDepth);
// Convert depth values to view space.
float minDepthVS = ScreenToView(float4(0, 0, minGroupDepth, 1)).z;
float maxDepthVS = ScreenToView(float4(0, 0, maxGroupDepth, 1)).z;
float nearClipVS = ScreenToView(float4(0, 0, 0, 1)).z;
// Clipping plane for minimum depth value
Plane minPlane = { float3(0, 0, 1), minDepthVS };
uint threadCount = TILE_SIZE * TILE_SIZE;
uint passCount = (lightCount + threadCount - 1) / threadCount;
for (uint i = 0; i < passCount; i ++)
{
uint lightIndex = i * threadCount + groupIndex;
if (lightIndex >= lightCount)
{
break;
}
float4 vsPos = mul(float4(lights[lightIndex].position, 1.0), view);
bool interSects = false;
switch (lights[lightIndex].type)
{
case LIGHT_TYPE_DIR:
{
uint offset;
InterlockedAdd(visibleLightCount, 1, offset);
visibleLightIndices[offset] = lightIndex;
}
break;
case LIGHT_TYPE_SPOT:
{
interSects = false;
}
break;
case LIGHT_TYPE_POINT:
{
Sphere sphere;
sphere.c = vsPos.xyz;
sphere.r = lights[lightIndex].range;
if (SphereInsideFrustum(sphere, frustum, nearClipVS, maxDepthVS) &amp;&amp; !SphereInsidePlane(sphere, minPlane))
{
uint offset;
InterlockedAdd(visibleLightCount, 1, offset);
visibleLightIndices[offset] = lightIndex;
}
}
break;
case LIGHT_TYPE_AREA_RECT:
{
// Add all rect lights for now
uint offset;
InterlockedAdd(visibleLightCount, 1, offset);
visibleLightIndices[offset] = lightIndex;
}
break;
case LIGHT_TYPE_AREA_DISK:
{
interSects = false;
}
break;
}
if(interSects)
{
}
}
GroupMemoryBarrierWithGroupSync();
if(groupIndex == 0)
{
uint offset = 1024 * index;
for (uint i = 0; i < visibleLightCount;i++)
{
LightIndices[offset + i] = visibleLightIndices[i];
}
if (visibleLightCount != 1024)
{
LightIndices[offset + visibleLightCount] = -1;
}
}
}