Advertisement

CSM works fine on AMD GPU not on NVIDIA GPU

Started by July 02, 2020 09:35 AM
2 comments, last by MJP 4 years, 6 months ago

I recently implemented the CSM in my engine. When I ran it in my laptop(AMD Vega GFX on Win 10), the shadows are rendered correctly. The same code w/o any modification, I ran it in my desktop(NVIDIA on Win 7), the shadows are not rendered correctly in fact they are missing. I didn't have time to isolate the problem. Why does that happen? Here's some codes.


void CSM_GetLightViewProjection(int Cascade_Index, Camera *ActiveCamera, XMMATRIX &LVP)
{
	XMFLOAT3 FrustumCorners[] = {	XMFLOAT3(-1,  1, 0 ),
									XMFLOAT3( 1,  1, 0 ),
									XMFLOAT3( 1, -1, 0 ),
									XMFLOAT3(-1, -1, 0 ),
									XMFLOAT3(-1,  1, 1 ),
									XMFLOAT3( 1,  1, 1 ),
									XMFLOAT3( 1, -1, 1 ),
									XMFLOAT3(-1, -1, 1 ) };

	XMMATRIX Camera_ViewProj = XMMatrixMultiply(ActiveCamera->mView, ActiveCamera->mProjection_Cascade[Cascade_Index]);

	XMMATRIX InverseCameraViewProjection;
	InverseCameraViewProjection = XMMatrixInverse(NULL, XMMatrixTranspose(Camera_ViewProj));

	for (int i=0; i<8; i++)
		FrustumCorners[i] = TransformTransposed(FrustumCorners[i], InverseCameraViewProjection);

	XMVECTOR FrustumCenter = XMVectorSet(0,0,0,1);
	for (int i=0; i<8; i++)
		FrustumCenter += XMLoadFloat3(&amp;amp;FrustumCorners[i]);
	FrustumCenter *= (1/8.0f);

	float Radius = XMVector3Length(XMLoadFloat3(&amp;amp;FrustumCorners[0]) - XMLoadFloat3(&amp;amp;FrustumCorners[6])).m128_f32[0] / 2.0f;
	float TexelPerUnit = (CSM_Sizes[Cascade_Index] / (2 * Radius));

	XMMATRIX Scalar = XMMatrixScaling(TexelPerUnit, TexelPerUnit, TexelPerUnit);
	XMVECTOR Zero = XMVectorSet(0,0,0,1);
	XMVECTOR UpDirection = XMVectorSet(0,1,0,0);
	XMVECTOR BaseLookAt = XMVectorSet(-LightDirection.m128_f32[0],-LightDirection.m128_f32[1],-LightDirection.m128_f32[2],1);

	XMMATRIX LookAt = XMMatrixLookAtLH(Zero, BaseLookAt, UpDirection);
	LookAt = XMMatrixMultiply(Scalar, LookAt);
	XMMATRIX InverseLookAt = XMMatrixInverse(NULL, LookAt);

	XMFLOAT3 mFC;
	XMStoreFloat3(&amp;amp;mFC, FrustumCenter);
	FrustumCenter = XMLoadFloat3(&amp;amp;Transform(mFC, LookAt));
	FrustumCenter.m128_f32[0] = floorf(FrustumCenter.m128_f32[0]);
	FrustumCenter.m128_f32[1] = floorf(FrustumCenter.m128_f32[1]);
	XMStoreFloat3(&amp;amp;mFC, FrustumCenter);
	FrustumCenter = XMLoadFloat3(&amp;amp;Transform(mFC, InverseLookAt));
	
	XMVECTOR Eye = FrustumCenter - (LightDirection * Radius * 2);
	mLightView = XMMatrixLookAtLH(Eye, FrustumCenter, UpDirection);
	mLightProjection = XMMatrixOrthographicOffCenterLH(-Radius, Radius, -Radius, Radius,-Radius * 6, Radius * 6);
	LVP = XMMatrixMultiply(mLightView, mLightProjection);
}

Texture2D txModel : register( t0 );
Texture2D DepthMapTextures[3] : register( t1 );
SamplerState ssModel : register( s0 );
SamplerState ssModelCSM : register( s1 );
...
float SampleDepthMap(Texture2D texArr[3], int index, float2 coord)
{
	float ret = 0;
    if(index == 0)
    {
	ret = texArr[0].Sample(ssModelCSM, coord).r;
    }
    else if(index == 1)
    {
	ret = texArr[1].Sample(ssModelCSM, coord).r;
    }
	else if (index == 2)
	{
		ret = texArr[2].Sample(ssModelCSM, coord).r;
	}
	return ret;
}
float4 PSMAIN(DS_OUTPUT Input) : SV_Target
{
	   float3 lightDir = -LightDirection;
    float shadowBias = 0.0005;
    float lightIntensity = 0;
	
    float inputPositionInv  = 1.0 / Input.PSPosition.w;
    float lightPositionInv1 = 1.0 / Input.lightPosition1.w;
    float lightPositionInv2 = 1.0 / Input.lightPosition2.w;
    float lightPositionInv3 = 1.0 / Input.lightPosition3.w;
	
    float depthTest = Input.PSPosition.z * inputPositionInv;
    

    float2 shadowCoords[3] = {
        float2( Input.lightPosition1.x * lightPositionInv1 * 0.5 + 0.5,  -Input.lightPosition1.y * lightPositionInv1 * 0.5 + 0.5),
	float2( Input.lightPosition2.x * lightPositionInv2 * 0.5 + 0.5,  -Input.lightPosition2.y * lightPositionInv2 * 0.5 + 0.5),
	float2( Input.lightPosition3.x * lightPositionInv3 * 0.5 + 0.5,  -Input.lightPosition3.y * lightPositionInv3 * 0.5 + 0.5)
    };
	
    float lightDepthValues[3];
    lightDepthValues[0] = Input.lightPosition1.z * lightPositionInv1;
    lightDepthValues[1] = Input.lightPosition2.z * lightPositionInv2;
    lightDepthValues[2] = Input.lightPosition3.z * lightPositionInv3;

	
    int shadowIndex = 3;
	
    if((saturate(shadowCoords[0].x) == shadowCoords[0].x) &amp;amp;&amp;amp; (saturate(shadowCoords[0].y) == shadowCoords[0].y) &amp;amp;&amp;amp; (depthTest > (1.0f - (DepthBounds.x * inputPositionInv))))
    {
	shadowIndex = 0;
    }
    else if((saturate(shadowCoords[1].x) == shadowCoords[1].x) &amp;amp;&amp;amp; (saturate(shadowCoords[1].y) == shadowCoords[1].y) &amp;amp;&amp;amp; (depthTest > (1.0f - (DepthBounds.y * inputPositionInv))))
    {
	shadowIndex = 1;
    }
    else if((saturate(shadowCoords[2].x) == shadowCoords[2].x) &amp;amp;&amp;amp; (saturate(shadowCoords[2].y) == shadowCoords[2].y) &amp;amp;&amp;amp; (depthTest > (1.0f - (DepthBounds.z * inputPositionInv))))
    {
	shadowIndex = 2;
    }
		float3 FinalColor = 0, AmbientColor, mAmb = float3(.48f,.77f,.46f), lAmb = float3(.2f,.2f,.2f);
		AmbientColor = mAmb*lAmb;

    if( shadowIndex < 3 ) //we are in a shadow map
    {
        float depthVal = SampleDepthMap(DepthMapTextures, shadowIndex, shadowCoords[shadowIndex]);
		
        if((lightDepthValues[shadowIndex]-shadowBias) <= depthVal)
        {
			lightIntensity = saturate(dot(Input.WSNormal, normalize(lightDir)));
			FinalColor = depthVal*lightIntensity;
			FinalColor = saturate(FinalColor);
			FinalColor = txModel.Sample(ssModel, Input.TextureCoords).rgb*(AmbientColor + FinalColor);
		}
		else
			FinalColor =  txModel.Sample(ssModel, Input.TextureCoords).rgb*AmbientColor;

    }

	return float4(FinalColor, 1.0f);
}

Solved the problem. The problem was the sampler state I provided from programme side was a comparison sampler state. I forgot to state it as SamplerComparisonState ssModelCSM : register( s1 ); in the shader. On AMD gpu it worked without mentioning that. But on NVIDIA gpu it has to be mentioned correctly.

Advertisement

FYI that sort of issue would get caught by the D3D debug layer, and I definitely recommend enabling it for your debug builds.

This topic is closed to new replies.

Advertisement