Hi all, this is my first time posting on these forums, so please let me know if there are any conventions I'm missing:
I have a compute shader that was working just fine until I quadrupled the size of one of the compute buffers from 262,144 bytes to 1,048,576 bytes. After that increase, the Unity performance profiler tells me that this shader takes about 70ms to complete its work, when previously the time was insignificant. This buffer is only set once, at launch, and is never read back into the main C# script, so I don't think it can be anything to do with data transfer. I can only assume that I've crossed some sort of subtle-but-important memory threshhold: some register is overflowing or something.
Shader memory management is a pretty arcane topic, so I don't know where to start. Can anyone give me a clue? Any help is appreciated.
--and here's the shader. The buffer in question is "imageLibrary:"
RWStructuredBuffer<uint> bugger;
RWStructuredBuffer<float> imageLibrary;
RWStructuredBuffer<uint> world;
RWStructuredBuffer<float2> cameraDimensions;
RWStructuredBuffer<float2> hereNow;
RWStructuredBuffer<float2> scale;
RWStructuredBuffer<float> output;
#pragma kernel action
[numthreads(1,1,1)]
void action (uint3 group : SV_GroupID, uint3 thread : SV_GroupThreadID) {
uint members, stride;
imageLibrary.GetDimensions(members, stride);
for (int i = 0; i < members; ++i) {
bugger[i] = world [i];
}
uint totalWidth = cameraDimensions[0].x;
float halfWidth = totalWidth / 2;
uint totalHeight = cameraDimensions[0].y;
float halfHeight = totalHeight / 2;
uint offset, nevermind;
world.GetDimensions(offset, nevermind);
offset = sqrt(offset * 4) / 2;
uint sectorLength = totalWidth / 8; //THE DIVISOR CHANGES IF THE THREAD-GROUP COUNT IS CHANGED
uint sectorHeight = totalHeight / 8; //THE DIVISOR CHANGES IF THE THREAD-GROUP COUNT IS CHANGED
uint xStart = group.x * sectorLength;
uint yStart = group.y * sectorHeight;
int k = 0;
for (uint i = 0; i < sectorHeight; ++i) {
for (uint j = 0; j < sectorLength; ++j) {
float worldX = hereNow[0].x + ((float)(xStart - halfWidth) + (float) j) * scale[0].x / halfWidth;
float worldY = hereNow[0].y + ((float)(yStart - halfHeight) + (float) i) * scale[0].y / halfHeight;
int xInSquare = floor(worldX);
int yInSquare = floor(worldY);
int index = ((xInSquare + offset) * offset * 2 + (yInSquare + offset));
int byteInFloat = index % 4;
index = index / 4;
int tileType = world[index];
tileType = tileType >> (byteInFloat * 8);
tileType = tileType & 0x000000ff;
int2 fromPixel = int2((float)(worldX - xInSquare) * 128, (float)(worldY - yInSquare) * 128);
int2 toPixel = int2 (j + xStart, i + yStart);
output[cameraDimensions[0].x * toPixel.y + toPixel.x] = imageLibrary.Load(16384 * tileType + 128 * fromPixel.y + fromPixel.x);
}
}
}