Replacing glCopyImageSubData

Shawn Halayka · 2022-11-25T18:46:29

I'm currently using glCopyImageSubData to copy from texture to texture. It works fine, but I'm trying to replace it with the following code. It doesn't work, failing on the copy back to the GPU. glCopyImageSubData(glowmap_tex, GL_TEXTURE_2D, 0, 0, 0, 0, last_frame_glowmap_tex, GL_TEXTURE_2D, 0, 0, 0, 0, win_x, win_y, 1); … vector<float> output_pixels(win_x* win_y * 4, 1.0f); glActiveTexture(GL_TEXTURE4); glBindTexture(GL_TEXTURE_2D, glowmap_tex); glBindImageTexture(GL_TEXTURE4, glowmap_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &output_pixels[0]); vector<float> last_frame_output_pixels(win_x* win_y * 4, 1.0f); glActiveTexture(GL_TEXTURE4); glBindTexture(GL_TEXTURE_2D, last_frame_glowmap_tex); glBindImageTexture(GL_TEXTURE4, last_frame_glowmap_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &last_frame_output_pixels[0]); vector<float> combined_output_pixels(win_x* win_y * 4, 1.0f); for (int x = 0; x < win_x; x++) { for (int y = 0; y < win_y; y++) { size_t index = 4 * ((y * win_x) + x); combined_output_pixels[index + 0] = output_pixels[index + 0];// +last_frame_output_pixels[imgIdx + 0]; combined_output_pixels[index + 1] = output_pixels[index + 1];// +last_frame_output_pixels[imgIdx + 1]; combined_output_pixels[index + 2] = output_pixels[index + 2];// +last_frame_output_pixels[imgIdx + 2]; combined_output_pixels[index + 3] = output_pixels[index + 3];// +last_frame_output_pixels[imgIdx + 3]; } } // The following doesn't work, and I don't know why glActiveTexture(GL_TEXTURE4); glBindTexture(GL_TEXTURE_2D, last_frame_glowmap_tex); glBindImageTexture(4, last_frame_glowmap_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, win_x, win_y, 0, GL_RGBA, GL_FLOAT, &combined_output_pixels[0]); Any ideas?

taby

Author

1,557

November 24, 2022 05:47 PM

I tried larger values, and it runs slower!

JoeJ

4,388

November 24, 2022 06:11 PM

taby said:
I tried larger values, and it runs slower!

Did you forget to adjust the dispatch as well? Guess it should be: (but not sure - these parameters can be confusing)

glDispatchCompute((GLuint)win_x / 8, (GLuint)win_y / 8, 1);

layout(local_size_x = 8, local_size_y = 8) in;

I still remember the case where i could not make your iso surface shader faster by increasing work group size.
But at least it did not get slower.

Fact is: With a workgroup size of 1, only one out of 32 threads does work. The others do nothing, but still waste power and potential.
So you should be able to get a speedup.

Ofc. we're totally memory bound here, is there is no ALU going on. But still - the speedup should be noticeable, for god's sake! /:O\

JoeJ

4,388

November 24, 2022 08:27 PM

Just saw this voxel game : )

taby

Author

1,557

November 24, 2022 08:54 PM

Holy cow, that game looks amazing! AAA

taby

Author

1,557

November 25, 2022 03:26 PM

Holy f**k… it’s working!

The C++ code is:

	glUseProgram(glowmap_copier.get_program());


	// create output temp texture
	GLuint temp_tex;

	glGenTextures(1, &temp_tex);
	glActiveTexture(GL_TEXTURE0);
	glBindTexture(GL_TEXTURE_2D, temp_tex);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, win_x, win_y, 0, GL_RGBA, GL_FLOAT, NULL);
	glBindImageTexture(0, temp_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
	glUniform1i(glGetUniformLocation(glowmap_copier.get_program(), "output_image"), 0);


	// activate glow and last frame glow input textures
	glActiveTexture(GL_TEXTURE1);
	glBindTexture(GL_TEXTURE_2D, glowmap_tex);
	glBindImageTexture(1, glowmap_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);
	glUniform1i(glGetUniformLocation(glowmap_copier.get_program(), "inputa_image"), 1);

	glActiveTexture(GL_TEXTURE2);
	glBindTexture(GL_TEXTURE_2D, last_frame_glowmap_tex);
	glBindImageTexture(2, last_frame_glowmap_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);
	glUniform1i(glGetUniformLocation(glowmap_copier.get_program(), "inputb_image"), 2);

	// call compute shader
	glDispatchCompute(win_x, win_y, 1);

	// Wait for compute shader to finish
	glMemoryBarrier(GL_ALL_BARRIER_BITS);

	glCopyImageSubData(temp_tex, GL_TEXTURE_2D, 0, 0, 0, 0,
		last_frame_glowmap_tex, GL_TEXTURE_2D, 0, 0, 0, 0,
		win_x, win_y, 1);

	// debug -- shows that it works
//	vector<float> output_pixels(win_x * win_y * 4);
//	glActiveTexture(GL_TEXTURE0);
//	glBindImageTexture(0, temp_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
//	glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &output_pixels[0]);
//	save_float_tex_to_disk(win_x, win_y, output_pixels, "temp_tex.tga");

	// debug -- shows that it works
//	glActiveTexture(GL_TEXTURE0);
//	glBindImageTexture(0, last_frame_glowmap_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
//	glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &output_pixels[0]);
//	save_float_tex_to_disk(win_x, win_y, output_pixels, "last_frame_glowmap_tex.tga");

	glDeleteTextures(1, &temp_tex);

The glow shader is:

// OpenGL 4.3 introduces compute shaders
#version 430

layout(local_size_x = 1, local_size_y = 1) in;

layout(binding = 0, rgba32f) writeonly uniform image2D output_image;
layout(binding = 1, rgba32f) readonly uniform image2D inputa_image;
layout(binding = 2, rgba32f) readonly uniform image2D inputb_image;


void main()
{
	// Get global coordinates
	const ivec2 pixel_coords = ivec2(gl_GlobalInvocationID.xy);
	const vec3 output_pixel = imageLoad(inputa_image, pixel_coords).rgb + 0.5*imageLoad(inputb_image, pixel_coords).rgb;

	imageStore(output_image, pixel_coords, vec4(output_pixel, 1.0));
}

And the compositing shader is:

#version 430

uniform sampler2D regular_tex;
uniform sampler2D upside_down_tex;
uniform sampler2D reflectance_tex;
uniform sampler2D upside_down_white_mask_tex;
uniform sampler2D glowmap_tex;
uniform sampler2D last_frame_glowmap_tex;


uniform sampler2D depth_tex;

in vec2 ftexcoord;

uniform int img_width;
uniform int img_height;
uniform int cam_factor;

vec2 img_size = vec2(img_width, img_height);

layout(location = 0) out vec4 frag_colour;

void main()
{


    // for debug purposes
//frag_colour = texture(glowmap_tex, ftexcoord);
  //return;


   const float pi_times_2 = 6.28318530718; // Pi*2
    
    float directions = 16.0; // BLUR directions (Default 16.0 - More is better but slower)
    float quality = 4.0; // BLUR quality (Default 4.0 - More is better but slower)
    float size = 10.0; // BLUR size (radius)
    vec2 radius = vec2(size/img_size.x * cam_factor, size/img_size.y * cam_factor);




   int count = 0;

   vec4 glowmap_blurred_colour =  texture( last_frame_glowmap_tex, ftexcoord);
   count++;
   ...

JoeJ

4,388

November 25, 2022 04:11 PM

Nice.

But now, somebody needs to tell you about bad habits all gamedevs share: Once they figure out something new, they tend to overuse it.

For you that means too much blur form DOF. Gamers will call it ‘vaseline graphics’. :D

taby

Author

1,557

November 25, 2022 04:32 PM

Yeah, I’m not happy with the result of the DOF. I might just cut it out altogether, as well as the specular map.

JoeJ

4,388

November 25, 2022 04:48 PM

Subtlety is key. Usually people use DOF only for cinematic reasons. Like in cutscenes, to guide the focus of the player, or to do some eye candy / special effects.

Technically you still have the issue of a hard transition from DOF off to on. It seems the radius jumps from zero to some somber like 5, but there are no gradual steps in between.
That's not acceptable imo, but otherwise it's nice.