I'm getting some strangely unexpected results with my new sprite renderer that uses OpenGL ES 2.0. It performs much worse than my old sprite renderer from 5 years ago that uses OpenGL ES 1.1 (no shaders). All I'm doing is displaying a grid of quads 16x16 and moving and zooming it around a little bit. You can see the difference in the video below:
Video to demonstrate the issue
Clearly, the fixed pipeline runs smoothly, but my supposedly fast one-draw-call shader program chugs (when I tried one draw call-per-quad it was naturally even slower). This is not what I expected.
- How can I speed up my new sprite renderer?
- Is the fixed function pipeline naturally just more adapted to vertex data that changes more often? (like a new VBO on every frame)
- I could just re-write the new renderer in OpenGL ES 1.1 again, but then I will lose compatibility with desktop OpenGL. This is a bad idea, right?
- Can I emulate the fixed-function pipeline with shaders? Is there code out there that does this? What tricks did they use in it to get sprites to render so fast?
Old Fixed-Function Code:
for (int z = 0; z <= mTileEdit.mCurLevel; z++) {
for (int y = 0; y < tm.mSizeY; y++) {
for (int x = 0; x < tm.mSizeX; x++) {
int t = tm.get(x, y, z);
if (t != 0 && t > 0 && t < 256) {
// Set alpha
float alpha = 1.0f;
if (Lozoware.getMP().get("name").equals("pixeledit") || Lozoware.getMP().get("name").equals("edit3d")) {
alpha = 1.0f - ((float)z / (float)tm.mSizeZ);
}
// Set color
gl.glColor4f(tm.mPalette.mRed[t],
tm.mPalette.mGreen[t],
tm.mPalette.mBlue[t], alpha);
// Vertex buffer
bb = ByteBuffer.allocateDirect((6 * 3) * 3 * 4);
bb.order(ByteOrder.nativeOrder());
FloatBuffer buf = bb.asFloatBuffer();
float bottomLeftX = x * mGLTileSizeX;
float bottomLeftY = y * mGLTileSizeY;
float topLeftX = x * mGLTileSizeX;
float topLeftY = y * mGLTileSizeY + mGLTileSizeY;
float bottomRightX = x * mGLTileSizeX + mGLTileSizeX;
float bottomRightY = y * mGLTileSizeY;
float topRightX = x * mGLTileSizeX + mGLTileSizeX;
float topRightY = y * mGLTileSizeY + mGLTileSizeY;
buf.position(0);
buf.put(topLeftX);
buf.put(topLeftY);
buf.put(0);
buf.put(bottomRightX);
buf.put(bottomRightY);
buf.put(0);
buf.put(bottomLeftX);
buf.put(bottomLeftY);
buf.put(0);
buf.put(topLeftX);
buf.put(topLeftY);
buf.put(0);
buf.put(topRightX);
buf.put(topRightY);
buf.put(0);
buf.put(bottomRightX);
buf.put(bottomRightY);
buf.put(0);
buf.position(0);
// Draw
gl.glEnableClientState(GL10.GL_VERTEX_ARRAY);
gl.glVertexPointer(3, GL10.GL_FLOAT, 0, buf);
gl.glDrawArrays(GL10.GL_TRIANGLES, 0, 6 * 3);
gl.glDisableClientState(GL10.GL_VERTEX_ARRAY);
}
}
}
}
gl.glFlush();
New OpenGL ES 2.0 Code:
int numVerts = 0;
int numQuads = 0;
// Alloc enough data for all sprites
for (const auto & pair: objects) {
Object * obj = pair.second;
if (obj != nullptr && obj - > visible && obj - > type == OBJTYPE_SPRITE) {
numVerts += 6;
numQuads += 1;
}
}
int floatsPerVert = 26;
float * data = new float[numVerts * floatsPerVert];
int cursor = 0;
// Quad/sprite index
int q = 0;
// Fill data for all sprites
for (const auto & pair: objects) {
Object * obj = pair.second;
if (obj != nullptr && obj - > visible && obj - > type == OBJTYPE_SPRITE) {
// Add sprite
texAtlas.add(obj - > textureName);
if (texAtlas.getNeedsRefresh())
texAtlas.refresh();
// Set modelview matrix
glm::mat4 mvMatrix;
glm::mat4 scaleToNDC;
glm::mat4 cameraRotate;
glm::mat4 cameraTranslate;
glm::mat4 rotate;
#ifdef PLATFORM_OPENVR
scaleToNDC = glm::scale(glm::mat4(), glm::vec3(VRSCALE, VRSCALE, VRSCALE));#
else
scaleToNDC = glm::scale(glm::mat4(), glm::vec3(NDC_SCALE, NDC_SCALE, NDC_SCALE));#
endif
if (obj - > alwaysFacePlayer)
rotate = glm::rotate(glm::mat4(), glm::radians(-camera - > yaw), glm::vec3(0, 1, 0)) // Model yaw
*
glm::rotate(glm::mat4(), glm::radians(camera - > pitch), glm::vec3(1, 0, 0)); // Model pitch
else
rotate = glm::rotate(glm::mat4(), glm::radians(-obj - > yaw), glm::vec3(0, 1, 0)) // Model yaw
*
glm::rotate(glm::mat4(), glm::radians(-obj - > pitch), glm::vec3(1, 0, 0)); // Model pitch
cameraRotate = glm::rotate(glm::mat4(), glm::radians(camera - > roll), glm::vec3(0, 0, 1)) // Camera roll
*
glm::rotate(glm::mat4(), -glm::radians(camera - > pitch), glm::vec3(1, 0, 0)) // Camera pitch
*
glm::rotate(glm::mat4(), glm::radians(camera - > yaw), glm::vec3(0, 1, 0)); // Camera yaw
cameraTranslate = glm::translate(glm::mat4(), glm::vec3(-camera - > position.x, -camera - > position.y, -camera - > position.z)); // Camera translate
#ifdef PLATFORM_OPENVR
mvMatrix =
glm::make_mat4((const GLfloat * ) g_poseEyeMatrix.get()) *
scaleToNDC *
cameraRotate *
cameraTranslate *
glm::translate(glm::mat4(), glm::vec3(obj - > position.x, obj - > position.y, obj - > position.z)) // World translate
*
rotate *
glm::scale(glm::mat4(), obj - > scale / glm::vec3(2.0, 2.0, 2.0)); // Scale
#else
mvMatrix =
scaleToNDC *
cameraRotate *
cameraTranslate *
glm::translate(glm::mat4(), glm::vec3(obj - > position.x, obj - > position.y, obj - > position.z)) // World translate
*
rotate *
glm::scale(glm::mat4(), obj - > scale / glm::vec3(2.0, 2.0, 2.0)); // Scale
#endif
// ______
// |\\5 4|
// |0\\ |
// | \\ |
// | \\ |
// | \\3|
// |1__2_\\|
// Triangle 1
// Vertex 0
data[cursor + 0] = -1.0 f;
data[cursor + 1] = 1.0 f;
data[cursor + 2] = 0.0 f;
data[cursor + 3] = 1.0 f;
UV input;
input.u = 0.0 f;
input.v = 1.0 f;
UV output = texAtlas.getUV(obj - > textureName, input);
data[cursor + 4] = output.u;
data[cursor + 5] = output.v;
data[cursor + 6] = mvMatrix[0][0];
data[cursor + 7] = mvMatrix[0][1];
data[cursor + 8] = mvMatrix[0][2];
data[cursor + 9] = mvMatrix[0][3];
data[cursor + 10] = mvMatrix[1][0];
data[cursor + 11] = mvMatrix[1][1];
data[cursor + 12] = mvMatrix[1][2];
data[cursor + 13] = mvMatrix[1][3];
data[cursor + 14] = mvMatrix[2][0];
data[cursor + 15] = mvMatrix[2][1];
data[cursor + 16] = mvMatrix[2][2];
data[cursor + 17] = mvMatrix[2][3];
data[cursor + 18] = mvMatrix[3][0];
data[cursor + 19] = mvMatrix[3][1];
data[cursor + 20] = mvMatrix[3][2];
data[cursor + 21] = mvMatrix[3][3];
data[cursor + 22] = obj - > color.r;
data[cursor + 23] = obj - > color.g;
data[cursor + 24] = obj - > color.b;
data[cursor + 25] = obj - > color.a;
cursor += floatsPerVert;
// Vertex 1
data[cursor + 0] = -1.0 f;
data[cursor + 1] = -1.0 f;
data[cursor + 2] = 0.0 f;
data[cursor + 3] = 1.0 f;
input.u = 0.0 f;
input.v = 0.0 f;
output = texAtlas.getUV(obj - > textureName, input);
data[cursor + 4] = output.u;
data[cursor + 5] = output.v;
data[cursor + 6] = mvMatrix[0][0];
data[cursor + 7] = mvMatrix[0][1];
data[cursor + 8] = mvMatrix[0][2];
data[cursor + 9] = mvMatrix[0][3];
data[cursor + 10] = mvMatrix[1][0];
data[cursor + 11] = mvMatrix[1][1];
data[cursor + 12] = mvMatrix[1][2];
data[cursor + 13] = mvMatrix[1][3];
data[cursor + 14] = mvMatrix[2][0];
data[cursor + 15] = mvMatrix[2][1];
data[cursor + 16] = mvMatrix[2][2];
data[cursor + 17] = mvMatrix[2][3];
data[cursor + 18] = mvMatrix[3][0];
data[cursor + 19] = mvMatrix[3][1];
data[cursor + 20] = mvMatrix[3][2];
data[cursor + 21] = mvMatrix[3][3];
data[cursor + 22] = obj - > color.r;
data[cursor + 23] = obj - > color.g;
data[cursor + 24] = obj - > color.b;
data[cursor + 25] = obj - > color.a;
cursor += floatsPerVert;
// Vertex 2
data[cursor + 0] = 1.0 f;
data[cursor + 1] = -1.0 f;
data[cursor + 2] = 0.0 f;
data[cursor + 3] = 1.0 f;
input.u = 1.0 f;
input.v = 0.0 f;
output = texAtlas.getUV(obj - > textureName, input);
data[cursor + 4] = output.u;
data[cursor + 5] = output.v;
data[cursor + 6] = mvMatrix[0][0];
data[cursor + 7] = mvMatrix[0][1];
data[cursor + 8] = mvMatrix[0][2];
data[cursor + 9] = mvMatrix[0][3];
data[cursor + 10] = mvMatrix[1][0];
data[cursor + 11] = mvMatrix[1][1];
data[cursor + 12] = mvMatrix[1][2];
data[cursor + 13] = mvMatrix[1][3];
data[cursor + 14] = mvMatrix[2][0];
data[cursor + 15] = mvMatrix[2][1];
data[cursor + 16] = mvMatrix[2][2];
data[cursor + 17] = mvMatrix[2][3];
data[cursor + 18] = mvMatrix[3][0];
data[cursor + 19] = mvMatrix[3][1];
data[cursor + 20] = mvMatrix[3][2];
data[cursor + 21] = mvMatrix[3][3];
data[cursor + 22] = obj - > color.r;
data[cursor + 23] = obj - > color.g;
data[cursor + 24] = obj - > color.b;
data[cursor + 25] = obj - > color.a;
cursor += floatsPerVert;
// Triangle 2
// Vertex 3
data[cursor + 0] = 1.0 f;
data[cursor + 1] = -1.0 f;
data[cursor + 2] = 0.0 f;
data[cursor + 3] = 1.0 f;
input.u = 1.0 f;
input.v = 0.0 f;
output = texAtlas.getUV(obj - > textureName, input);
data[cursor + 4] = output.u;
data[cursor + 5] = output.v;
data[cursor + 6] = mvMatrix[0][0];
data[cursor + 7] = mvMatrix[0][1];
data[cursor + 8] = mvMatrix[0][2];
data[cursor + 9] = mvMatrix[0][3];
data[cursor + 10] = mvMatrix[1][0];
data[cursor + 11] = mvMatrix[1][1];
data[cursor + 12] = mvMatrix[1][2];
data[cursor + 13] = mvMatrix[1][3];
data[cursor + 14] = mvMatrix[2][0];
data[cursor + 15] = mvMatrix[2][1];
data[cursor + 16] = mvMatrix[2][2];
data[cursor + 17] = mvMatrix[2][3];
data[cursor + 18] = mvMatrix[3][0];
data[cursor + 19] = mvMatrix[3][1];
data[cursor + 20] = mvMatrix[3][2];
data[cursor + 21] = mvMatrix[3][3];
data[cursor + 22] = obj - > color.r;
data[cursor + 23] = obj - > color.g;
data[cursor + 24] = obj - > color.b;
data[cursor + 25] = obj - > color.a;
cursor += floatsPerVert;
// Vertex 4
data[cursor + 0] = 1.0 f;
data[cursor + 1] = 1.0 f;
data[cursor + 2] = 0.0 f;
data[cursor + 3] = 1.0 f;
input.u = 1.0 f;
input.v = 1.0 f;
output = texAtlas.getUV(obj - > textureName, input);
data[cursor + 4] = output.u;
data[cursor + 5] = output.v;
data[cursor + 6] = mvMatrix[0][0];
data[cursor + 7] = mvMatrix[0][1];
data[cursor + 8] = mvMatrix[0][2];
data[cursor + 9] = mvMatrix[0][3];
data[cursor + 10] = mvMatrix[1][0];
data[cursor + 11] = mvMatrix[1][1];
data[cursor + 12] = mvMatrix[1][2];
data[cursor + 13] = mvMatrix[1][3];
data[cursor + 14] = mvMatrix[2][0];
data[cursor + 15] = mvMatrix[2][1];
data[cursor + 16] = mvMatrix[2][2];
data[cursor + 17] = mvMatrix[2][3];
data[cursor + 18] = mvMatrix[3][0];
data[cursor + 19] = mvMatrix[3][1];
data[cursor + 20] = mvMatrix[3][2];
data[cursor + 21] = mvMatrix[3][3];
data[cursor + 22] = obj - > color.r;
data[cursor + 23] = obj - > color.g;
data[cursor + 24] = obj - > color.b;
data[cursor + 25] = obj - > color.a;
cursor += floatsPerVert;
// Vertex 5
data[cursor + 0] = -1.0 f;
data[cursor + 1] = 1.0 f;
data[cursor + 2] = 0.0 f;
data[cursor + 3] = 1.0 f;
input.u = 0.0 f;
input.v = 1.0 f;
output = texAtlas.getUV(obj - > textureName, input);
data[cursor + 4] = output.u;
data[cursor + 5] = output.v;
data[cursor + 6] = mvMatrix[0][0];
data[cursor + 7] = mvMatrix[0][1];
data[cursor + 8] = mvMatrix[0][2];
data[cursor + 9] = mvMatrix[0][3];
data[cursor + 10] = mvMatrix[1][0];
data[cursor + 11] = mvMatrix[1][1];
data[cursor + 12] = mvMatrix[1][2];
data[cursor + 13] = mvMatrix[1][3];
data[cursor + 14] = mvMatrix[2][0];
data[cursor + 15] = mvMatrix[2][1];
data[cursor + 16] = mvMatrix[2][2];
data[cursor + 17] = mvMatrix[2][3];
data[cursor + 18] = mvMatrix[3][0];
data[cursor + 19] = mvMatrix[3][1];
data[cursor + 20] = mvMatrix[3][2];
data[cursor + 21] = mvMatrix[3][3];
data[cursor + 22] = obj - > color.r;
data[cursor + 23] = obj - > color.g;
data[cursor + 24] = obj - > color.b;
data[cursor + 25] = obj - > color.a;
cursor += floatsPerVert;
q++;
}
}
#if defined PLATFORM_WINDOWS || defined PLATFORM_OSX
// Generate VAO
glGenVertexArrays(1, (GLuint * ) & vao);
checkGLError("glGenVertexArrays");
glBindVertexArray(vao);
checkGLError("glBindVertexArray");#
endif
// Generate VBO
glGenBuffers(1, (GLuint * ) & vbo);
checkGLError("glGenBuffers");
glBindBuffer(GL_ARRAY_BUFFER, vbo);
checkGLError("glBindBuffer");
// Load data into VBO
glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 6 * floatsPerVert * q, data, GL_STATIC_DRAW);
checkGLError("glBufferData");
// Delete data
delete data;
// Get aspect
float width = PLAT_GetWindowWidth();
float height = PLAT_GetWindowHeight();#
ifdef PLATFORM_OPENVR
float aspect = 1.0;#
else
float aspect = width / height;#
endif
// DRAW
glEnable(GL_CULL_FACE);
checkGLError("glEnable");
glFrontFace(GL_CCW);
checkGLError("glFrontFace");
glCullFace(GL_BACK);
checkGLError("glCullFace");
glEnable(GL_BLEND);
checkGLError("ShapeRenderer glEnable");#
ifndef PLATFORM_ANDROID
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
checkGLError("ShapeRenderer glBlendFunc");#
endif
// Add program to OpenGL environment
int curProgram = -1;
curProgram = programMain;
glUseProgram(curProgram);
checkGLError("SpriteRenderer glUseProgram");
#if defined PLATFORM_WINDOWS || defined PLATFORM_OSX
// Bind the VAO
glBindVertexArray(vao);
checkGLError("glBindVertexArray");#
endif
// Bind the VBO
glBindBuffer(GL_ARRAY_BUFFER, vbo);
checkGLError("glBindBuffer");
// Set the projection matrix
glm::mat4 projMatrix;
#if defined PLATFORM_OPENVR
projMatrix = glm::make_mat4((const GLfloat * ) g_projectionMatrix.get());#
else
projMatrix = glm::perspective(VIEW_FOV, aspect, 0.001 f, 1000.0 f);#
endif
setMatrix(curProgram, "projectionMatrix", projMatrix);
setUniform4f(curProgram, "globalColor", globalColor.x, globalColor.y, globalColor.z, globalColor.w);
int t = texAtlas.getGlTexId();
glActiveTexture(GL_TEXTURE0);
checkGLError("glActiveTexture");
glBindTexture(GL_TEXTURE_2D, t);
setUniform2f(curProgram, "vTexSpan", 1.0, 1.0);
setUniform1f(curProgram, "useTexture", 1.0);
setUniform1f(curProgram, "fadeNear", 600.0 * NDC_SCALE);
setUniform1f(curProgram, "fadeFar", 900.0 * NDC_SCALE);
// Set attributes
setVertexAttrib(curProgram, "vPosition", 4, GL_FLOAT, false, floatsPerVert * sizeof(float), 0);
setVertexAttrib(curProgram, "vTexCoords", 2, GL_FLOAT, false, floatsPerVert * sizeof(float), 4);
setVertexAttrib(curProgram, "mvMatrixPt1", 4, GL_FLOAT, false, floatsPerVert * sizeof(float), 6);
setVertexAttrib(curProgram, "mvMatrixPt2", 4, GL_FLOAT, false, floatsPerVert * sizeof(float), 10);
setVertexAttrib(curProgram, "mvMatrixPt3", 4, GL_FLOAT, false, floatsPerVert * sizeof(float), 14);
setVertexAttrib(curProgram, "mvMatrixPt4", 4, GL_FLOAT, false, floatsPerVert * sizeof(float), 18);
setVertexAttrib(curProgram, "vColor", 4, GL_FLOAT, false, floatsPerVert * sizeof(float), 22);
// Draw
glDrawArrays(GL_TRIANGLES, 0, q * 6);
checkGLError("glDrawArrays");
#if defined PLATFORM_WINDOWS || defined PLATFORM_OSX
// Reset
glBindVertexArray(0);
glBindTexture(GL_TEXTURE_2D, 0);
glUseProgram(0);#
endif
// Delete VAO and VBO
glDeleteBuffers(1, (GLuint * ) & vbo);#
if defined PLATFORM_WINDOWS || defined PLATFORM_OSX
glDeleteVertexArrays(1, (GLuint * ) & vao);#
endif
Shader Code:
//
// VERTEX SHADER ES 2.0
//
const char * vertexShaderCodeES20 =
"attribute vec4 vPosition;"\
"varying lowp vec4 posOut; "\
"attribute vec2 vTexCoords;"\
"varying lowp vec2 vTexCoordsOut; "\
"uniform vec2 vTexSpan;"\
"attribute vec4 vNormal;"\
"varying vec4 vNormalOut;"\
"attribute vec4 vVertexLight; "\
"varying vec4 vVertexLightOut; "\
"uniform mat4 projectionMatrix; "\
"varying lowp float distToCamera; "\
"attribute vec4 mvMatrixPt1; "\
"attribute vec4 mvMatrixPt2; "\
"attribute vec4 mvMatrixPt3; "\
"attribute vec4 mvMatrixPt4; "\
"attribute vec4 vColor; "\
"varying vec4 vColorOut;"\
"attribute mat4 oldmvMatrix; "\
"void main() {"\
" mat4 mvMatrix; "\
" mvMatrix[0] = mvMatrixPt1; "\
" mvMatrix[1] = mvMatrixPt2; "\
" mvMatrix[2] = mvMatrixPt3; "\
" mvMatrix[3] = mvMatrixPt4; "\
" gl_Position = projectionMatrix * mvMatrix * vPosition; "
" vTexCoordsOut = vTexCoords * vTexSpan; "\
" posOut = gl_Position; "\
" vec4 posBeforeProj = mvMatrix * vPosition;"\
" distToCamera = -posBeforeProj.z; "\
" vColorOut = vColor; "\
"}\n";
//
// FRAGMENT SHADER ES 2.0
//
const char * fragmentShaderCodeES20 =
"uniform sampler2D uTexture; "\
"uniform lowp vec4 vColor; "\
"uniform lowp vec4 globalColor; "\
"varying lowp vec2 vTexCoordsOut; "\
"varying lowp vec4 posOut; "\
"uniform lowp float useTexture; "\
"uniform lowp float fadeNear; "\
"uniform lowp float fadeFar; "\
"varying lowp float distToCamera; "\
"varying lowp vec4 vColorOut; "\
"void main() {"\
" lowp vec4 f = texture2D(uTexture, vTexCoordsOut.st); "\
" if (f.a == 0.0) "\
" discard; "\
" lowp float visibility = 1.0; "\
" lowp float alpha = 1.0; "\
" if (distToCamera >= fadeFar) discard; "\
" if (distToCamera >= fadeNear) "\
" alpha = 1.0 - (distToCamera - fadeNear) * 3.0; "\
" if (useTexture == 1.0)"\
" {"\
" gl_FragColor = texture2D(uTexture, vTexCoordsOut.st) * vColorOut * vec4(visibility, visibility, visibility, alpha) * globalColor; "\
" }"\
" else"\
" {"\
" gl_FragColor = vColorOut * vec4(visibility, visibility, visibility, alpha) * globalColor; "\
" }"\
"}\n";
The rest of the new code is here: