gpu: add shader for tiles

This has a measurable performance improvement on weaker GPUs since there
are generally many tiles and they have few faces
This commit is contained in:
Adam
2018-11-20 20:11:48 -05:00
parent c28f53cd4a
commit 67f0672799
2 changed files with 137 additions and 3 deletions

View File

@@ -139,6 +139,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private int glSmallComputeProgram;
private int glSmallComputeShader;
private int glUnorderedComputeProgram;
private int glUnorderedComputeShader;
private int vaoHandle;
private int interfaceTexture;
@@ -168,9 +171,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private GpuIntBuffer vertexBuffer;
private GpuFloatBuffer uvBuffer;
private GpuIntBuffer modelBufferUnordered;
private GpuIntBuffer modelBufferSmall;
private GpuIntBuffer modelBuffer;
private int unorderedModels;
/**
* number of models in small buffer
*/
@@ -223,9 +229,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
try
{
bufferId = uvBufferId = uniformBufferId = -1;
unorderedModels = smallModels = largeModels = 0;
vertexBuffer = new GpuIntBuffer();
uvBuffer = new GpuFloatBuffer();
modelBufferUnordered = new GpuIntBuffer();
modelBufferSmall = new GpuIntBuffer();
modelBuffer = new GpuIntBuffer();
@@ -389,8 +398,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
vertexBuffer = null;
uvBuffer = null;
modelBufferSmall = null;
modelBuffer = null;
modelBufferUnordered = null;
// force main buffer provider rebuild to turn off alpha channel
client.resizeCanvas();
@@ -431,6 +442,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
source = template.process(resourceLoader.apply("comp_small.glsl"));
GLUtil.loadComputeShader(gl, glSmallComputeProgram, glSmallComputeShader, source);
glUnorderedComputeProgram = gl.glCreateProgram();
glUnorderedComputeShader = gl.glCreateShader(gl.GL_COMPUTE_SHADER);
template = new Template(resourceLoader);
source = template.process(resourceLoader.apply("comp_unordered.glsl"));
GLUtil.loadComputeShader(gl, glUnorderedComputeProgram, glUnorderedComputeShader, source);
glUiProgram = gl.glCreateProgram();
glUiVertexShader = gl.glCreateShader(gl.GL_VERTEX_SHADER);
glUiFragmentShader = gl.glCreateShader(gl.GL_FRAGMENT_SHADER);
@@ -484,6 +501,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
gl.glDeleteProgram(glSmallComputeProgram);
glSmallComputeProgram = -1;
gl.glDeleteShader(glUnorderedComputeShader);
glUnorderedComputeShader = -1;
gl.glDeleteProgram(glUnorderedComputeProgram);
glUnorderedComputeProgram = -1;
///
gl.glDeleteShader(glUiVertexShader);
@@ -672,7 +695,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
y -= client.getCameraY2();
z -= client.getCameraZ2();
GpuIntBuffer b = bufferForTriangles(2);
GpuIntBuffer b = modelBufferUnordered;
++unorderedModels;
b.ensureCapacity(8);
IntBuffer buffer = b.getBuffer();
@@ -701,7 +725,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
y -= client.getCameraY2();
z -= client.getCameraZ2();
GpuIntBuffer b = bufferForTriangles(model.getBufferLen() / 3);
GpuIntBuffer b = modelBufferUnordered;
++unorderedModels;
b.ensureCapacity(8);
IntBuffer buffer = b.getBuffer();
@@ -777,16 +802,19 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
uvBuffer.flip();
modelBuffer.flip();
modelBufferSmall.flip();
modelBufferUnordered.flip();
int bufferId = glGenBuffers(gl); // temporary scene vertex buffer
int uvBufferId = glGenBuffers(gl); // temporary scene uv buffer
int modelBufferId = glGenBuffers(gl); // scene model buffer, large
int modelBufferSmallId = glGenBuffers(gl); // scene model buffer, small
int modelBufferUnorderedId = glGenBuffers(gl);
IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
IntBuffer modelBuffer = this.modelBuffer.getBuffer();
IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer();
IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer();
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, bufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_STREAM_DRAW);
@@ -800,6 +828,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, modelBufferSmallId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_STREAM_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, modelBufferUnorderedId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_STREAM_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
// allocate target vertex buffer for compute shaders
@@ -847,6 +878,19 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
* save on GPU resources. Small will sort <= 512 faces, large will do <= 4096.
*/
// unordered
gl.glUseProgram(glUnorderedComputeProgram);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, modelBufferUnorderedId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, bufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, outBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, outUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, uvBufferId);
gl.glDispatchCompute(unorderedModels, 1, 1);
// small
gl.glUseProgram(glSmallComputeProgram);
@@ -943,9 +987,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
uvBuffer.clear();
modelBuffer.clear();
modelBufferSmall.clear();
modelBufferUnordered.clear();
targetBufferOffset = 0;
smallModels = largeModels = 0;
smallModels = largeModels = unorderedModels = 0;
tempOffset = 0;
tempUvOffset = 0;
@@ -953,6 +998,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
glDeleteBuffer(gl, uvBufferId);
glDeleteBuffer(gl, modelBufferId);
glDeleteBuffer(gl, modelBufferSmallId);
glDeleteBuffer(gl, modelBufferUnorderedId);
glDeleteBuffer(gl, outBufferId);
glDeleteBuffer(gl, outUvBufferId);

View File

@@ -0,0 +1,88 @@
/*
* Copyright (c) 2018, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#version 430 core
#include comp_common.glsl
layout(local_size_x = 6) in;
#include common.glsl
void main() {
uint groupId = gl_WorkGroupID.x;
uint localId = gl_LocalInvocationID.x;
modelinfo minfo = ol[groupId];
int offset = minfo.offset;
int length = minfo.length;
int outOffset = minfo.idx;
int uvOffset = minfo.uvOffset;
int flags = minfo.flags;
int orientation = flags & 0x7ff;
ivec4 pos = ivec4(minfo.x, minfo.y, minfo.z, 0);
if (localId >= length) {
return;
}
uint ssboOffset = localId;
ivec4 thisA, thisB, thisC;
// Grab triangle vertices from the correct buffer
if (flags < 0) {
thisA = vb[offset + ssboOffset * 3 ];
thisB = vb[offset + ssboOffset * 3 + 1];
thisC = vb[offset + ssboOffset * 3 + 2];
} else {
thisA = tempvb[offset + ssboOffset * 3 ];
thisB = tempvb[offset + ssboOffset * 3 + 1];
thisC = tempvb[offset + ssboOffset * 3 + 2];
}
ivec4 thisrvA = rotate(thisA, orientation);
ivec4 thisrvB = rotate(thisB, orientation);
ivec4 thisrvC = rotate(thisC, orientation);
uint myOffset = localId;
// position vertices in scene and write to out buffer
vout[outOffset + myOffset * 3] = pos + thisrvA;
vout[outOffset + myOffset * 3 + 1] = pos + thisrvB;
vout[outOffset + myOffset * 3 + 2] = pos + thisrvC;
if (uvOffset < 0) {
uvout[outOffset + myOffset * 3] = vec4(0, 0, 0, 0);
uvout[outOffset + myOffset * 3 + 1] = vec4(0, 0, 0, 0);
uvout[outOffset + myOffset * 3 + 2] = vec4(0, 0, 0, 0);
} else if (flags >= 0) {
uvout[outOffset + myOffset * 3] = tempuv[uvOffset + localId * 3];
uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
} else {
uvout[outOffset + myOffset * 3] = uv[uvOffset + localId * 3];
uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
}
}