From 67f067279911253346b39aa089841a9f5dc7b656 Mon Sep 17 00:00:00 2001 From: Adam Date: Tue, 20 Nov 2018 20:11:48 -0500 Subject: [PATCH] gpu: add shader for tiles This has a measurable performance improvement on weaker GPUs since there are generally many tiles and they have few faces --- .../client/plugins/gpu/GpuPlugin.java | 52 ++++++++++- .../client/plugins/gpu/comp_unordered.glsl | 88 +++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.glsl diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java index e5dd9e717f..404b1687c5 100644 --- a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java +++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java @@ -139,6 +139,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks private int glSmallComputeProgram; private int glSmallComputeShader; + private int glUnorderedComputeProgram; + private int glUnorderedComputeShader; + private int vaoHandle; private int interfaceTexture; @@ -168,9 +171,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks private GpuIntBuffer vertexBuffer; private GpuFloatBuffer uvBuffer; + private GpuIntBuffer modelBufferUnordered; private GpuIntBuffer modelBufferSmall; private GpuIntBuffer modelBuffer; + private int unorderedModels; + /** * number of models in small buffer */ @@ -223,9 +229,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks try { bufferId = uvBufferId = uniformBufferId = -1; + unorderedModels = smallModels = largeModels = 0; vertexBuffer = new GpuIntBuffer(); uvBuffer = new GpuFloatBuffer(); + + modelBufferUnordered = new GpuIntBuffer(); modelBufferSmall = new GpuIntBuffer(); modelBuffer = new GpuIntBuffer(); @@ -389,8 +398,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks vertexBuffer = null; uvBuffer = null; + modelBufferSmall = null; modelBuffer = null; + modelBufferUnordered = null; // force main buffer provider rebuild to turn off alpha channel client.resizeCanvas(); @@ -431,6 +442,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks source = template.process(resourceLoader.apply("comp_small.glsl")); GLUtil.loadComputeShader(gl, glSmallComputeProgram, glSmallComputeShader, source); + glUnorderedComputeProgram = gl.glCreateProgram(); + glUnorderedComputeShader = gl.glCreateShader(gl.GL_COMPUTE_SHADER); + template = new Template(resourceLoader); + source = template.process(resourceLoader.apply("comp_unordered.glsl")); + GLUtil.loadComputeShader(gl, glUnorderedComputeProgram, glUnorderedComputeShader, source); + glUiProgram = gl.glCreateProgram(); glUiVertexShader = gl.glCreateShader(gl.GL_VERTEX_SHADER); glUiFragmentShader = gl.glCreateShader(gl.GL_FRAGMENT_SHADER); @@ -484,6 +501,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks gl.glDeleteProgram(glSmallComputeProgram); glSmallComputeProgram = -1; + gl.glDeleteShader(glUnorderedComputeShader); + glUnorderedComputeShader = -1; + + gl.glDeleteProgram(glUnorderedComputeProgram); + glUnorderedComputeProgram = -1; + /// gl.glDeleteShader(glUiVertexShader); @@ -672,7 +695,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks y -= client.getCameraY2(); z -= client.getCameraZ2(); - GpuIntBuffer b = bufferForTriangles(2); + GpuIntBuffer b = modelBufferUnordered; + ++unorderedModels; b.ensureCapacity(8); IntBuffer buffer = b.getBuffer(); @@ -701,7 +725,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks y -= client.getCameraY2(); z -= client.getCameraZ2(); - GpuIntBuffer b = bufferForTriangles(model.getBufferLen() / 3); + GpuIntBuffer b = modelBufferUnordered; + ++unorderedModels; b.ensureCapacity(8); IntBuffer buffer = b.getBuffer(); @@ -777,16 +802,19 @@ public class GpuPlugin extends Plugin implements DrawCallbacks uvBuffer.flip(); modelBuffer.flip(); modelBufferSmall.flip(); + modelBufferUnordered.flip(); int bufferId = glGenBuffers(gl); // temporary scene vertex buffer int uvBufferId = glGenBuffers(gl); // temporary scene uv buffer int modelBufferId = glGenBuffers(gl); // scene model buffer, large int modelBufferSmallId = glGenBuffers(gl); // scene model buffer, small + int modelBufferUnorderedId = glGenBuffers(gl); IntBuffer vertexBuffer = this.vertexBuffer.getBuffer(); FloatBuffer uvBuffer = this.uvBuffer.getBuffer(); IntBuffer modelBuffer = this.modelBuffer.getBuffer(); IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer(); + IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer(); gl.glBindBuffer(gl.GL_ARRAY_BUFFER, bufferId); gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_STREAM_DRAW); @@ -800,6 +828,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks gl.glBindBuffer(gl.GL_ARRAY_BUFFER, modelBufferSmallId); gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_STREAM_DRAW); + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, modelBufferUnorderedId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_STREAM_DRAW); + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0); // allocate target vertex buffer for compute shaders @@ -847,6 +878,19 @@ public class GpuPlugin extends Plugin implements DrawCallbacks * save on GPU resources. Small will sort <= 512 faces, large will do <= 4096. */ + // unordered + gl.glUseProgram(glUnorderedComputeProgram); + + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, modelBufferUnorderedId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, bufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, outBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, outUvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, uvBufferId); + + gl.glDispatchCompute(unorderedModels, 1, 1); + // small gl.glUseProgram(glSmallComputeProgram); @@ -943,9 +987,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks uvBuffer.clear(); modelBuffer.clear(); modelBufferSmall.clear(); + modelBufferUnordered.clear(); targetBufferOffset = 0; - smallModels = largeModels = 0; + smallModels = largeModels = unorderedModels = 0; tempOffset = 0; tempUvOffset = 0; @@ -953,6 +998,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks glDeleteBuffer(gl, uvBufferId); glDeleteBuffer(gl, modelBufferId); glDeleteBuffer(gl, modelBufferSmallId); + glDeleteBuffer(gl, modelBufferUnorderedId); glDeleteBuffer(gl, outBufferId); glDeleteBuffer(gl, outUvBufferId); diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.glsl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.glsl new file mode 100644 index 0000000000..1abda2304f --- /dev/null +++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.glsl @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018, Adam + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#version 430 core + +#include comp_common.glsl + +layout(local_size_x = 6) in; + +#include common.glsl + +void main() { + uint groupId = gl_WorkGroupID.x; + uint localId = gl_LocalInvocationID.x; + modelinfo minfo = ol[groupId]; + + int offset = minfo.offset; + int length = minfo.length; + int outOffset = minfo.idx; + int uvOffset = minfo.uvOffset; + int flags = minfo.flags; + int orientation = flags & 0x7ff; + ivec4 pos = ivec4(minfo.x, minfo.y, minfo.z, 0); + + if (localId >= length) { + return; + } + + uint ssboOffset = localId; + ivec4 thisA, thisB, thisC; + + // Grab triangle vertices from the correct buffer + if (flags < 0) { + thisA = vb[offset + ssboOffset * 3 ]; + thisB = vb[offset + ssboOffset * 3 + 1]; + thisC = vb[offset + ssboOffset * 3 + 2]; + } else { + thisA = tempvb[offset + ssboOffset * 3 ]; + thisB = tempvb[offset + ssboOffset * 3 + 1]; + thisC = tempvb[offset + ssboOffset * 3 + 2]; + } + + ivec4 thisrvA = rotate(thisA, orientation); + ivec4 thisrvB = rotate(thisB, orientation); + ivec4 thisrvC = rotate(thisC, orientation); + + uint myOffset = localId; + + // position vertices in scene and write to out buffer + vout[outOffset + myOffset * 3] = pos + thisrvA; + vout[outOffset + myOffset * 3 + 1] = pos + thisrvB; + vout[outOffset + myOffset * 3 + 2] = pos + thisrvC; + + if (uvOffset < 0) { + uvout[outOffset + myOffset * 3] = vec4(0, 0, 0, 0); + uvout[outOffset + myOffset * 3 + 1] = vec4(0, 0, 0, 0); + uvout[outOffset + myOffset * 3 + 2] = vec4(0, 0, 0, 0); + } else if (flags >= 0) { + uvout[outOffset + myOffset * 3] = tempuv[uvOffset + localId * 3]; + uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1]; + uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2]; + } else { + uvout[outOffset + myOffset * 3] = uv[uvOffset + localId * 3]; + uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1]; + uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2]; + } +}