From 0fc1a940887a835df29f3615fbedde15684eed03 Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 28 Dec 2020 13:54:44 -0500 Subject: [PATCH] gpu: dispatch compute after scene draw This moves the compute shaders to run immediately after scene draw, instead of in the draw() callback, which happens much later. All models in the scene have been queued by the time, but since it is so early in the ui drawing, it gives a few more ms for the compute to run before the result needs to be used to draw the next frame. --- .../net/runelite/api/hooks/DrawCallbacks.java | 14 + .../client/plugins/gpu/GpuPlugin.java | 275 ++++++++++-------- 2 files changed, 170 insertions(+), 119 deletions(-) diff --git a/runelite-api/src/main/java/net/runelite/api/hooks/DrawCallbacks.java b/runelite-api/src/main/java/net/runelite/api/hooks/DrawCallbacks.java index 7cf7469f37..452480435f 100644 --- a/runelite-api/src/main/java/net/runelite/api/hooks/DrawCallbacks.java +++ b/runelite-api/src/main/java/net/runelite/api/hooks/DrawCallbacks.java @@ -52,7 +52,21 @@ public interface DrawCallbacks boolean drawFace(Model model, int face); + /** + * Called before the scene is drawn + * @param cameraX + * @param cameraY + * @param cameraZ + * @param cameraPitch + * @param cameraYaw + * @param plane + */ void drawScene(int cameraX, int cameraY, int cameraZ, int cameraPitch, int cameraYaw, int plane); + /** + * Called after the scene has been drawn + */ + void postDrawScene(); + void animate(Texture texture, int diff); } diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java index e82373edd5..7cbe5f31b0 100644 --- a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java +++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java @@ -242,8 +242,6 @@ public class GpuPlugin extends Plugin implements DrawCallbacks private AntiAliasingMode lastAntiAliasingMode; private int lastAnisotropicFilteringLevel = -1; - private int centerX; - private int centerY; private int yaw; private int pitch; // fields for non-compute draw @@ -776,13 +774,149 @@ public class GpuPlugin extends Plugin implements DrawCallbacks @Override public void drawScene(int cameraX, int cameraY, int cameraZ, int cameraPitch, int cameraYaw, int plane) { - centerX = client.getCenterX(); - centerY = client.getCenterY(); yaw = client.getCameraYaw(); pitch = client.getCameraPitch(); final Scene scene = client.getScene(); scene.setDrawDistance(getDrawDistance()); + + invokeOnMainThread(() -> + { + // UBO. Only the first 32 bytes get modified here, the rest is the constant sin/cos table. + gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId); + uniformBuffer.clear(); + uniformBuffer + .put(yaw) + .put(pitch) + .put(client.getCenterX()) + .put(client.getCenterY()) + .put(client.getScale()) + .put(client.getCameraX2()) + .put(client.getCameraY2()) + .put(client.getCameraZ2()); + uniformBuffer.flip(); + + gl.glBufferSubData(gl.GL_UNIFORM_BUFFER, 0, uniformBuffer.limit() * Integer.BYTES, uniformBuffer); + gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0); + + gl.glBindBufferBase(gl.GL_UNIFORM_BUFFER, 0, uniformBufferId); + }); + } + + @Override + public void postDrawScene() + { + invokeOnMainThread(this::postDraw); + } + + private void postDraw() + { + if (!useComputeShaders) + { + // Upload buffers + vertexBuffer.flip(); + uvBuffer.flip(); + + IntBuffer vertexBuffer = this.vertexBuffer.getBuffer(); + FloatBuffer uvBuffer = this.uvBuffer.getBuffer(); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW); + + return; + } + + // Upload buffers + vertexBuffer.flip(); + uvBuffer.flip(); + modelBuffer.flip(); + modelBufferSmall.flip(); + modelBufferUnordered.flip(); + + IntBuffer vertexBuffer = this.vertexBuffer.getBuffer(); + FloatBuffer uvBuffer = this.uvBuffer.getBuffer(); + IntBuffer modelBuffer = this.modelBuffer.getBuffer(); + IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer(); + IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer(); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, gl.GL_DYNAMIC_DRAW); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferSmallId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_DYNAMIC_DRAW); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferUnorderedId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_DYNAMIC_DRAW); + + // Output buffers + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, + targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes + null, + gl.GL_STREAM_DRAW); + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutUvBufferId); + gl.glBufferData(gl.GL_ARRAY_BUFFER, + targetBufferOffset * 16, + null, + gl.GL_STREAM_DRAW); + + // Bind UBO to compute programs + gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0); + gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0); + + /* + * Compute is split into three separate programs: 'unordered', 'small', and 'large' + * to save on GPU resources. Small will sort <= 512 faces, large will do <= 4096. + */ + + // unordered + gl.glUseProgram(glUnorderedComputeProgram); + + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnorderedId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId); + + gl.glDispatchCompute(unorderedModels, 1, 1); + + // small + gl.glUseProgram(glSmallComputeProgram); + + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmallId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId); + + gl.glDispatchCompute(smallModels, 1, 1); + + // large + gl.glUseProgram(glComputeProgram); + + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); + gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId); + + gl.glDispatchCompute(largeModels, 1, 1); } @Override @@ -955,121 +1089,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks gl.glClearColor((sky >> 16 & 0xFF) / 255f, (sky >> 8 & 0xFF) / 255f, (sky & 0xFF) / 255f, 1f); gl.glClear(gl.GL_COLOR_BUFFER_BIT); - // Upload buffers - vertexBuffer.flip(); - uvBuffer.flip(); - modelBuffer.flip(); - modelBufferSmall.flip(); - modelBufferUnordered.flip(); - - IntBuffer vertexBuffer = this.vertexBuffer.getBuffer(); - FloatBuffer uvBuffer = this.uvBuffer.getBuffer(); - IntBuffer modelBuffer = this.modelBuffer.getBuffer(); - IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer(); - IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer(); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, gl.GL_DYNAMIC_DRAW); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferSmallId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_DYNAMIC_DRAW); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferUnorderedId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_DYNAMIC_DRAW); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutBufferId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, - targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes - null, - gl.GL_STREAM_DRAW); - - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutUvBufferId); - gl.glBufferData(gl.GL_ARRAY_BUFFER, - targetBufferOffset * 16, - null, - gl.GL_STREAM_DRAW); - - // UBO. Only the first 32 bytes get modified here, the rest is the constant sin/cos table. - gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId); - uniformBuffer.clear(); - uniformBuffer - .put(yaw) - .put(pitch) - .put(centerX) - .put(centerY) - .put(client.getScale()) - .put(client.getCameraX2()) - .put(client.getCameraY2()) - .put(client.getCameraZ2()); - uniformBuffer.flip(); - - gl.glBufferSubData(gl.GL_UNIFORM_BUFFER, 0, uniformBuffer.limit() * Integer.BYTES, uniformBuffer); - gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0); - - gl.glBindBufferBase(gl.GL_UNIFORM_BUFFER, 0, uniformBufferId); - // Draw 3d scene final TextureProvider textureProvider = client.getTextureProvider(); if (textureProvider != null) { - if (useComputeShaders) - { - gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0); - gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0); - - /* - * Compute is split into two separate programs 'small' and 'large' to - * save on GPU resources. Small will sort <= 512 faces, large will do <= 4096. - */ - - // unordered - gl.glUseProgram(glUnorderedComputeProgram); - - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnorderedId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId); - - gl.glDispatchCompute(unorderedModels, 1, 1); - - // small - gl.glUseProgram(glSmallComputeProgram); - - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmallId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId); - - gl.glDispatchCompute(smallModels, 1, 1); - - // large - gl.glUseProgram(glComputeProgram); - - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId); - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId); - - gl.glDispatchCompute(largeModels, 1, 1); - - gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT); - } - if (textureArrayId == -1) { // lazy init textures as they may not be loaded at plugin start. @@ -1168,14 +1191,28 @@ public class GpuPlugin extends Plugin implements DrawCallbacks // Draw buffers gl.glBindVertexArray(vaoHandle); - // When using compute shaders, draw using the output buffer of the compute. Otherwise - // only use the temporary buffers, which will contain the full scene. + int vertexBuffer, uvBuffer; + if (useComputeShaders) + { + // Before reading the SSBOs written to from postDrawScene() we must insert a barrier + gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT); + // Draw using the output buffer of the compute + vertexBuffer = tmpOutBufferId; + uvBuffer = tmpOutUvBufferId; + } + else + { + // Only use the temporary buffers, which will contain the full scene + vertexBuffer = tmpBufferId; + uvBuffer = tmpUvBufferId; + } + gl.glEnableVertexAttribArray(0); - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, useComputeShaders ? tmpOutBufferId : tmpBufferId); + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vertexBuffer); gl.glVertexAttribIPointer(0, 4, gl.GL_INT, 0, 0); gl.glEnableVertexAttribArray(1); - gl.glBindBuffer(gl.GL_ARRAY_BUFFER, useComputeShaders ? tmpOutUvBufferId : tmpUvBufferId); + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBuffer); gl.glVertexAttribPointer(1, 4, gl.GL_FLOAT, false, 0, 0); gl.glDrawArrays(gl.GL_TRIANGLES, 0, targetBufferOffset);