diff --git a/runelite-client/pom.xml b/runelite-client/pom.xml
index 10000e6125..48eb8825ce 100644
--- a/runelite-client/pom.xml
+++ b/runelite-client/pom.xml
@@ -177,6 +177,25 @@
2.4.0-rc-20210117
runtime
+
+ net.runelite.jocl
+ jocl
+ 1.0
+
+
+ net.runelite.jocl
+ jocl
+ 1.0
+ macos-x64
+ runtime
+
+
+ net.runelite.jocl
+ jocl
+ 1.0
+ macos-arm64
+ runtime
+
net.runelite
archive-patcher
diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GLBuffer.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GLBuffer.java
new file mode 100644
index 0000000000..cd3b7288ed
--- /dev/null
+++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GLBuffer.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package net.runelite.client.plugins.gpu;
+
+import org.jocl.Pointer;
+import org.jocl.cl_mem;
+
+class GLBuffer
+{
+ int glBufferId = -1;
+ int size = -1;
+ cl_mem cl_mem;
+
+ Pointer ptr()
+ {
+ return cl_mem != null ? Pointer.to(cl_mem) : null;
+ }
+}
diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java
index a6be4e5872..8ba9db5a67 100644
--- a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java
+++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java
@@ -29,6 +29,11 @@ import com.google.inject.Provides;
import com.jogamp.nativewindow.awt.AWTGraphicsConfiguration;
import com.jogamp.nativewindow.awt.JAWTWindow;
import com.jogamp.opengl.GL;
+import static com.jogamp.opengl.GL.GL_ARRAY_BUFFER;
+import static com.jogamp.opengl.GL.GL_DYNAMIC_DRAW;
+import static com.jogamp.opengl.GL2ES2.GL_STREAM_DRAW;
+import static com.jogamp.opengl.GL2ES3.GL_STATIC_COPY;
+import static com.jogamp.opengl.GL2ES3.GL_UNIFORM_BUFFER;
import com.jogamp.opengl.GL4;
import com.jogamp.opengl.GLCapabilities;
import com.jogamp.opengl.GLContext;
@@ -45,6 +50,7 @@ import java.awt.Image;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferInt;
+import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
@@ -93,6 +99,10 @@ import net.runelite.client.plugins.gpu.config.UIScalingMode;
import net.runelite.client.plugins.gpu.template.Template;
import net.runelite.client.ui.DrawManager;
import net.runelite.client.util.OSType;
+import org.jocl.CL;
+import static org.jocl.CL.CL_MEM_READ_ONLY;
+import static org.jocl.CL.CL_MEM_WRITE_ONLY;
+import static org.jocl.CL.clCreateFromGLBuffer;
@PluginDescriptor(
name = "GPU",
@@ -105,8 +115,8 @@ import net.runelite.client.util.OSType;
public class GpuPlugin extends Plugin implements DrawCallbacks
{
// This is the maximum number of triangles the compute shaders support
- private static final int MAX_TRIANGLE = 4096;
- private static final int SMALL_TRIANGLE_COUNT = 512;
+ static final int MAX_TRIANGLE = 4096;
+ static final int SMALL_TRIANGLE_COUNT = 512;
private static final int FLAG_SCENE_BUFFER = Integer.MIN_VALUE;
private static final int DEFAULT_DISTANCE = 25;
static final int MAX_DISTANCE = 90;
@@ -115,6 +125,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Inject
private Client client;
+ @Inject
+ private OpenCLManager openCLManager;
+
@Inject
private ClientThread clientThread;
@@ -133,7 +146,14 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Inject
private PluginManager pluginManager;
- private boolean useComputeShaders;
+ enum ComputeMode
+ {
+ NONE,
+ OPENGL,
+ OPENCL
+ }
+
+ private ComputeMode computeMode = ComputeMode.NONE;
private Canvas canvas;
private JAWTWindow jawtWindow;
@@ -182,23 +202,22 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private int texSceneHandle;
private int rboSceneHandle;
- // scene vertex buffer id
- private int bufferId;
- // scene uv buffer id
- private int uvBufferId;
+ // scene vertex buffer
+ private final GLBuffer sceneVertexBuffer = new GLBuffer();
+ // scene uv buffer
+ private final GLBuffer sceneUvBuffer = new GLBuffer();
- private int tmpBufferId; // temporary scene vertex buffer
- private int tmpUvBufferId; // temporary scene uv buffer
- private int tmpModelBufferId; // scene model buffer, large
- private int tmpModelBufferSmallId; // scene model buffer, small
- private int tmpModelBufferUnorderedId;
- private int tmpOutBufferId; // target vertex buffer for compute shaders
- private int tmpOutUvBufferId; // target uv buffer for compute shaders
+ private final GLBuffer tmpVertexBuffer = new GLBuffer(); // temporary scene vertex buffer
+ private final GLBuffer tmpUvBuffer = new GLBuffer(); // temporary scene uv buffer
+ private final GLBuffer tmpModelBufferLarge = new GLBuffer(); // scene model buffer, large
+ private final GLBuffer tmpModelBufferSmall = new GLBuffer(); // scene model buffer, small
+ private final GLBuffer tmpModelBufferUnordered = new GLBuffer(); // scene model buffer, unordered
+ private final GLBuffer tmpOutBuffer = new GLBuffer(); // target vertex buffer for compute shaders
+ private final GLBuffer tmpOutUvBuffer = new GLBuffer(); // target uv buffer for compute shaders
private int textureArrayId;
- private int uniformBufferId;
- private final IntBuffer uniformBuffer = GpuIntBuffer.allocateDirect(5 + 3 + 2048 * 4);
+ private final GLBuffer uniformBuffer = new GLBuffer();
private final float[] textureOffsets = new float[128];
private GpuIntBuffer vertexBuffer;
@@ -278,7 +297,6 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
{
try
{
- bufferId = uvBufferId = uniformBufferId = tmpBufferId = tmpUvBufferId = tmpModelBufferId = tmpModelBufferSmallId = tmpModelBufferUnorderedId = tmpOutBufferId = tmpOutUvBufferId = -1;
texSceneHandle = fboSceneHandle = rboSceneHandle = -1; // AA FBO
unorderedModels = smallModels = largeModels = 0;
drawingModel = false;
@@ -290,8 +308,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
return false;
}
- // OSX supports up to OpenGL 4.1, however 4.3 is required for compute shaders
- useComputeShaders = config.useComputeShaders() && OSType.getOSType() != OSType.MacOS;
+ computeMode = config.useComputeShaders()
+ ? (OSType.getOSType() == OSType.MacOS ? ComputeMode.OPENCL : ComputeMode.OPENGL)
+ : ComputeMode.NONE;
canvas.setIgnoreRepaint(true);
@@ -397,7 +416,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
if (client.getGameState() == GameState.LOGGED_IN)
{
- uploadScene();
+ invokeOnMainThread(this::uploadScene);
}
}
catch (Throwable e)
@@ -433,6 +452,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
invokeOnMainThread(() ->
{
+ openCLManager.cleanup();
+
if (gl != null)
{
if (textureArrayId != -1)
@@ -441,11 +462,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
textureArrayId = -1;
}
- if (uniformBufferId != -1)
- {
- glDeleteBuffer(gl, uniformBufferId);
- uniformBufferId = -1;
- }
+ destroyGlBuffer(uniformBuffer);
shutdownBuffers();
shutdownInterfaceTexture();
@@ -519,12 +536,16 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
glProgram = PROGRAM.compile(gl, template);
glUiProgram = UI_PROGRAM.compile(gl, template);
- if (useComputeShaders)
+ if (computeMode == ComputeMode.OPENGL)
{
glComputeProgram = COMPUTE_PROGRAM.compile(gl, template);
glSmallComputeProgram = SMALL_COMPUTE_PROGRAM.compile(gl, template);
glUnorderedComputeProgram = UNORDERED_COMPUTE_PROGRAM.compile(gl, template);
}
+ else if (computeMode == ComputeMode.OPENCL)
+ {
+ openCLManager.init(gl);
+ }
initUniforms();
}
@@ -593,8 +614,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
-1f, 1f, 0.0f, 0.0f, 0f // top left
});
vboUiBuf.rewind();
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vboUiHandle);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, vboUiBuf.capacity() * Float.BYTES, vboUiBuf, gl.GL_STATIC_DRAW);
+ gl.glBindBuffer(GL_ARRAY_BUFFER, vboUiHandle);
+ gl.glBufferData(GL_ARRAY_BUFFER, vboUiBuf.capacity() * Float.BYTES, vboUiBuf, gl.GL_STATIC_DRAW);
// position attribute
gl.glVertexAttribPointer(0, 3, gl.GL_FLOAT, false, 5 * Float.BYTES, 0);
@@ -605,7 +626,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
gl.glEnableVertexAttribArray(1);
// unbind VBO
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
+ gl.glBindBuffer(GL_ARRAY_BUFFER, 0);
}
private void shutdownVao()
@@ -622,71 +643,49 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private void initBuffers()
{
- bufferId = glGenBuffers(gl);
- uvBufferId = glGenBuffers(gl);
- tmpBufferId = glGenBuffers(gl);
- tmpUvBufferId = glGenBuffers(gl);
- tmpModelBufferId = glGenBuffers(gl);
- tmpModelBufferSmallId = glGenBuffers(gl);
- tmpModelBufferUnorderedId = glGenBuffers(gl);
- tmpOutBufferId = glGenBuffers(gl);
- tmpOutUvBufferId = glGenBuffers(gl);
+ initGlBuffer(sceneVertexBuffer);
+ initGlBuffer(sceneUvBuffer);
+ initGlBuffer(tmpVertexBuffer);
+ initGlBuffer(tmpUvBuffer);
+ initGlBuffer(tmpModelBufferLarge);
+ initGlBuffer(tmpModelBufferSmall);
+ initGlBuffer(tmpModelBufferUnordered);
+ initGlBuffer(tmpOutBuffer);
+ initGlBuffer(tmpOutUvBuffer);
+ }
+
+ private void initGlBuffer(GLBuffer glBuffer)
+ {
+ glBuffer.glBufferId = glGenBuffers(gl);
}
private void shutdownBuffers()
{
- if (bufferId != -1)
- {
- glDeleteBuffer(gl, bufferId);
- bufferId = -1;
- }
+ destroyGlBuffer(sceneVertexBuffer);
+ destroyGlBuffer(sceneUvBuffer);
- if (uvBufferId != -1)
- {
- glDeleteBuffer(gl, uvBufferId);
- uvBufferId = -1;
- }
+ destroyGlBuffer(tmpVertexBuffer);
+ destroyGlBuffer(tmpUvBuffer);
+ destroyGlBuffer(tmpModelBufferLarge);
+ destroyGlBuffer(tmpModelBufferSmall);
+ destroyGlBuffer(tmpModelBufferUnordered);
+ destroyGlBuffer(tmpOutBuffer);
+ destroyGlBuffer(tmpOutUvBuffer);
+ }
- if (tmpBufferId != -1)
+ private void destroyGlBuffer(GLBuffer glBuffer)
+ {
+ if (glBuffer.glBufferId != -1)
{
- glDeleteBuffer(gl, tmpBufferId);
- tmpBufferId = -1;
+ glDeleteBuffer(gl, glBuffer.glBufferId);
+ glBuffer.glBufferId = -1;
}
+ glBuffer.size = -1;
- if (tmpUvBufferId != -1)
+ if (glBuffer.cl_mem != null)
{
- glDeleteBuffer(gl, tmpUvBufferId);
- tmpUvBufferId = -1;
- }
-
- if (tmpModelBufferId != -1)
- {
- glDeleteBuffer(gl, tmpModelBufferId);
- tmpModelBufferId = -1;
- }
-
- if (tmpModelBufferSmallId != -1)
- {
- glDeleteBuffer(gl, tmpModelBufferSmallId);
- tmpModelBufferSmallId = -1;
- }
-
- if (tmpModelBufferUnorderedId != -1)
- {
- glDeleteBuffer(gl, tmpModelBufferUnorderedId);
- tmpModelBufferUnorderedId = -1;
- }
-
- if (tmpOutBufferId != -1)
- {
- glDeleteBuffer(gl, tmpOutBufferId);
- tmpOutBufferId = -1;
- }
-
- if (tmpOutUvBufferId != -1)
- {
- glDeleteBuffer(gl, tmpOutUvBufferId);
- tmpOutUvBufferId = -1;
+ CL.clReleaseMemObject(glBuffer.cl_mem);
+ glBuffer.cl_mem = null;
}
}
@@ -709,21 +708,21 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private void initUniformBuffer()
{
- uniformBufferId = glGenBuffers(gl);
- gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId);
- uniformBuffer.clear();
- uniformBuffer.put(new int[8]);
+ initGlBuffer(uniformBuffer);
+
+ IntBuffer uniformBuf = GpuIntBuffer.allocateDirect(8 + 2048 * 4);
+ uniformBuf.put(new int[8]); // uniform block
final int[] pad = new int[2];
for (int i = 0; i < 2048; i++)
{
- uniformBuffer.put(Perspective.SINE[i]);
- uniformBuffer.put(Perspective.COSINE[i]);
- uniformBuffer.put(pad);
+ uniformBuf.put(Perspective.SINE[i]);
+ uniformBuf.put(Perspective.COSINE[i]);
+ uniformBuf.put(pad); // ivec2 alignment in std140 is 16 bytes
}
- uniformBuffer.flip();
+ uniformBuf.flip();
- gl.glBufferData(gl.GL_UNIFORM_BUFFER, uniformBuffer.limit() * Integer.BYTES, uniformBuffer, gl.GL_DYNAMIC_DRAW);
- gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0);
+ updateBuffer(uniformBuffer, GL_UNIFORM_BUFFER, uniformBuf.limit() * Integer.BYTES, uniformBuf, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+ gl.glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
private void initAAFbo(int width, int height, int aaSamples)
@@ -785,9 +784,11 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
invokeOnMainThread(() ->
{
// UBO. Only the first 32 bytes get modified here, the rest is the constant sin/cos table.
- gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId);
- uniformBuffer.clear();
- uniformBuffer
+ // We can reuse the vertex buffer since it isn't used yet.
+ vertexBuffer.clear();
+ vertexBuffer.ensureCapacity(32);
+ IntBuffer uniformBuf = vertexBuffer.getBuffer();
+ uniformBuf
.put(yaw)
.put(pitch)
.put(client.getCenterX())
@@ -796,12 +797,14 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
.put(cameraX)
.put(cameraY)
.put(cameraZ);
- uniformBuffer.flip();
+ uniformBuf.flip();
- gl.glBufferSubData(gl.GL_UNIFORM_BUFFER, 0, uniformBuffer.limit() * Integer.BYTES, uniformBuffer);
- gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0);
+ gl.glBindBuffer(GL_UNIFORM_BUFFER, uniformBuffer.glBufferId);
+ gl.glBufferSubData(GL_UNIFORM_BUFFER, 0, uniformBuf.limit() * Integer.BYTES, uniformBuf);
+ gl.glBindBuffer(GL_UNIFORM_BUFFER, 0);
- gl.glBindBufferBase(gl.GL_UNIFORM_BUFFER, 0, uniformBufferId);
+ gl.glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniformBuffer.glBufferId);
+ uniformBuf.clear();
});
}
@@ -813,7 +816,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private void postDraw()
{
- if (!useComputeShaders)
+ if (computeMode == ComputeMode.NONE)
{
// Upload buffers
vertexBuffer.flip();
@@ -822,12 +825,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW);
-
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW);
-
+ updateBuffer(tmpVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_DYNAMIC_DRAW, 0L);
+ updateBuffer(tmpUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_DYNAMIC_DRAW, 0L);
return;
}
@@ -844,79 +843,91 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer();
IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer();
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW);
+ // temp buffers
+ updateBuffer(tmpVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+ updateBuffer(tmpUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW);
-
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, gl.GL_DYNAMIC_DRAW);
-
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferSmallId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_DYNAMIC_DRAW);
-
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferUnorderedId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_DYNAMIC_DRAW);
+ // model buffers
+ updateBuffer(tmpModelBufferLarge, GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+ updateBuffer(tmpModelBufferSmall, GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+ updateBuffer(tmpModelBufferUnordered, GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
// Output buffers
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER,
+ updateBuffer(tmpOutBuffer,
+ GL_ARRAY_BUFFER,
targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes
null,
- gl.GL_STREAM_DRAW);
-
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutUvBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER,
- targetBufferOffset * 16,
+ GL_STREAM_DRAW,
+ CL_MEM_WRITE_ONLY);
+ updateBuffer(tmpOutUvBuffer,
+ GL_ARRAY_BUFFER,
+ targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes
null,
- gl.GL_STREAM_DRAW);
+ GL_STREAM_DRAW,
+ CL_MEM_WRITE_ONLY);
- // Bind UBO to compute programs
- gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0);
- gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0);
+ if (computeMode == ComputeMode.OPENCL)
+ {
+ // The docs for clEnqueueAcquireGLObjects say all pending GL operations must be completed before calling
+ // clEnqueueAcquireGLObjects, and recommends calling glFinish() as the only portable way to do that.
+ // However no issues have been observed from not calling it, and so will leave disabled for now.
+ // gl.glFinish();
+
+ openCLManager.compute(
+ unorderedModels, smallModels, largeModels,
+ sceneVertexBuffer, sceneUvBuffer,
+ tmpVertexBuffer, tmpUvBuffer,
+ tmpModelBufferUnordered, tmpModelBufferSmall, tmpModelBufferLarge,
+ tmpOutBuffer, tmpOutUvBuffer,
+ uniformBuffer);
+ return;
+ }
/*
* Compute is split into three separate programs: 'unordered', 'small', and 'large'
* to save on GPU resources. Small will sort <= 512 faces, large will do <= 4096.
*/
+ // Bind UBO to compute programs
+ gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0);
+ gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0);
+
// unordered
gl.glUseProgram(glUnorderedComputeProgram);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnorderedId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnordered.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
gl.glDispatchCompute(unorderedModels, 1, 1);
// small
gl.glUseProgram(glSmallComputeProgram);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmallId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmall.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
gl.glDispatchCompute(smallModels, 1, 1);
// large
gl.glUseProgram(glComputeProgram);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
- gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferLarge.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
+ gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
gl.glDispatchCompute(largeModels, 1, 1);
}
@@ -926,7 +937,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
SceneTilePaint paint, int tileZ, int tileX, int tileY,
int zoom, int centerX, int centerY)
{
- if (!useComputeShaders)
+ if (computeMode == ComputeMode.NONE)
{
targetBufferOffset += sceneUploader.upload(paint,
tileZ, tileX, tileY,
@@ -963,7 +974,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
SceneTileModel model, int tileZ, int tileX, int tileY,
int zoom, int centerX, int centerY)
{
- if (!useComputeShaders)
+ if (computeMode == ComputeMode.NONE)
{
targetBufferOffset += sceneUploader.upload(model,
tileX, tileY,
@@ -1131,7 +1142,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
// Ceil the sizes because even if the size is 599.1 we want to treat it as size 600 (i.e. render to the x=599 pixel).
renderViewportHeight = (int) Math.ceil(scaleFactorY * (renderViewportHeight)) + padding * 2;
- renderViewportWidth = (int) Math.ceil(scaleFactorX * (renderViewportWidth )) + padding * 2;
+ renderViewportWidth = (int) Math.ceil(scaleFactorX * (renderViewportWidth )) + padding * 2;
// Floor the offsets because even if the offset is 4.9, we want to render to the x=4 pixel anyway.
renderHeightOff = (int) Math.floor(scaleFactorY * (renderHeightOff)) - padding;
@@ -1195,27 +1206,36 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
gl.glBindVertexArray(vaoHandle);
int vertexBuffer, uvBuffer;
- if (useComputeShaders)
+ if (computeMode != ComputeMode.NONE)
{
- // Before reading the SSBOs written to from postDrawScene() we must insert a barrier
- gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT);
+ if (computeMode == ComputeMode.OPENGL)
+ {
+ // Before reading the SSBOs written to from postDrawScene() we must insert a barrier
+ gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT);
+ }
+ else
+ {
+ // Wait for the command queue to finish, so that we know the compute is done
+ openCLManager.finish();
+ }
+
// Draw using the output buffer of the compute
- vertexBuffer = tmpOutBufferId;
- uvBuffer = tmpOutUvBufferId;
+ vertexBuffer = tmpOutBuffer.glBufferId;
+ uvBuffer = tmpOutUvBuffer.glBufferId;
}
else
{
// Only use the temporary buffers, which will contain the full scene
- vertexBuffer = tmpBufferId;
- uvBuffer = tmpUvBufferId;
+ vertexBuffer = tmpVertexBuffer.glBufferId;
+ uvBuffer = tmpUvBuffer.glBufferId;
}
gl.glEnableVertexAttribArray(0);
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vertexBuffer);
+ gl.glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
gl.glVertexAttribIPointer(0, 4, gl.GL_INT, 0, 0);
gl.glEnableVertexAttribArray(1);
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBuffer);
+ gl.glBindBuffer(GL_ARRAY_BUFFER, uvBuffer);
gl.glVertexAttribPointer(1, 4, gl.GL_FLOAT, false, 0, 0);
gl.glDrawArrays(gl.GL_TRIANGLES, 0, targetBufferOffset);
@@ -1400,12 +1420,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Subscribe
public void onGameStateChanged(GameStateChanged gameStateChanged)
{
- if (!useComputeShaders || gameStateChanged.getGameState() != GameState.LOGGED_IN)
+ if (computeMode == ComputeMode.NONE || gameStateChanged.getGameState() != GameState.LOGGED_IN)
{
return;
}
- uploadScene();
+ invokeOnMainThread(this::uploadScene);
}
private void uploadScene()
@@ -1421,13 +1441,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, bufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_STATIC_COPY);
+ updateBuffer(sceneVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_STATIC_COPY, CL_MEM_READ_ONLY);
+ updateBuffer(sceneUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_STATIC_COPY, CL_MEM_READ_ONLY);
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBufferId);
- gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_STATIC_COPY);
-
- gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
+ gl.glBindBuffer(GL_ARRAY_BUFFER, 0);
vertexBuffer.clear();
uvBuffer.clear();
@@ -1492,7 +1509,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Override
public void draw(Renderable renderable, int orientation, int pitchSin, int pitchCos, int yawSin, int yawCos, int x, int y, int z, long hash)
{
- if (!useComputeShaders)
+ if (computeMode == ComputeMode.NONE)
{
Model model = renderable instanceof Model ? (Model) renderable : renderable.getModel();
if (model != null)
@@ -1673,7 +1690,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private int getDrawDistance()
{
- final int limit = useComputeShaders ? MAX_DISTANCE : DEFAULT_DISTANCE;
+ final int limit = computeMode != ComputeMode.NONE ? MAX_DISTANCE : DEFAULT_DISTANCE;
return Ints.constrainToRange(config.drawDistance(), 0, limit);
}
@@ -1688,4 +1705,36 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
runnable.run();
}
}
+
+ private void updateBuffer(GLBuffer glBuffer, int target, int size, Buffer data, int usage, long clFlags)
+ {
+ gl.glBindBuffer(target, glBuffer.glBufferId);
+ if (size > glBuffer.size)
+ {
+ log.trace("Buffer resize: {} {} -> {}", glBuffer, glBuffer.size, size);
+
+ glBuffer.size = size;
+ gl.glBufferData(target, size, data, usage);
+
+ if (computeMode == ComputeMode.OPENCL)
+ {
+ if (glBuffer.cl_mem != null)
+ {
+ CL.clReleaseMemObject(glBuffer.cl_mem);
+ }
+ if (size == 0)
+ {
+ glBuffer.cl_mem = null;
+ }
+ else
+ {
+ glBuffer.cl_mem = clCreateFromGLBuffer(openCLManager.context, clFlags, glBuffer.glBufferId, null);
+ }
+ }
+ }
+ else if (data != null)
+ {
+ gl.glBufferSubData(target, 0, size, data);
+ }
+ }
}
diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/OpenCLManager.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/OpenCLManager.java
new file mode 100644
index 0000000000..77c2f9b575
--- /dev/null
+++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/OpenCLManager.java
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package net.runelite.client.plugins.gpu;
+
+import com.google.common.base.Charsets;
+import com.jogamp.nativewindow.NativeSurface;
+import com.jogamp.opengl.GL4;
+import com.jogamp.opengl.GLContext;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Objects;
+import javax.inject.Singleton;
+import jogamp.opengl.GLContextImpl;
+import jogamp.opengl.GLDrawableImpl;
+import jogamp.opengl.egl.EGLContext;
+import jogamp.opengl.macosx.cgl.CGL;
+import jogamp.opengl.windows.wgl.WindowsWGLContext;
+import jogamp.opengl.x11.glx.X11GLXContext;
+import lombok.extern.slf4j.Slf4j;
+import net.runelite.client.plugins.gpu.template.Template;
+import net.runelite.client.util.OSType;
+import org.jocl.CL;
+import static org.jocl.CL.*;
+import org.jocl.CLException;
+import org.jocl.Pointer;
+import org.jocl.Sizeof;
+import org.jocl.cl_command_queue;
+import org.jocl.cl_context;
+import org.jocl.cl_context_properties;
+import org.jocl.cl_device_id;
+import org.jocl.cl_event;
+import org.jocl.cl_kernel;
+import org.jocl.cl_mem;
+import org.jocl.cl_platform_id;
+import org.jocl.cl_program;
+
+@Singleton
+@Slf4j
+class OpenCLManager
+{
+ private static final String GL_SHARING_PLATFORM_EXT = "cl_khr_gl_sharing";
+
+ private static final String KERNEL_NAME_UNORDERED = "computeUnordered";
+ private static final String KERNEL_NAME_LARGE = "computeLarge";
+
+ private static final int MIN_WORK_GROUP_SIZE = 256;
+ private static final int SMALL_SIZE = GpuPlugin.SMALL_TRIANGLE_COUNT;
+ private static final int LARGE_SIZE = GpuPlugin.MAX_TRIANGLE;
+ // struct shared_data {
+ // int totalNum[12];
+ // int totalDistance[12];
+ // int totalMappedNum[18];
+ // int min10;
+ // int dfs[0];
+ // };
+ private static final int SHARED_SIZE = 12 + 12 + 18 + 1; // in ints
+
+ // The number of faces each worker processes in the two kernels
+ private int largeFaceCount;
+ private int smallFaceCount;
+
+ private cl_platform_id platform;
+ private cl_device_id device;
+ cl_context context;
+ private cl_command_queue commandQueue;
+
+ private cl_program programUnordered;
+ private cl_program programSmall;
+ private cl_program programLarge;
+
+ private cl_kernel kernelUnordered;
+ private cl_kernel kernelSmall;
+ private cl_kernel kernelLarge;
+
+ void init(GL4 gl)
+ {
+ CL.setExceptionsEnabled(true);
+
+ switch (OSType.getOSType())
+ {
+ case Windows:
+ case Linux:
+ initPlatform();
+ initDevice();
+ initContext(gl);
+ break;
+ case MacOS:
+ initMacOS(gl);
+ break;
+ default:
+ throw new RuntimeException("Unsupported OS Type " + OSType.getOSType().name());
+ }
+ ensureMinWorkGroupSize();
+ initQueue();
+ compilePrograms();
+ }
+
+ void cleanup()
+ {
+ if (programUnordered != null)
+ {
+ CL.clReleaseProgram(programUnordered);
+ programUnordered = null;
+ }
+
+ if (programSmall != null)
+ {
+ CL.clReleaseProgram(programSmall);
+ programSmall = null;
+ }
+
+ if (programLarge != null)
+ {
+ CL.clReleaseProgram(programLarge);
+ programLarge = null;
+ }
+
+ if (kernelUnordered != null)
+ {
+ CL.clReleaseKernel(kernelUnordered);
+ kernelUnordered = null;
+ }
+
+ if (kernelSmall != null)
+ {
+ CL.clReleaseKernel(kernelSmall);
+ kernelSmall = null;
+ }
+
+ if (kernelLarge != null)
+ {
+ CL.clReleaseKernel(kernelLarge);
+ kernelLarge = null;
+ }
+
+ if (commandQueue != null)
+ {
+ CL.clReleaseCommandQueue(commandQueue);
+ commandQueue = null;
+ }
+
+ if (context != null)
+ {
+ CL.clReleaseContext(context);
+ context = null;
+ }
+
+ if (device != null)
+ {
+ CL.clReleaseDevice(device);
+ device = null;
+ }
+ }
+
+ private String logPlatformInfo(cl_platform_id platform, int param)
+ {
+ long[] size = new long[1];
+ clGetPlatformInfo(platform, param, 0, null, size);
+
+ byte[] buffer = new byte[(int) size[0]];
+ clGetPlatformInfo(platform, param, buffer.length, Pointer.to(buffer), null);
+ String platformInfo = new String(buffer, Charsets.UTF_8);
+ log.debug("Platform: {}, {}", stringFor_cl_platform_info(param), platformInfo);
+ return platformInfo;
+ }
+
+ private void logBuildInfo(cl_program program, int param)
+ {
+ long[] size = new long[1];
+ clGetProgramBuildInfo(program, device, param, 0, null, size);
+
+ ByteBuffer buffer = ByteBuffer.allocateDirect((int) size[0]);
+ clGetProgramBuildInfo(program, device, param, buffer.limit(), Pointer.toBuffer(buffer), null);
+
+ switch (param)
+ {
+ case CL_PROGRAM_BUILD_STATUS:
+ log.debug("Build status: {}, {}", stringFor_cl_program_build_info(param), stringFor_cl_build_status(buffer.getInt()));
+ break;
+ case CL_PROGRAM_BINARY_TYPE:
+ log.debug("Binary type: {}, {}", stringFor_cl_program_build_info(param), stringFor_cl_program_binary_type(buffer.getInt()));
+ break;
+ case CL_PROGRAM_BUILD_LOG:
+ String buildLog = StandardCharsets.US_ASCII.decode(buffer).toString();
+ log.trace("Build log: {}, {}", stringFor_cl_program_build_info(param), buildLog);
+ break;
+ case CL_PROGRAM_BUILD_OPTIONS:
+ String message = StandardCharsets.US_ASCII.decode(buffer).toString();
+ log.debug("Build options: {}, {}", stringFor_cl_program_build_info(param), message);
+ break;
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private void initPlatform()
+ {
+ int[] platformCount = new int[1];
+ clGetPlatformIDs(0, null, platformCount);
+ if (platformCount[0] == 0)
+ {
+ throw new RuntimeException("No compute platforms found");
+ }
+
+ cl_platform_id[] platforms = new cl_platform_id[platformCount[0]];
+ clGetPlatformIDs(platforms.length, platforms, null);
+
+ for (cl_platform_id platform : platforms)
+ {
+ log.debug("Found cl_platform_id {}", platform);
+ logPlatformInfo(platform, CL_PLATFORM_PROFILE);
+ logPlatformInfo(platform, CL_PLATFORM_VERSION);
+ logPlatformInfo(platform, CL_PLATFORM_NAME);
+ logPlatformInfo(platform, CL_PLATFORM_VENDOR);
+ String[] extensions = logPlatformInfo(platform, CL_PLATFORM_EXTENSIONS).split(" ");
+ if (Arrays.stream(extensions).noneMatch(s -> s.equals(GL_SHARING_PLATFORM_EXT)))
+ {
+ throw new RuntimeException("Platform does not support OpenGL buffer sharing");
+ }
+ }
+
+ platform = platforms[0];
+ log.debug("Selected cl_platform_id {}", platform);
+ }
+
+ private void initDevice()
+ {
+ int[] deviceCount = new int[1];
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, null, deviceCount);
+ if (deviceCount[0] == 0)
+ {
+ throw new RuntimeException("No compute devices found");
+ }
+
+ cl_device_id[] devices = new cl_device_id[(int) deviceCount[0]];
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices.length, devices, null);
+
+ for (cl_device_id device : devices)
+ {
+ long[] size = new long[1];
+ clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, null, size);
+
+ byte[] devInfoBuf = new byte[(int) size[0]];
+ clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, devInfoBuf.length, Pointer.to(devInfoBuf), null);
+
+ log.debug("Found cl_device_id: {}", device);
+ log.debug("Device extensions: {}", new String(devInfoBuf, Charsets.UTF_8));
+ }
+
+ device = devices[0];
+ log.debug("Selected cl_device_id {}", device);
+ }
+
+ private void initContext(GL4 gl)
+ {
+ // set computation platform
+ cl_context_properties contextProps = new cl_context_properties();
+ contextProps.addProperty(CL_CONTEXT_PLATFORM, platform);
+
+ // pull gl context
+ GLContext glContext = gl.getContext();
+ log.debug("Got GLContext of type {}", glContext.getClass().getSimpleName());
+ if (!glContext.isCurrent())
+ {
+ throw new RuntimeException("Can't create OpenCL context from inactive GL Context");
+ }
+
+ // get correct props based on os
+ long glContextHandle = glContext.getHandle();
+ GLContextImpl glContextImpl = (GLContextImpl) glContext;
+ GLDrawableImpl glDrawableImpl = glContextImpl.getDrawableImpl();
+ NativeSurface nativeSurface = glDrawableImpl.getNativeSurface();
+
+ if (glContext instanceof X11GLXContext)
+ {
+ long displayHandle = nativeSurface.getDisplayHandle();
+ contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
+ contextProps.addProperty(CL_GLX_DISPLAY_KHR, displayHandle);
+ }
+ else if (glContext instanceof WindowsWGLContext)
+ {
+ long surfaceHandle = nativeSurface.getSurfaceHandle();
+ contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
+ contextProps.addProperty(CL_WGL_HDC_KHR, surfaceHandle);
+ }
+ else if (glContext instanceof EGLContext)
+ {
+ long displayHandle = nativeSurface.getDisplayHandle();
+ contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
+ contextProps.addProperty(CL_EGL_DISPLAY_KHR, displayHandle);
+ }
+
+ log.debug("Creating context with props: {}", contextProps);
+ context = clCreateContext(contextProps, 1, new cl_device_id[]{device}, null, null, null);
+ log.debug("Created compute context {}", context);
+ }
+
+ private void initMacOS(GL4 gl)
+ {
+ // get sharegroup from gl context
+ GLContext glContext = gl.getContext();
+ if (!glContext.isCurrent())
+ {
+ throw new RuntimeException("Can't create context from inactive GL");
+ }
+ long cglContext = CGL.CGLGetCurrentContext();
+ long cglShareGroup = CGL.CGLGetShareGroup(cglContext);
+
+ // build context props
+ cl_context_properties contextProps = new cl_context_properties();
+ contextProps.addProperty(CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, cglShareGroup);
+
+ // ask macos to make the context for us
+ log.debug("Creating context with props: {}", contextProps);
+ context = clCreateContext(contextProps, 0, null, null, null, null);
+
+ // pull the compute device out of the provided context
+ device = new cl_device_id();
+ clGetGLContextInfoAPPLE(context, cglContext, CL_CGL_DEVICE_FOR_CURRENT_VIRTUAL_SCREEN_APPLE, Sizeof.cl_device_id, Pointer.to(device), null);
+
+ log.debug("Got macOS CLGL compute device {}", device);
+ }
+
+ private void ensureMinWorkGroupSize()
+ {
+ long[] maxWorkGroupSize = new long[1];
+ clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, Sizeof.size_t, Pointer.to(maxWorkGroupSize), null);
+ log.debug("Device CL_DEVICE_MAX_WORK_GROUP_SIZE: {}", maxWorkGroupSize[0]);
+
+ if (maxWorkGroupSize[0] < MIN_WORK_GROUP_SIZE)
+ {
+ throw new RuntimeException("Compute device does not support min work group size " + MIN_WORK_GROUP_SIZE);
+ }
+
+ // Largest power of 2 less than or equal to maxWorkGroupSize
+ int groupSize = 0x80000000 >>> Integer.numberOfLeadingZeros((int) maxWorkGroupSize[0]);
+ largeFaceCount = LARGE_SIZE / (Math.min(groupSize, LARGE_SIZE));
+ smallFaceCount = SMALL_SIZE / (Math.min(groupSize, SMALL_SIZE));
+
+ log.debug("Face counts: small: {}, large: {}", smallFaceCount, largeFaceCount);
+ }
+
+ private void initQueue()
+ {
+ long[] l = new long[1];
+ clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, Sizeof.cl_long, Pointer.to(l), null);
+
+ commandQueue = clCreateCommandQueue(context, device, l[0] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, null);
+ log.debug("Created command_queue {}, properties {}", commandQueue, l[0] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
+ }
+
+ private cl_program compileProgram(String programSource)
+ {
+ log.trace("Compiling program:\n {}", programSource);
+ cl_program program = clCreateProgramWithSource(context, 1, new String[]{programSource}, null, null);
+
+ try
+ {
+ clBuildProgram(program, 0, null, null, null, null);
+ }
+ catch (CLException e)
+ {
+ logBuildInfo(program, CL_PROGRAM_BUILD_LOG);
+ throw e;
+ }
+
+ logBuildInfo(program, CL_PROGRAM_BUILD_STATUS);
+ logBuildInfo(program, CL_PROGRAM_BINARY_TYPE);
+ logBuildInfo(program, CL_PROGRAM_BUILD_OPTIONS);
+ logBuildInfo(program, CL_PROGRAM_BUILD_LOG);
+ return program;
+ }
+
+ private cl_kernel getKernel(cl_program program, String kernelName)
+ {
+ cl_kernel kernel = clCreateKernel(program, kernelName, null);
+ log.debug("Loaded kernel {} for program {}", kernelName, program);
+ return kernel;
+ }
+
+ private void compilePrograms()
+ {
+ Template templateSmall = new Template()
+ .addInclude(OpenCLManager.class)
+ .add(key -> key.equals("FACE_COUNT") ? ("#define FACE_COUNT " + smallFaceCount) : null);
+ Template templateLarge = new Template()
+ .addInclude(OpenCLManager.class)
+ .add(key -> key.equals("FACE_COUNT") ? ("#define FACE_COUNT " + largeFaceCount) : null);
+
+ String unordered = new Template()
+ .addInclude(OpenCLManager.class)
+ .load("comp_unordered.cl");
+ String small = templateSmall.load("comp.cl");
+ String large = templateLarge.load("comp.cl");
+
+ programUnordered = compileProgram(unordered);
+ programSmall = compileProgram(small);
+ programLarge = compileProgram(large);
+
+ kernelUnordered = getKernel(programUnordered, KERNEL_NAME_UNORDERED);
+ kernelSmall = getKernel(programSmall, KERNEL_NAME_LARGE);
+ kernelLarge = getKernel(programLarge, KERNEL_NAME_LARGE);
+ }
+
+ void compute(int unorderedModels, int smallModels, int largeModels,
+ GLBuffer sceneVertexBuffer,
+ GLBuffer sceneUvBuffer,
+ GLBuffer vertexBuffer,
+ GLBuffer uvBuffer,
+ GLBuffer unorderedBuffer,
+ GLBuffer smallBuffer,
+ GLBuffer largeBuffer,
+ GLBuffer outVertexBuffer,
+ GLBuffer outUvBuffer,
+ GLBuffer uniformBuffer
+ )
+ {
+ cl_mem[] glBuffersAll = {
+ sceneVertexBuffer.cl_mem,
+ sceneUvBuffer.cl_mem,
+ unorderedBuffer.cl_mem,
+ smallBuffer.cl_mem,
+ largeBuffer.cl_mem,
+ vertexBuffer.cl_mem,
+ uvBuffer.cl_mem,
+ outVertexBuffer.cl_mem,
+ outUvBuffer.cl_mem,
+ uniformBuffer.cl_mem,
+ };
+ cl_mem[] glBuffers = Arrays.stream(glBuffersAll)
+ .filter(Objects::nonNull)
+ .toArray(cl_mem[]::new);
+
+ cl_event acquireGLBuffers = new cl_event();
+ clEnqueueAcquireGLObjects(commandQueue, glBuffers.length, glBuffers, 0, null, acquireGLBuffers);
+
+ cl_event[] computeEvents = {
+ new cl_event(),
+ new cl_event(),
+ new cl_event()
+ };
+ int numComputeEvents = 0;
+
+ if (unorderedModels > 0)
+ {
+ clSetKernelArg(kernelUnordered, 0, Sizeof.cl_mem, unorderedBuffer.ptr());
+ clSetKernelArg(kernelUnordered, 1, Sizeof.cl_mem, sceneVertexBuffer.ptr());
+ clSetKernelArg(kernelUnordered, 2, Sizeof.cl_mem, vertexBuffer.ptr());
+ clSetKernelArg(kernelUnordered, 3, Sizeof.cl_mem, sceneUvBuffer.ptr());
+ clSetKernelArg(kernelUnordered, 4, Sizeof.cl_mem, uvBuffer.ptr());
+ clSetKernelArg(kernelUnordered, 5, Sizeof.cl_mem, outVertexBuffer.ptr());
+ clSetKernelArg(kernelUnordered, 6, Sizeof.cl_mem, outUvBuffer.ptr());
+
+ // queue compute call after acquireGLBuffers
+ clEnqueueNDRangeKernel(commandQueue, kernelUnordered, 1, null,
+ new long[]{unorderedModels * 6L}, new long[]{6}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
+ }
+
+ if (smallModels > 0)
+ {
+ clSetKernelArg(kernelSmall, 0, (SHARED_SIZE + SMALL_SIZE) * Integer.BYTES, null);
+ clSetKernelArg(kernelSmall, 1, Sizeof.cl_mem, smallBuffer.ptr());
+ clSetKernelArg(kernelSmall, 2, Sizeof.cl_mem, sceneVertexBuffer.ptr());
+ clSetKernelArg(kernelSmall, 3, Sizeof.cl_mem, vertexBuffer.ptr());
+ clSetKernelArg(kernelSmall, 4, Sizeof.cl_mem, sceneUvBuffer.ptr());
+ clSetKernelArg(kernelSmall, 5, Sizeof.cl_mem, uvBuffer.ptr());
+ clSetKernelArg(kernelSmall, 6, Sizeof.cl_mem, outVertexBuffer.ptr());
+ clSetKernelArg(kernelSmall, 7, Sizeof.cl_mem, outUvBuffer.ptr());
+ clSetKernelArg(kernelSmall, 8, Sizeof.cl_mem, uniformBuffer.ptr());
+
+ clEnqueueNDRangeKernel(commandQueue, kernelSmall, 1, null,
+ new long[]{smallModels * (SMALL_SIZE / smallFaceCount)}, new long[]{SMALL_SIZE / smallFaceCount}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
+ }
+
+ if (largeModels > 0)
+ {
+ clSetKernelArg(kernelLarge, 0, (SHARED_SIZE + LARGE_SIZE) * Integer.BYTES, null);
+ clSetKernelArg(kernelLarge, 1, Sizeof.cl_mem, largeBuffer.ptr());
+ clSetKernelArg(kernelLarge, 2, Sizeof.cl_mem, sceneVertexBuffer.ptr());
+ clSetKernelArg(kernelLarge, 3, Sizeof.cl_mem, vertexBuffer.ptr());
+ clSetKernelArg(kernelLarge, 4, Sizeof.cl_mem, sceneUvBuffer.ptr());
+ clSetKernelArg(kernelLarge, 5, Sizeof.cl_mem, uvBuffer.ptr());
+ clSetKernelArg(kernelLarge, 6, Sizeof.cl_mem, outVertexBuffer.ptr());
+ clSetKernelArg(kernelLarge, 7, Sizeof.cl_mem, outUvBuffer.ptr());
+ clSetKernelArg(kernelLarge, 8, Sizeof.cl_mem, uniformBuffer.ptr());
+
+ clEnqueueNDRangeKernel(commandQueue, kernelLarge, 1, null,
+ new long[]{(long) largeModels * (LARGE_SIZE / largeFaceCount)}, new long[]{LARGE_SIZE / largeFaceCount}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
+ }
+
+ clEnqueueReleaseGLObjects(commandQueue, glBuffers.length, glBuffers, numComputeEvents, computeEvents, null);
+ }
+
+ void finish()
+ {
+ clFinish(commandQueue);
+ }
+}
\ No newline at end of file
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/cl_types.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/cl_types.cl
new file mode 100644
index 0000000000..7f41acab49
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/cl_types.cl
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+struct uniform {
+ int cameraYaw;
+ int cameraPitch;
+ int centerX;
+ int centerY;
+ int zoom;
+ int cameraX;
+ int cameraY;
+ int cameraZ;
+ int4 sinCosTable[2048];
+};
+
+struct shared_data {
+ int totalNum[12]; // number of faces with a given priority
+ int totalDistance[12]; // sum of distances to faces of a given priority
+ int totalMappedNum[18]; // number of faces with a given adjusted priority
+ int min10; // minimum distance to a face of priority 10
+ int dfs[0]; // packed face id and distance, size 512 for small, 4096 for large
+};
+
+struct modelinfo {
+ int offset; // offset into buffer
+ int uvOffset; // offset into uv buffer
+ int size; // length in faces
+ int idx; // write idx in target buffer
+ int flags; // radius, orientation
+ int x; // scene position x
+ int y; // scene position y
+ int z; // scene position z
+};
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/common.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/common.cl
new file mode 100644
index 0000000000..f499599cd2
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/common.cl
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define PI 3.1415926535897932384626433832795f
+#define UNIT PI / 1024.0f
+
+float3 toScreen(int4 vertex, int cameraYaw, int cameraPitch, int centerX, int centerY, int zoom) {
+ float yawSin = sin(cameraYaw * UNIT);
+ float yawCos = cos(cameraYaw * UNIT);
+
+ float pitchSin = sin(cameraPitch * UNIT);
+ float pitchCos = cos(cameraPitch * UNIT);
+
+ float rotatedX = (vertex.z * yawSin) + (vertex.x * yawCos);
+ float rotatedZ = (vertex.z * yawCos) - (vertex.x * yawSin);
+
+ float var13 = (vertex.y * pitchCos) - (rotatedZ * pitchSin);
+ float var12 = (vertex.y * pitchSin) + (rotatedZ * pitchCos);
+
+ float x = rotatedX * zoom / var12 + centerX;
+ float y = var13 * zoom / var12 + centerY;
+ float z = -var12; // in OpenGL depth is negative
+
+ return (float3) (x, y, z);
+}
+
+/*
+ * Rotate a vertex by a given orientation in JAU
+ */
+int4 rotate_vertex(__constant struct uniform *uni, int4 vertex, int orientation) {
+ int4 sinCos = uni->sinCosTable[orientation];
+ int s = sinCos.x;
+ int c = sinCos.y;
+ int x = vertex.z * s + vertex.x * c >> 16;
+ int z = vertex.z * c - vertex.x * s >> 16;
+ return (int4)(x, vertex.y, z, vertex.w);
+}
+
+/*
+ * Calculate the distance to a vertex given the camera angle
+ */
+int vertex_distance(int4 vertex, int cameraYaw, int cameraPitch) {
+ int yawSin = (int)(65536.0f * sin(cameraYaw * UNIT));
+ int yawCos = (int)(65536.0f * cos(cameraYaw * UNIT));
+
+ int pitchSin = (int)(65536.0f * sin(cameraPitch * UNIT));
+ int pitchCos = (int)(65536.0f * cos(cameraPitch * UNIT));
+
+ int j = vertex.z * yawCos - vertex.x * yawSin >> 16;
+ int l = vertex.y * pitchSin + j * pitchCos >> 16;
+
+ return l;
+}
+
+/*
+ * Calculate the distance to a face
+ */
+int face_distance(int4 vA, int4 vB, int4 vC, int cameraYaw, int cameraPitch) {
+ int dvA = vertex_distance(vA, cameraYaw, cameraPitch);
+ int dvB = vertex_distance(vB, cameraYaw, cameraPitch);
+ int dvC = vertex_distance(vC, cameraYaw, cameraPitch);
+ int faceDistance = (dvA + dvB + dvC) / 3;
+ return faceDistance;
+}
+
+/*
+ * Test if a face is visible (not backward facing)
+ */
+bool face_visible(__constant struct uniform *uni, int4 vA, int4 vB, int4 vC, int4 position) {
+ // Move model to scene location, and account for camera offset
+ int4 cameraPos = (int4)(uni->cameraX, uni->cameraY, uni->cameraZ, 0);
+ vA += position - cameraPos;
+ vB += position - cameraPos;
+ vC += position - cameraPos;
+
+ float3 sA = toScreen(vA, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
+ float3 sB = toScreen(vB, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
+ float3 sC = toScreen(vC, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
+
+ return (sA.x - sB.x) * (sC.y - sB.y) - (sC.x - sB.x) * (sA.y - sB.y) > 0;
+}
+
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp.cl
new file mode 100644
index 0000000000..1212123f25
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp.cl
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include FACE_COUNT
+
+#include cl_types.cl
+#include to_screen.cl
+#include common.cl
+#include priority_render.cl
+
+__kernel
+__attribute__((work_group_size_hint(256, 1, 1)))
+void computeLarge(
+ __local struct shared_data *shared,
+ __global const struct modelinfo *ol,
+ __global const int4 *vb,
+ __global const int4 *tempvb,
+ __global const float4 *uv,
+ __global const float4 *tempuv,
+ __global int4 *vout,
+ __global float4 *uvout,
+ __constant struct uniform *uni) {
+
+ size_t groupId = get_group_id(0);
+ size_t localId = get_local_id(0) * FACE_COUNT;
+ struct modelinfo minfo = ol[groupId];
+ int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
+
+ if (localId == 0) {
+ shared->min10 = 1600;
+ for (int i = 0; i < 12; ++i) {
+ shared->totalNum[i] = 0;
+ shared->totalDistance[i] = 0;
+ }
+ for (int i = 0; i < 18; ++i) {
+ shared->totalMappedNum[i] = 0;
+ }
+ }
+
+ int prio[FACE_COUNT];
+ int dis[FACE_COUNT];
+ int4 v1[FACE_COUNT];
+ int4 v2[FACE_COUNT];
+ int4 v3[FACE_COUNT];
+
+ for (int i = 0; i < FACE_COUNT; i++) {
+ get_face(shared, uni, vb, tempvb, localId + i, minfo, uni->cameraYaw, uni->cameraPitch, &prio[i], &dis[i], &v1[i], &v2[i], &v3[i]);
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for (int i = 0; i < FACE_COUNT; i++) {
+ add_face_prio_distance(shared, uni, localId + i, minfo, v1[i], v2[i], v3[i], prio[i], dis[i], pos);
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int prioAdj[FACE_COUNT];
+ int idx[FACE_COUNT];
+ for (int i = 0; i < FACE_COUNT; i++) {
+ idx[i] = map_face_priority(shared, localId + i, minfo, prio[i], dis[i], &prioAdj[i]);
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for (int i = 0; i < FACE_COUNT; i++) {
+ insert_dfs(shared, localId + i, minfo, prioAdj[i], dis[i], idx[i]);
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for (int i = 0; i < FACE_COUNT; i++) {
+ sort_and_insert(shared, uv, tempuv, vout, uvout, localId + i, minfo, prioAdj[i], dis[i], v1[i], v2[i], v3[i]);
+ }
+}
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.cl
new file mode 100644
index 0000000000..436f9a7d72
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.cl
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include cl_types.cl
+
+__kernel
+__attribute__((reqd_work_group_size(6, 1, 1)))
+void computeUnordered(__global const struct modelinfo *ol,
+ __global const int4 *vb,
+ __global const int4 *tempvb,
+ __global const float4 *uv,
+ __global const float4 *tempuv,
+ __global int4 *vout,
+ __global float4 *uvout) {
+ size_t groupId = get_group_id(0);
+ size_t localId = get_local_id(0);
+ struct modelinfo minfo = ol[groupId];
+
+ int offset = minfo.offset;
+ int size = minfo.size;
+ int outOffset = minfo.idx;
+ int uvOffset = minfo.uvOffset;
+ int flags = minfo.flags;
+ int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
+
+ if (localId >= size) {
+ return;
+ }
+
+ uint ssboOffset = localId;
+ int4 thisA, thisB, thisC;
+
+ // Grab triangle vertices from the correct buffer
+ if (flags < 0) {
+ thisA = vb[offset + ssboOffset * 3];
+ thisB = vb[offset + ssboOffset * 3 + 1];
+ thisC = vb[offset + ssboOffset * 3 + 2];
+ } else {
+ thisA = tempvb[offset + ssboOffset * 3];
+ thisB = tempvb[offset + ssboOffset * 3 + 1];
+ thisC = tempvb[offset + ssboOffset * 3 + 2];
+ }
+
+ uint myOffset = localId;
+
+ // position vertices in scene and write to out buffer
+ vout[outOffset + myOffset * 3] = pos + thisA;
+ vout[outOffset + myOffset * 3 + 1] = pos + thisB;
+ vout[outOffset + myOffset * 3 + 2] = pos + thisC;
+
+ if (uvOffset < 0) {
+ uvout[outOffset + myOffset * 3] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+ uvout[outOffset + myOffset * 3 + 1] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+ uvout[outOffset + myOffset * 3 + 2] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+ } else if (flags >= 0) {
+ uvout[outOffset + myOffset * 3] = tempuv[uvOffset + localId * 3];
+ uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
+ uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
+ } else {
+ uvout[outOffset + myOffset * 3] = uv[uvOffset + localId * 3];
+ uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
+ uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
+ }
+}
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl
new file mode 100644
index 0000000000..6f1a04470c
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2021, Adam
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Calculate adjusted priority for a face with a given priority, distance, and
+// model global min10 and face distance averages. This allows positioning faces
+// with priorities 10/11 into the correct 'slots' resulting in 18 possible
+// adjusted priorities
+int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3) {
+ // (10, 11) 0 1 2 (10, 11) 3 4 (10, 11) 5 6 7 8 9 (10, 11)
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
+ switch (p) {
+ case 0: return 2;
+ case 1: return 3;
+ case 2: return 4;
+ case 3: return 7;
+ case 4: return 8;
+ case 5: return 11;
+ case 6: return 12;
+ case 7: return 13;
+ case 8: return 14;
+ case 9: return 15;
+ case 10:
+ if (distance > avg1) {
+ return 0;
+ } else if (distance > avg2) {
+ return 5;
+ } else if (distance > avg3) {
+ return 9;
+ } else {
+ return 16;
+ }
+ case 11:
+ if (distance > avg1 && _min10 > avg1) {
+ return 1;
+ } else if (distance > avg2 && (_min10 > avg1 || _min10 > avg2)) {
+ return 6;
+ } else if (distance > avg3 && (_min10 > avg1 || _min10 > avg2 || _min10 > avg3)) {
+ return 10;
+ } else {
+ return 17;
+ }
+ default:
+ return -1;
+ }
+}
+
+// calculate the number of faces with a lower adjusted priority than
+// the given adjusted priority
+int count_prio_offset(__local struct shared_data *shared, int priority) {
+ int total = 0;
+ switch (priority) {
+ case 17:
+ total += shared->totalMappedNum[16];
+ case 16:
+ total += shared->totalMappedNum[15];
+ case 15:
+ total += shared->totalMappedNum[14];
+ case 14:
+ total += shared->totalMappedNum[13];
+ case 13:
+ total += shared->totalMappedNum[12];
+ case 12:
+ total += shared->totalMappedNum[11];
+ case 11:
+ total += shared->totalMappedNum[10];
+ case 10:
+ total += shared->totalMappedNum[9];
+ case 9:
+ total += shared->totalMappedNum[8];
+ case 8:
+ total += shared->totalMappedNum[7];
+ case 7:
+ total += shared->totalMappedNum[6];
+ case 6:
+ total += shared->totalMappedNum[5];
+ case 5:
+ total += shared->totalMappedNum[4];
+ case 4:
+ total += shared->totalMappedNum[3];
+ case 3:
+ total += shared->totalMappedNum[2];
+ case 2:
+ total += shared->totalMappedNum[1];
+ case 1:
+ total += shared->totalMappedNum[0];
+ case 0:
+ return total;
+ }
+}
+
+void get_face(
+ __local struct shared_data *shared,
+ __constant struct uniform *uni,
+ __global const int4 *vb,
+ __global const int4 *tempvb,
+ uint localId, struct modelinfo minfo, int cameraYaw, int cameraPitch,
+ /* out */ int *prio, int *dis, int4 *o1, int4 *o2, int4 *o3) {
+ int size = minfo.size;
+ int offset = minfo.offset;
+ int flags = minfo.flags;
+ uint ssboOffset;
+
+ if (localId < size) {
+ ssboOffset = localId;
+ } else {
+ ssboOffset = 0;
+ }
+
+ int4 thisA;
+ int4 thisB;
+ int4 thisC;
+
+ // Grab triangle vertices from the correct buffer
+ if (flags < 0) {
+ thisA = vb[offset + ssboOffset * 3];
+ thisB = vb[offset + ssboOffset * 3 + 1];
+ thisC = vb[offset + ssboOffset * 3 + 2];
+ } else {
+ thisA = tempvb[offset + ssboOffset * 3];
+ thisB = tempvb[offset + ssboOffset * 3 + 1];
+ thisC = tempvb[offset + ssboOffset * 3 + 2];
+ }
+
+ if (localId < size) {
+ int radius = (flags & 0x7fffffff) >> 12;
+ int orientation = flags & 0x7ff;
+
+ // rotate for model orientation
+ int4 thisrvA = rotate_vertex(uni, thisA, orientation);
+ int4 thisrvB = rotate_vertex(uni, thisB, orientation);
+ int4 thisrvC = rotate_vertex(uni, thisC, orientation);
+
+ // calculate distance to face
+ int thisPriority = (thisA.w >> 16) & 0xff;// all vertices on the face have the same priority
+ int thisDistance;
+ if (radius == 0) {
+ thisDistance = 0;
+ } else {
+ thisDistance = face_distance(thisrvA, thisrvB, thisrvC, cameraYaw, cameraPitch) + radius;
+ }
+
+ *o1 = thisrvA;
+ *o2 = thisrvB;
+ *o3 = thisrvC;
+
+ *prio = thisPriority;
+ *dis = thisDistance;
+ } else {
+ *o1 = (int4)(0, 0, 0, 0);
+ *o2 = (int4)(0, 0, 0, 0);
+ *o3 = (int4)(0, 0, 0, 0);
+ *prio = 0;
+ *dis = 0;
+ }
+}
+
+void add_face_prio_distance(
+ __local struct shared_data *shared,
+ __constant struct uniform *uni,
+ uint localId, struct modelinfo minfo, int4 thisrvA, int4 thisrvB, int4 thisrvC, int thisPriority, int thisDistance, int4 pos) {
+ if (localId < minfo.size) {
+ // if the face is not culled, it is calculated into priority distance averages
+ if (face_visible(uni, thisrvA, thisrvB, thisrvC, pos)) {
+ atomic_add(&shared->totalNum[thisPriority], 1);
+ atomic_add(&shared->totalDistance[thisPriority], thisDistance);
+
+ // calculate minimum distance to any face of priority 10 for positioning the 11 faces later
+ if (thisPriority == 10) {
+ atomic_min(&shared->min10, thisDistance);
+ }
+ }
+ }
+}
+
+int map_face_priority(__local struct shared_data *shared, uint localId, struct modelinfo minfo, int thisPriority, int thisDistance, int *prio) {
+ int size = minfo.size;
+
+ // Compute average distances for 0/2, 3/4, and 6/8
+
+ if (localId < size) {
+ int avg1 = 0;
+ int avg2 = 0;
+ int avg3 = 0;
+
+ if (shared->totalNum[1] > 0 || shared->totalNum[2] > 0) {
+ avg1 = (shared->totalDistance[1] + shared->totalDistance[2]) / (shared->totalNum[1] + shared->totalNum[2]);
+ }
+
+ if (shared->totalNum[3] > 0 || shared->totalNum[4] > 0) {
+ avg2 = (shared->totalDistance[3] + shared->totalDistance[4]) / (shared->totalNum[3] + shared->totalNum[4]);
+ }
+
+ if (shared->totalNum[6] > 0 || shared->totalNum[8] > 0) {
+ avg3 = (shared->totalDistance[6] + shared->totalDistance[8]) / (shared->totalNum[6] + shared->totalNum[8]);
+ }
+
+ int adjPrio = priority_map(thisPriority, thisDistance, shared->min10, avg1, avg2, avg3);
+ int prioIdx = atomic_add(&shared->totalMappedNum[adjPrio], 1);
+
+ *prio = adjPrio;
+ return prioIdx;
+ }
+
+ *prio = 0;
+ return 0;
+}
+
+void insert_dfs(__local struct shared_data *shared, uint localId, struct modelinfo minfo, int adjPrio, int distance, int prioIdx) {
+ int size = minfo.size;
+
+ if (localId < size) {
+ // calculate base offset into dfs based on number of faces with a lower priority
+ int baseOff = count_prio_offset(shared, adjPrio);
+ // store into face array offset array by unique index
+ shared->dfs[baseOff + prioIdx] = ((int) localId << 16) | distance;
+ }
+}
+
+void sort_and_insert(
+ __local struct shared_data *shared,
+ __global const float4 *uv,
+ __global const float4 *tempuv,
+ __global int4 *vout,
+ __global float4 *uvout,
+ uint localId, struct modelinfo minfo, int thisPriority, int thisDistance, int4 thisrvA, int4 thisrvB, int4 thisrvC) {
+ /* compute face distance */
+ int size = minfo.size;
+
+ if (localId < size) {
+ int outOffset = minfo.idx;
+ int uvOffset = minfo.uvOffset;
+ int flags = minfo.flags;
+ int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
+
+ const int priorityOffset = count_prio_offset(shared, thisPriority);
+ const int numOfPriority = shared->totalMappedNum[thisPriority];
+ int start = priorityOffset; // index of first face with this priority
+ int end = priorityOffset + numOfPriority; // index of last face with this priority
+ int myOffset = priorityOffset;
+
+ // we only have to order faces against others of the same priority
+ // calculate position this face will be in
+ for (int i = start; i < end; ++i) {
+ int d1 = shared->dfs[i];
+ int theirId = d1 >> 16;
+ int theirDistance = d1 & 0xffff;
+
+ // the closest faces draw last, so have the highest index
+ // if two faces have the same distance, the one with the
+ // higher id draws last
+ if ((theirDistance > thisDistance)
+ || (theirDistance == thisDistance && theirId < localId)) {
+ ++myOffset;
+ }
+ }
+
+ // position vertices in scene and write to out buffer
+ vout[outOffset + myOffset * 3] = pos + thisrvA;
+ vout[outOffset + myOffset * 3 + 1] = pos + thisrvB;
+ vout[outOffset + myOffset * 3 + 2] = pos + thisrvC;
+
+ if (uvOffset < 0) {
+ uvout[outOffset + myOffset * 3] = (float4)(0, 0, 0, 0);
+ uvout[outOffset + myOffset * 3 + 1] = (float4)(0, 0, 0, 0);
+ uvout[outOffset + myOffset * 3 + 2] = (float4)(0, 0, 0, 0);
+ } else if (flags >= 0) {
+ uvout[outOffset + myOffset * 3] = tempuv[uvOffset + localId * 3];
+ uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
+ uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
+ } else {
+ uvout[outOffset + myOffset * 3] = uv[uvOffset + localId * 3];
+ uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
+ uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
+ }
+ }
+}