diff --git a/runelite-client/pom.xml b/runelite-client/pom.xml
index 10000e6125..48eb8825ce 100644
--- a/runelite-client/pom.xml
+++ b/runelite-client/pom.xml
@@ -177,6 +177,25 @@
 			<version>2.4.0-rc-20210117</version>
 			<scope>runtime</scope>
 		</dependency>
+		<dependency>
+			<groupId>net.runelite.jocl</groupId>
+			<artifactId>jocl</artifactId>
+			<version>1.0</version>
+		</dependency>
+		<dependency>
+			<groupId>net.runelite.jocl</groupId>
+			<artifactId>jocl</artifactId>
+			<version>1.0</version>
+			<classifier>macos-x64</classifier>
+			<scope>runtime</scope>
+		</dependency>
+		<dependency>
+			<groupId>net.runelite.jocl</groupId>
+			<artifactId>jocl</artifactId>
+			<version>1.0</version>
+			<classifier>macos-arm64</classifier>
+			<scope>runtime</scope>
+		</dependency>
 		<dependency>
 			<groupId>net.runelite</groupId>
 			<artifactId>archive-patcher</artifactId>
diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GLBuffer.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GLBuffer.java
new file mode 100644
index 0000000000..cd3b7288ed
--- /dev/null
+++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GLBuffer.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package net.runelite.client.plugins.gpu;
+
+import org.jocl.Pointer;
+import org.jocl.cl_mem;
+
+class GLBuffer
+{
+	int glBufferId = -1;
+	int size = -1;
+	cl_mem cl_mem;
+
+	Pointer ptr()
+	{
+		return cl_mem != null ? Pointer.to(cl_mem) : null;
+	}
+}
diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java
index a6be4e5872..8ba9db5a67 100644
--- a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java
+++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/GpuPlugin.java
@@ -29,6 +29,11 @@ import com.google.inject.Provides;
 import com.jogamp.nativewindow.awt.AWTGraphicsConfiguration;
 import com.jogamp.nativewindow.awt.JAWTWindow;
 import com.jogamp.opengl.GL;
+import static com.jogamp.opengl.GL.GL_ARRAY_BUFFER;
+import static com.jogamp.opengl.GL.GL_DYNAMIC_DRAW;
+import static com.jogamp.opengl.GL2ES2.GL_STREAM_DRAW;
+import static com.jogamp.opengl.GL2ES3.GL_STATIC_COPY;
+import static com.jogamp.opengl.GL2ES3.GL_UNIFORM_BUFFER;
 import com.jogamp.opengl.GL4;
 import com.jogamp.opengl.GLCapabilities;
 import com.jogamp.opengl.GLContext;
@@ -45,6 +50,7 @@ import java.awt.Image;
 import java.awt.geom.AffineTransform;
 import java.awt.image.BufferedImage;
 import java.awt.image.DataBufferInt;
+import java.nio.Buffer;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.FloatBuffer;
@@ -93,6 +99,10 @@ import net.runelite.client.plugins.gpu.config.UIScalingMode;
 import net.runelite.client.plugins.gpu.template.Template;
 import net.runelite.client.ui.DrawManager;
 import net.runelite.client.util.OSType;
+import org.jocl.CL;
+import static org.jocl.CL.CL_MEM_READ_ONLY;
+import static org.jocl.CL.CL_MEM_WRITE_ONLY;
+import static org.jocl.CL.clCreateFromGLBuffer;
 
 @PluginDescriptor(
 	name = "GPU",
@@ -105,8 +115,8 @@ import net.runelite.client.util.OSType;
 public class GpuPlugin extends Plugin implements DrawCallbacks
 {
 	// This is the maximum number of triangles the compute shaders support
-	private static final int MAX_TRIANGLE = 4096;
-	private static final int SMALL_TRIANGLE_COUNT = 512;
+	static final int MAX_TRIANGLE = 4096;
+	static final int SMALL_TRIANGLE_COUNT = 512;
 	private static final int FLAG_SCENE_BUFFER = Integer.MIN_VALUE;
 	private static final int DEFAULT_DISTANCE = 25;
 	static final int MAX_DISTANCE = 90;
@@ -115,6 +125,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 	@Inject
 	private Client client;
 
+	@Inject
+	private OpenCLManager openCLManager;
+
 	@Inject
 	private ClientThread clientThread;
 
@@ -133,7 +146,14 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 	@Inject
 	private PluginManager pluginManager;
 
-	private boolean useComputeShaders;
+	enum ComputeMode
+	{
+		NONE,
+		OPENGL,
+		OPENCL
+	}
+
+	private ComputeMode computeMode = ComputeMode.NONE;
 
 	private Canvas canvas;
 	private JAWTWindow jawtWindow;
@@ -182,23 +202,22 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 	private int texSceneHandle;
 	private int rboSceneHandle;
 
-	// scene vertex buffer id
-	private int bufferId;
-	// scene uv buffer id
-	private int uvBufferId;
+	// scene vertex buffer
+	private final GLBuffer sceneVertexBuffer = new GLBuffer();
+	// scene uv buffer
+	private final GLBuffer sceneUvBuffer = new GLBuffer();
 
-	private int tmpBufferId; // temporary scene vertex buffer
-	private int tmpUvBufferId; // temporary scene uv buffer
-	private int tmpModelBufferId; // scene model buffer, large
-	private int tmpModelBufferSmallId; // scene model buffer, small
-	private int tmpModelBufferUnorderedId;
-	private int tmpOutBufferId; // target vertex buffer for compute shaders
-	private int tmpOutUvBufferId; // target uv buffer for compute shaders
+	private final GLBuffer tmpVertexBuffer = new GLBuffer(); // temporary scene vertex buffer
+	private final GLBuffer tmpUvBuffer = new GLBuffer(); // temporary scene uv buffer
+	private final GLBuffer tmpModelBufferLarge = new GLBuffer(); // scene model buffer, large
+	private final GLBuffer tmpModelBufferSmall = new GLBuffer(); // scene model buffer, small
+	private final GLBuffer tmpModelBufferUnordered = new GLBuffer(); // scene model buffer, unordered
+	private final GLBuffer tmpOutBuffer = new GLBuffer(); // target vertex buffer for compute shaders
+	private final GLBuffer tmpOutUvBuffer = new GLBuffer(); // target uv buffer for compute shaders
 
 	private int textureArrayId;
 
-	private int uniformBufferId;
-	private final IntBuffer uniformBuffer = GpuIntBuffer.allocateDirect(5 + 3 + 2048 * 4);
+	private final GLBuffer uniformBuffer = new GLBuffer();
 	private final float[] textureOffsets = new float[128];
 
 	private GpuIntBuffer vertexBuffer;
@@ -278,7 +297,6 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 		{
 			try
 			{
-				bufferId = uvBufferId = uniformBufferId = tmpBufferId = tmpUvBufferId = tmpModelBufferId = tmpModelBufferSmallId = tmpModelBufferUnorderedId = tmpOutBufferId = tmpOutUvBufferId = -1;
 				texSceneHandle = fboSceneHandle = rboSceneHandle = -1; // AA FBO
 				unorderedModels = smallModels = largeModels = 0;
 				drawingModel = false;
@@ -290,8 +308,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 					return false;
 				}
 
-				// OSX supports up to OpenGL 4.1, however 4.3 is required for compute shaders
-				useComputeShaders = config.useComputeShaders() && OSType.getOSType() != OSType.MacOS;
+				computeMode = config.useComputeShaders()
+					? (OSType.getOSType() == OSType.MacOS ? ComputeMode.OPENCL : ComputeMode.OPENGL)
+					: ComputeMode.NONE;
 
 				canvas.setIgnoreRepaint(true);
 
@@ -397,7 +416,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 				if (client.getGameState() == GameState.LOGGED_IN)
 				{
-					uploadScene();
+					invokeOnMainThread(this::uploadScene);
 				}
 			}
 			catch (Throwable e)
@@ -433,6 +452,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 			invokeOnMainThread(() ->
 			{
+				openCLManager.cleanup();
+
 				if (gl != null)
 				{
 					if (textureArrayId != -1)
@@ -441,11 +462,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 						textureArrayId = -1;
 					}
 
-					if (uniformBufferId != -1)
-					{
-						glDeleteBuffer(gl, uniformBufferId);
-						uniformBufferId = -1;
-					}
+					destroyGlBuffer(uniformBuffer);
 
 					shutdownBuffers();
 					shutdownInterfaceTexture();
@@ -519,12 +536,16 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 		glProgram = PROGRAM.compile(gl, template);
 		glUiProgram = UI_PROGRAM.compile(gl, template);
 
-		if (useComputeShaders)
+		if (computeMode == ComputeMode.OPENGL)
 		{
 			glComputeProgram = COMPUTE_PROGRAM.compile(gl, template);
 			glSmallComputeProgram = SMALL_COMPUTE_PROGRAM.compile(gl, template);
 			glUnorderedComputeProgram = UNORDERED_COMPUTE_PROGRAM.compile(gl, template);
 		}
+		else if (computeMode == ComputeMode.OPENCL)
+		{
+			openCLManager.init(gl);
+		}
 
 		initUniforms();
 	}
@@ -593,8 +614,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 			-1f, 1f, 0.0f, 0.0f, 0f  // top left
 		});
 		vboUiBuf.rewind();
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vboUiHandle);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, vboUiBuf.capacity() * Float.BYTES, vboUiBuf, gl.GL_STATIC_DRAW);
+		gl.glBindBuffer(GL_ARRAY_BUFFER, vboUiHandle);
+		gl.glBufferData(GL_ARRAY_BUFFER, vboUiBuf.capacity() * Float.BYTES, vboUiBuf, gl.GL_STATIC_DRAW);
 
 		// position attribute
 		gl.glVertexAttribPointer(0, 3, gl.GL_FLOAT, false, 5 * Float.BYTES, 0);
@@ -605,7 +626,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 		gl.glEnableVertexAttribArray(1);
 
 		// unbind VBO
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
+		gl.glBindBuffer(GL_ARRAY_BUFFER, 0);
 	}
 
 	private void shutdownVao()
@@ -622,71 +643,49 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 	private void initBuffers()
 	{
-		bufferId = glGenBuffers(gl);
-		uvBufferId = glGenBuffers(gl);
-		tmpBufferId = glGenBuffers(gl);
-		tmpUvBufferId = glGenBuffers(gl);
-		tmpModelBufferId = glGenBuffers(gl);
-		tmpModelBufferSmallId = glGenBuffers(gl);
-		tmpModelBufferUnorderedId = glGenBuffers(gl);
-		tmpOutBufferId = glGenBuffers(gl);
-		tmpOutUvBufferId = glGenBuffers(gl);
+		initGlBuffer(sceneVertexBuffer);
+		initGlBuffer(sceneUvBuffer);
+		initGlBuffer(tmpVertexBuffer);
+		initGlBuffer(tmpUvBuffer);
+		initGlBuffer(tmpModelBufferLarge);
+		initGlBuffer(tmpModelBufferSmall);
+		initGlBuffer(tmpModelBufferUnordered);
+		initGlBuffer(tmpOutBuffer);
+		initGlBuffer(tmpOutUvBuffer);
+	}
+
+	private void initGlBuffer(GLBuffer glBuffer)
+	{
+		glBuffer.glBufferId = glGenBuffers(gl);
 	}
 
 	private void shutdownBuffers()
 	{
-		if (bufferId != -1)
-		{
-			glDeleteBuffer(gl, bufferId);
-			bufferId = -1;
-		}
+		destroyGlBuffer(sceneVertexBuffer);
+		destroyGlBuffer(sceneUvBuffer);
 
-		if (uvBufferId != -1)
-		{
-			glDeleteBuffer(gl, uvBufferId);
-			uvBufferId = -1;
-		}
+		destroyGlBuffer(tmpVertexBuffer);
+		destroyGlBuffer(tmpUvBuffer);
+		destroyGlBuffer(tmpModelBufferLarge);
+		destroyGlBuffer(tmpModelBufferSmall);
+		destroyGlBuffer(tmpModelBufferUnordered);
+		destroyGlBuffer(tmpOutBuffer);
+		destroyGlBuffer(tmpOutUvBuffer);
+	}
 
-		if (tmpBufferId != -1)
+	private void destroyGlBuffer(GLBuffer glBuffer)
+	{
+		if (glBuffer.glBufferId != -1)
 		{
-			glDeleteBuffer(gl, tmpBufferId);
-			tmpBufferId = -1;
+			glDeleteBuffer(gl, glBuffer.glBufferId);
+			glBuffer.glBufferId = -1;
 		}
+		glBuffer.size = -1;
 
-		if (tmpUvBufferId != -1)
+		if (glBuffer.cl_mem != null)
 		{
-			glDeleteBuffer(gl, tmpUvBufferId);
-			tmpUvBufferId = -1;
-		}
-
-		if (tmpModelBufferId != -1)
-		{
-			glDeleteBuffer(gl, tmpModelBufferId);
-			tmpModelBufferId = -1;
-		}
-
-		if (tmpModelBufferSmallId != -1)
-		{
-			glDeleteBuffer(gl, tmpModelBufferSmallId);
-			tmpModelBufferSmallId = -1;
-		}
-
-		if (tmpModelBufferUnorderedId != -1)
-		{
-			glDeleteBuffer(gl, tmpModelBufferUnorderedId);
-			tmpModelBufferUnorderedId = -1;
-		}
-
-		if (tmpOutBufferId != -1)
-		{
-			glDeleteBuffer(gl, tmpOutBufferId);
-			tmpOutBufferId = -1;
-		}
-
-		if (tmpOutUvBufferId != -1)
-		{
-			glDeleteBuffer(gl, tmpOutUvBufferId);
-			tmpOutUvBufferId = -1;
+			CL.clReleaseMemObject(glBuffer.cl_mem);
+			glBuffer.cl_mem = null;
 		}
 	}
 
@@ -709,21 +708,21 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 	private void initUniformBuffer()
 	{
-		uniformBufferId = glGenBuffers(gl);
-		gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId);
-		uniformBuffer.clear();
-		uniformBuffer.put(new int[8]);
+		initGlBuffer(uniformBuffer);
+
+		IntBuffer uniformBuf = GpuIntBuffer.allocateDirect(8 + 2048 * 4);
+		uniformBuf.put(new int[8]); // uniform block
 		final int[] pad = new int[2];
 		for (int i = 0; i < 2048; i++)
 		{
-			uniformBuffer.put(Perspective.SINE[i]);
-			uniformBuffer.put(Perspective.COSINE[i]);
-			uniformBuffer.put(pad);
+			uniformBuf.put(Perspective.SINE[i]);
+			uniformBuf.put(Perspective.COSINE[i]);
+			uniformBuf.put(pad); // ivec2 alignment in std140 is 16 bytes
 		}
-		uniformBuffer.flip();
+		uniformBuf.flip();
 
-		gl.glBufferData(gl.GL_UNIFORM_BUFFER, uniformBuffer.limit() * Integer.BYTES, uniformBuffer, gl.GL_DYNAMIC_DRAW);
-		gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0);
+		updateBuffer(uniformBuffer, GL_UNIFORM_BUFFER, uniformBuf.limit() * Integer.BYTES, uniformBuf, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+		gl.glBindBuffer(GL_UNIFORM_BUFFER, 0);
 	}
 
 	private void initAAFbo(int width, int height, int aaSamples)
@@ -785,9 +784,11 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 		invokeOnMainThread(() ->
 		{
 			// UBO. Only the first 32 bytes get modified here, the rest is the constant sin/cos table.
-			gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId);
-			uniformBuffer.clear();
-			uniformBuffer
+			// We can reuse the vertex buffer since it isn't used yet.
+			vertexBuffer.clear();
+			vertexBuffer.ensureCapacity(32);
+			IntBuffer uniformBuf = vertexBuffer.getBuffer();
+			uniformBuf
 				.put(yaw)
 				.put(pitch)
 				.put(client.getCenterX())
@@ -796,12 +797,14 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 				.put(cameraX)
 				.put(cameraY)
 				.put(cameraZ);
-			uniformBuffer.flip();
+			uniformBuf.flip();
 
-			gl.glBufferSubData(gl.GL_UNIFORM_BUFFER, 0, uniformBuffer.limit() * Integer.BYTES, uniformBuffer);
-			gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0);
+			gl.glBindBuffer(GL_UNIFORM_BUFFER, uniformBuffer.glBufferId);
+			gl.glBufferSubData(GL_UNIFORM_BUFFER, 0, uniformBuf.limit() * Integer.BYTES, uniformBuf);
+			gl.glBindBuffer(GL_UNIFORM_BUFFER, 0);
 
-			gl.glBindBufferBase(gl.GL_UNIFORM_BUFFER, 0, uniformBufferId);
+			gl.glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniformBuffer.glBufferId);
+			uniformBuf.clear();
 		});
 	}
 
@@ -813,7 +816,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 	private void postDraw()
 	{
-		if (!useComputeShaders)
+		if (computeMode == ComputeMode.NONE)
 		{
 			// Upload buffers
 			vertexBuffer.flip();
@@ -822,12 +825,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 			IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
 			FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
 
-			gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId);
-			gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW);
-
-			gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId);
-			gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW);
-
+			updateBuffer(tmpVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_DYNAMIC_DRAW, 0L);
+			updateBuffer(tmpUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_DYNAMIC_DRAW, 0L);
 			return;
 		}
 
@@ -844,79 +843,91 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 		IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer();
 		IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer();
 
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW);
+		// temp buffers
+		updateBuffer(tmpVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+		updateBuffer(tmpUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
 
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW);
-
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, gl.GL_DYNAMIC_DRAW);
-
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferSmallId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_DYNAMIC_DRAW);
-
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferUnorderedId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_DYNAMIC_DRAW);
+		// model buffers
+		updateBuffer(tmpModelBufferLarge, GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+		updateBuffer(tmpModelBufferSmall, GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
+		updateBuffer(tmpModelBufferUnordered, GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
 
 		// Output buffers
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutBufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER,
+		updateBuffer(tmpOutBuffer,
+			GL_ARRAY_BUFFER,
 			targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes
 			null,
-			gl.GL_STREAM_DRAW);
-
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutUvBufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER,
-			targetBufferOffset * 16,
+			GL_STREAM_DRAW,
+			CL_MEM_WRITE_ONLY);
+		updateBuffer(tmpOutUvBuffer,
+			GL_ARRAY_BUFFER,
+			targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes
 			null,
-			gl.GL_STREAM_DRAW);
+			GL_STREAM_DRAW,
+			CL_MEM_WRITE_ONLY);
 
-		// Bind UBO to compute programs
-		gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0);
-		gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0);
+		if (computeMode == ComputeMode.OPENCL)
+		{
+			// The docs for clEnqueueAcquireGLObjects say all pending GL operations must be completed before calling
+			// clEnqueueAcquireGLObjects, and recommends calling glFinish() as the only portable way to do that.
+			// However no issues have been observed from not calling it, and so will leave disabled for now.
+			// gl.glFinish();
+
+			openCLManager.compute(
+				unorderedModels, smallModels, largeModels,
+				sceneVertexBuffer, sceneUvBuffer,
+				tmpVertexBuffer, tmpUvBuffer,
+				tmpModelBufferUnordered, tmpModelBufferSmall, tmpModelBufferLarge,
+				tmpOutBuffer, tmpOutUvBuffer,
+				uniformBuffer);
+			return;
+		}
 
 		/*
 		 * Compute is split into three separate programs: 'unordered', 'small', and 'large'
 		 * to save on GPU resources. Small will sort <= 512 faces, large will do <= 4096.
 		 */
 
+		// Bind UBO to compute programs
+		gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0);
+		gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0);
+
 		// unordered
 		gl.glUseProgram(glUnorderedComputeProgram);
 
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnorderedId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnordered.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
 
 		gl.glDispatchCompute(unorderedModels, 1, 1);
 
 		// small
 		gl.glUseProgram(glSmallComputeProgram);
 
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmallId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmall.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
 
 		gl.glDispatchCompute(smallModels, 1, 1);
 
 		// large
 		gl.glUseProgram(glComputeProgram);
 
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
-		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferLarge.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
+		gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
 
 		gl.glDispatchCompute(largeModels, 1, 1);
 	}
@@ -926,7 +937,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 							SceneTilePaint paint, int tileZ, int tileX, int tileY,
 							int zoom, int centerX, int centerY)
 	{
-		if (!useComputeShaders)
+		if (computeMode == ComputeMode.NONE)
 		{
 			targetBufferOffset += sceneUploader.upload(paint,
 				tileZ, tileX, tileY,
@@ -963,7 +974,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 							SceneTileModel model, int tileZ, int tileX, int tileY,
 							int zoom, int centerX, int centerY)
 	{
-		if (!useComputeShaders)
+		if (computeMode == ComputeMode.NONE)
 		{
 			targetBufferOffset += sceneUploader.upload(model,
 				tileX, tileY,
@@ -1131,7 +1142,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 				// Ceil the sizes because even if the size is 599.1 we want to treat it as size 600 (i.e. render to the x=599 pixel).
 				renderViewportHeight = (int) Math.ceil(scaleFactorY * (renderViewportHeight)) + padding * 2;
-				renderViewportWidth  = (int) Math.ceil(scaleFactorX * (renderViewportWidth )) + padding * 2;
+				renderViewportWidth  = (int) Math.ceil(scaleFactorX * (renderViewportWidth ))  + padding * 2;
 
 				// Floor the offsets because even if the offset is 4.9, we want to render to the x=4 pixel anyway.
 				renderHeightOff      = (int) Math.floor(scaleFactorY * (renderHeightOff)) - padding;
@@ -1195,27 +1206,36 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 			gl.glBindVertexArray(vaoHandle);
 
 			int vertexBuffer, uvBuffer;
-			if (useComputeShaders)
+			if (computeMode != ComputeMode.NONE)
 			{
-				// Before reading the SSBOs written to from postDrawScene() we must insert a barrier
-				gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT);
+				if (computeMode == ComputeMode.OPENGL)
+				{
+					// Before reading the SSBOs written to from postDrawScene() we must insert a barrier
+					gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT);
+				}
+				else
+				{
+					// Wait for the command queue to finish, so that we know the compute is done
+					openCLManager.finish();
+				}
+
 				// Draw using the output buffer of the compute
-				vertexBuffer = tmpOutBufferId;
-				uvBuffer = tmpOutUvBufferId;
+				vertexBuffer = tmpOutBuffer.glBufferId;
+				uvBuffer = tmpOutUvBuffer.glBufferId;
 			}
 			else
 			{
 				// Only use the temporary buffers, which will contain the full scene
-				vertexBuffer = tmpBufferId;
-				uvBuffer = tmpUvBufferId;
+				vertexBuffer = tmpVertexBuffer.glBufferId;
+				uvBuffer = tmpUvBuffer.glBufferId;
 			}
 
 			gl.glEnableVertexAttribArray(0);
-			gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vertexBuffer);
+			gl.glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
 			gl.glVertexAttribIPointer(0, 4, gl.GL_INT, 0, 0);
 
 			gl.glEnableVertexAttribArray(1);
-			gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBuffer);
+			gl.glBindBuffer(GL_ARRAY_BUFFER, uvBuffer);
 			gl.glVertexAttribPointer(1, 4, gl.GL_FLOAT, false, 0, 0);
 
 			gl.glDrawArrays(gl.GL_TRIANGLES, 0, targetBufferOffset);
@@ -1400,12 +1420,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 	@Subscribe
 	public void onGameStateChanged(GameStateChanged gameStateChanged)
 	{
-		if (!useComputeShaders || gameStateChanged.getGameState() != GameState.LOGGED_IN)
+		if (computeMode == ComputeMode.NONE || gameStateChanged.getGameState() != GameState.LOGGED_IN)
 		{
 			return;
 		}
 
-		uploadScene();
+		invokeOnMainThread(this::uploadScene);
 	}
 
 	private void uploadScene()
@@ -1421,13 +1441,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 		IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
 		FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
 
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, bufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_STATIC_COPY);
+		updateBuffer(sceneVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_STATIC_COPY, CL_MEM_READ_ONLY);
+		updateBuffer(sceneUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_STATIC_COPY, CL_MEM_READ_ONLY);
 
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBufferId);
-		gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_STATIC_COPY);
-
-		gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
+		gl.glBindBuffer(GL_ARRAY_BUFFER, 0);
 
 		vertexBuffer.clear();
 		uvBuffer.clear();
@@ -1492,7 +1509,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 	@Override
 	public void draw(Renderable renderable, int orientation, int pitchSin, int pitchCos, int yawSin, int yawCos, int x, int y, int z, long hash)
 	{
-		if (!useComputeShaders)
+		if (computeMode == ComputeMode.NONE)
 		{
 			Model model = renderable instanceof Model ? (Model) renderable : renderable.getModel();
 			if (model != null)
@@ -1673,7 +1690,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 
 	private int getDrawDistance()
 	{
-		final int limit = useComputeShaders ? MAX_DISTANCE : DEFAULT_DISTANCE;
+		final int limit = computeMode != ComputeMode.NONE ? MAX_DISTANCE : DEFAULT_DISTANCE;
 		return Ints.constrainToRange(config.drawDistance(), 0, limit);
 	}
 
@@ -1688,4 +1705,36 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
 			runnable.run();
 		}
 	}
+
+	private void updateBuffer(GLBuffer glBuffer, int target, int size, Buffer data, int usage, long clFlags)
+	{
+		gl.glBindBuffer(target, glBuffer.glBufferId);
+		if (size > glBuffer.size)
+		{
+			log.trace("Buffer resize: {} {} -> {}", glBuffer, glBuffer.size, size);
+
+			glBuffer.size = size;
+			gl.glBufferData(target, size, data, usage);
+
+			if (computeMode == ComputeMode.OPENCL)
+			{
+				if (glBuffer.cl_mem != null)
+				{
+					CL.clReleaseMemObject(glBuffer.cl_mem);
+				}
+				if (size == 0)
+				{
+					glBuffer.cl_mem = null;
+				}
+				else
+				{
+					glBuffer.cl_mem = clCreateFromGLBuffer(openCLManager.context, clFlags, glBuffer.glBufferId, null);
+				}
+			}
+		}
+		else if (data != null)
+		{
+			gl.glBufferSubData(target, 0, size, data);
+		}
+	}
 }
diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/gpu/OpenCLManager.java b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/OpenCLManager.java
new file mode 100644
index 0000000000..77c2f9b575
--- /dev/null
+++ b/runelite-client/src/main/java/net/runelite/client/plugins/gpu/OpenCLManager.java
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package net.runelite.client.plugins.gpu;
+
+import com.google.common.base.Charsets;
+import com.jogamp.nativewindow.NativeSurface;
+import com.jogamp.opengl.GL4;
+import com.jogamp.opengl.GLContext;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Objects;
+import javax.inject.Singleton;
+import jogamp.opengl.GLContextImpl;
+import jogamp.opengl.GLDrawableImpl;
+import jogamp.opengl.egl.EGLContext;
+import jogamp.opengl.macosx.cgl.CGL;
+import jogamp.opengl.windows.wgl.WindowsWGLContext;
+import jogamp.opengl.x11.glx.X11GLXContext;
+import lombok.extern.slf4j.Slf4j;
+import net.runelite.client.plugins.gpu.template.Template;
+import net.runelite.client.util.OSType;
+import org.jocl.CL;
+import static org.jocl.CL.*;
+import org.jocl.CLException;
+import org.jocl.Pointer;
+import org.jocl.Sizeof;
+import org.jocl.cl_command_queue;
+import org.jocl.cl_context;
+import org.jocl.cl_context_properties;
+import org.jocl.cl_device_id;
+import org.jocl.cl_event;
+import org.jocl.cl_kernel;
+import org.jocl.cl_mem;
+import org.jocl.cl_platform_id;
+import org.jocl.cl_program;
+
+@Singleton
+@Slf4j
+class OpenCLManager
+{
+	private static final String GL_SHARING_PLATFORM_EXT = "cl_khr_gl_sharing";
+
+	private static final String KERNEL_NAME_UNORDERED = "computeUnordered";
+	private static final String KERNEL_NAME_LARGE = "computeLarge";
+
+	private static final int MIN_WORK_GROUP_SIZE = 256;
+	private static final int SMALL_SIZE = GpuPlugin.SMALL_TRIANGLE_COUNT;
+	private static final int LARGE_SIZE = GpuPlugin.MAX_TRIANGLE;
+	//  struct shared_data {
+	//      int totalNum[12];
+	//      int totalDistance[12];
+	//      int totalMappedNum[18];
+	//      int min10;
+	//      int dfs[0];
+	//  };
+	private static final int SHARED_SIZE = 12 + 12 + 18 + 1; // in ints
+
+	// The number of faces each worker processes in the two kernels
+	private int largeFaceCount;
+	private int smallFaceCount;
+
+	private cl_platform_id platform;
+	private cl_device_id device;
+	cl_context context;
+	private cl_command_queue commandQueue;
+
+	private cl_program programUnordered;
+	private cl_program programSmall;
+	private cl_program programLarge;
+
+	private cl_kernel kernelUnordered;
+	private cl_kernel kernelSmall;
+	private cl_kernel kernelLarge;
+
+	void init(GL4 gl)
+	{
+		CL.setExceptionsEnabled(true);
+
+		switch (OSType.getOSType())
+		{
+			case Windows:
+			case Linux:
+				initPlatform();
+				initDevice();
+				initContext(gl);
+				break;
+			case MacOS:
+				initMacOS(gl);
+				break;
+			default:
+				throw new RuntimeException("Unsupported OS Type " + OSType.getOSType().name());
+		}
+		ensureMinWorkGroupSize();
+		initQueue();
+		compilePrograms();
+	}
+
+	void cleanup()
+	{
+		if (programUnordered != null)
+		{
+			CL.clReleaseProgram(programUnordered);
+			programUnordered = null;
+		}
+
+		if (programSmall != null)
+		{
+			CL.clReleaseProgram(programSmall);
+			programSmall = null;
+		}
+
+		if (programLarge != null)
+		{
+			CL.clReleaseProgram(programLarge);
+			programLarge = null;
+		}
+
+		if (kernelUnordered != null)
+		{
+			CL.clReleaseKernel(kernelUnordered);
+			kernelUnordered = null;
+		}
+
+		if (kernelSmall != null)
+		{
+			CL.clReleaseKernel(kernelSmall);
+			kernelSmall = null;
+		}
+
+		if (kernelLarge != null)
+		{
+			CL.clReleaseKernel(kernelLarge);
+			kernelLarge = null;
+		}
+
+		if (commandQueue != null)
+		{
+			CL.clReleaseCommandQueue(commandQueue);
+			commandQueue = null;
+		}
+
+		if (context != null)
+		{
+			CL.clReleaseContext(context);
+			context = null;
+		}
+
+		if (device != null)
+		{
+			CL.clReleaseDevice(device);
+			device = null;
+		}
+	}
+
+	private String logPlatformInfo(cl_platform_id platform, int param)
+	{
+		long[] size = new long[1];
+		clGetPlatformInfo(platform, param, 0, null, size);
+
+		byte[] buffer = new byte[(int) size[0]];
+		clGetPlatformInfo(platform, param, buffer.length, Pointer.to(buffer), null);
+		String platformInfo = new String(buffer, Charsets.UTF_8);
+		log.debug("Platform: {}, {}", stringFor_cl_platform_info(param), platformInfo);
+		return platformInfo;
+	}
+
+	private void logBuildInfo(cl_program program, int param)
+	{
+		long[] size = new long[1];
+		clGetProgramBuildInfo(program, device, param, 0, null, size);
+
+		ByteBuffer buffer = ByteBuffer.allocateDirect((int) size[0]);
+		clGetProgramBuildInfo(program, device, param, buffer.limit(), Pointer.toBuffer(buffer), null);
+
+		switch (param)
+		{
+			case CL_PROGRAM_BUILD_STATUS:
+				log.debug("Build status: {}, {}", stringFor_cl_program_build_info(param), stringFor_cl_build_status(buffer.getInt()));
+				break;
+			case CL_PROGRAM_BINARY_TYPE:
+				log.debug("Binary type: {}, {}", stringFor_cl_program_build_info(param), stringFor_cl_program_binary_type(buffer.getInt()));
+				break;
+			case CL_PROGRAM_BUILD_LOG:
+				String buildLog = StandardCharsets.US_ASCII.decode(buffer).toString();
+				log.trace("Build log: {}, {}", stringFor_cl_program_build_info(param), buildLog);
+				break;
+			case CL_PROGRAM_BUILD_OPTIONS:
+				String message = StandardCharsets.US_ASCII.decode(buffer).toString();
+				log.debug("Build options: {}, {}", stringFor_cl_program_build_info(param), message);
+				break;
+			default:
+				throw new IllegalArgumentException();
+		}
+	}
+
+	private void initPlatform()
+	{
+		int[] platformCount = new int[1];
+		clGetPlatformIDs(0, null, platformCount);
+		if (platformCount[0] == 0)
+		{
+			throw new RuntimeException("No compute platforms found");
+		}
+
+		cl_platform_id[] platforms = new cl_platform_id[platformCount[0]];
+		clGetPlatformIDs(platforms.length, platforms, null);
+
+		for (cl_platform_id platform : platforms)
+		{
+			log.debug("Found cl_platform_id {}", platform);
+			logPlatformInfo(platform, CL_PLATFORM_PROFILE);
+			logPlatformInfo(platform, CL_PLATFORM_VERSION);
+			logPlatformInfo(platform, CL_PLATFORM_NAME);
+			logPlatformInfo(platform, CL_PLATFORM_VENDOR);
+			String[] extensions = logPlatformInfo(platform, CL_PLATFORM_EXTENSIONS).split(" ");
+			if (Arrays.stream(extensions).noneMatch(s -> s.equals(GL_SHARING_PLATFORM_EXT)))
+			{
+				throw new RuntimeException("Platform does not support OpenGL buffer sharing");
+			}
+		}
+
+		platform = platforms[0];
+		log.debug("Selected cl_platform_id {}", platform);
+	}
+
+	private void initDevice()
+	{
+		int[] deviceCount = new int[1];
+		clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, null, deviceCount);
+		if (deviceCount[0] == 0)
+		{
+			throw new RuntimeException("No compute devices found");
+		}
+
+		cl_device_id[] devices = new cl_device_id[(int) deviceCount[0]];
+		clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices.length, devices, null);
+
+		for (cl_device_id device : devices)
+		{
+			long[] size = new long[1];
+			clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, null, size);
+
+			byte[] devInfoBuf = new byte[(int) size[0]];
+			clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, devInfoBuf.length, Pointer.to(devInfoBuf), null);
+
+			log.debug("Found cl_device_id: {}", device);
+			log.debug("Device extensions: {}", new String(devInfoBuf, Charsets.UTF_8));
+		}
+
+		device = devices[0];
+		log.debug("Selected cl_device_id {}", device);
+	}
+
+	private void initContext(GL4 gl)
+	{
+		// set computation platform
+		cl_context_properties contextProps = new cl_context_properties();
+		contextProps.addProperty(CL_CONTEXT_PLATFORM, platform);
+
+		// pull gl context
+		GLContext glContext = gl.getContext();
+		log.debug("Got GLContext of type {}", glContext.getClass().getSimpleName());
+		if (!glContext.isCurrent())
+		{
+			throw new RuntimeException("Can't create OpenCL context from inactive GL Context");
+		}
+
+		// get correct props based on os
+		long glContextHandle = glContext.getHandle();
+		GLContextImpl glContextImpl = (GLContextImpl) glContext;
+		GLDrawableImpl glDrawableImpl = glContextImpl.getDrawableImpl();
+		NativeSurface nativeSurface = glDrawableImpl.getNativeSurface();
+
+		if (glContext instanceof X11GLXContext)
+		{
+			long displayHandle = nativeSurface.getDisplayHandle();
+			contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
+			contextProps.addProperty(CL_GLX_DISPLAY_KHR, displayHandle);
+		}
+		else if (glContext instanceof WindowsWGLContext)
+		{
+			long surfaceHandle = nativeSurface.getSurfaceHandle();
+			contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
+			contextProps.addProperty(CL_WGL_HDC_KHR, surfaceHandle);
+		}
+		else if (glContext instanceof EGLContext)
+		{
+			long displayHandle = nativeSurface.getDisplayHandle();
+			contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
+			contextProps.addProperty(CL_EGL_DISPLAY_KHR, displayHandle);
+		}
+
+		log.debug("Creating context with props: {}", contextProps);
+		context = clCreateContext(contextProps, 1, new cl_device_id[]{device}, null, null, null);
+		log.debug("Created compute context {}", context);
+	}
+
+	private void initMacOS(GL4 gl)
+	{
+		// get sharegroup from gl context
+		GLContext glContext = gl.getContext();
+		if (!glContext.isCurrent())
+		{
+			throw new RuntimeException("Can't create context from inactive GL");
+		}
+		long cglContext = CGL.CGLGetCurrentContext();
+		long cglShareGroup = CGL.CGLGetShareGroup(cglContext);
+
+		// build context props
+		cl_context_properties contextProps = new cl_context_properties();
+		contextProps.addProperty(CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, cglShareGroup);
+
+		// ask macos to make the context for us
+		log.debug("Creating context with props: {}", contextProps);
+		context = clCreateContext(contextProps, 0, null, null, null, null);
+
+		// pull the compute device out of the provided context
+		device = new cl_device_id();
+		clGetGLContextInfoAPPLE(context, cglContext, CL_CGL_DEVICE_FOR_CURRENT_VIRTUAL_SCREEN_APPLE, Sizeof.cl_device_id, Pointer.to(device), null);
+
+		log.debug("Got macOS CLGL compute device {}", device);
+	}
+
+	private void ensureMinWorkGroupSize()
+	{
+		long[] maxWorkGroupSize = new long[1];
+		clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, Sizeof.size_t, Pointer.to(maxWorkGroupSize), null);
+		log.debug("Device CL_DEVICE_MAX_WORK_GROUP_SIZE: {}", maxWorkGroupSize[0]);
+
+		if (maxWorkGroupSize[0] < MIN_WORK_GROUP_SIZE)
+		{
+			throw new RuntimeException("Compute device does not support min work group size " + MIN_WORK_GROUP_SIZE);
+		}
+
+		// Largest power of 2 less than or equal to maxWorkGroupSize
+		int groupSize = 0x80000000 >>> Integer.numberOfLeadingZeros((int) maxWorkGroupSize[0]);
+		largeFaceCount = LARGE_SIZE / (Math.min(groupSize, LARGE_SIZE));
+		smallFaceCount = SMALL_SIZE / (Math.min(groupSize, SMALL_SIZE));
+
+		log.debug("Face counts: small: {}, large: {}", smallFaceCount, largeFaceCount);
+	}
+
+	private void initQueue()
+	{
+		long[] l = new long[1];
+		clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, Sizeof.cl_long, Pointer.to(l), null);
+
+		commandQueue = clCreateCommandQueue(context, device, l[0] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, null);
+		log.debug("Created command_queue {}, properties {}", commandQueue, l[0] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
+	}
+
+	private cl_program compileProgram(String programSource)
+	{
+		log.trace("Compiling program:\n {}", programSource);
+		cl_program program = clCreateProgramWithSource(context, 1, new String[]{programSource}, null, null);
+
+		try
+		{
+			clBuildProgram(program, 0, null, null, null, null);
+		}
+		catch (CLException e)
+		{
+			logBuildInfo(program, CL_PROGRAM_BUILD_LOG);
+			throw e;
+		}
+
+		logBuildInfo(program, CL_PROGRAM_BUILD_STATUS);
+		logBuildInfo(program, CL_PROGRAM_BINARY_TYPE);
+		logBuildInfo(program, CL_PROGRAM_BUILD_OPTIONS);
+		logBuildInfo(program, CL_PROGRAM_BUILD_LOG);
+		return program;
+	}
+
+	private cl_kernel getKernel(cl_program program, String kernelName)
+	{
+		cl_kernel kernel = clCreateKernel(program, kernelName, null);
+		log.debug("Loaded kernel {} for program {}", kernelName, program);
+		return kernel;
+	}
+
+	private void compilePrograms()
+	{
+		Template templateSmall = new Template()
+			.addInclude(OpenCLManager.class)
+			.add(key -> key.equals("FACE_COUNT") ? ("#define FACE_COUNT " + smallFaceCount) : null);
+		Template templateLarge = new Template()
+			.addInclude(OpenCLManager.class)
+			.add(key -> key.equals("FACE_COUNT") ? ("#define FACE_COUNT " + largeFaceCount) : null);
+
+		String unordered = new Template()
+			.addInclude(OpenCLManager.class)
+			.load("comp_unordered.cl");
+		String small = templateSmall.load("comp.cl");
+		String large = templateLarge.load("comp.cl");
+
+		programUnordered = compileProgram(unordered);
+		programSmall = compileProgram(small);
+		programLarge = compileProgram(large);
+
+		kernelUnordered = getKernel(programUnordered, KERNEL_NAME_UNORDERED);
+		kernelSmall = getKernel(programSmall, KERNEL_NAME_LARGE);
+		kernelLarge = getKernel(programLarge, KERNEL_NAME_LARGE);
+	}
+
+	void compute(int unorderedModels, int smallModels, int largeModels,
+		GLBuffer sceneVertexBuffer,
+		GLBuffer sceneUvBuffer,
+		GLBuffer vertexBuffer,
+		GLBuffer uvBuffer,
+		GLBuffer unorderedBuffer,
+		GLBuffer smallBuffer,
+		GLBuffer largeBuffer,
+		GLBuffer outVertexBuffer,
+		GLBuffer outUvBuffer,
+		GLBuffer uniformBuffer
+	)
+	{
+		cl_mem[] glBuffersAll = {
+			sceneVertexBuffer.cl_mem,
+			sceneUvBuffer.cl_mem,
+			unorderedBuffer.cl_mem,
+			smallBuffer.cl_mem,
+			largeBuffer.cl_mem,
+			vertexBuffer.cl_mem,
+			uvBuffer.cl_mem,
+			outVertexBuffer.cl_mem,
+			outUvBuffer.cl_mem,
+			uniformBuffer.cl_mem,
+		};
+		cl_mem[] glBuffers = Arrays.stream(glBuffersAll)
+			.filter(Objects::nonNull)
+			.toArray(cl_mem[]::new);
+
+		cl_event acquireGLBuffers = new cl_event();
+		clEnqueueAcquireGLObjects(commandQueue, glBuffers.length, glBuffers, 0, null, acquireGLBuffers);
+
+		cl_event[] computeEvents = {
+			new cl_event(),
+			new cl_event(),
+			new cl_event()
+		};
+		int numComputeEvents = 0;
+
+		if (unorderedModels > 0)
+		{
+			clSetKernelArg(kernelUnordered, 0, Sizeof.cl_mem, unorderedBuffer.ptr());
+			clSetKernelArg(kernelUnordered, 1, Sizeof.cl_mem, sceneVertexBuffer.ptr());
+			clSetKernelArg(kernelUnordered, 2, Sizeof.cl_mem, vertexBuffer.ptr());
+			clSetKernelArg(kernelUnordered, 3, Sizeof.cl_mem, sceneUvBuffer.ptr());
+			clSetKernelArg(kernelUnordered, 4, Sizeof.cl_mem, uvBuffer.ptr());
+			clSetKernelArg(kernelUnordered, 5, Sizeof.cl_mem, outVertexBuffer.ptr());
+			clSetKernelArg(kernelUnordered, 6, Sizeof.cl_mem, outUvBuffer.ptr());
+
+			// queue compute call after acquireGLBuffers
+			clEnqueueNDRangeKernel(commandQueue, kernelUnordered, 1, null,
+				new long[]{unorderedModels * 6L}, new long[]{6}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
+		}
+
+		if (smallModels > 0)
+		{
+			clSetKernelArg(kernelSmall, 0, (SHARED_SIZE + SMALL_SIZE) * Integer.BYTES, null);
+			clSetKernelArg(kernelSmall, 1, Sizeof.cl_mem, smallBuffer.ptr());
+			clSetKernelArg(kernelSmall, 2, Sizeof.cl_mem, sceneVertexBuffer.ptr());
+			clSetKernelArg(kernelSmall, 3, Sizeof.cl_mem, vertexBuffer.ptr());
+			clSetKernelArg(kernelSmall, 4, Sizeof.cl_mem, sceneUvBuffer.ptr());
+			clSetKernelArg(kernelSmall, 5, Sizeof.cl_mem, uvBuffer.ptr());
+			clSetKernelArg(kernelSmall, 6, Sizeof.cl_mem, outVertexBuffer.ptr());
+			clSetKernelArg(kernelSmall, 7, Sizeof.cl_mem, outUvBuffer.ptr());
+			clSetKernelArg(kernelSmall, 8, Sizeof.cl_mem, uniformBuffer.ptr());
+
+			clEnqueueNDRangeKernel(commandQueue, kernelSmall, 1, null,
+				new long[]{smallModels * (SMALL_SIZE / smallFaceCount)}, new long[]{SMALL_SIZE / smallFaceCount}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
+		}
+
+		if (largeModels > 0)
+		{
+			clSetKernelArg(kernelLarge, 0, (SHARED_SIZE + LARGE_SIZE) * Integer.BYTES, null);
+			clSetKernelArg(kernelLarge, 1, Sizeof.cl_mem, largeBuffer.ptr());
+			clSetKernelArg(kernelLarge, 2, Sizeof.cl_mem, sceneVertexBuffer.ptr());
+			clSetKernelArg(kernelLarge, 3, Sizeof.cl_mem, vertexBuffer.ptr());
+			clSetKernelArg(kernelLarge, 4, Sizeof.cl_mem, sceneUvBuffer.ptr());
+			clSetKernelArg(kernelLarge, 5, Sizeof.cl_mem, uvBuffer.ptr());
+			clSetKernelArg(kernelLarge, 6, Sizeof.cl_mem, outVertexBuffer.ptr());
+			clSetKernelArg(kernelLarge, 7, Sizeof.cl_mem, outUvBuffer.ptr());
+			clSetKernelArg(kernelLarge, 8, Sizeof.cl_mem, uniformBuffer.ptr());
+
+			clEnqueueNDRangeKernel(commandQueue, kernelLarge, 1, null,
+				new long[]{(long) largeModels * (LARGE_SIZE / largeFaceCount)}, new long[]{LARGE_SIZE / largeFaceCount}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
+		}
+
+		clEnqueueReleaseGLObjects(commandQueue, glBuffers.length, glBuffers, numComputeEvents, computeEvents, null);
+	}
+
+	void finish()
+	{
+		clFinish(commandQueue);
+	}
+}
\ No newline at end of file
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/cl_types.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/cl_types.cl
new file mode 100644
index 0000000000..7f41acab49
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/cl_types.cl
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+struct uniform {
+  int cameraYaw;
+  int cameraPitch;
+  int centerX;
+  int centerY;
+  int zoom;
+  int cameraX;
+  int cameraY;
+  int cameraZ;
+  int4 sinCosTable[2048];
+};
+
+struct shared_data {
+  int totalNum[12]; // number of faces with a given priority
+  int totalDistance[12]; // sum of distances to faces of a given priority
+  int totalMappedNum[18]; // number of faces with a given adjusted priority
+  int min10; // minimum distance to a face of priority 10
+  int dfs[0]; // packed face id and distance, size 512 for small, 4096 for large
+};
+
+struct modelinfo {
+  int offset;   // offset into buffer
+  int uvOffset; // offset into uv buffer
+  int size;     // length in faces
+  int idx;      // write idx in target buffer
+  int flags;    // radius, orientation
+  int x;        // scene position x
+  int y;        // scene position y
+  int z;        // scene position z
+};
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/common.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/common.cl
new file mode 100644
index 0000000000..f499599cd2
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/common.cl
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define PI 3.1415926535897932384626433832795f
+#define UNIT PI / 1024.0f
+
+float3 toScreen(int4 vertex, int cameraYaw, int cameraPitch, int centerX, int centerY, int zoom) {
+  float yawSin = sin(cameraYaw * UNIT);
+  float yawCos = cos(cameraYaw * UNIT);
+
+  float pitchSin = sin(cameraPitch * UNIT);
+  float pitchCos = cos(cameraPitch * UNIT);
+
+  float rotatedX = (vertex.z * yawSin) + (vertex.x * yawCos);
+  float rotatedZ = (vertex.z * yawCos) - (vertex.x * yawSin);
+
+  float var13 = (vertex.y * pitchCos) - (rotatedZ * pitchSin);
+  float var12 = (vertex.y * pitchSin) + (rotatedZ * pitchCos);
+
+  float x = rotatedX * zoom / var12 + centerX;
+  float y = var13 * zoom / var12 + centerY;
+  float z = -var12; // in OpenGL depth is negative
+
+  return (float3) (x, y, z);
+}
+
+/*
+ * Rotate a vertex by a given orientation in JAU
+ */
+int4 rotate_vertex(__constant struct uniform *uni, int4 vertex, int orientation) {
+  int4 sinCos = uni->sinCosTable[orientation];
+  int s = sinCos.x;
+  int c = sinCos.y;
+  int x = vertex.z * s + vertex.x * c >> 16;
+  int z = vertex.z * c - vertex.x * s >> 16;
+  return (int4)(x, vertex.y, z, vertex.w);
+}
+
+/*
+ * Calculate the distance to a vertex given the camera angle
+ */
+int vertex_distance(int4 vertex, int cameraYaw, int cameraPitch) {
+  int yawSin = (int)(65536.0f * sin(cameraYaw * UNIT));
+  int yawCos = (int)(65536.0f * cos(cameraYaw * UNIT));
+
+  int pitchSin = (int)(65536.0f * sin(cameraPitch * UNIT));
+  int pitchCos = (int)(65536.0f * cos(cameraPitch * UNIT));
+
+  int j = vertex.z * yawCos - vertex.x * yawSin >> 16;
+  int l = vertex.y * pitchSin + j * pitchCos >> 16;
+
+  return l;
+}
+
+/*
+ * Calculate the distance to a face
+ */
+int face_distance(int4 vA, int4 vB, int4 vC, int cameraYaw, int cameraPitch) {
+  int dvA = vertex_distance(vA, cameraYaw, cameraPitch);
+  int dvB = vertex_distance(vB, cameraYaw, cameraPitch);
+  int dvC = vertex_distance(vC, cameraYaw, cameraPitch);
+  int faceDistance = (dvA + dvB + dvC) / 3;
+  return faceDistance;
+}
+
+/*
+ * Test if a face is visible (not backward facing)
+ */
+bool face_visible(__constant struct uniform *uni, int4 vA, int4 vB, int4 vC, int4 position) {
+  // Move model to scene location, and account for camera offset
+  int4 cameraPos = (int4)(uni->cameraX, uni->cameraY, uni->cameraZ, 0);
+  vA += position - cameraPos;
+  vB += position - cameraPos;
+  vC += position - cameraPos;
+
+  float3 sA = toScreen(vA, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
+  float3 sB = toScreen(vB, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
+  float3 sC = toScreen(vC, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
+
+  return (sA.x - sB.x) * (sC.y - sB.y) - (sC.x - sB.x) * (sA.y - sB.y) > 0;
+}
+
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp.cl
new file mode 100644
index 0000000000..1212123f25
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp.cl
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include FACE_COUNT
+
+#include cl_types.cl
+#include to_screen.cl
+#include common.cl
+#include priority_render.cl
+
+__kernel
+__attribute__((work_group_size_hint(256, 1, 1)))
+void computeLarge(
+  __local struct shared_data *shared,
+  __global const struct modelinfo *ol,
+  __global const int4 *vb,
+  __global const int4 *tempvb,
+  __global const float4 *uv,
+  __global const float4 *tempuv,
+  __global int4 *vout,
+  __global float4 *uvout,
+  __constant struct uniform *uni) {
+
+  size_t groupId = get_group_id(0);
+  size_t localId = get_local_id(0) * FACE_COUNT;
+  struct modelinfo minfo = ol[groupId];
+  int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
+
+  if (localId == 0) {
+    shared->min10 = 1600;
+    for (int i = 0; i < 12; ++i) {
+      shared->totalNum[i] = 0;
+      shared->totalDistance[i] = 0;
+    }
+    for (int i = 0; i < 18; ++i) {
+      shared->totalMappedNum[i] = 0;
+    }
+  }
+
+  int prio[FACE_COUNT];
+  int dis[FACE_COUNT];
+  int4 v1[FACE_COUNT];
+  int4 v2[FACE_COUNT];
+  int4 v3[FACE_COUNT];
+
+  for (int i = 0; i < FACE_COUNT; i++) {
+    get_face(shared, uni, vb, tempvb, localId + i, minfo, uni->cameraYaw, uni->cameraPitch, &prio[i], &dis[i], &v1[i], &v2[i], &v3[i]);
+  }
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int i = 0; i < FACE_COUNT; i++) {
+    add_face_prio_distance(shared, uni, localId + i, minfo, v1[i], v2[i], v3[i], prio[i], dis[i], pos);
+  }
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  int prioAdj[FACE_COUNT];
+  int idx[FACE_COUNT];
+  for (int i = 0; i < FACE_COUNT; i++) {
+    idx[i] = map_face_priority(shared, localId + i, minfo, prio[i], dis[i], &prioAdj[i]);
+  }
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int i = 0; i < FACE_COUNT; i++) {
+    insert_dfs(shared, localId + i, minfo, prioAdj[i], dis[i], idx[i]);
+  }
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int i = 0; i < FACE_COUNT; i++) {
+    sort_and_insert(shared, uv, tempuv, vout, uvout, localId + i, minfo, prioAdj[i], dis[i], v1[i], v2[i], v3[i]);
+  }
+}
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.cl
new file mode 100644
index 0000000000..436f9a7d72
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/comp_unordered.cl
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include cl_types.cl
+
+__kernel
+__attribute__((reqd_work_group_size(6, 1, 1)))
+void computeUnordered(__global const struct modelinfo *ol,
+                      __global const int4 *vb,
+                      __global const int4 *tempvb,
+                      __global const float4 *uv,
+                      __global const float4 *tempuv,
+                      __global int4 *vout,
+                      __global float4 *uvout) {
+  size_t groupId = get_group_id(0);
+  size_t localId = get_local_id(0);
+  struct modelinfo minfo = ol[groupId];
+
+  int offset = minfo.offset;
+  int size = minfo.size;
+  int outOffset = minfo.idx;
+  int uvOffset = minfo.uvOffset;
+  int flags = minfo.flags;
+  int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
+
+  if (localId >= size) {
+    return;
+  }
+
+  uint ssboOffset = localId;
+  int4 thisA, thisB, thisC;
+
+  // Grab triangle vertices from the correct buffer
+  if (flags < 0) {
+    thisA = vb[offset + ssboOffset * 3];
+    thisB = vb[offset + ssboOffset * 3 + 1];
+    thisC = vb[offset + ssboOffset * 3 + 2];
+  } else {
+    thisA = tempvb[offset + ssboOffset * 3];
+    thisB = tempvb[offset + ssboOffset * 3 + 1];
+    thisC = tempvb[offset + ssboOffset * 3 + 2];
+  }
+
+  uint myOffset = localId;
+
+  // position vertices in scene and write to out buffer
+  vout[outOffset + myOffset * 3]     = pos + thisA;
+  vout[outOffset + myOffset * 3 + 1] = pos + thisB;
+  vout[outOffset + myOffset * 3 + 2] = pos + thisC;
+
+  if (uvOffset < 0) {
+    uvout[outOffset + myOffset * 3]     = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+    uvout[outOffset + myOffset * 3 + 1] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+    uvout[outOffset + myOffset * 3 + 2] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+  } else if (flags >= 0) {
+    uvout[outOffset + myOffset * 3]     = tempuv[uvOffset + localId * 3];
+    uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
+    uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
+  } else {
+    uvout[outOffset + myOffset * 3]     = uv[uvOffset + localId * 3];
+    uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
+    uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
+  }
+}
diff --git a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl
new file mode 100644
index 0000000000..6f1a04470c
--- /dev/null
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2021, Adam <Adam@sigterm.info>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Calculate adjusted priority for a face with a given priority, distance, and
+// model global min10 and face distance averages. This allows positioning faces
+// with priorities 10/11 into the correct 'slots' resulting in 18 possible
+// adjusted priorities
+int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3) {
+  // (10, 11)  0  1  2  (10, 11)  3  4  (10, 11)  5  6  7  8  9  (10, 11)
+  //   0   1   2  3  4    5   6   7  8    9  10  11 12 13 14 15   16  17
+  switch (p) {
+    case 0: return 2;
+    case 1: return 3;
+    case 2: return 4;
+    case 3: return 7;
+    case 4: return 8;
+    case 5: return 11;
+    case 6: return 12;
+    case 7: return 13;
+    case 8: return 14;
+    case 9: return 15;
+    case 10:
+      if (distance > avg1) {
+        return 0;
+      } else if (distance > avg2) {
+        return 5;
+      } else if (distance > avg3) {
+        return 9;
+      } else {
+        return 16;
+      }
+    case 11:
+      if (distance > avg1 && _min10 > avg1) {
+        return 1;
+      } else if (distance > avg2 && (_min10 > avg1 || _min10 > avg2)) {
+        return 6;
+      } else if (distance > avg3 && (_min10 > avg1 || _min10 > avg2 || _min10 > avg3)) {
+        return 10;
+      } else {
+        return 17;
+      }
+    default:
+      return -1;
+  }
+}
+
+// calculate the number of faces with a lower adjusted priority than
+// the given adjusted priority
+int count_prio_offset(__local struct shared_data *shared, int priority) {
+  int total = 0;
+  switch (priority) {
+    case 17:
+      total += shared->totalMappedNum[16];
+    case 16:
+      total += shared->totalMappedNum[15];
+    case 15:
+      total += shared->totalMappedNum[14];
+    case 14:
+      total += shared->totalMappedNum[13];
+    case 13:
+      total += shared->totalMappedNum[12];
+    case 12:
+      total += shared->totalMappedNum[11];
+    case 11:
+      total += shared->totalMappedNum[10];
+    case 10:
+      total += shared->totalMappedNum[9];
+    case 9:
+      total += shared->totalMappedNum[8];
+    case 8:
+      total += shared->totalMappedNum[7];
+    case 7:
+      total += shared->totalMappedNum[6];
+    case 6:
+      total += shared->totalMappedNum[5];
+    case 5:
+      total += shared->totalMappedNum[4];
+    case 4:
+      total += shared->totalMappedNum[3];
+    case 3:
+      total += shared->totalMappedNum[2];
+    case 2:
+      total += shared->totalMappedNum[1];
+    case 1:
+      total += shared->totalMappedNum[0];
+    case 0:
+      return total;
+  }
+}
+
+void get_face(
+  __local struct shared_data *shared,
+  __constant struct uniform *uni,
+  __global const int4 *vb,
+  __global const int4 *tempvb,
+  uint localId, struct modelinfo minfo, int cameraYaw, int cameraPitch,
+  /* out */ int *prio, int *dis, int4 *o1, int4 *o2, int4 *o3) {
+  int size = minfo.size;
+  int offset = minfo.offset;
+  int flags = minfo.flags;
+  uint ssboOffset;
+
+  if (localId < size) {
+    ssboOffset = localId;
+  } else {
+    ssboOffset = 0;
+  }
+
+  int4 thisA;
+  int4 thisB;
+  int4 thisC;
+
+  // Grab triangle vertices from the correct buffer
+  if (flags < 0) {
+    thisA = vb[offset + ssboOffset * 3];
+    thisB = vb[offset + ssboOffset * 3 + 1];
+    thisC = vb[offset + ssboOffset * 3 + 2];
+  } else {
+    thisA = tempvb[offset + ssboOffset * 3];
+    thisB = tempvb[offset + ssboOffset * 3 + 1];
+    thisC = tempvb[offset + ssboOffset * 3 + 2];
+  }
+
+  if (localId < size) {
+    int radius = (flags & 0x7fffffff) >> 12;
+    int orientation = flags & 0x7ff;
+
+    // rotate for model orientation
+    int4 thisrvA = rotate_vertex(uni, thisA, orientation);
+    int4 thisrvB = rotate_vertex(uni, thisB, orientation);
+    int4 thisrvC = rotate_vertex(uni, thisC, orientation);
+
+    // calculate distance to face
+    int thisPriority = (thisA.w >> 16) & 0xff;// all vertices on the face have the same priority
+    int thisDistance;
+    if (radius == 0) {
+      thisDistance = 0;
+    } else {
+      thisDistance = face_distance(thisrvA, thisrvB, thisrvC, cameraYaw, cameraPitch) + radius;
+    }
+
+    *o1 = thisrvA;
+    *o2 = thisrvB;
+    *o3 = thisrvC;
+
+    *prio = thisPriority;
+    *dis = thisDistance;
+  } else {
+    *o1 = (int4)(0, 0, 0, 0);
+    *o2 = (int4)(0, 0, 0, 0);
+    *o3 = (int4)(0, 0, 0, 0);
+    *prio = 0;
+    *dis = 0;
+  }
+}
+
+void add_face_prio_distance(
+  __local struct shared_data *shared,
+  __constant struct uniform *uni,
+  uint localId, struct modelinfo minfo, int4 thisrvA, int4 thisrvB, int4 thisrvC, int thisPriority, int thisDistance, int4 pos) {
+  if (localId < minfo.size) {
+    // if the face is not culled, it is calculated into priority distance averages
+    if (face_visible(uni, thisrvA, thisrvB, thisrvC, pos)) {
+      atomic_add(&shared->totalNum[thisPriority], 1);
+      atomic_add(&shared->totalDistance[thisPriority], thisDistance);
+
+      // calculate minimum distance to any face of priority 10 for positioning the 11 faces later
+      if (thisPriority == 10) {
+        atomic_min(&shared->min10, thisDistance);
+      }
+    }
+  }
+}
+
+int map_face_priority(__local struct shared_data *shared, uint localId, struct modelinfo minfo, int thisPriority, int thisDistance, int *prio) {
+  int size = minfo.size;
+
+  // Compute average distances for 0/2, 3/4, and 6/8
+
+  if (localId < size) {
+    int avg1 = 0;
+    int avg2 = 0;
+    int avg3 = 0;
+
+    if (shared->totalNum[1] > 0 || shared->totalNum[2] > 0) {
+      avg1 = (shared->totalDistance[1] + shared->totalDistance[2]) / (shared->totalNum[1] + shared->totalNum[2]);
+    }
+
+    if (shared->totalNum[3] > 0 || shared->totalNum[4] > 0) {
+      avg2 = (shared->totalDistance[3] + shared->totalDistance[4]) / (shared->totalNum[3] + shared->totalNum[4]);
+    }
+
+    if (shared->totalNum[6] > 0 || shared->totalNum[8] > 0) {
+      avg3 = (shared->totalDistance[6] + shared->totalDistance[8]) / (shared->totalNum[6] + shared->totalNum[8]);
+    }
+
+    int adjPrio = priority_map(thisPriority, thisDistance, shared->min10, avg1, avg2, avg3);
+    int prioIdx = atomic_add(&shared->totalMappedNum[adjPrio], 1);
+
+    *prio = adjPrio;
+    return prioIdx;
+  }
+
+  *prio = 0;
+  return 0;
+}
+
+void insert_dfs(__local struct shared_data *shared, uint localId, struct modelinfo minfo, int adjPrio, int distance, int prioIdx) {
+  int size = minfo.size;
+
+  if (localId < size) {
+    // calculate base offset into dfs based on number of faces with a lower priority
+    int baseOff = count_prio_offset(shared, adjPrio);
+    // store into face array offset array by unique index
+    shared->dfs[baseOff + prioIdx] = ((int) localId << 16) | distance;
+  }
+}
+
+void sort_and_insert(
+  __local struct shared_data *shared,
+  __global const float4 *uv,
+  __global const float4 *tempuv,
+  __global int4 *vout,
+  __global float4 *uvout,
+  uint localId, struct modelinfo minfo, int thisPriority, int thisDistance, int4 thisrvA, int4 thisrvB, int4 thisrvC) {
+  /* compute face distance */
+  int size = minfo.size;
+
+  if (localId < size) {
+    int outOffset = minfo.idx;
+    int uvOffset = minfo.uvOffset;
+    int flags = minfo.flags;
+    int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
+
+    const int priorityOffset = count_prio_offset(shared, thisPriority);
+    const int numOfPriority = shared->totalMappedNum[thisPriority];
+    int start = priorityOffset; // index of first face with this priority
+    int end = priorityOffset + numOfPriority; // index of last face with this priority
+    int myOffset = priorityOffset;
+
+    // we only have to order faces against others of the same priority
+    // calculate position this face will be in
+    for (int i = start; i < end; ++i) {
+      int d1 = shared->dfs[i];
+      int theirId = d1 >> 16;
+      int theirDistance = d1 & 0xffff;
+
+      // the closest faces draw last, so have the highest index
+      // if two faces have the same distance, the one with the
+      // higher id draws last
+      if ((theirDistance > thisDistance)
+        || (theirDistance == thisDistance && theirId < localId)) {
+        ++myOffset;
+      }
+    }
+
+    // position vertices in scene and write to out buffer
+    vout[outOffset + myOffset * 3]     = pos + thisrvA;
+    vout[outOffset + myOffset * 3 + 1] = pos + thisrvB;
+    vout[outOffset + myOffset * 3 + 2] = pos + thisrvC;
+
+    if (uvOffset < 0) {
+      uvout[outOffset + myOffset * 3]     = (float4)(0, 0, 0, 0);
+      uvout[outOffset + myOffset * 3 + 1] = (float4)(0, 0, 0, 0);
+      uvout[outOffset + myOffset * 3 + 2] = (float4)(0, 0, 0, 0);
+    } else if (flags >= 0) {
+      uvout[outOffset + myOffset * 3]     = tempuv[uvOffset + localId * 3];
+      uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
+      uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
+    } else {
+      uvout[outOffset + myOffset * 3]     = uv[uvOffset + localId * 3];
+      uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
+      uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
+    }
+  }
+}