gpu: add opencl support for macos

This allows using opencl as an alternative to opengl compute shaders on
macos, which does not support compute shaders. Now, macos can finally
use the extended draw distance feature of the gpu plugin.

This also includes code for using opencl with Windows and Linux if we
want to enable that in the future. A copy of the existing compute
shaders have been checked in and ported to opencl, keeping support for
opengl compute shaders on Windows and Linux.

Co-authored-by: Paul Norton <napkinorton@gmail.com>
This commit is contained in:
Adam
2021-02-10 21:01:53 -05:00
parent 26f26308ab
commit 13efaa6a0c
9 changed files with 1445 additions and 175 deletions

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package net.runelite.client.plugins.gpu;
import org.jocl.Pointer;
import org.jocl.cl_mem;
class GLBuffer
{
int glBufferId = -1;
int size = -1;
cl_mem cl_mem;
Pointer ptr()
{
return cl_mem != null ? Pointer.to(cl_mem) : null;
}
}

View File

@@ -29,6 +29,11 @@ import com.google.inject.Provides;
import com.jogamp.nativewindow.awt.AWTGraphicsConfiguration;
import com.jogamp.nativewindow.awt.JAWTWindow;
import com.jogamp.opengl.GL;
import static com.jogamp.opengl.GL.GL_ARRAY_BUFFER;
import static com.jogamp.opengl.GL.GL_DYNAMIC_DRAW;
import static com.jogamp.opengl.GL2ES2.GL_STREAM_DRAW;
import static com.jogamp.opengl.GL2ES3.GL_STATIC_COPY;
import static com.jogamp.opengl.GL2ES3.GL_UNIFORM_BUFFER;
import com.jogamp.opengl.GL4;
import com.jogamp.opengl.GLCapabilities;
import com.jogamp.opengl.GLContext;
@@ -45,6 +50,7 @@ import java.awt.Image;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferInt;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
@@ -93,6 +99,10 @@ import net.runelite.client.plugins.gpu.config.UIScalingMode;
import net.runelite.client.plugins.gpu.template.Template;
import net.runelite.client.ui.DrawManager;
import net.runelite.client.util.OSType;
import org.jocl.CL;
import static org.jocl.CL.CL_MEM_READ_ONLY;
import static org.jocl.CL.CL_MEM_WRITE_ONLY;
import static org.jocl.CL.clCreateFromGLBuffer;
@PluginDescriptor(
name = "GPU",
@@ -105,8 +115,8 @@ import net.runelite.client.util.OSType;
public class GpuPlugin extends Plugin implements DrawCallbacks
{
// This is the maximum number of triangles the compute shaders support
private static final int MAX_TRIANGLE = 4096;
private static final int SMALL_TRIANGLE_COUNT = 512;
static final int MAX_TRIANGLE = 4096;
static final int SMALL_TRIANGLE_COUNT = 512;
private static final int FLAG_SCENE_BUFFER = Integer.MIN_VALUE;
private static final int DEFAULT_DISTANCE = 25;
static final int MAX_DISTANCE = 90;
@@ -115,6 +125,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Inject
private Client client;
@Inject
private OpenCLManager openCLManager;
@Inject
private ClientThread clientThread;
@@ -133,7 +146,14 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Inject
private PluginManager pluginManager;
private boolean useComputeShaders;
enum ComputeMode
{
NONE,
OPENGL,
OPENCL
}
private ComputeMode computeMode = ComputeMode.NONE;
private Canvas canvas;
private JAWTWindow jawtWindow;
@@ -182,23 +202,22 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private int texSceneHandle;
private int rboSceneHandle;
// scene vertex buffer id
private int bufferId;
// scene uv buffer id
private int uvBufferId;
// scene vertex buffer
private final GLBuffer sceneVertexBuffer = new GLBuffer();
// scene uv buffer
private final GLBuffer sceneUvBuffer = new GLBuffer();
private int tmpBufferId; // temporary scene vertex buffer
private int tmpUvBufferId; // temporary scene uv buffer
private int tmpModelBufferId; // scene model buffer, large
private int tmpModelBufferSmallId; // scene model buffer, small
private int tmpModelBufferUnorderedId;
private int tmpOutBufferId; // target vertex buffer for compute shaders
private int tmpOutUvBufferId; // target uv buffer for compute shaders
private final GLBuffer tmpVertexBuffer = new GLBuffer(); // temporary scene vertex buffer
private final GLBuffer tmpUvBuffer = new GLBuffer(); // temporary scene uv buffer
private final GLBuffer tmpModelBufferLarge = new GLBuffer(); // scene model buffer, large
private final GLBuffer tmpModelBufferSmall = new GLBuffer(); // scene model buffer, small
private final GLBuffer tmpModelBufferUnordered = new GLBuffer(); // scene model buffer, unordered
private final GLBuffer tmpOutBuffer = new GLBuffer(); // target vertex buffer for compute shaders
private final GLBuffer tmpOutUvBuffer = new GLBuffer(); // target uv buffer for compute shaders
private int textureArrayId;
private int uniformBufferId;
private final IntBuffer uniformBuffer = GpuIntBuffer.allocateDirect(5 + 3 + 2048 * 4);
private final GLBuffer uniformBuffer = new GLBuffer();
private final float[] textureOffsets = new float[128];
private GpuIntBuffer vertexBuffer;
@@ -278,7 +297,6 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
{
try
{
bufferId = uvBufferId = uniformBufferId = tmpBufferId = tmpUvBufferId = tmpModelBufferId = tmpModelBufferSmallId = tmpModelBufferUnorderedId = tmpOutBufferId = tmpOutUvBufferId = -1;
texSceneHandle = fboSceneHandle = rboSceneHandle = -1; // AA FBO
unorderedModels = smallModels = largeModels = 0;
drawingModel = false;
@@ -290,8 +308,9 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
return false;
}
// OSX supports up to OpenGL 4.1, however 4.3 is required for compute shaders
useComputeShaders = config.useComputeShaders() && OSType.getOSType() != OSType.MacOS;
computeMode = config.useComputeShaders()
? (OSType.getOSType() == OSType.MacOS ? ComputeMode.OPENCL : ComputeMode.OPENGL)
: ComputeMode.NONE;
canvas.setIgnoreRepaint(true);
@@ -397,7 +416,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
if (client.getGameState() == GameState.LOGGED_IN)
{
uploadScene();
invokeOnMainThread(this::uploadScene);
}
}
catch (Throwable e)
@@ -433,6 +452,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
invokeOnMainThread(() ->
{
openCLManager.cleanup();
if (gl != null)
{
if (textureArrayId != -1)
@@ -441,11 +462,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
textureArrayId = -1;
}
if (uniformBufferId != -1)
{
glDeleteBuffer(gl, uniformBufferId);
uniformBufferId = -1;
}
destroyGlBuffer(uniformBuffer);
shutdownBuffers();
shutdownInterfaceTexture();
@@ -519,12 +536,16 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
glProgram = PROGRAM.compile(gl, template);
glUiProgram = UI_PROGRAM.compile(gl, template);
if (useComputeShaders)
if (computeMode == ComputeMode.OPENGL)
{
glComputeProgram = COMPUTE_PROGRAM.compile(gl, template);
glSmallComputeProgram = SMALL_COMPUTE_PROGRAM.compile(gl, template);
glUnorderedComputeProgram = UNORDERED_COMPUTE_PROGRAM.compile(gl, template);
}
else if (computeMode == ComputeMode.OPENCL)
{
openCLManager.init(gl);
}
initUniforms();
}
@@ -593,8 +614,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
-1f, 1f, 0.0f, 0.0f, 0f // top left
});
vboUiBuf.rewind();
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vboUiHandle);
gl.glBufferData(gl.GL_ARRAY_BUFFER, vboUiBuf.capacity() * Float.BYTES, vboUiBuf, gl.GL_STATIC_DRAW);
gl.glBindBuffer(GL_ARRAY_BUFFER, vboUiHandle);
gl.glBufferData(GL_ARRAY_BUFFER, vboUiBuf.capacity() * Float.BYTES, vboUiBuf, gl.GL_STATIC_DRAW);
// position attribute
gl.glVertexAttribPointer(0, 3, gl.GL_FLOAT, false, 5 * Float.BYTES, 0);
@@ -605,7 +626,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
gl.glEnableVertexAttribArray(1);
// unbind VBO
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
gl.glBindBuffer(GL_ARRAY_BUFFER, 0);
}
private void shutdownVao()
@@ -622,71 +643,49 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private void initBuffers()
{
bufferId = glGenBuffers(gl);
uvBufferId = glGenBuffers(gl);
tmpBufferId = glGenBuffers(gl);
tmpUvBufferId = glGenBuffers(gl);
tmpModelBufferId = glGenBuffers(gl);
tmpModelBufferSmallId = glGenBuffers(gl);
tmpModelBufferUnorderedId = glGenBuffers(gl);
tmpOutBufferId = glGenBuffers(gl);
tmpOutUvBufferId = glGenBuffers(gl);
initGlBuffer(sceneVertexBuffer);
initGlBuffer(sceneUvBuffer);
initGlBuffer(tmpVertexBuffer);
initGlBuffer(tmpUvBuffer);
initGlBuffer(tmpModelBufferLarge);
initGlBuffer(tmpModelBufferSmall);
initGlBuffer(tmpModelBufferUnordered);
initGlBuffer(tmpOutBuffer);
initGlBuffer(tmpOutUvBuffer);
}
private void initGlBuffer(GLBuffer glBuffer)
{
glBuffer.glBufferId = glGenBuffers(gl);
}
private void shutdownBuffers()
{
if (bufferId != -1)
{
glDeleteBuffer(gl, bufferId);
bufferId = -1;
}
destroyGlBuffer(sceneVertexBuffer);
destroyGlBuffer(sceneUvBuffer);
if (uvBufferId != -1)
{
glDeleteBuffer(gl, uvBufferId);
uvBufferId = -1;
}
destroyGlBuffer(tmpVertexBuffer);
destroyGlBuffer(tmpUvBuffer);
destroyGlBuffer(tmpModelBufferLarge);
destroyGlBuffer(tmpModelBufferSmall);
destroyGlBuffer(tmpModelBufferUnordered);
destroyGlBuffer(tmpOutBuffer);
destroyGlBuffer(tmpOutUvBuffer);
}
if (tmpBufferId != -1)
private void destroyGlBuffer(GLBuffer glBuffer)
{
if (glBuffer.glBufferId != -1)
{
glDeleteBuffer(gl, tmpBufferId);
tmpBufferId = -1;
glDeleteBuffer(gl, glBuffer.glBufferId);
glBuffer.glBufferId = -1;
}
glBuffer.size = -1;
if (tmpUvBufferId != -1)
if (glBuffer.cl_mem != null)
{
glDeleteBuffer(gl, tmpUvBufferId);
tmpUvBufferId = -1;
}
if (tmpModelBufferId != -1)
{
glDeleteBuffer(gl, tmpModelBufferId);
tmpModelBufferId = -1;
}
if (tmpModelBufferSmallId != -1)
{
glDeleteBuffer(gl, tmpModelBufferSmallId);
tmpModelBufferSmallId = -1;
}
if (tmpModelBufferUnorderedId != -1)
{
glDeleteBuffer(gl, tmpModelBufferUnorderedId);
tmpModelBufferUnorderedId = -1;
}
if (tmpOutBufferId != -1)
{
glDeleteBuffer(gl, tmpOutBufferId);
tmpOutBufferId = -1;
}
if (tmpOutUvBufferId != -1)
{
glDeleteBuffer(gl, tmpOutUvBufferId);
tmpOutUvBufferId = -1;
CL.clReleaseMemObject(glBuffer.cl_mem);
glBuffer.cl_mem = null;
}
}
@@ -709,21 +708,21 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private void initUniformBuffer()
{
uniformBufferId = glGenBuffers(gl);
gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId);
uniformBuffer.clear();
uniformBuffer.put(new int[8]);
initGlBuffer(uniformBuffer);
IntBuffer uniformBuf = GpuIntBuffer.allocateDirect(8 + 2048 * 4);
uniformBuf.put(new int[8]); // uniform block
final int[] pad = new int[2];
for (int i = 0; i < 2048; i++)
{
uniformBuffer.put(Perspective.SINE[i]);
uniformBuffer.put(Perspective.COSINE[i]);
uniformBuffer.put(pad);
uniformBuf.put(Perspective.SINE[i]);
uniformBuf.put(Perspective.COSINE[i]);
uniformBuf.put(pad); // ivec2 alignment in std140 is 16 bytes
}
uniformBuffer.flip();
uniformBuf.flip();
gl.glBufferData(gl.GL_UNIFORM_BUFFER, uniformBuffer.limit() * Integer.BYTES, uniformBuffer, gl.GL_DYNAMIC_DRAW);
gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0);
updateBuffer(uniformBuffer, GL_UNIFORM_BUFFER, uniformBuf.limit() * Integer.BYTES, uniformBuf, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
gl.glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
private void initAAFbo(int width, int height, int aaSamples)
@@ -785,9 +784,11 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
invokeOnMainThread(() ->
{
// UBO. Only the first 32 bytes get modified here, the rest is the constant sin/cos table.
gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, uniformBufferId);
uniformBuffer.clear();
uniformBuffer
// We can reuse the vertex buffer since it isn't used yet.
vertexBuffer.clear();
vertexBuffer.ensureCapacity(32);
IntBuffer uniformBuf = vertexBuffer.getBuffer();
uniformBuf
.put(yaw)
.put(pitch)
.put(client.getCenterX())
@@ -796,12 +797,14 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
.put(cameraX)
.put(cameraY)
.put(cameraZ);
uniformBuffer.flip();
uniformBuf.flip();
gl.glBufferSubData(gl.GL_UNIFORM_BUFFER, 0, uniformBuffer.limit() * Integer.BYTES, uniformBuffer);
gl.glBindBuffer(gl.GL_UNIFORM_BUFFER, 0);
gl.glBindBuffer(GL_UNIFORM_BUFFER, uniformBuffer.glBufferId);
gl.glBufferSubData(GL_UNIFORM_BUFFER, 0, uniformBuf.limit() * Integer.BYTES, uniformBuf);
gl.glBindBuffer(GL_UNIFORM_BUFFER, 0);
gl.glBindBufferBase(gl.GL_UNIFORM_BUFFER, 0, uniformBufferId);
gl.glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniformBuffer.glBufferId);
uniformBuf.clear();
});
}
@@ -813,7 +816,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private void postDraw()
{
if (!useComputeShaders)
if (computeMode == ComputeMode.NONE)
{
// Upload buffers
vertexBuffer.flip();
@@ -822,12 +825,8 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW);
updateBuffer(tmpVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_DYNAMIC_DRAW, 0L);
updateBuffer(tmpUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_DYNAMIC_DRAW, 0L);
return;
}
@@ -844,79 +843,91 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
IntBuffer modelBufferSmall = this.modelBufferSmall.getBuffer();
IntBuffer modelBufferUnordered = this.modelBufferUnordered.getBuffer();
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_DYNAMIC_DRAW);
// temp buffers
updateBuffer(tmpVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
updateBuffer(tmpUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpUvBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_DYNAMIC_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, gl.GL_DYNAMIC_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferSmallId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, gl.GL_DYNAMIC_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpModelBufferUnorderedId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, gl.GL_DYNAMIC_DRAW);
// model buffers
updateBuffer(tmpModelBufferLarge, GL_ARRAY_BUFFER, modelBuffer.limit() * Integer.BYTES, modelBuffer, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
updateBuffer(tmpModelBufferSmall, GL_ARRAY_BUFFER, modelBufferSmall.limit() * Integer.BYTES, modelBufferSmall, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
updateBuffer(tmpModelBufferUnordered, GL_ARRAY_BUFFER, modelBufferUnordered.limit() * Integer.BYTES, modelBufferUnordered, GL_DYNAMIC_DRAW, CL_MEM_READ_ONLY);
// Output buffers
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER,
updateBuffer(tmpOutBuffer,
GL_ARRAY_BUFFER,
targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes
null,
gl.GL_STREAM_DRAW);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, tmpOutUvBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER,
targetBufferOffset * 16,
GL_STREAM_DRAW,
CL_MEM_WRITE_ONLY);
updateBuffer(tmpOutUvBuffer,
GL_ARRAY_BUFFER,
targetBufferOffset * 16, // each vertex is an ivec4, which is 16 bytes
null,
gl.GL_STREAM_DRAW);
GL_STREAM_DRAW,
CL_MEM_WRITE_ONLY);
// Bind UBO to compute programs
gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0);
gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0);
if (computeMode == ComputeMode.OPENCL)
{
// The docs for clEnqueueAcquireGLObjects say all pending GL operations must be completed before calling
// clEnqueueAcquireGLObjects, and recommends calling glFinish() as the only portable way to do that.
// However no issues have been observed from not calling it, and so will leave disabled for now.
// gl.glFinish();
openCLManager.compute(
unorderedModels, smallModels, largeModels,
sceneVertexBuffer, sceneUvBuffer,
tmpVertexBuffer, tmpUvBuffer,
tmpModelBufferUnordered, tmpModelBufferSmall, tmpModelBufferLarge,
tmpOutBuffer, tmpOutUvBuffer,
uniformBuffer);
return;
}
/*
* Compute is split into three separate programs: 'unordered', 'small', and 'large'
* to save on GPU resources. Small will sort <= 512 faces, large will do <= 4096.
*/
// Bind UBO to compute programs
gl.glUniformBlockBinding(glSmallComputeProgram, uniBlockSmall, 0);
gl.glUniformBlockBinding(glComputeProgram, uniBlockLarge, 0);
// unordered
gl.glUseProgram(glUnorderedComputeProgram);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnorderedId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferUnordered.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
gl.glDispatchCompute(unorderedModels, 1, 1);
// small
gl.glUseProgram(glSmallComputeProgram);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmallId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferSmall.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
gl.glDispatchCompute(smallModels, 1, 1);
// large
gl.glUseProgram(glComputeProgram);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, this.bufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, this.uvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, tmpModelBufferLarge.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 1, sceneVertexBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 2, tmpVertexBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 3, tmpOutBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 4, tmpOutUvBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 5, sceneUvBuffer.glBufferId);
gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 6, tmpUvBuffer.glBufferId);
gl.glDispatchCompute(largeModels, 1, 1);
}
@@ -926,7 +937,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
SceneTilePaint paint, int tileZ, int tileX, int tileY,
int zoom, int centerX, int centerY)
{
if (!useComputeShaders)
if (computeMode == ComputeMode.NONE)
{
targetBufferOffset += sceneUploader.upload(paint,
tileZ, tileX, tileY,
@@ -963,7 +974,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
SceneTileModel model, int tileZ, int tileX, int tileY,
int zoom, int centerX, int centerY)
{
if (!useComputeShaders)
if (computeMode == ComputeMode.NONE)
{
targetBufferOffset += sceneUploader.upload(model,
tileX, tileY,
@@ -1131,7 +1142,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
// Ceil the sizes because even if the size is 599.1 we want to treat it as size 600 (i.e. render to the x=599 pixel).
renderViewportHeight = (int) Math.ceil(scaleFactorY * (renderViewportHeight)) + padding * 2;
renderViewportWidth = (int) Math.ceil(scaleFactorX * (renderViewportWidth )) + padding * 2;
renderViewportWidth = (int) Math.ceil(scaleFactorX * (renderViewportWidth )) + padding * 2;
// Floor the offsets because even if the offset is 4.9, we want to render to the x=4 pixel anyway.
renderHeightOff = (int) Math.floor(scaleFactorY * (renderHeightOff)) - padding;
@@ -1195,27 +1206,36 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
gl.glBindVertexArray(vaoHandle);
int vertexBuffer, uvBuffer;
if (useComputeShaders)
if (computeMode != ComputeMode.NONE)
{
// Before reading the SSBOs written to from postDrawScene() we must insert a barrier
gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT);
if (computeMode == ComputeMode.OPENGL)
{
// Before reading the SSBOs written to from postDrawScene() we must insert a barrier
gl.glMemoryBarrier(gl.GL_SHADER_STORAGE_BARRIER_BIT);
}
else
{
// Wait for the command queue to finish, so that we know the compute is done
openCLManager.finish();
}
// Draw using the output buffer of the compute
vertexBuffer = tmpOutBufferId;
uvBuffer = tmpOutUvBufferId;
vertexBuffer = tmpOutBuffer.glBufferId;
uvBuffer = tmpOutUvBuffer.glBufferId;
}
else
{
// Only use the temporary buffers, which will contain the full scene
vertexBuffer = tmpBufferId;
uvBuffer = tmpUvBufferId;
vertexBuffer = tmpVertexBuffer.glBufferId;
uvBuffer = tmpUvBuffer.glBufferId;
}
gl.glEnableVertexAttribArray(0);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vertexBuffer);
gl.glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
gl.glVertexAttribIPointer(0, 4, gl.GL_INT, 0, 0);
gl.glEnableVertexAttribArray(1);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBuffer);
gl.glBindBuffer(GL_ARRAY_BUFFER, uvBuffer);
gl.glVertexAttribPointer(1, 4, gl.GL_FLOAT, false, 0, 0);
gl.glDrawArrays(gl.GL_TRIANGLES, 0, targetBufferOffset);
@@ -1400,12 +1420,12 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Subscribe
public void onGameStateChanged(GameStateChanged gameStateChanged)
{
if (!useComputeShaders || gameStateChanged.getGameState() != GameState.LOGGED_IN)
if (computeMode == ComputeMode.NONE || gameStateChanged.getGameState() != GameState.LOGGED_IN)
{
return;
}
uploadScene();
invokeOnMainThread(this::uploadScene);
}
private void uploadScene()
@@ -1421,13 +1441,10 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
IntBuffer vertexBuffer = this.vertexBuffer.getBuffer();
FloatBuffer uvBuffer = this.uvBuffer.getBuffer();
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, bufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, gl.GL_STATIC_COPY);
updateBuffer(sceneVertexBuffer, GL_ARRAY_BUFFER, vertexBuffer.limit() * Integer.BYTES, vertexBuffer, GL_STATIC_COPY, CL_MEM_READ_ONLY);
updateBuffer(sceneUvBuffer, GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, GL_STATIC_COPY, CL_MEM_READ_ONLY);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, uvBufferId);
gl.glBufferData(gl.GL_ARRAY_BUFFER, uvBuffer.limit() * Float.BYTES, uvBuffer, gl.GL_STATIC_COPY);
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0);
gl.glBindBuffer(GL_ARRAY_BUFFER, 0);
vertexBuffer.clear();
uvBuffer.clear();
@@ -1492,7 +1509,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
@Override
public void draw(Renderable renderable, int orientation, int pitchSin, int pitchCos, int yawSin, int yawCos, int x, int y, int z, long hash)
{
if (!useComputeShaders)
if (computeMode == ComputeMode.NONE)
{
Model model = renderable instanceof Model ? (Model) renderable : renderable.getModel();
if (model != null)
@@ -1673,7 +1690,7 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
private int getDrawDistance()
{
final int limit = useComputeShaders ? MAX_DISTANCE : DEFAULT_DISTANCE;
final int limit = computeMode != ComputeMode.NONE ? MAX_DISTANCE : DEFAULT_DISTANCE;
return Ints.constrainToRange(config.drawDistance(), 0, limit);
}
@@ -1688,4 +1705,36 @@ public class GpuPlugin extends Plugin implements DrawCallbacks
runnable.run();
}
}
private void updateBuffer(GLBuffer glBuffer, int target, int size, Buffer data, int usage, long clFlags)
{
gl.glBindBuffer(target, glBuffer.glBufferId);
if (size > glBuffer.size)
{
log.trace("Buffer resize: {} {} -> {}", glBuffer, glBuffer.size, size);
glBuffer.size = size;
gl.glBufferData(target, size, data, usage);
if (computeMode == ComputeMode.OPENCL)
{
if (glBuffer.cl_mem != null)
{
CL.clReleaseMemObject(glBuffer.cl_mem);
}
if (size == 0)
{
glBuffer.cl_mem = null;
}
else
{
glBuffer.cl_mem = clCreateFromGLBuffer(openCLManager.context, clFlags, glBuffer.glBufferId, null);
}
}
}
else if (data != null)
{
gl.glBufferSubData(target, 0, size, data);
}
}
}

View File

@@ -0,0 +1,521 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package net.runelite.client.plugins.gpu;
import com.google.common.base.Charsets;
import com.jogamp.nativewindow.NativeSurface;
import com.jogamp.opengl.GL4;
import com.jogamp.opengl.GLContext;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;
import javax.inject.Singleton;
import jogamp.opengl.GLContextImpl;
import jogamp.opengl.GLDrawableImpl;
import jogamp.opengl.egl.EGLContext;
import jogamp.opengl.macosx.cgl.CGL;
import jogamp.opengl.windows.wgl.WindowsWGLContext;
import jogamp.opengl.x11.glx.X11GLXContext;
import lombok.extern.slf4j.Slf4j;
import net.runelite.client.plugins.gpu.template.Template;
import net.runelite.client.util.OSType;
import org.jocl.CL;
import static org.jocl.CL.*;
import org.jocl.CLException;
import org.jocl.Pointer;
import org.jocl.Sizeof;
import org.jocl.cl_command_queue;
import org.jocl.cl_context;
import org.jocl.cl_context_properties;
import org.jocl.cl_device_id;
import org.jocl.cl_event;
import org.jocl.cl_kernel;
import org.jocl.cl_mem;
import org.jocl.cl_platform_id;
import org.jocl.cl_program;
@Singleton
@Slf4j
class OpenCLManager
{
private static final String GL_SHARING_PLATFORM_EXT = "cl_khr_gl_sharing";
private static final String KERNEL_NAME_UNORDERED = "computeUnordered";
private static final String KERNEL_NAME_LARGE = "computeLarge";
private static final int MIN_WORK_GROUP_SIZE = 256;
private static final int SMALL_SIZE = GpuPlugin.SMALL_TRIANGLE_COUNT;
private static final int LARGE_SIZE = GpuPlugin.MAX_TRIANGLE;
// struct shared_data {
// int totalNum[12];
// int totalDistance[12];
// int totalMappedNum[18];
// int min10;
// int dfs[0];
// };
private static final int SHARED_SIZE = 12 + 12 + 18 + 1; // in ints
// The number of faces each worker processes in the two kernels
private int largeFaceCount;
private int smallFaceCount;
private cl_platform_id platform;
private cl_device_id device;
cl_context context;
private cl_command_queue commandQueue;
private cl_program programUnordered;
private cl_program programSmall;
private cl_program programLarge;
private cl_kernel kernelUnordered;
private cl_kernel kernelSmall;
private cl_kernel kernelLarge;
void init(GL4 gl)
{
CL.setExceptionsEnabled(true);
switch (OSType.getOSType())
{
case Windows:
case Linux:
initPlatform();
initDevice();
initContext(gl);
break;
case MacOS:
initMacOS(gl);
break;
default:
throw new RuntimeException("Unsupported OS Type " + OSType.getOSType().name());
}
ensureMinWorkGroupSize();
initQueue();
compilePrograms();
}
void cleanup()
{
if (programUnordered != null)
{
CL.clReleaseProgram(programUnordered);
programUnordered = null;
}
if (programSmall != null)
{
CL.clReleaseProgram(programSmall);
programSmall = null;
}
if (programLarge != null)
{
CL.clReleaseProgram(programLarge);
programLarge = null;
}
if (kernelUnordered != null)
{
CL.clReleaseKernel(kernelUnordered);
kernelUnordered = null;
}
if (kernelSmall != null)
{
CL.clReleaseKernel(kernelSmall);
kernelSmall = null;
}
if (kernelLarge != null)
{
CL.clReleaseKernel(kernelLarge);
kernelLarge = null;
}
if (commandQueue != null)
{
CL.clReleaseCommandQueue(commandQueue);
commandQueue = null;
}
if (context != null)
{
CL.clReleaseContext(context);
context = null;
}
if (device != null)
{
CL.clReleaseDevice(device);
device = null;
}
}
private String logPlatformInfo(cl_platform_id platform, int param)
{
long[] size = new long[1];
clGetPlatformInfo(platform, param, 0, null, size);
byte[] buffer = new byte[(int) size[0]];
clGetPlatformInfo(platform, param, buffer.length, Pointer.to(buffer), null);
String platformInfo = new String(buffer, Charsets.UTF_8);
log.debug("Platform: {}, {}", stringFor_cl_platform_info(param), platformInfo);
return platformInfo;
}
private void logBuildInfo(cl_program program, int param)
{
long[] size = new long[1];
clGetProgramBuildInfo(program, device, param, 0, null, size);
ByteBuffer buffer = ByteBuffer.allocateDirect((int) size[0]);
clGetProgramBuildInfo(program, device, param, buffer.limit(), Pointer.toBuffer(buffer), null);
switch (param)
{
case CL_PROGRAM_BUILD_STATUS:
log.debug("Build status: {}, {}", stringFor_cl_program_build_info(param), stringFor_cl_build_status(buffer.getInt()));
break;
case CL_PROGRAM_BINARY_TYPE:
log.debug("Binary type: {}, {}", stringFor_cl_program_build_info(param), stringFor_cl_program_binary_type(buffer.getInt()));
break;
case CL_PROGRAM_BUILD_LOG:
String buildLog = StandardCharsets.US_ASCII.decode(buffer).toString();
log.trace("Build log: {}, {}", stringFor_cl_program_build_info(param), buildLog);
break;
case CL_PROGRAM_BUILD_OPTIONS:
String message = StandardCharsets.US_ASCII.decode(buffer).toString();
log.debug("Build options: {}, {}", stringFor_cl_program_build_info(param), message);
break;
default:
throw new IllegalArgumentException();
}
}
private void initPlatform()
{
int[] platformCount = new int[1];
clGetPlatformIDs(0, null, platformCount);
if (platformCount[0] == 0)
{
throw new RuntimeException("No compute platforms found");
}
cl_platform_id[] platforms = new cl_platform_id[platformCount[0]];
clGetPlatformIDs(platforms.length, platforms, null);
for (cl_platform_id platform : platforms)
{
log.debug("Found cl_platform_id {}", platform);
logPlatformInfo(platform, CL_PLATFORM_PROFILE);
logPlatformInfo(platform, CL_PLATFORM_VERSION);
logPlatformInfo(platform, CL_PLATFORM_NAME);
logPlatformInfo(platform, CL_PLATFORM_VENDOR);
String[] extensions = logPlatformInfo(platform, CL_PLATFORM_EXTENSIONS).split(" ");
if (Arrays.stream(extensions).noneMatch(s -> s.equals(GL_SHARING_PLATFORM_EXT)))
{
throw new RuntimeException("Platform does not support OpenGL buffer sharing");
}
}
platform = platforms[0];
log.debug("Selected cl_platform_id {}", platform);
}
private void initDevice()
{
int[] deviceCount = new int[1];
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, null, deviceCount);
if (deviceCount[0] == 0)
{
throw new RuntimeException("No compute devices found");
}
cl_device_id[] devices = new cl_device_id[(int) deviceCount[0]];
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices.length, devices, null);
for (cl_device_id device : devices)
{
long[] size = new long[1];
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, null, size);
byte[] devInfoBuf = new byte[(int) size[0]];
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, devInfoBuf.length, Pointer.to(devInfoBuf), null);
log.debug("Found cl_device_id: {}", device);
log.debug("Device extensions: {}", new String(devInfoBuf, Charsets.UTF_8));
}
device = devices[0];
log.debug("Selected cl_device_id {}", device);
}
private void initContext(GL4 gl)
{
// set computation platform
cl_context_properties contextProps = new cl_context_properties();
contextProps.addProperty(CL_CONTEXT_PLATFORM, platform);
// pull gl context
GLContext glContext = gl.getContext();
log.debug("Got GLContext of type {}", glContext.getClass().getSimpleName());
if (!glContext.isCurrent())
{
throw new RuntimeException("Can't create OpenCL context from inactive GL Context");
}
// get correct props based on os
long glContextHandle = glContext.getHandle();
GLContextImpl glContextImpl = (GLContextImpl) glContext;
GLDrawableImpl glDrawableImpl = glContextImpl.getDrawableImpl();
NativeSurface nativeSurface = glDrawableImpl.getNativeSurface();
if (glContext instanceof X11GLXContext)
{
long displayHandle = nativeSurface.getDisplayHandle();
contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
contextProps.addProperty(CL_GLX_DISPLAY_KHR, displayHandle);
}
else if (glContext instanceof WindowsWGLContext)
{
long surfaceHandle = nativeSurface.getSurfaceHandle();
contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
contextProps.addProperty(CL_WGL_HDC_KHR, surfaceHandle);
}
else if (glContext instanceof EGLContext)
{
long displayHandle = nativeSurface.getDisplayHandle();
contextProps.addProperty(CL_GL_CONTEXT_KHR, glContextHandle);
contextProps.addProperty(CL_EGL_DISPLAY_KHR, displayHandle);
}
log.debug("Creating context with props: {}", contextProps);
context = clCreateContext(contextProps, 1, new cl_device_id[]{device}, null, null, null);
log.debug("Created compute context {}", context);
}
private void initMacOS(GL4 gl)
{
// get sharegroup from gl context
GLContext glContext = gl.getContext();
if (!glContext.isCurrent())
{
throw new RuntimeException("Can't create context from inactive GL");
}
long cglContext = CGL.CGLGetCurrentContext();
long cglShareGroup = CGL.CGLGetShareGroup(cglContext);
// build context props
cl_context_properties contextProps = new cl_context_properties();
contextProps.addProperty(CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, cglShareGroup);
// ask macos to make the context for us
log.debug("Creating context with props: {}", contextProps);
context = clCreateContext(contextProps, 0, null, null, null, null);
// pull the compute device out of the provided context
device = new cl_device_id();
clGetGLContextInfoAPPLE(context, cglContext, CL_CGL_DEVICE_FOR_CURRENT_VIRTUAL_SCREEN_APPLE, Sizeof.cl_device_id, Pointer.to(device), null);
log.debug("Got macOS CLGL compute device {}", device);
}
private void ensureMinWorkGroupSize()
{
long[] maxWorkGroupSize = new long[1];
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, Sizeof.size_t, Pointer.to(maxWorkGroupSize), null);
log.debug("Device CL_DEVICE_MAX_WORK_GROUP_SIZE: {}", maxWorkGroupSize[0]);
if (maxWorkGroupSize[0] < MIN_WORK_GROUP_SIZE)
{
throw new RuntimeException("Compute device does not support min work group size " + MIN_WORK_GROUP_SIZE);
}
// Largest power of 2 less than or equal to maxWorkGroupSize
int groupSize = 0x80000000 >>> Integer.numberOfLeadingZeros((int) maxWorkGroupSize[0]);
largeFaceCount = LARGE_SIZE / (Math.min(groupSize, LARGE_SIZE));
smallFaceCount = SMALL_SIZE / (Math.min(groupSize, SMALL_SIZE));
log.debug("Face counts: small: {}, large: {}", smallFaceCount, largeFaceCount);
}
private void initQueue()
{
long[] l = new long[1];
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, Sizeof.cl_long, Pointer.to(l), null);
commandQueue = clCreateCommandQueue(context, device, l[0] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, null);
log.debug("Created command_queue {}, properties {}", commandQueue, l[0] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
}
private cl_program compileProgram(String programSource)
{
log.trace("Compiling program:\n {}", programSource);
cl_program program = clCreateProgramWithSource(context, 1, new String[]{programSource}, null, null);
try
{
clBuildProgram(program, 0, null, null, null, null);
}
catch (CLException e)
{
logBuildInfo(program, CL_PROGRAM_BUILD_LOG);
throw e;
}
logBuildInfo(program, CL_PROGRAM_BUILD_STATUS);
logBuildInfo(program, CL_PROGRAM_BINARY_TYPE);
logBuildInfo(program, CL_PROGRAM_BUILD_OPTIONS);
logBuildInfo(program, CL_PROGRAM_BUILD_LOG);
return program;
}
private cl_kernel getKernel(cl_program program, String kernelName)
{
cl_kernel kernel = clCreateKernel(program, kernelName, null);
log.debug("Loaded kernel {} for program {}", kernelName, program);
return kernel;
}
private void compilePrograms()
{
Template templateSmall = new Template()
.addInclude(OpenCLManager.class)
.add(key -> key.equals("FACE_COUNT") ? ("#define FACE_COUNT " + smallFaceCount) : null);
Template templateLarge = new Template()
.addInclude(OpenCLManager.class)
.add(key -> key.equals("FACE_COUNT") ? ("#define FACE_COUNT " + largeFaceCount) : null);
String unordered = new Template()
.addInclude(OpenCLManager.class)
.load("comp_unordered.cl");
String small = templateSmall.load("comp.cl");
String large = templateLarge.load("comp.cl");
programUnordered = compileProgram(unordered);
programSmall = compileProgram(small);
programLarge = compileProgram(large);
kernelUnordered = getKernel(programUnordered, KERNEL_NAME_UNORDERED);
kernelSmall = getKernel(programSmall, KERNEL_NAME_LARGE);
kernelLarge = getKernel(programLarge, KERNEL_NAME_LARGE);
}
void compute(int unorderedModels, int smallModels, int largeModels,
GLBuffer sceneVertexBuffer,
GLBuffer sceneUvBuffer,
GLBuffer vertexBuffer,
GLBuffer uvBuffer,
GLBuffer unorderedBuffer,
GLBuffer smallBuffer,
GLBuffer largeBuffer,
GLBuffer outVertexBuffer,
GLBuffer outUvBuffer,
GLBuffer uniformBuffer
)
{
cl_mem[] glBuffersAll = {
sceneVertexBuffer.cl_mem,
sceneUvBuffer.cl_mem,
unorderedBuffer.cl_mem,
smallBuffer.cl_mem,
largeBuffer.cl_mem,
vertexBuffer.cl_mem,
uvBuffer.cl_mem,
outVertexBuffer.cl_mem,
outUvBuffer.cl_mem,
uniformBuffer.cl_mem,
};
cl_mem[] glBuffers = Arrays.stream(glBuffersAll)
.filter(Objects::nonNull)
.toArray(cl_mem[]::new);
cl_event acquireGLBuffers = new cl_event();
clEnqueueAcquireGLObjects(commandQueue, glBuffers.length, glBuffers, 0, null, acquireGLBuffers);
cl_event[] computeEvents = {
new cl_event(),
new cl_event(),
new cl_event()
};
int numComputeEvents = 0;
if (unorderedModels > 0)
{
clSetKernelArg(kernelUnordered, 0, Sizeof.cl_mem, unorderedBuffer.ptr());
clSetKernelArg(kernelUnordered, 1, Sizeof.cl_mem, sceneVertexBuffer.ptr());
clSetKernelArg(kernelUnordered, 2, Sizeof.cl_mem, vertexBuffer.ptr());
clSetKernelArg(kernelUnordered, 3, Sizeof.cl_mem, sceneUvBuffer.ptr());
clSetKernelArg(kernelUnordered, 4, Sizeof.cl_mem, uvBuffer.ptr());
clSetKernelArg(kernelUnordered, 5, Sizeof.cl_mem, outVertexBuffer.ptr());
clSetKernelArg(kernelUnordered, 6, Sizeof.cl_mem, outUvBuffer.ptr());
// queue compute call after acquireGLBuffers
clEnqueueNDRangeKernel(commandQueue, kernelUnordered, 1, null,
new long[]{unorderedModels * 6L}, new long[]{6}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
}
if (smallModels > 0)
{
clSetKernelArg(kernelSmall, 0, (SHARED_SIZE + SMALL_SIZE) * Integer.BYTES, null);
clSetKernelArg(kernelSmall, 1, Sizeof.cl_mem, smallBuffer.ptr());
clSetKernelArg(kernelSmall, 2, Sizeof.cl_mem, sceneVertexBuffer.ptr());
clSetKernelArg(kernelSmall, 3, Sizeof.cl_mem, vertexBuffer.ptr());
clSetKernelArg(kernelSmall, 4, Sizeof.cl_mem, sceneUvBuffer.ptr());
clSetKernelArg(kernelSmall, 5, Sizeof.cl_mem, uvBuffer.ptr());
clSetKernelArg(kernelSmall, 6, Sizeof.cl_mem, outVertexBuffer.ptr());
clSetKernelArg(kernelSmall, 7, Sizeof.cl_mem, outUvBuffer.ptr());
clSetKernelArg(kernelSmall, 8, Sizeof.cl_mem, uniformBuffer.ptr());
clEnqueueNDRangeKernel(commandQueue, kernelSmall, 1, null,
new long[]{smallModels * (SMALL_SIZE / smallFaceCount)}, new long[]{SMALL_SIZE / smallFaceCount}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
}
if (largeModels > 0)
{
clSetKernelArg(kernelLarge, 0, (SHARED_SIZE + LARGE_SIZE) * Integer.BYTES, null);
clSetKernelArg(kernelLarge, 1, Sizeof.cl_mem, largeBuffer.ptr());
clSetKernelArg(kernelLarge, 2, Sizeof.cl_mem, sceneVertexBuffer.ptr());
clSetKernelArg(kernelLarge, 3, Sizeof.cl_mem, vertexBuffer.ptr());
clSetKernelArg(kernelLarge, 4, Sizeof.cl_mem, sceneUvBuffer.ptr());
clSetKernelArg(kernelLarge, 5, Sizeof.cl_mem, uvBuffer.ptr());
clSetKernelArg(kernelLarge, 6, Sizeof.cl_mem, outVertexBuffer.ptr());
clSetKernelArg(kernelLarge, 7, Sizeof.cl_mem, outUvBuffer.ptr());
clSetKernelArg(kernelLarge, 8, Sizeof.cl_mem, uniformBuffer.ptr());
clEnqueueNDRangeKernel(commandQueue, kernelLarge, 1, null,
new long[]{(long) largeModels * (LARGE_SIZE / largeFaceCount)}, new long[]{LARGE_SIZE / largeFaceCount}, 1, new cl_event[]{acquireGLBuffers}, computeEvents[numComputeEvents++]);
}
clEnqueueReleaseGLObjects(commandQueue, glBuffers.length, glBuffers, numComputeEvents, computeEvents, null);
}
void finish()
{
clFinish(commandQueue);
}
}