gpu: add opencl support for macos

This allows using opencl as an alternative to opengl compute shaders on
macos, which does not support compute shaders. Now, macos can finally
use the extended draw distance feature of the gpu plugin.

This also includes code for using opencl with Windows and Linux if we
want to enable that in the future. A copy of the existing compute
shaders have been checked in and ported to opencl, keeping support for
opengl compute shaders on Windows and Linux.

Co-authored-by: Paul Norton <napkinorton@gmail.com>
This commit is contained in:
Adam
2021-02-10 21:01:53 -05:00
parent 26f26308ab
commit 13efaa6a0c
9 changed files with 1445 additions and 175 deletions

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
struct uniform {
int cameraYaw;
int cameraPitch;
int centerX;
int centerY;
int zoom;
int cameraX;
int cameraY;
int cameraZ;
int4 sinCosTable[2048];
};
struct shared_data {
int totalNum[12]; // number of faces with a given priority
int totalDistance[12]; // sum of distances to faces of a given priority
int totalMappedNum[18]; // number of faces with a given adjusted priority
int min10; // minimum distance to a face of priority 10
int dfs[0]; // packed face id and distance, size 512 for small, 4096 for large
};
struct modelinfo {
int offset; // offset into buffer
int uvOffset; // offset into uv buffer
int size; // length in faces
int idx; // write idx in target buffer
int flags; // radius, orientation
int x; // scene position x
int y; // scene position y
int z; // scene position z
};

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define PI 3.1415926535897932384626433832795f
#define UNIT PI / 1024.0f
float3 toScreen(int4 vertex, int cameraYaw, int cameraPitch, int centerX, int centerY, int zoom) {
float yawSin = sin(cameraYaw * UNIT);
float yawCos = cos(cameraYaw * UNIT);
float pitchSin = sin(cameraPitch * UNIT);
float pitchCos = cos(cameraPitch * UNIT);
float rotatedX = (vertex.z * yawSin) + (vertex.x * yawCos);
float rotatedZ = (vertex.z * yawCos) - (vertex.x * yawSin);
float var13 = (vertex.y * pitchCos) - (rotatedZ * pitchSin);
float var12 = (vertex.y * pitchSin) + (rotatedZ * pitchCos);
float x = rotatedX * zoom / var12 + centerX;
float y = var13 * zoom / var12 + centerY;
float z = -var12; // in OpenGL depth is negative
return (float3) (x, y, z);
}
/*
* Rotate a vertex by a given orientation in JAU
*/
int4 rotate_vertex(__constant struct uniform *uni, int4 vertex, int orientation) {
int4 sinCos = uni->sinCosTable[orientation];
int s = sinCos.x;
int c = sinCos.y;
int x = vertex.z * s + vertex.x * c >> 16;
int z = vertex.z * c - vertex.x * s >> 16;
return (int4)(x, vertex.y, z, vertex.w);
}
/*
* Calculate the distance to a vertex given the camera angle
*/
int vertex_distance(int4 vertex, int cameraYaw, int cameraPitch) {
int yawSin = (int)(65536.0f * sin(cameraYaw * UNIT));
int yawCos = (int)(65536.0f * cos(cameraYaw * UNIT));
int pitchSin = (int)(65536.0f * sin(cameraPitch * UNIT));
int pitchCos = (int)(65536.0f * cos(cameraPitch * UNIT));
int j = vertex.z * yawCos - vertex.x * yawSin >> 16;
int l = vertex.y * pitchSin + j * pitchCos >> 16;
return l;
}
/*
* Calculate the distance to a face
*/
int face_distance(int4 vA, int4 vB, int4 vC, int cameraYaw, int cameraPitch) {
int dvA = vertex_distance(vA, cameraYaw, cameraPitch);
int dvB = vertex_distance(vB, cameraYaw, cameraPitch);
int dvC = vertex_distance(vC, cameraYaw, cameraPitch);
int faceDistance = (dvA + dvB + dvC) / 3;
return faceDistance;
}
/*
* Test if a face is visible (not backward facing)
*/
bool face_visible(__constant struct uniform *uni, int4 vA, int4 vB, int4 vC, int4 position) {
// Move model to scene location, and account for camera offset
int4 cameraPos = (int4)(uni->cameraX, uni->cameraY, uni->cameraZ, 0);
vA += position - cameraPos;
vB += position - cameraPos;
vC += position - cameraPos;
float3 sA = toScreen(vA, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
float3 sB = toScreen(vB, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
float3 sC = toScreen(vC, uni->cameraYaw, uni->cameraPitch, uni->centerX, uni->centerY, uni->zoom);
return (sA.x - sB.x) * (sC.y - sB.y) - (sC.x - sB.x) * (sA.y - sB.y) > 0;
}

View File

@@ -0,0 +1,97 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include FACE_COUNT
#include cl_types.cl
#include to_screen.cl
#include common.cl
#include priority_render.cl
__kernel
__attribute__((work_group_size_hint(256, 1, 1)))
void computeLarge(
__local struct shared_data *shared,
__global const struct modelinfo *ol,
__global const int4 *vb,
__global const int4 *tempvb,
__global const float4 *uv,
__global const float4 *tempuv,
__global int4 *vout,
__global float4 *uvout,
__constant struct uniform *uni) {
size_t groupId = get_group_id(0);
size_t localId = get_local_id(0) * FACE_COUNT;
struct modelinfo minfo = ol[groupId];
int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
if (localId == 0) {
shared->min10 = 1600;
for (int i = 0; i < 12; ++i) {
shared->totalNum[i] = 0;
shared->totalDistance[i] = 0;
}
for (int i = 0; i < 18; ++i) {
shared->totalMappedNum[i] = 0;
}
}
int prio[FACE_COUNT];
int dis[FACE_COUNT];
int4 v1[FACE_COUNT];
int4 v2[FACE_COUNT];
int4 v3[FACE_COUNT];
for (int i = 0; i < FACE_COUNT; i++) {
get_face(shared, uni, vb, tempvb, localId + i, minfo, uni->cameraYaw, uni->cameraPitch, &prio[i], &dis[i], &v1[i], &v2[i], &v3[i]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < FACE_COUNT; i++) {
add_face_prio_distance(shared, uni, localId + i, minfo, v1[i], v2[i], v3[i], prio[i], dis[i], pos);
}
barrier(CLK_LOCAL_MEM_FENCE);
int prioAdj[FACE_COUNT];
int idx[FACE_COUNT];
for (int i = 0; i < FACE_COUNT; i++) {
idx[i] = map_face_priority(shared, localId + i, minfo, prio[i], dis[i], &prioAdj[i]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < FACE_COUNT; i++) {
insert_dfs(shared, localId + i, minfo, prioAdj[i], dis[i], idx[i]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < FACE_COUNT; i++) {
sort_and_insert(shared, uv, tempuv, vout, uvout, localId + i, minfo, prioAdj[i], dis[i], v1[i], v2[i], v3[i]);
}
}

View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include cl_types.cl
__kernel
__attribute__((reqd_work_group_size(6, 1, 1)))
void computeUnordered(__global const struct modelinfo *ol,
__global const int4 *vb,
__global const int4 *tempvb,
__global const float4 *uv,
__global const float4 *tempuv,
__global int4 *vout,
__global float4 *uvout) {
size_t groupId = get_group_id(0);
size_t localId = get_local_id(0);
struct modelinfo minfo = ol[groupId];
int offset = minfo.offset;
int size = minfo.size;
int outOffset = minfo.idx;
int uvOffset = minfo.uvOffset;
int flags = minfo.flags;
int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
if (localId >= size) {
return;
}
uint ssboOffset = localId;
int4 thisA, thisB, thisC;
// Grab triangle vertices from the correct buffer
if (flags < 0) {
thisA = vb[offset + ssboOffset * 3];
thisB = vb[offset + ssboOffset * 3 + 1];
thisC = vb[offset + ssboOffset * 3 + 2];
} else {
thisA = tempvb[offset + ssboOffset * 3];
thisB = tempvb[offset + ssboOffset * 3 + 1];
thisC = tempvb[offset + ssboOffset * 3 + 2];
}
uint myOffset = localId;
// position vertices in scene and write to out buffer
vout[outOffset + myOffset * 3] = pos + thisA;
vout[outOffset + myOffset * 3 + 1] = pos + thisB;
vout[outOffset + myOffset * 3 + 2] = pos + thisC;
if (uvOffset < 0) {
uvout[outOffset + myOffset * 3] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
uvout[outOffset + myOffset * 3 + 1] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
uvout[outOffset + myOffset * 3 + 2] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
} else if (flags >= 0) {
uvout[outOffset + myOffset * 3] = tempuv[uvOffset + localId * 3];
uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
} else {
uvout[outOffset + myOffset * 3] = uv[uvOffset + localId * 3];
uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
}
}

View File

@@ -0,0 +1,298 @@
/*
* Copyright (c) 2021, Adam <Adam@sigterm.info>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Calculate adjusted priority for a face with a given priority, distance, and
// model global min10 and face distance averages. This allows positioning faces
// with priorities 10/11 into the correct 'slots' resulting in 18 possible
// adjusted priorities
int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3) {
// (10, 11) 0 1 2 (10, 11) 3 4 (10, 11) 5 6 7 8 9 (10, 11)
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
switch (p) {
case 0: return 2;
case 1: return 3;
case 2: return 4;
case 3: return 7;
case 4: return 8;
case 5: return 11;
case 6: return 12;
case 7: return 13;
case 8: return 14;
case 9: return 15;
case 10:
if (distance > avg1) {
return 0;
} else if (distance > avg2) {
return 5;
} else if (distance > avg3) {
return 9;
} else {
return 16;
}
case 11:
if (distance > avg1 && _min10 > avg1) {
return 1;
} else if (distance > avg2 && (_min10 > avg1 || _min10 > avg2)) {
return 6;
} else if (distance > avg3 && (_min10 > avg1 || _min10 > avg2 || _min10 > avg3)) {
return 10;
} else {
return 17;
}
default:
return -1;
}
}
// calculate the number of faces with a lower adjusted priority than
// the given adjusted priority
int count_prio_offset(__local struct shared_data *shared, int priority) {
int total = 0;
switch (priority) {
case 17:
total += shared->totalMappedNum[16];
case 16:
total += shared->totalMappedNum[15];
case 15:
total += shared->totalMappedNum[14];
case 14:
total += shared->totalMappedNum[13];
case 13:
total += shared->totalMappedNum[12];
case 12:
total += shared->totalMappedNum[11];
case 11:
total += shared->totalMappedNum[10];
case 10:
total += shared->totalMappedNum[9];
case 9:
total += shared->totalMappedNum[8];
case 8:
total += shared->totalMappedNum[7];
case 7:
total += shared->totalMappedNum[6];
case 6:
total += shared->totalMappedNum[5];
case 5:
total += shared->totalMappedNum[4];
case 4:
total += shared->totalMappedNum[3];
case 3:
total += shared->totalMappedNum[2];
case 2:
total += shared->totalMappedNum[1];
case 1:
total += shared->totalMappedNum[0];
case 0:
return total;
}
}
void get_face(
__local struct shared_data *shared,
__constant struct uniform *uni,
__global const int4 *vb,
__global const int4 *tempvb,
uint localId, struct modelinfo minfo, int cameraYaw, int cameraPitch,
/* out */ int *prio, int *dis, int4 *o1, int4 *o2, int4 *o3) {
int size = minfo.size;
int offset = minfo.offset;
int flags = minfo.flags;
uint ssboOffset;
if (localId < size) {
ssboOffset = localId;
} else {
ssboOffset = 0;
}
int4 thisA;
int4 thisB;
int4 thisC;
// Grab triangle vertices from the correct buffer
if (flags < 0) {
thisA = vb[offset + ssboOffset * 3];
thisB = vb[offset + ssboOffset * 3 + 1];
thisC = vb[offset + ssboOffset * 3 + 2];
} else {
thisA = tempvb[offset + ssboOffset * 3];
thisB = tempvb[offset + ssboOffset * 3 + 1];
thisC = tempvb[offset + ssboOffset * 3 + 2];
}
if (localId < size) {
int radius = (flags & 0x7fffffff) >> 12;
int orientation = flags & 0x7ff;
// rotate for model orientation
int4 thisrvA = rotate_vertex(uni, thisA, orientation);
int4 thisrvB = rotate_vertex(uni, thisB, orientation);
int4 thisrvC = rotate_vertex(uni, thisC, orientation);
// calculate distance to face
int thisPriority = (thisA.w >> 16) & 0xff;// all vertices on the face have the same priority
int thisDistance;
if (radius == 0) {
thisDistance = 0;
} else {
thisDistance = face_distance(thisrvA, thisrvB, thisrvC, cameraYaw, cameraPitch) + radius;
}
*o1 = thisrvA;
*o2 = thisrvB;
*o3 = thisrvC;
*prio = thisPriority;
*dis = thisDistance;
} else {
*o1 = (int4)(0, 0, 0, 0);
*o2 = (int4)(0, 0, 0, 0);
*o3 = (int4)(0, 0, 0, 0);
*prio = 0;
*dis = 0;
}
}
void add_face_prio_distance(
__local struct shared_data *shared,
__constant struct uniform *uni,
uint localId, struct modelinfo minfo, int4 thisrvA, int4 thisrvB, int4 thisrvC, int thisPriority, int thisDistance, int4 pos) {
if (localId < minfo.size) {
// if the face is not culled, it is calculated into priority distance averages
if (face_visible(uni, thisrvA, thisrvB, thisrvC, pos)) {
atomic_add(&shared->totalNum[thisPriority], 1);
atomic_add(&shared->totalDistance[thisPriority], thisDistance);
// calculate minimum distance to any face of priority 10 for positioning the 11 faces later
if (thisPriority == 10) {
atomic_min(&shared->min10, thisDistance);
}
}
}
}
int map_face_priority(__local struct shared_data *shared, uint localId, struct modelinfo minfo, int thisPriority, int thisDistance, int *prio) {
int size = minfo.size;
// Compute average distances for 0/2, 3/4, and 6/8
if (localId < size) {
int avg1 = 0;
int avg2 = 0;
int avg3 = 0;
if (shared->totalNum[1] > 0 || shared->totalNum[2] > 0) {
avg1 = (shared->totalDistance[1] + shared->totalDistance[2]) / (shared->totalNum[1] + shared->totalNum[2]);
}
if (shared->totalNum[3] > 0 || shared->totalNum[4] > 0) {
avg2 = (shared->totalDistance[3] + shared->totalDistance[4]) / (shared->totalNum[3] + shared->totalNum[4]);
}
if (shared->totalNum[6] > 0 || shared->totalNum[8] > 0) {
avg3 = (shared->totalDistance[6] + shared->totalDistance[8]) / (shared->totalNum[6] + shared->totalNum[8]);
}
int adjPrio = priority_map(thisPriority, thisDistance, shared->min10, avg1, avg2, avg3);
int prioIdx = atomic_add(&shared->totalMappedNum[adjPrio], 1);
*prio = adjPrio;
return prioIdx;
}
*prio = 0;
return 0;
}
void insert_dfs(__local struct shared_data *shared, uint localId, struct modelinfo minfo, int adjPrio, int distance, int prioIdx) {
int size = minfo.size;
if (localId < size) {
// calculate base offset into dfs based on number of faces with a lower priority
int baseOff = count_prio_offset(shared, adjPrio);
// store into face array offset array by unique index
shared->dfs[baseOff + prioIdx] = ((int) localId << 16) | distance;
}
}
void sort_and_insert(
__local struct shared_data *shared,
__global const float4 *uv,
__global const float4 *tempuv,
__global int4 *vout,
__global float4 *uvout,
uint localId, struct modelinfo minfo, int thisPriority, int thisDistance, int4 thisrvA, int4 thisrvB, int4 thisrvC) {
/* compute face distance */
int size = minfo.size;
if (localId < size) {
int outOffset = minfo.idx;
int uvOffset = minfo.uvOffset;
int flags = minfo.flags;
int4 pos = (int4)(minfo.x, minfo.y, minfo.z, 0);
const int priorityOffset = count_prio_offset(shared, thisPriority);
const int numOfPriority = shared->totalMappedNum[thisPriority];
int start = priorityOffset; // index of first face with this priority
int end = priorityOffset + numOfPriority; // index of last face with this priority
int myOffset = priorityOffset;
// we only have to order faces against others of the same priority
// calculate position this face will be in
for (int i = start; i < end; ++i) {
int d1 = shared->dfs[i];
int theirId = d1 >> 16;
int theirDistance = d1 & 0xffff;
// the closest faces draw last, so have the highest index
// if two faces have the same distance, the one with the
// higher id draws last
if ((theirDistance > thisDistance)
|| (theirDistance == thisDistance && theirId < localId)) {
++myOffset;
}
}
// position vertices in scene and write to out buffer
vout[outOffset + myOffset * 3] = pos + thisrvA;
vout[outOffset + myOffset * 3 + 1] = pos + thisrvB;
vout[outOffset + myOffset * 3 + 2] = pos + thisrvC;
if (uvOffset < 0) {
uvout[outOffset + myOffset * 3] = (float4)(0, 0, 0, 0);
uvout[outOffset + myOffset * 3 + 1] = (float4)(0, 0, 0, 0);
uvout[outOffset + myOffset * 3 + 2] = (float4)(0, 0, 0, 0);
} else if (flags >= 0) {
uvout[outOffset + myOffset * 3] = tempuv[uvOffset + localId * 3];
uvout[outOffset + myOffset * 3 + 1] = tempuv[uvOffset + localId * 3 + 1];
uvout[outOffset + myOffset * 3 + 2] = tempuv[uvOffset + localId * 3 + 2];
} else {
uvout[outOffset + myOffset * 3] = uv[uvOffset + localId * 3];
uvout[outOffset + myOffset * 3 + 1] = uv[uvOffset + localId * 3 + 1];
uvout[outOffset + myOffset * 3 + 2] = uv[uvOffset + localId * 3 + 2];
}
}
}