gpu: replace count_prio_offset switch with loop

Nvidia drivers seem to compile the switch with fallthrough as a 19 level nested if, and inlines the code from the fallthrough cases into each. The hd devs have identified this as a potential source of the artifacting that happens on some cards due to the number of nested branches. Replace the switch with a simple loop, which also is about 7% of the generated code size of the switch.
2022-02-08 11:14:11 -05:00
parent 11f9454015
commit 8811756301
2 changed files with 14 additions and 76 deletions
--- a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.cl
@@ -62,52 +62,21 @@ int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3)
        return 17;
      }
    default:
-      return -1;
+      // this can't happen unless an invalid priority is sent. just assume 0.
+      return 0;
  }
 }

 // calculate the number of faces with a lower adjusted priority than
 // the given adjusted priority
 int count_prio_offset(__local struct shared_data *shared, int priority) {
+  // this shouldn't ever be outside of (0, 17) because it is the return value from priority_map
+  priority = clamp(priority, 0, 17);
  int total = 0;
-  switch (priority) {
-    case 17:
-      total += shared->totalMappedNum[16];
-    case 16:
-      total += shared->totalMappedNum[15];
-    case 15:
-      total += shared->totalMappedNum[14];
-    case 14:
-      total += shared->totalMappedNum[13];
-    case 13:
-      total += shared->totalMappedNum[12];
-    case 12:
-      total += shared->totalMappedNum[11];
-    case 11:
-      total += shared->totalMappedNum[10];
-    case 10:
-      total += shared->totalMappedNum[9];
-    case 9:
-      total += shared->totalMappedNum[8];
-    case 8:
-      total += shared->totalMappedNum[7];
-    case 7:
-      total += shared->totalMappedNum[6];
-    case 6:
-      total += shared->totalMappedNum[5];
-    case 5:
-      total += shared->totalMappedNum[4];
-    case 4:
-      total += shared->totalMappedNum[3];
-    case 3:
-      total += shared->totalMappedNum[2];
-    case 2:
-      total += shared->totalMappedNum[1];
-    case 1:
-      total += shared->totalMappedNum[0];
-    case 0:
-      return total;
+  for (int i = 0; i < priority; i++) {
+    total += shared->totalMappedNum[i];
  }
+  return total;
 }

 void get_face(
--- a/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.glsl
+++ b/runelite-client/src/main/resources/net/runelite/client/plugins/gpu/priority_render.glsl
@@ -62,52 +62,21 @@ int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3)
        return 17;
      }
    default:
-      return -1;
+      // this can't happen unless an invalid priority is sent. just assume 0.
+      return 0;
  }
 }

 // calculate the number of faces with a lower adjusted priority than
 // the given adjusted priority
 int count_prio_offset(int priority) {
+  // this shouldn't ever be outside of (0, 17) because it is the return value from priority_map
+  priority = clamp(priority, 0, 17);
  int total = 0;
-  switch (priority) {
-    case 17:
-      total += totalMappedNum[16];
-    case 16:
-      total += totalMappedNum[15];
-    case 15:
-      total += totalMappedNum[14];
-    case 14:
-      total += totalMappedNum[13];
-    case 13:
-      total += totalMappedNum[12];
-    case 12:
-      total += totalMappedNum[11];
-    case 11:
-      total += totalMappedNum[10];
-    case 10:
-      total += totalMappedNum[9];
-    case 9:
-      total += totalMappedNum[8];
-    case 8:
-      total += totalMappedNum[7];
-    case 7:
-      total += totalMappedNum[6];
-    case 6:
-      total += totalMappedNum[5];
-    case 5:
-      total += totalMappedNum[4];
-    case 4:
-      total += totalMappedNum[3];
-    case 3:
-      total += totalMappedNum[2];
-    case 2:
-      total += totalMappedNum[1];
-    case 1:
-      total += totalMappedNum[0];
-    case 0:
-      return total;
+  for (int i = 0; i < priority; i++) {
+      total += totalMappedNum[i];
  }
+  return total;
 }

 void get_face(uint localId, modelinfo minfo, int cameraYaw, int cameraPitch,