gpu: replace count_prio_offset switch with loop

Nvidia drivers seem to compile the switch with fallthrough as a 19 level
nested if, and inlines the code from the fallthrough cases into each.

The hd devs have identified this as a potential source of the
artifacting that happens on some cards due to the number of nested
branches.

Replace the switch with a simple loop, which also is about 7% of the
generated code size of the switch.
This commit is contained in:
Adam
2022-02-08 11:14:11 -05:00
parent 11f9454015
commit 8811756301
2 changed files with 14 additions and 76 deletions

View File

@@ -62,52 +62,21 @@ int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3)
return 17;
}
default:
return -1;
// this can't happen unless an invalid priority is sent. just assume 0.
return 0;
}
}
// calculate the number of faces with a lower adjusted priority than
// the given adjusted priority
int count_prio_offset(__local struct shared_data *shared, int priority) {
// this shouldn't ever be outside of (0, 17) because it is the return value from priority_map
priority = clamp(priority, 0, 17);
int total = 0;
switch (priority) {
case 17:
total += shared->totalMappedNum[16];
case 16:
total += shared->totalMappedNum[15];
case 15:
total += shared->totalMappedNum[14];
case 14:
total += shared->totalMappedNum[13];
case 13:
total += shared->totalMappedNum[12];
case 12:
total += shared->totalMappedNum[11];
case 11:
total += shared->totalMappedNum[10];
case 10:
total += shared->totalMappedNum[9];
case 9:
total += shared->totalMappedNum[8];
case 8:
total += shared->totalMappedNum[7];
case 7:
total += shared->totalMappedNum[6];
case 6:
total += shared->totalMappedNum[5];
case 5:
total += shared->totalMappedNum[4];
case 4:
total += shared->totalMappedNum[3];
case 3:
total += shared->totalMappedNum[2];
case 2:
total += shared->totalMappedNum[1];
case 1:
total += shared->totalMappedNum[0];
case 0:
return total;
for (int i = 0; i < priority; i++) {
total += shared->totalMappedNum[i];
}
return total;
}
void get_face(

View File

@@ -62,52 +62,21 @@ int priority_map(int p, int distance, int _min10, int avg1, int avg2, int avg3)
return 17;
}
default:
return -1;
// this can't happen unless an invalid priority is sent. just assume 0.
return 0;
}
}
// calculate the number of faces with a lower adjusted priority than
// the given adjusted priority
int count_prio_offset(int priority) {
// this shouldn't ever be outside of (0, 17) because it is the return value from priority_map
priority = clamp(priority, 0, 17);
int total = 0;
switch (priority) {
case 17:
total += totalMappedNum[16];
case 16:
total += totalMappedNum[15];
case 15:
total += totalMappedNum[14];
case 14:
total += totalMappedNum[13];
case 13:
total += totalMappedNum[12];
case 12:
total += totalMappedNum[11];
case 11:
total += totalMappedNum[10];
case 10:
total += totalMappedNum[9];
case 9:
total += totalMappedNum[8];
case 8:
total += totalMappedNum[7];
case 7:
total += totalMappedNum[6];
case 6:
total += totalMappedNum[5];
case 5:
total += totalMappedNum[4];
case 4:
total += totalMappedNum[3];
case 3:
total += totalMappedNum[2];
case 2:
total += totalMappedNum[1];
case 1:
total += totalMappedNum[0];
case 0:
return total;
for (int i = 0; i < priority; i++) {
total += totalMappedNum[i];
}
return total;
}
void get_face(uint localId, modelinfo minfo, int cameraYaw, int cameraPitch,