-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathtraversal_init.comp.glsl
More file actions
257 lines (202 loc) · 8.71 KB
/
traversal_init.comp.glsl
File metadata and controls
257 lines (202 loc) · 8.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/*
* Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
/*
Shader Description
==================
This compute shader initializes the traversal queue with the
root nodes of the lod hierarchy of rendered instances.
A thread represents one instance.
NOT compatible with USE_BLAS_SHARING, see `traversal_init_blas_sharing.comp.glsl`
*/
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
#extension GL_EXT_buffer_reference : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_control_flow_attributes : require
#extension GL_KHR_shader_subgroup_vote : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_shuffle : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_clustered : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#include "shaderio.h"
////////////////////////////////////////////
layout(scalar, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer
{
FrameConstants view;
};
layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer
{
Readback readback;
};
layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer
{
RenderInstance instances[];
};
layout(scalar, binding = BINDINGS_RENDERMATERIALS_SSBO, set = 0) buffer renderMaterialsBuffer
{
RenderMaterial materials[];
};
layout(scalar, binding = BINDINGS_GEOMETRIES_SSBO, set = 0) buffer geometryBuffer
{
Geometry geometries[];
};
#if USE_TWO_PASS_CULLING && TARGETS_RASTERIZATION
layout(binding = BINDINGS_HIZ_TEX) uniform sampler2D texHizFar[2];
#else
layout(binding = BINDINGS_HIZ_TEX) uniform sampler2D texHizFar;
#endif
layout(scalar, binding = BINDINGS_SCENEBUILDING_UBO, set = 0) uniform buildBuffer
{
SceneBuilding build;
};
layout(scalar, binding = BINDINGS_SCENEBUILDING_SSBO, set = 0) buffer buildBufferRW
{
SceneBuilding buildRW;
};
////////////////////////////////////////////
layout(local_size_x=TRAVERSAL_INIT_WORKGROUP) in;
#include "culling.glsl"
#include "traversal.glsl"
////////////////////////////////////////////
void main()
{
uint instanceID = getGlobalInvocationIndex(gl_GlobalInvocationID);
uint instanceLoad = min(build.numRenderInstances-1, instanceID);
bool isValid = instanceID == instanceLoad;
#if USE_SORTING
instanceLoad = build.instanceSortValues.d[instanceLoad];
instanceID = instanceLoad;
#endif
RenderInstance instance = instances[instanceLoad];
uint geometryID = instance.geometryID;
Geometry geometry = geometries[geometryID];
uint blasBuildIndex = BLAS_BUILD_INDEX_LOWDETAIL;
vec4 clipMin;
vec4 clipMax;
bool clipValid;
#if USE_TWO_PASS_CULLING && TARGETS_RASTERIZATION
bool inFrustum = intersectFrustum( build.cullPass == 0 ? build.cullViewProjMatrixLast : build.cullViewProjMatrix, geometry.bbox.lo, geometry.bbox.hi, instance.worldMatrix, clipMin, clipMax, clipValid);
bool isVisible = inFrustum && (!clipValid || (intersectSize(clipMin, clipMax, 1.0) && intersectHiz(clipMin, clipMax, build.cullPass)));
// if smallish and was already drawn, don't process again
if (build.cullPass == 1 && isVisible && clipValid && !intersectSize(clipMin, clipMax, 8.0) && ((uint(build.instanceVisibility.d[instanceLoad]) & INSTANCE_VISIBLE_BIT) != 0)) {
isVisible = false;
}
#else
bool inFrustum = intersectFrustum(build.cullViewProjMatrixLast, geometry.bbox.lo, geometry.bbox.hi, instance.worldMatrix, clipMin, clipMax, clipValid);
bool isVisible = inFrustum && (!clipValid || (intersectSize(clipMin, clipMax, 1.0) && intersectHiz(clipMin, clipMax, 0)));
#endif
uint visibilityState = isVisible ? INSTANCE_VISIBLE_BIT : 0;
bool isRenderable = isValid
#if USE_CULLING && (TARGETS_RASTERIZATION || USE_FORCED_INVISIBLE_CULLING)
&& isVisible
#endif
;
bool traverseRootNode = isRenderable;
if (isRenderable)
{
// We test if we are only using the furthest lod.
// If that is true, then we can skip lod traversal completely and
// straight enqueue the lowest detail cluster directly.
uint rootNodePacked = geometry.nodes.d[0].packed;
uint childOffset = PACKED_GET(rootNodePacked, Node_packed_nodeChildOffset);
uint childCountMinusOne = PACKED_GET(rootNodePacked, Node_packed_nodeChildCountMinusOne);
// test if the second to last lod needs to be traversed
uint childNodeIndex = (childCountMinusOne > 1 ? (childCountMinusOne - 1) : 0);
Node childNode = geometry.nodes.d[childOffset + childNodeIndex];
TraversalMetric traversalMetric = childNode.traversalMetric;
mat4x3 worldMatrix = instances[instanceID].worldMatrix;
float uniformScale = computeUniformScale(worldMatrix);
float errorScale = 1.0;
#if USE_CULLING && TARGETS_RAY_TRACING
if (visibilityState == 0) errorScale = build.culledErrorScale;
#endif
mat4 transform = build.traversalViewMatrix * toMat4(worldMatrix);
// if there is no need to traverse the pen ultimate lod level,
// then just insert the last lod level node's cluster directly
if (!testForTraversal(mat4x3(transform), uniformScale, traversalMetric, errorScale))
{
#if TARGETS_RAY_TRACING
// we don't need to add a cluster because we always add it
// implictly through the use of the low detail BLAS.
#elif TARGETS_RASTERIZATION
// lowest detail lod is guaranteed to have only one cluster.
bool useAlpha = false;
bool useSW = false;
#if HAS_ALPHA_TEST
useAlpha = (uint(instance.lowDetailClusterStateBits) & CLUSTER_STATE_ALPHAMASKED) != 0;
#endif
#if USE_SW_RASTER
float relativeSize = geometry.bbox.longestEdge;
if (isVisible && clipValid && clipMin.z > 0 && clipMax.z < 1 && !intersectSize(clipMin, clipMax, build.swRasterThreshold, relativeSize))
{
useSW = true;
}
#endif
rasterBinning(geometry.lowDetailClusterID, instanceID, useAlpha, useSW, true);
#endif
// we can skip adding the node for traversal
traverseRootNode = false;
}
}
uvec4 voteNodes = subgroupBallot(traverseRootNode);
uint offsetNodes = 0;
if (subgroupElect())
{
offsetNodes = atomicAdd(buildRW.traversalNodeWriteCounter, subgroupBallotBitCount(voteNodes));
}
offsetNodes = subgroupBroadcastFirst(offsetNodes);
offsetNodes += subgroupBallotExclusiveBitCount(voteNodes);
if (traverseRootNode && offsetNodes < build.maxTraversalInfos)
{
uint rootNodePacked = geometry.nodes.d[0].packed;
TraversalInfo traversalInfo;
traversalInfo.instanceID = instanceID;
traversalInfo.packedNode = rootNodePacked;
build.traversalNodeInfos.d[offsetNodes] = packTraversalInfo(traversalInfo);
}
#if TARGETS_RAY_TRACING
if (isValid) {
build.instanceVisibility.d[instanceID] = uint8_t(visibilityState);
build.instanceBuildInfos.d[instanceID].clusterReferencesCount = 0;
build.instanceBuildInfos.d[instanceID].blasBuildIndex = blasBuildIndex;
// We might want to remove the instance completely if not visible, or just use the low detail blas
#if USE_CULLING && USE_FORCED_INVISIBLE_CULLING && FORCE_INVISIBLE_CULLED_REMOVES_INSTANCE
if(!isVisible && build.frameIndex != 0){
// first frame must always have a valid BLAS due to TLAS BUILD, other frames are TLAS UPDATE
build.tlasInstances.d[instanceID].blasReference = 0;
}
else
#endif
{
build.tlasInstances.d[instanceID].blasReference = geometry.lowDetailBlasAddress;
}
}
#elif USE_TWO_PASS_CULLING && TARGETS_RASTERIZATION
if (build.cullPass == 0 && isValid) {
build.instanceVisibility.d[instanceID] = uint8_t(visibilityState);
}
#endif
}