Skip to content

Commit c83c3fb

Browse files
authored
Combine draw commands to improve rendering performance (#2421)
1 parent aef6c33 commit c83c3fb

20 files changed

Lines changed: 470 additions & 245 deletions

common/src/main/java/net/caffeinemc/mods/sodium/client/gl/buffer/IndexedVertexData.java

Lines changed: 0 additions & 16 deletions
This file was deleted.

common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/DefaultChunkRenderer.java

Lines changed: 73 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package net.caffeinemc.mods.sodium.client.render.chunk;
22

33
import net.caffeinemc.mods.sodium.client.SodiumClientMod;
4-
import net.caffeinemc.mods.sodium.client.gl.attribute.GlVertexAttributeBinding;
54
import net.caffeinemc.mods.sodium.client.gl.device.CommandList;
65
import net.caffeinemc.mods.sodium.client.gl.device.DrawCommandList;
76
import net.caffeinemc.mods.sodium.client.gl.device.MultiDrawBatch;
@@ -16,7 +15,6 @@
1615
import net.caffeinemc.mods.sodium.client.render.chunk.lists.ChunkRenderList;
1716
import net.caffeinemc.mods.sodium.client.render.chunk.lists.ChunkRenderListIterable;
1817
import net.caffeinemc.mods.sodium.client.render.chunk.region.RenderRegion;
19-
import net.caffeinemc.mods.sodium.client.render.chunk.shader.ChunkShaderBindingPoints;
2018
import net.caffeinemc.mods.sodium.client.render.chunk.shader.ChunkShaderInterface;
2119
import net.caffeinemc.mods.sodium.client.render.chunk.terrain.TerrainRenderPass;
2220
import net.caffeinemc.mods.sodium.client.render.chunk.translucent_sorting.SortBehavior;
@@ -73,7 +71,7 @@ public void render(ChunkRenderMatrices matrices,
7371
continue;
7472
}
7573

76-
fillCommandBuffer(this.batch, region, storage, renderList, camera, renderPass, useBlockFaceCulling);
74+
fillCommandBuffer(this.batch, region, storage, renderList, camera, renderPass, useBlockFaceCulling, useIndexedTessellation);
7775

7876
if (this.batch.isEmpty()) {
7977
continue;
@@ -110,7 +108,8 @@ private static void fillCommandBuffer(MultiDrawBatch batch,
110108
ChunkRenderList renderList,
111109
CameraTransform camera,
112110
TerrainRenderPass pass,
113-
boolean useBlockFaceCulling) {
111+
boolean useBlockFaceCulling,
112+
boolean useIndexedTessellation) {
114113
batch.clear();
115114

116115
var iterator = renderList.sectionsWithGeometryIterator(pass.isTranslucent());
@@ -150,30 +149,48 @@ private static void fillCommandBuffer(MultiDrawBatch batch,
150149
continue;
151150
}
152151

153-
if (pass.isTranslucent()) {
154-
addIndexedDrawCommands(batch, pMeshData, slices);
152+
// it's necessary to sometimes not the locally-indexed command generator even for indexed tessellations since
153+
// sometimes the index buffer is shared, but not globally shared. This means that translucent sections that
154+
// are sharing an index buffer amongst them need to use the shared index command generator since it sets the
155+
// same element offset for each draw command and doesn't increment it. Recall that in each draw command the indexing
156+
// of the elements needs to start at 0 and thus starting somewhere further into the shared index buffer is invalid.
157+
// there's also the optimization that draw commands can be combined when using a shared index buffer, be it
158+
// globally shared or just shared within the region, which isn't possible with the locally-indexed command generator.
159+
if (useIndexedTessellation && SectionRenderDataUnsafe.isLocalIndex(pMeshData)) {
160+
addLocalIndexedDrawCommands(batch, pMeshData, slices);
155161
} else {
156-
addNonIndexedDrawCommands(batch, pMeshData, slices);
162+
addSharedIndexedDrawCommands(batch, pMeshData, slices);
157163
}
158164
}
159165
}
160166

161167
/**
162-
* Generates the draw commands for a chunk's meshes using the shared index buffer.
168+
* Generates the draw commands for a chunk's meshes, where each mesh has a separate index buffer. This is used
169+
* when rendering translucent geometry, as each geometry set needs a sorted index buffer.
163170
*/
164171
@SuppressWarnings("IntegerMultiplicationImplicitCastToLong")
165-
private static void addNonIndexedDrawCommands(MultiDrawBatch batch, long pMeshData, int mask) {
172+
private static void addLocalIndexedDrawCommands(MultiDrawBatch batch, long pMeshData, int mask) {
166173
final var pElementPointer = batch.pElementPointer;
167174
final var pBaseVertex = batch.pBaseVertex;
168175
final var pElementCount = batch.pElementCount;
169176

170177
int size = batch.size;
171178

179+
long elementOffset = SectionRenderDataUnsafe.getBaseElement(pMeshData);
180+
long baseVertex = SectionRenderDataUnsafe.getBaseVertex(pMeshData);
181+
172182
for (int facing = 0; facing < ModelQuadFacing.COUNT; facing++) {
173-
// Uint32 -> Int32 cast is always safe and should be optimized away
174-
MemoryUtil.memPutInt(pBaseVertex + (size << 2), (int) SectionRenderDataUnsafe.getVertexOffset(pMeshData, facing));
175-
MemoryUtil.memPutInt(pElementCount + (size << 2), (int) SectionRenderDataUnsafe.getElementCount(pMeshData, facing));
176-
MemoryUtil.memPutAddress(pElementPointer + (size << Pointer.POINTER_SHIFT), 0 /* using a shared index buffer */);
183+
final long vertexCount = SectionRenderDataUnsafe.getVertexCount(pMeshData, facing);
184+
final long elementCount = (vertexCount >> 2) * 6;
185+
186+
MemoryUtil.memPutInt(pElementCount + (size << 2), UInt32.uncheckedDowncast(elementCount));
187+
MemoryUtil.memPutInt(pBaseVertex + (size << 2), UInt32.uncheckedDowncast(baseVertex));
188+
189+
// * 4 to convert to bytes (the index buffer contains integers)
190+
MemoryUtil.memPutAddress(pElementPointer + (size << Pointer.POINTER_SHIFT), elementOffset << 2);
191+
192+
baseVertex += vertexCount;
193+
elementOffset += elementCount;
177194

178195
size += (mask >> facing) & 1;
179196
}
@@ -182,34 +199,57 @@ private static void addNonIndexedDrawCommands(MultiDrawBatch batch, long pMeshDa
182199
}
183200

184201
/**
185-
* Generates the draw commands for a chunk's meshes, where each mesh has a separate index buffer. This is used
186-
* when rendering translucent geometry, as each geometry set needs a sorted index buffer.
202+
* Generates the draw commands for a chunk's meshes using the shared index buffer.
187203
*/
188204
@SuppressWarnings("IntegerMultiplicationImplicitCastToLong")
189-
private static void addIndexedDrawCommands(MultiDrawBatch batch, long pMeshData, int mask) {
205+
private static void addSharedIndexedDrawCommands(MultiDrawBatch batch, long pMeshData, int mask) {
190206
final var pElementPointer = batch.pElementPointer;
191207
final var pBaseVertex = batch.pBaseVertex;
192208
final var pElementCount = batch.pElementCount;
193209

194-
int size = batch.size;
195-
196-
long elementOffset = SectionRenderDataUnsafe.getBaseElement(pMeshData);
210+
// this is either zero (global shared index buffer) or the offset to the location of the shared element buffer (region shared index buffer)
211+
final var elementOffsetBytes = SectionRenderDataUnsafe.getBaseElement(pMeshData) << 2;
212+
final var facingList = SectionRenderDataUnsafe.getFacingList(pMeshData);
197213

198-
for (int facing = 0; facing < ModelQuadFacing.COUNT; facing++) {
199-
final long vertexOffset = SectionRenderDataUnsafe.getVertexOffset(pMeshData, facing);
200-
final long elementCount = SectionRenderDataUnsafe.getElementCount(pMeshData, facing);
201-
202-
// Uint32 -> Int32 cast is always safe and should be optimized away
203-
MemoryUtil.memPutInt(pBaseVertex + (size << 2), UInt32.uncheckedDowncast(vertexOffset));
204-
MemoryUtil.memPutInt(pElementCount + (size << 2), UInt32.uncheckedDowncast(elementCount));
214+
int size = batch.size;
215+
long groupVertexCount = 0;
216+
long baseVertex = SectionRenderDataUnsafe.getBaseVertex(pMeshData);
217+
int lastMaskBit = 0;
218+
219+
for (int i = 0; i <= ModelQuadFacing.COUNT; i++) {
220+
var maskBit = 0;
221+
long vertexCount = 0;
222+
if (i < ModelQuadFacing.COUNT) {
223+
vertexCount = SectionRenderDataUnsafe.getVertexCount(pMeshData, i);
224+
225+
// if there's no vertexes, the mask bit is just 0
226+
if (vertexCount != 0) {
227+
var facing = (facingList >>> (i * 8)) & 0xFF;
228+
maskBit = (mask >>> facing) & 1;
229+
}
230+
}
205231

206-
// * 4 to convert to bytes (the index buffer contains integers)
207-
// the section render data storage for the indices stores the offset in indices (also called elements)
208-
MemoryUtil.memPutAddress(pElementPointer + (size << Pointer.POINTER_SHIFT), elementOffset << 2);
232+
if (maskBit == 0) {
233+
if (lastMaskBit == 1) {
234+
// delay writing out draw command if there's a zero-size group
235+
if (i < ModelQuadFacing.COUNT && vertexCount == 0) {
236+
continue;
237+
}
238+
239+
MemoryUtil.memPutInt(pElementCount + (size << 2), UInt32.uncheckedDowncast((groupVertexCount >> 2) * 6));
240+
MemoryUtil.memPutInt(pBaseVertex + (size << 2), UInt32.uncheckedDowncast(baseVertex));
241+
MemoryUtil.memPutAddress(pElementPointer + (size << Pointer.POINTER_SHIFT), elementOffsetBytes);
242+
size++;
243+
baseVertex += groupVertexCount;
244+
groupVertexCount = 0;
245+
}
246+
247+
baseVertex += vertexCount;
248+
} else {
249+
groupVertexCount += vertexCount;
250+
}
209251

210-
// adding the number of elements works because the index data has one index per element (which are the indices)
211-
elementOffset += elementCount;
212-
size += (mask >> facing) & 1;
252+
lastMaskBit = maskBit;
213253
}
214254

215255
batch.size = size;
@@ -224,7 +264,7 @@ private static void addIndexedDrawCommands(MultiDrawBatch batch, long pMeshData,
224264
private static final int MODEL_NEG_Y = ModelQuadFacing.NEG_Y.ordinal();
225265
private static final int MODEL_NEG_Z = ModelQuadFacing.NEG_Z.ordinal();
226266

227-
private static int getVisibleFaces(int originX, int originY, int originZ, int chunkX, int chunkY, int chunkZ) {
267+
public static int getVisibleFaces(int originX, int originY, int originZ, int chunkX, int chunkY, int chunkZ) {
228268
// This is carefully written so that we can keep everything branch-less.
229269
//
230270
// Normally, this would be a ridiculous way to handle the problem. But the Hotspot VM's

common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSectionManager.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,9 @@ private boolean processChunkBuildResults(ArrayList<BuilderTaskOutput> results) {
328328
result.render.setTranslucentData(chunkBuildOutput.translucentData);
329329
}
330330
} else if (result instanceof ChunkSortOutput sortOutput
331-
&& sortOutput.getTopoSorter() != null
331+
&& sortOutput.getDynamicSorter() != null
332332
&& result.render.getTranslucentData() instanceof DynamicTopoData data) {
333-
this.sortTriggering.applyTriggerChanges(data, sortOutput.getTopoSorter(), result.render.getPosition(), this.cameraPosition);
333+
this.sortTriggering.applyTriggerChanges(data, sortOutput.getDynamicSorter(), result.render.getPosition(), this.cameraPosition);
334334
}
335335

336336
var job = result.render.getTaskCancellationToken();
@@ -678,8 +678,10 @@ public Collection<String> getDebugStrings() {
678678

679679
int count = 0;
680680

681-
long deviceUsed = 0;
682-
long deviceAllocated = 0;
681+
long geometryDeviceUsed = 0;
682+
long geometryDeviceAllocated = 0;
683+
long indexDeviceUsed = 0;
684+
long indexDeviceAllocated = 0;
683685

684686
for (var region : this.regions.getLoadedRegions()) {
685687
var resources = region.getResources();
@@ -688,15 +690,20 @@ public Collection<String> getDebugStrings() {
688690
continue;
689691
}
690692

691-
var buffer = resources.getGeometryArena();
693+
var geometryArena = resources.getGeometryArena();
694+
geometryDeviceUsed += geometryArena.getDeviceUsedMemory();
695+
geometryDeviceAllocated += geometryArena.getDeviceAllocatedMemory();
692696

693-
deviceUsed += buffer.getDeviceUsedMemory();
694-
deviceAllocated += buffer.getDeviceAllocatedMemory();
697+
var indexArena = resources.getIndexArena();
698+
indexDeviceUsed += indexArena.getDeviceUsedMemory();
699+
indexDeviceAllocated += indexArena.getDeviceAllocatedMemory();
695700

696701
count++;
697702
}
698703

699-
list.add(String.format("Geometry Pool: %d/%d MiB (%d buffers)", MathUtil.toMib(deviceUsed), MathUtil.toMib(deviceAllocated), count));
704+
list.add(String.format("Pools: Geometry %d/%d MiB, Index %d/%d MiB (%d buffers)",
705+
MathUtil.toMib(geometryDeviceUsed), MathUtil.toMib(geometryDeviceAllocated),
706+
MathUtil.toMib(indexDeviceUsed), MathUtil.toMib(indexDeviceAllocated), count));
700707
list.add(String.format("Transfer Queue: %s", this.regions.getStagingBuffer().toString()));
701708

702709
list.add(String.format("Chunk Builder: Permits=%02d (E %03d) | Busy=%02d | Total=%02d",

common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/SharedQuadIndexBuffer.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import net.caffeinemc.mods.sodium.client.gl.device.CommandList;
88
import net.caffeinemc.mods.sodium.client.gl.tessellation.GlIndexType;
99
import net.caffeinemc.mods.sodium.client.gl.util.EnumBitField;
10+
import net.caffeinemc.mods.sodium.client.util.NativeBuffer;
1011

1112
import java.nio.ByteBuffer;
1213
import java.nio.IntBuffer;
@@ -55,6 +56,14 @@ private void grow(CommandList commandList, int primitiveCount) {
5556
this.maxPrimitives = primitiveCount;
5657
}
5758

59+
public static NativeBuffer createIndexBuffer(IndexType indexType, int primitiveCount) {
60+
var bufferSize = primitiveCount * indexType.getBytesPerElement() * ELEMENTS_PER_PRIMITIVE;
61+
var buffer = new NativeBuffer(bufferSize);
62+
63+
indexType.createIndexBuffer(buffer.getDirectBuffer(), primitiveCount);
64+
65+
return buffer;
66+
}
5867

5968
public GlBuffer getBufferObject() {
6069
return this.buffer;
@@ -64,14 +73,6 @@ public void delete(CommandList commandList) {
6473
commandList.deleteBuffer(this.buffer);
6574
}
6675

67-
public GlIndexType getIndexFormat() {
68-
return this.indexType.getFormat();
69-
}
70-
71-
public IndexType getIndexType() {
72-
return this.indexType;
73-
}
74-
7576
public enum IndexType {
7677
SHORT(GlIndexType.UNSIGNED_SHORT, 64 * 1024) {
7778
@Override

0 commit comments

Comments
 (0)