Skip to content

Commit 2d6bdae

Browse files
committed
Optimize TLSF allocator: flat native block storage, alignment mask, SkipLocalsInit
- Replace grouped Block[] arrays with flat native memory (Block*) for single-indirection block access on the hot path - Pre-compute alignment mask to avoid repeated subtraction in TryAllocate - Add [module: SkipLocalsInit] to eliminate unnecessary zero-initialization - Vectorize BinsDirectory.Initialize with Span.Fill instead of scalar loop - Implement IDisposable to properly free native block buffer - Make BlockLinks.Undefined a static readonly field - Enable AggressiveInlining on AlignHelper.AlignUpOffset - Remove artificial lock() from benchmark for accurate comparison
1 parent 75fdc3b commit 2d6bdae

File tree

4 files changed

+57
-46
lines changed

4 files changed

+57
-46
lines changed

src/XenoAtom.Allocators.Bench/BenchAllocator.cs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class BenchAllocator
1818
private Random _random = new Random();
1919

2020
private static int[] AllocSizes = [64, 96, 150, 200, 400, 1024, 4096];
21-
21+
2222
private const int AllocationCount = 2048;
2323

2424
[GlobalSetup]
@@ -46,19 +46,13 @@ public void Tlsf()
4646

4747
for (int i = 0; i < AllocationCount; i++)
4848
{
49-
lock (_tlsfAllocator) // Make it more fair to the libc benchmark
50-
{
51-
var allocate = _tlsfAllocator.Allocate(GetNextRandomSize());
52-
localList.Add(allocate);
53-
}
49+
var allocate = _tlsfAllocator.Allocate(GetNextRandomSize());
50+
localList.Add(allocate);
5451
}
5552

5653
for(int i = 0; i < localList.Count; i++)
5754
{
58-
lock (_tlsfAllocator) // Make it more fair to the libc benchmark
59-
{
60-
_tlsfAllocator.Free(localList[i]);
61-
}
55+
_tlsfAllocator.Free(localList[i]);
6256
}
6357
}
6458

@@ -84,7 +78,7 @@ private unsafe class BasicChunkAllocator : IMemoryChunkAllocator
8478
{
8579
private readonly Dictionary<int, MemoryChunk> _chunks = new Dictionary<int, MemoryChunk>();
8680
private const int ChunkSize = 65536;
87-
81+
8882
public bool TryAllocateChunk(MemorySize minSize, out MemoryChunk chunk)
8983
{
9084
var blockSize = (uint)Math.Max(ChunkSize, (int)minSize.Value);

src/XenoAtom.Allocators/AlignHelper.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public static uint AlignUp(uint value, uint alignment)
2424
return (value + alignment - 1) & ~(uint)(alignment - 1);
2525
}
2626

27-
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
27+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2828
public static uint AlignUpOffset(ulong value, uint alignment)
2929
{
3030
Debug.Assert(BitOperations.IsPow2(alignment));
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// Copyright (c) Alexandre Mutel. All rights reserved.
2+
// Licensed under the BSD-Clause 2 license.
3+
// See license.txt file in the project root for full license information.
4+
5+
using System.Runtime.CompilerServices;
6+
7+
[module: SkipLocalsInit]

src/XenoAtom.Allocators/TlsfAllocator.cs

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ namespace XenoAtom.Allocators;
1616

1717
/// <summary>
1818
/// This is a TLSF (Two-Level Segregated Fit) allocator following the paper http://www.gii.upv.es/tlsf/files/papers/ecrts04_tlsf.pdf
19-
///
19+
///
2020
/// But with the following modifications:
2121
/// - We are relying on a backend allocator for the chunks.
2222
/// - We are not storing the block headers in the allocated memory but in separate array as the memory allocated from chunks might not be accessible from CPU (e.g GPU).
23-
///
23+
///
2424
/// With its backend allocator, this allocator is dynamic and its size can grow as needed. This allocator doesn't allocate memory by itself,
2525
/// but use a backend allocator to allocate chunks of memory. It is agnostic of the backend allocator (that can allocate memory from RAM or GPU memory...etc.).
2626
/// </summary>
@@ -29,13 +29,15 @@ namespace XenoAtom.Allocators;
2929
/// Note that this class is not thread safe and should be guarded by a lock if used in a multi-threaded environment.
3030
/// The rationale is that this allocator can be used with Thread Local Storage (TLS) buffers that are not shared between threads and don't need locking.
3131
/// </remarks>
32-
public sealed unsafe class TlsfAllocator
32+
public sealed unsafe class TlsfAllocator : IDisposable
3333
{
3434
private readonly IMemoryChunkAllocator _context;
3535
private readonly uint _alignment;
36+
private readonly uint _alignmentMask;
3637
private UnsafeList<Chunk> _chunks;
37-
private UnsafeList<Block[]> _groupedBlocks;
38+
private Block* _blocks;
3839
private int _blockCount;
40+
private int _blockCapacity;
3941
private int _indexToFirstAvailableBlock;
4042
private BinsDirectory _bins;
4143

@@ -47,10 +49,6 @@ public sealed unsafe class TlsfAllocator
4749
private const int TotalBinCount = BinCount * SubBinCount;
4850
private const int MinAlignment = 1 << (BaseBin0Log2 - SubBinsLog2); // Minimum alignment is 64 bytes
4951

50-
private const int GroupedBlockCountLog2 = 10; // TODO: Should we make this configurable? (It might not help with optimizations in that case for indexing a block)
51-
private const int GroupedBlockCount = 1 << GroupedBlockCountLog2;
52-
private const int GroupedBlockMask = GroupedBlockCount - 1;
53-
5452
/// <summary>
5553
/// Creates a new instance of <see cref="TlsfAllocator"/>.
5654
/// </summary>
@@ -77,8 +75,8 @@ public TlsfAllocator(IMemoryChunkAllocator context, in TlsfAllocatorConfig confi
7775
}
7876
_context = context;
7977
_alignment = Math.Max(MinAlignment, alignment);
78+
_alignmentMask = _alignment - 1;
8079
_chunks = new UnsafeList<Chunk>((int)config.PreAllocatedChunkCount);
81-
_groupedBlocks = new UnsafeList<Block[]>();
8280
_indexToFirstAvailableBlock = -1;
8381
_bins = new BinsDirectory();
8482
}
@@ -111,7 +109,7 @@ public TlsfAllocation Allocate(uint size)
111109
public bool TryAllocate(MemorySize size, out TlsfAllocation allocation)
112110
{
113111
// We align the size to the alignment (so free blocks are always aligned)
114-
size = AlignHelper.AlignUp(size, _alignment);
112+
size = (size + _alignmentMask) & ~_alignmentMask;
115113

116114
var firstLevelIndex = Mapping(size, out int secondLevelIndex);
117115

@@ -128,7 +126,7 @@ public bool TryAllocate(MemorySize size, out TlsfAllocation allocation)
128126

129127
var offsetIntoChunk = freeBlock.OffsetIntoChunk;
130128
var newFreeBlockSize = freeBlock.Size - size;
131-
129+
132130
if (newFreeBlockSize > 0)
133131
{
134132
// we need to shrink the block and create a new block used
@@ -147,11 +145,11 @@ public bool TryAllocate(MemorySize size, out TlsfAllocation allocation)
147145
usedBlock.Size = size;
148146
usedBlock.IsUsed = true;
149147
usedBlock.FreeLink = BlockLinks.Undefined;
150-
148+
151149
// Insert the new block in the physical order
152150
usedBlock.PhysicalLink.Next = freeBlockIndex;
153151
usedBlock.PhysicalLink.Previous = freeBlock.PhysicalLink.Previous;
154-
152+
155153
if (freeBlock.PhysicalLink.Previous < 0)
156154
{
157155
// Relink the beginning of the chunk
@@ -162,7 +160,7 @@ public bool TryAllocate(MemorySize size, out TlsfAllocation allocation)
162160
ref var previousBlock = ref GetBlockAt(freeBlock.PhysicalLink.Previous);
163161
previousBlock.PhysicalLink.Next = usedBlockIndex;
164162
Debug.Assert(previousBlock.OffsetIntoChunk + previousBlock.Size == offsetIntoChunk);
165-
}
163+
}
166164

167165
Debug.Assert(usedBlock.OffsetIntoChunk + size == freeBlock.OffsetIntoChunk);
168166
freeBlock.PhysicalLink.Previous = usedBlockIndex;
@@ -299,6 +297,18 @@ public void Reset()
299297
_bins.Initialize();
300298
}
301299

300+
/// <inheritdoc />
301+
public void Dispose()
302+
{
303+
Reset();
304+
if (_blocks != null)
305+
{
306+
NativeMemory.Free(_blocks);
307+
_blocks = null;
308+
_blockCapacity = 0;
309+
}
310+
}
311+
302312
/// <summary>
303313
/// Dumps the internal state of this allocator to a string.
304314
/// </summary>
@@ -431,7 +441,7 @@ public void Dump(StringBuilder buffer)
431441
buffer.AppendLine($"{$"[{(length == 1 ? firstBlockAvailableIndex : $"{firstBlockAvailableIndex}-{_blockCount - 1}")}]",C1} {$"",C2} {"",C3} {"",C4} {"Avail",C5} {"",C6} {"",C7}");
432442
}
433443
}
434-
444+
435445
static string ToBin<T>(T number) where T : unmanaged, IBinaryInteger<T>
436446
{
437447
var builder = new StringBuilder();
@@ -475,31 +485,31 @@ private void MarkBlockAsAvailable(ref Block block, int blockIndex)
475485
private ref Block GetBlockAt(int index)
476486
{
477487
Debug.Assert(index >= 0 && index < _blockCount);
478-
479-
var groupIndex = index >> GroupedBlockCountLog2;
480-
var blocks = _groupedBlocks.UnsafeGetRefAt(groupIndex);
481-
var localIndex = index & GroupedBlockMask;
482-
return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(blocks), localIndex);
488+
return ref Unsafe.Add(ref Unsafe.AsRef<Block>(_blocks), index);
483489
}
484490

485491
[MethodImpl(MethodImplOptions.AggressiveInlining)]
486492
private ref Block GetOrCreateBlockAt(int index)
487493
{
488-
var groupIndex = index >> GroupedBlockCountLog2;
489-
490-
while (groupIndex >= _groupedBlocks.Count)
494+
if (index >= _blockCapacity)
491495
{
492-
_groupedBlocks.Add(new Block[GroupedBlockCount]);
496+
GrowBlocks(index);
493497
}
494498

495499
if (index >= _blockCount)
496500
{
497501
_blockCount = index + 1;
498502
}
499503

500-
var blocks = _groupedBlocks.UnsafeGetRefAt(groupIndex);
501-
var localIndex = index & GroupedBlockMask;
502-
return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(blocks), localIndex);
504+
return ref Unsafe.Add(ref Unsafe.AsRef<Block>(_blocks), index);
505+
}
506+
507+
[MethodImpl(MethodImplOptions.NoInlining)]
508+
private void GrowBlocks(int minIndex)
509+
{
510+
var newCapacity = (int)BitOperations.RoundUpToPowerOf2((uint)Math.Max(_blockCapacity * 2, minIndex + 1));
511+
_blocks = (Block*)NativeMemory.Realloc(_blocks, (nuint)(newCapacity * sizeof(Block)));
512+
_blockCapacity = newCapacity;
503513
}
504514

505515
private void RemoveBlockFromFreeList(ref Block block, int firstLevelIndex, int secondLevelIndex)
@@ -586,7 +596,7 @@ private ref Block TryFindSuitableBlock(uint size, ref int firstLevelIndex, ref i
586596
chunkEntry.FreeBlockCount++;
587597
chunkEntry.FirstBlockInPhysicalOrder = blockIndex;
588598
chunk = localChunk;
589-
599+
590600
Debug.Assert(BitOperations.IsPow2(chunk.Size));
591601
ref var block = ref GetOrCreateBlockAt(blockIndex);
592602
block.ChunkIndex = (uint)chunkIndex;
@@ -641,7 +651,7 @@ private ref Block TryFindSuitableBlock(uint size, ref int firstLevelIndex, ref i
641651
return ref block;
642652
}
643653
}
644-
654+
645655
[MethodImpl(MethodImplOptions.AggressiveInlining)]
646656
public static int Mapping(uint size, out int secondLevelIndex)
647657
{
@@ -669,17 +679,17 @@ private struct BinsDirectory
669679
public uint FirstLevelBitmap => _firstLevelBitmap;
670680

671681
public ushort GetSecondLevelBitmap(int index) => _secondLevelBitmap[index];
672-
682+
673683
public BinsDirectory()
674684
{
675685
Initialize();
676686
}
677687

678688
public void Initialize()
679689
{
680-
for (int i = 0; i < TotalBinCount; i++)
690+
fixed (int* ptr = _firstFreeBlockIndices)
681691
{
682-
_firstFreeBlockIndices[i] = -1;
692+
new Span<int>(ptr, TotalBinCount).Fill(-1);
683693
}
684694
}
685695

@@ -776,7 +786,7 @@ private string ToDebuggerDisplay()
776786
{
777787
return $"Offset: {OffsetIntoChunk}, Size: {Size}, End: {OffsetIntoChunk + Size}, Status: {(IsUsed ? "Used":IsAvailable?"Avail":"Free")}, FreeLink: {FreeLink.Previous}<->{FreeLink.Next}, PhysicalLink: {PhysicalLink.Previous} <-> {PhysicalLink.Next}";
778788
}
779-
789+
780790
public bool IsUsed
781791
{
782792
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -796,7 +806,7 @@ public bool IsAvailable
796806

797807
private struct BlockLinks
798808
{
799-
public static BlockLinks Undefined => Unsafe.BitCast<long, BlockLinks>(-1);
809+
public static readonly BlockLinks Undefined = Unsafe.BitCast<long, BlockLinks>(-1L);
800810

801811
public int Previous;
802812
public int Next;

0 commit comments

Comments
 (0)