@@ -1225,7 +1225,7 @@ public final class MLXEngine: InferenceEngineProtocol, @unchecked Sendable {
12251225
12261226 /// Estimate model weight size from config.json params or directory size.
12271227 /// Used by the pre-load memory gate to decide whether we need LRU eviction.
1228- static func estimateModelWeightSize( at url: URL ) -> UInt64 ? {
1228+ public static func estimateModelWeightSize( at url: URL ) -> UInt64 ? {
12291229 let configFile = url. appendingPathComponent ( " config.json " )
12301230 guard let data = try ? Data ( contentsOf: configFile) ,
12311231 let json = try ? JSONSerialization . jsonObject ( with: data) as? [ String : Any ] else {
@@ -1254,6 +1254,85 @@ public final class MLXEngine: InferenceEngineProtocol, @unchecked Sendable {
12541254 return UInt64 ( MLX . Memory. activeMemory) + neededBytes <= softLimitBytes
12551255 }
12561256
1257+ /// Pre-emptive memory feasibility check — can this model be loaded?
1258+ /// Returns nil if the model is already loaded or if info is unavailable.
1259+ public func checkMemoryFeasibility( modelId: String , sizeBytes: UInt64 , localURL: URL ) async -> MemoryFeasibility ? {
1260+ // Already loaded — no need to check
1261+ guard getContainer ( for: modelId) == nil else { return nil }
1262+
1263+ let maxGPU = GPU . maxRecommendedWorkingSetBytes ( ) . map { UInt64 ( $0) } ?? 0
1264+ guard maxGPU > 0 else { return nil }
1265+
1266+ // Estimate weight size: prefer config.json, fall back to directory size or provided sizeBytes
1267+ let estimatedBytes = Self . estimateModelWeightSize ( at: localURL) ?? sizeBytes
1268+ let modelMB = estimatedBytes / 1_048_576
1269+
1270+ // Same safety margin logic as the pre-load gate (lines 678-686)
1271+ let safetyMargin : UInt64
1272+ if estimatedBytes > 30 * 1_073_741_824 {
1273+ safetyMargin = 5 * 1_073_741_824
1274+ } else if estimatedBytes > 20 * 1_073_741_824 {
1275+ safetyMargin = UInt64 ( Double ( estimatedBytes) * 0.3 )
1276+ } else {
1277+ safetyMargin = UInt64 ( Double ( estimatedBytes) * 0.2 )
1278+ }
1279+ let neededBytes = estimatedBytes + safetyMargin
1280+
1281+ let currentBytes = UInt64 ( MLX . Memory. activeMemory)
1282+ let available = currentBytes < maxGPU ? maxGPU - currentBytes : 0
1283+ let gpuMB = maxGPU / 1_048_576
1284+ let availableMB = available / 1_048_576
1285+
1286+ if neededBytes > maxGPU {
1287+ let neededMB = neededBytes / 1_048_576
1288+ return MemoryFeasibility (
1289+ canLoad: false ,
1290+ modelSizeMB: modelMB,
1291+ availableMB: availableMB,
1292+ gpuBudgetMB: gpuMB,
1293+ reason: " Model peak ( \( neededMB) MB) exceeds GPU budget ( \( gpuMB) MB). Try: sudo sysctl iogpu.wired_limit_mb= \( min ( maxGPU / 1_048_576 + 30000 , ProcessInfo . processInfo. physicalMemory / 1_048_576 - 2048 ) ) "
1294+ )
1295+ }
1296+
1297+ // Check against process soft limit if enforcer is active
1298+ if let enforcer = memoryEnforcer {
1299+ let status = await enforcer. status
1300+ if status. enabled && status. softLimitBytes > 0 {
1301+ let softLimitMB = status. softLimitBytes / 1_048_576
1302+ if currentBytes + neededBytes > status. softLimitBytes {
1303+ // Would need LRU eviction — borderline
1304+ let freeAfterEvict = status. softLimitBytes > currentBytes
1305+ ? ( status. softLimitBytes - currentBytes) / 1_048_576 : 0
1306+ let neededMB = neededBytes / 1_048_576
1307+ if freeAfterEvict >= neededMB {
1308+ return MemoryFeasibility (
1309+ canLoad: true ,
1310+ modelSizeMB: modelMB,
1311+ availableMB: availableMB,
1312+ gpuBudgetMB: gpuMB,
1313+ reason: " Loadable after evicting other models (need \( neededMB) MB, \( freeAfterEvict) MB available after LRU eviction) "
1314+ )
1315+ } else {
1316+ return MemoryFeasibility (
1317+ canLoad: false ,
1318+ modelSizeMB: modelMB,
1319+ availableMB: availableMB,
1320+ gpuBudgetMB: gpuMB,
1321+ reason: " Insufficient memory: need \( neededMB) MB, only \( freeAfterEvict) MB available after full LRU eviction "
1322+ )
1323+ }
1324+ }
1325+ }
1326+ }
1327+
1328+ return MemoryFeasibility (
1329+ canLoad: true ,
1330+ modelSizeMB: modelMB,
1331+ availableMB: availableMB,
1332+ gpuBudgetMB: gpuMB
1333+ )
1334+ }
1335+
12571336 public func preflightCheck( modelId: String , promptTokens: Int , maxTokens: Int ) async throws {
12581337 guard let maxGPU = GPU . maxRecommendedWorkingSetBytes ( ) , maxGPU > 0 else { return }
12591338
0 commit comments