-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathray.sub1b.classifier.json
More file actions
76 lines (76 loc) · 1.97 KB
/
ray.sub1b.classifier.json
File metadata and controls
76 lines (76 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
"profile": "sub1b",
"model": {
"maxOutputTokens": 96,
"operational": {
"recommendedPromptFormat": "native-template",
"supportsJsonMode": true,
"tokensPerSecondTarget": 22,
"memoryClassMiB": 4096,
"preferredCtxSize": 2048,
"chatTemplateKnown": true
},
"adapter": {
"kind": "llama.cpp",
"timeoutMs": 12000,
"slotSnapshotTimeoutMs": 200,
"promptScaffoldCacheEntries": 256,
"warmupRequests": [
{
"templateId": "email.reply_classification.v1",
"templateVariables": {
"replyText": "Send pricing and implementation details."
},
"maxTokens": 64,
"seed": 11
}
],
"launchProfile": {
"preset": "single-vps-sub1b-cx23",
"binaryPath": "/usr/local/bin/llama-server",
"modelPath": "/var/lib/ray/models/qwen2.5-0.5b-instruct-q4_k_m.gguf",
"host": "127.0.0.1",
"port": 8081,
"alias": "qwen2.5-0.5b-instruct-q4_k_m",
"ctxSize": 2048,
"parallel": 2,
"threads": 2,
"threadsHttp": 2,
"batchSize": 256,
"ubatchSize": 128,
"cachePrompt": true,
"cacheReuse": 192,
"cacheRamMiB": 384,
"continuousBatching": true,
"enableMetrics": true,
"exposeSlots": true,
"warmup": true,
"enableUnifiedKv": true,
"cacheIdleSlots": true,
"contextShift": true
}
}
},
"scheduler": {
"concurrency": 2,
"maxQueue": 96,
"maxQueuedTokens": 16000,
"maxInflightTokens": 2048,
"requestTimeoutMs": 14000,
"dedupeInflight": true,
"batchWindowMs": 5,
"affinityLookahead": 24,
"shortJobMaxTokens": 64
},
"gracefulDegradation": {
"queueDepthThreshold": 18,
"maxPromptChars": 3500,
"degradeToMaxTokens": 64
},
"tags": {
"target": "sub1b",
"modelRole": "classifier",
"modelSize": "0.5b-class",
"hardware": "hetzner-cx23-class"
}
}