-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathray.vps.json
More file actions
52 lines (52 loc) · 1.08 KB
/
ray.vps.json
File metadata and controls
52 lines (52 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
{
"profile": "vps",
"server": {
"host": "127.0.0.1",
"port": 3000,
"requestBodyLimitBytes": 64000
},
"model": {
"id": "qwen2.5-3b-instruct-q4",
"family": "qwen2.5",
"quantization": "q4_k_m",
"contextWindow": 8192,
"warmOnBoot": true,
"maxOutputTokens": 384,
"adapter": {
"kind": "openai-compatible",
"baseUrl": "http://127.0.0.1:8081",
"modelRef": "qwen2.5-3b-instruct-q4_k_m",
"timeoutMs": 20000
}
},
"scheduler": {
"concurrency": 2,
"maxQueue": 96,
"requestTimeoutMs": 24000,
"dedupeInflight": true,
"batchWindowMs": 0
},
"cache": {
"enabled": true,
"maxEntries": 512,
"ttlMs": 120000,
"keyStrategy": "input+params"
},
"gracefulDegradation": {
"enabled": true,
"queueDepthThreshold": 24,
"maxPromptChars": 8000,
"degradeToMaxTokens": 192
},
"auth": {
"enabled": false,
"apiKeyEnv": "RAY_API_KEYS"
},
"rateLimit": {
"enabled": true,
"windowMs": 60000,
"maxRequests": 90,
"keyStrategy": "ip+api-key",
"trustProxyHeaders": true
}
}