速度60tokens/s,目前不支持多并发,这个是合并到llama.cpp主线的mtp
运行命令
/workspace/llama-server -m /workspace/model/Qwen3.6-27B-MTP-UD-Q4_K_XL.gguf --host 0.0.0.0 --port 5001 -ngl 99 -t 8 --spec-type draft-mtp --spec-draft-n-max 2 -np 1 -c 131072 -ctk q8_0 -ctv q8_0 --reasoning off
openclaw config
"agents": {
"defaults": {
"workspace": "/home/mls/.openclaw/workspace",
"model": {
"primary": "cnb/Qwen3.6-27B-Q4"
},
"models": {
"modelscope/ZhipuAI/GLM-5.1": {"alias": "GLM-5.1"},
"cnb/Qwen3.6-27B-UD-Q4_K_XL.gguf": {"alias": "Qwen3.6-27B-Q4"}
}
}
}
"models": {
"mode": "merge",
"providers": {
"cnb": {
"baseUrl": "https://vd1odtlvc7-8082.cnb.run/v1",
"api": "openai-completions",
"apiKey": "ss-",
"models": [
{
"id": "Qwen3.6-27B-UD-Q4_K_XL.gguf",
"name": "Qwen3.6-27B-Q4",
"contextWindow": 262144,
"maxTokens": 262144,
"input": ["text"],
"cost": {"input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0},
"reasoning": false
}
]
},
"modelscope": {
"baseUrl": "https://api-inference.modelscope.cn/v1",
"api": "openai-completions",
"apiKey": "ms-",
"models": [
{
"id": "ZhipuAI/GLM-5.1",
"name": "GLM-5.1",
"contextWindow": 202752,
"maxTokens": 202752,
"input": ["text"],
"cost": {"input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0},
"reasoning": false
}
]
}
}
}
apikey 随便填,关闭了,视觉识别,个人感觉,目前针对openclaw没啥用,还不完善,报错太多.