logo
0
0
WeChat Login

速度60tokens/s,目前不支持多并发,这个是合并到llama.cpp主线的mtp

运行命令

/workspace/llama-server -m /workspace/model/Qwen3.6-27B-MTP-UD-Q4_K_XL.gguf --host 0.0.0.0 --port 5001 -ngl 99 -t 8 --spec-type draft-mtp --spec-draft-n-max 2 -np 1 -c 131072 -ctk q8_0  -ctv q8_0 --reasoning off

openclaw config

  "agents": {
    "defaults": {
      "workspace": "/home/mls/.openclaw/workspace",
      "model": {
        "primary": "cnb/Qwen3.6-27B-Q4"
      },
      "models": {
        "modelscope/ZhipuAI/GLM-5.1": {"alias": "GLM-5.1"},
        "cnb/Qwen3.6-27B-UD-Q4_K_XL.gguf": {"alias": "Qwen3.6-27B-Q4"}
      }
    }
  }
  "models": {
    "mode": "merge",
    "providers": {
      "cnb": {
        "baseUrl": "https://vd1odtlvc7-8082.cnb.run/v1",
        "api": "openai-completions",
        "apiKey": "ss-",
        "models": [
          {
            "id": "Qwen3.6-27B-UD-Q4_K_XL.gguf",
            "name": "Qwen3.6-27B-Q4",
            "contextWindow": 262144,
            "maxTokens": 262144,
            "input": ["text"],
            "cost": {"input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0},
            "reasoning": false
          }
        ]
      },
      "modelscope": {
        "baseUrl": "https://api-inference.modelscope.cn/v1",
        "api": "openai-completions",
        "apiKey": "ms-",
        "models": [
          {
            "id": "ZhipuAI/GLM-5.1",
            "name": "GLM-5.1",
            "contextWindow": 202752,
            "maxTokens": 202752,
            "input": ["text"],
            "cost": {"input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0},
            "reasoning": false
          }
        ]
      }
    }
  }

apikey 随便填,关闭了,视觉识别,个人感觉,目前针对openclaw没啥用,还不完善,报错太多.