标签 Nvidia API 下的文章

故事背景

用 SolidVPS 75 刀大鸡(没有 GPU),建了个 open-webui。知识库文件 3000 + 份。本地跑 bge-m3 时 cpu 负载 50%,且速度慢。因此转入 外部 API 方案。期间顺便换到 qwen-embeding-8B 试了试。

遇到速率限制问题

无论 Nvidia 还是硅基流动,都有 TPM RPM 限制。而我没找到 open-webui 里对外部嵌入模型发起请求的速率限制,因此知识库重建索引时,Nvidia 和 硅基 都容易因为 TPM 返回失败,进而导致 open-webui 无法获取到正确的向量。

解决方案

建站了,自然自带 nginx(我用 nginx-ui)进行管理。加一个流控


    # ======================================================
    # 区域 1: Nvidia (Embedding)
    # 限制: 40 RPM (每分钟40次)
    # 策略: 严格排队
    # ======================================================
    # 使用 "global" 作为 key,表示全局限制,不是按 IP
    limit_req_zone "global" zone=nvidia_limit:10m rate=40r/m;

    # ======================================================
    # 区域 2: SiliconFlow (Reranker)
    # 限制: 2000 RPM (每分钟2000次)
    # 策略: 允许突发
    # ======================================================
    limit_req_zone "global" zone=silicon_limit:10m rate=2000r/m;
    
       # ------------------------------------------------------
    # 服务 1: 代理 Nvidia (监听 8090)
    # ------------------------------------------------------
    server {
        listen 8090;
        
        location / {
            # 允许突发 100 个请求排队
            limit_req zone=nvidia_limit burst=100;
            
            proxy_pass https://integrate.api.nvidia.com;
            proxy_ssl_server_name on;
            proxy_set_header Host integrate.api.nvidia.com;
            
            # 增加超时时间,防止排队太久断开
            proxy_read_timeout 600s;
        }
    }

    # ------------------------------------------------------
    # 服务 2: 代理 SiliconFlow (监听 8091)
    # ------------------------------------------------------
    server {
        listen 8091;

        location / {
            # 允许突发 500 个请求排队
            # 2000r/m 很快,加上 nodelay 可以让请求瞬间转发,
            # 只有超过突发阈值时才开始排队或拒绝。
            # 这里不加 nodelay,保持平滑流控效果。
            limit_req zone=silicon_limit burst=500;

            proxy_pass https://api.siliconflow.cn;
            proxy_ssl_server_name on;
            proxy_set_header Host api.siliconflow.cn;
            
            
             # 增加超时时间,防止排队太久断开
            proxy_read_timeout 600s;
        }
    }

# Server 2: 回源专用 (origin-chat.example.com)
# 强制校验 Header,校验失败直接 444
server {
    listen 2083 ssl;
    listen [::]:2083 ssl;
    http2 on;
    server_name origin-chat.example.com;
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers 'TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:ECDHE-RSA-AES128-GCM-SHA256';
    ssl_prefer_server_ciphers off;
    ssl_certificate /etc/nginx/ssl/cf_origin_server_2048/fullchain.cer;
    ssl_certificate_key /etc/nginx/ssl/cf_origin_server_2048/private.key;
    # --------------------------------------------------------
    # [核心] 请求头校验逻辑,仅限认证的前置CDN回源
    # --------------------------------------------------------
    # 假设 Header 为 X-Origin-Verify,值为 strict-token-123456
    # 如果不相等 (!=),则直接返回 403
    # if ($http_x_origin_verify != "strict-token-123456" ) {
    # return 444;
    # }
    # --------------------------------------------------------
    # 业务逻辑 (与上方保持一致,确保后端处理逻辑相同)
    location /.well-known/acme-challenge {
        proxy_set_header Host $host;
        proxy_set_header X-Real_IP $remote_addr;
        proxy_set_header X-Forwarded-For $remote_addr:$remote_port;
        proxy_pass http://127.0.0.1:9180;
    }
    location ~* ^/(auth|api|oauth|admin|signin|signup|signout|login|logout|sso)/ {
        proxy_pass http://openwebui:8080;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        # 注意:这里依然传递 chat 域名给后端,保证应用识别正确的主站域名
        proxy_set_header Host chat.quarkmed.com;
        proxy_read_timeout 10m;
        proxy_buffering off;
        client_max_body_size 20M;
        proxy_no_cache 1;
        proxy_cache_bypass 1;
        add_header Cache-Control "no-store, no-cache, must-revalidate, proxy-revalidate, max-age=0" always;
        add_header Pragma "no-cache" always;
        expires -1;
    }
    location ~* \.(css|jpg|jpeg|png|gif|ico|svg|woff|woff2|ttf|eot)$ {
        proxy_pass http://openwebui:8080;
        proxy_http_version 1.1;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header Host chat.quarkmed.com;
        expires 7d;
        add_header Cache-Control "public, immutable";
    }
    location / {
        proxy_pass http://openwebui:8080;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header Host chat.quarkmed.com;
        proxy_read_timeout 10m;
        proxy_buffering off;
        client_max_body_size 20M;
        add_header Cache-Control "public, max-age=300, must-revalidate";
    }
}

open-webui api 地址改为 nginx: 端口


📌 转载信息
原作者:
lekai
转载时间:
2026/1/14 17:47:23