api 流控小技巧。压着 40rpm 用 Nvidia (适用 open-webui 知识库重建索引等场景)
故事背景
用 SolidVPS 75 刀大鸡(没有 GPU),建了个 open-webui。知识库文件 3000 + 份。本地跑 bge-m3 时 cpu 负载 50%,且速度慢。因此转入 外部 API 方案。期间顺便换到 qwen-embeding-8B 试了试。
遇到速率限制问题
无论 Nvidia 还是硅基流动,都有 TPM RPM 限制。而我没找到 open-webui 里对外部嵌入模型发起请求的速率限制,因此知识库重建索引时,Nvidia 和 硅基 都容易因为 TPM 返回失败,进而导致 open-webui 无法获取到正确的向量。
解决方案
建站了,自然自带 nginx(我用 nginx-ui)进行管理。加一个流控
# ======================================================
# 区域 1: Nvidia (Embedding)
# 限制: 40 RPM (每分钟40次)
# 策略: 严格排队
# ======================================================
# 使用 "global" 作为 key,表示全局限制,不是按 IP
limit_req_zone "global" zone=nvidia_limit:10m rate=40r/m;
# ======================================================
# 区域 2: SiliconFlow (Reranker)
# 限制: 2000 RPM (每分钟2000次)
# 策略: 允许突发
# ======================================================
limit_req_zone "global" zone=silicon_limit:10m rate=2000r/m;
# ------------------------------------------------------
# 服务 1: 代理 Nvidia (监听 8090)
# ------------------------------------------------------
server {
listen 8090;
location / {
# 允许突发 100 个请求排队
limit_req zone=nvidia_limit burst=100;
proxy_pass https://integrate.api.nvidia.com;
proxy_ssl_server_name on;
proxy_set_header Host integrate.api.nvidia.com;
# 增加超时时间,防止排队太久断开
proxy_read_timeout 600s;
}
}
# ------------------------------------------------------
# 服务 2: 代理 SiliconFlow (监听 8091)
# ------------------------------------------------------
server {
listen 8091;
location / {
# 允许突发 500 个请求排队
# 2000r/m 很快,加上 nodelay 可以让请求瞬间转发,
# 只有超过突发阈值时才开始排队或拒绝。
# 这里不加 nodelay,保持平滑流控效果。
limit_req zone=silicon_limit burst=500;
proxy_pass https://api.siliconflow.cn;
proxy_ssl_server_name on;
proxy_set_header Host api.siliconflow.cn;
# 增加超时时间,防止排队太久断开
proxy_read_timeout 600s;
}
}
# Server 2: 回源专用 (origin-chat.example.com)
# 强制校验 Header,校验失败直接 444
server {
listen 2083 ssl;
listen [::]:2083 ssl;
http2 on;
server_name origin-chat.example.com;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers 'TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:ECDHE-RSA-AES128-GCM-SHA256';
ssl_prefer_server_ciphers off;
ssl_certificate /etc/nginx/ssl/cf_origin_server_2048/fullchain.cer;
ssl_certificate_key /etc/nginx/ssl/cf_origin_server_2048/private.key;
# --------------------------------------------------------
# [核心] 请求头校验逻辑,仅限认证的前置CDN回源
# --------------------------------------------------------
# 假设 Header 为 X-Origin-Verify,值为 strict-token-123456
# 如果不相等 (!=),则直接返回 403
# if ($http_x_origin_verify != "strict-token-123456" ) {
# return 444;
# }
# --------------------------------------------------------
# 业务逻辑 (与上方保持一致,确保后端处理逻辑相同)
location /.well-known/acme-challenge {
proxy_set_header Host $host;
proxy_set_header X-Real_IP $remote_addr;
proxy_set_header X-Forwarded-For $remote_addr:$remote_port;
proxy_pass http://127.0.0.1:9180;
}
location ~* ^/(auth|api|oauth|admin|signin|signup|signout|login|logout|sso)/ {
proxy_pass http://openwebui:8080;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# 注意:这里依然传递 chat 域名给后端,保证应用识别正确的主站域名
proxy_set_header Host chat.quarkmed.com;
proxy_read_timeout 10m;
proxy_buffering off;
client_max_body_size 20M;
proxy_no_cache 1;
proxy_cache_bypass 1;
add_header Cache-Control "no-store, no-cache, must-revalidate, proxy-revalidate, max-age=0" always;
add_header Pragma "no-cache" always;
expires -1;
}
location ~* \.(css|jpg|jpeg|png|gif|ico|svg|woff|woff2|ttf|eot)$ {
proxy_pass http://openwebui:8080;
proxy_http_version 1.1;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host chat.quarkmed.com;
expires 7d;
add_header Cache-Control "public, immutable";
}
location / {
proxy_pass http://openwebui:8080;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host chat.quarkmed.com;
proxy_read_timeout 10m;
proxy_buffering off;
client_max_body_size 20M;
add_header Cache-Control "public, max-age=300, must-revalidate";
}
}
open-webui api 地址改为 nginx: 端口
