.env.example

# 建议使用WebUI进行配置，配置后可导出.env文件

LOG_GENERAL=true
LOG_OPENAI=true

CACHE_GENERAL=true
CACHE_OPENAI=true

CHAT_COMPLETION_ROUTE=/v1/chat/completions
# `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
CACHE_BACKEND=LMDB
CACHE_ROOT_PATH_OR_URL=./FLAXKV_DB
DEFAULT_REQUEST_CACHING_VALUE=false

BENCHMARK_MODE=true

# `FORWARD_CONFIG` 下面的配置将转发https://api.openai.com 至根路由`/`，转发 https://generativelanguage.googleapis.com至 `/gemini`路由:
FORWARD_CONFIG=[{"base_url":"https://api.openai.com","route":"/","type":"openai"},{"base_url":"https://generativelanguage.googleapis.com","route":"/gemini","type":"general"}]

#LEVEL_MODELS={"1": ["gpt-4"], "2": ["gpt-3.5-turbo"]}
#OPENAI_API_KEY_CONFIG={"sk-xxx": [0], "sk-xxx": [1], "sk-xxx": [1,2]}
#FORWARD_KEY_CONFIG={"fk-0": 0, "fk-1": 1, "fk-2": 2}

# `REQ_RATE_LIMIT`: 指定路由的请求速率限制（区分用户）
# `REQ_RATE_LIMIT`: i.e., Request rate limit for specified routes, user specific
# format: {route: ratelimit-string}
# ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation
REQ_RATE_LIMIT='{
"/openai/v1/chat/completions": "1/10seconds",
"/localai/v1/chat/completions": "2/second"
}'

# rate limit后端: [memory, redis, Memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
# Backend for rate limiting: [memory, redis, memcached, ...] :ref: https://limits.readthedocs.io/en/stable/storage.html#
# Assuming your Redis service is at localhost:6379
REQ_RATE_LIMIT_BACKEND="redis://localhost:6379"

# `GLOBAL_RATE_LIMIT`: 限制所有`REQ_RATE_LIMIT`没有指定的路由. 不填默认无限制
GLOBAL_RATE_LIMIT=inf

#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html
# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost.
RATE_LIMIT_STRATEGY=fixed-window

# `PROXY` http代理
PROXY=http://localhost:7890

# `TOKEN_RATE_LIMIT` 对每一份流式返回的token速率限制 (注：这里的token并不严格等于gpt中定义的token，而是SSE的chunk)
# Rate limit for returned tokens
TOKEN_RATE_LIMIT={"/v1/chat/completions":"20/second", "/benchmark/v1/chat/completions":"500/second"}

# TCP connection timeout duration (in seconds)
TIMEOUT=10

ITER_CHUNK_TYPE=efficiency
#ITER_CHUNK_TYPE=one-by-one

WEBUI_RESTART_PORT=15555
WEBUI_LOG_PORT=15556

DEFAULT_STREAM_RESPONSE=true
# Set timezone
TZ=Asia/Shanghai