update example config

This commit is contained in:
Cyberes 2023-08-30 18:59:29 -06:00
parent 2816c01902
commit 41b8232499
1 changed files with 45 additions and 35 deletions

View File

@ -1,45 +1,55 @@
## Important stuff ## Important
# The base URL of your backend API backend_url: https://10.0.0.86:8183
backend_url: http://x.x.x.x:5000
# Mode to run the proxy in mode: hf-textgen
mode: oobabooga concurrent_gens: 3
token_limit: 8192
# How many concurrent generation requests will be processed at the same time.
# Oobabooga only supports one. If you're using Oobabooga, you MUST set this to 1
# or else your estimated wait time will be incorrect.
concurrent_gens: 1
# The configured token limit of your backend.
# This number is shown to clients and on the home page. (may be important later)
token_limit: 7777
# How many requests a single IP is allowed to put in the queue. # How many requests a single IP is allowed to put in the queue.
# If an IP tries to put more than this their request will be rejected # If an IP tries to put more than this their request will be rejected
# until the other(s) are completed. # until the other(s) are completed.
ip_in_queue_max: 1 ip_in_queue_max: 2
llm_middleware_name: Local LLM Proxy
## Optional ## Optional
# Log request and response content. max_new_tokens: 500
log_prompts: false log_prompts: false
# Python request has issues with self-signed certs. verify_ssl: false # Python request has issues with self-signed certs
verify_ssl: false
# Reject all requests if they aren't authenticated with a token.
auth_required: false auth_required: false
# TODO: reject any prompts with a message
# TODO: tokens with a 0 priority are excluded
# TODO: add this value to the stats page
max_queued_prompts_per_ip: 1
# Name of your proxy, shown to clients.
llm_middleware_name: Local LLM Proxy
# JS tracking code to add to the home page. # JS tracking code to add to the home page.
# analytics_tracking_code: | # analytics_tracking_code: |
# alert("hello"); # alert("hello");
# HTML to add under the "Estimated Wait Time" line. # HTML to add under the "Estimated Wait Time" line.
# info_html: | # info_html: |
# <a href="https://chub-archive.evulid.cc/#/proxy-stats.html?proxy=proxy_chub_archive_evulid">Historical Stats</a> # some interesing info
### Tuneables ##
# Path that is shown to users for them to connect to
frontend_api_client: /api
# Path to the database, relative to the directory of server.py
database_path: ./proxy-server.db
# How to calculate the average generation time.
# Valid options: database, minute
# "database" calculates average from historical data in the database, with the more recent data weighted more.
# "minute" calculates it from the last minute of data.
average_generation_time_mode: database
## STATS ## ## STATS ##
@ -49,13 +59,13 @@ show_num_prompts: true
# Display the uptime item on the stats screen. # Display the uptime item on the stats screen.
show_uptime: true show_uptime: true
show_total_output_tokens: true
show_backend_info: true
# Load the number of prompts from the database to display on the stats page. # Load the number of prompts from the database to display on the stats page.
load_num_prompts: true load_num_prompts: true
# Path that is shown to users for them to connect to ## NETDATA ##
frontend_api_client: /api
# Relative paths are mapped to the directory of the server # netdata_root: http://172.0.2.140:19999
database_path: ./proxy-server.db
average_generation_time_mode: database