80 lines
2.7 KiB
Plaintext
80 lines
2.7 KiB
Plaintext
## Important
|
|
|
|
backend_url: https://10.0.0.50:8283
|
|
|
|
mode: vllm
|
|
concurrent_gens: 3
|
|
token_limit: 8192
|
|
|
|
# How many requests a single IP is allowed to put in the queue.
|
|
# If an IP tries to put more than this their request will be rejected
|
|
# until the other(s) are completed.
|
|
simultaneous_requests_per_ip: 2
|
|
|
|
## Optional
|
|
|
|
max_new_tokens: 500
|
|
|
|
enable_streaming: false
|
|
|
|
log_prompts: false
|
|
|
|
verify_ssl: false # Python request has issues with self-signed certs
|
|
|
|
auth_required: false
|
|
|
|
max_queued_prompts_per_ip: 1
|
|
|
|
# Name of your proxy, shown to clients.
|
|
llm_middleware_name: local-llm-server
|
|
|
|
# Set the name of the model shown to clients
|
|
# manual_model_name: testing123
|
|
|
|
# JS tracking code to add to the home page.
|
|
# analytics_tracking_code: |
|
|
# alert("hello");
|
|
|
|
# HTML to add under the "Estimated Wait Time" line.
|
|
# info_html: |
|
|
# bla bla whatever
|
|
|
|
enable_openi_compatible_backend: true
|
|
# openai_api_key:
|
|
expose_openai_system_prompt: true
|
|
#openai_system_prompt: |
|
|
# You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n
|
|
|
|
### Tuneables ##
|
|
|
|
# Path that is shown to users for them to connect to
|
|
# TODO: set this based on mode. Instead, have this be the path to the API
|
|
frontend_api_client: /api
|
|
|
|
# Path to the database, relative to the directory of server.py
|
|
database_path: ./proxy-server.db
|
|
|
|
# How to calculate the average generation time.
|
|
# Valid options: database, minute
|
|
# "database" calculates average from historical data in the database, with the more recent data weighted more.
|
|
# "minute" calculates it from the last minute of data.
|
|
average_generation_time_mode: database
|
|
|
|
## STATS ##
|
|
|
|
# Display the total_proompts item on the stats screen.
|
|
show_num_prompts: true
|
|
|
|
# Display the uptime item on the stats screen.
|
|
show_uptime: true
|
|
|
|
show_total_output_tokens: true
|
|
|
|
show_backend_info: true
|
|
|
|
# Load the number of prompts from the database to display on the stats page.
|
|
load_num_prompts: true
|
|
|
|
## NETDATA ##
|
|
|
|
netdata_root: http://10.0.0.50:19999 |