local-llm-server/config/config.yml.sample

## Important

backend_url: https://10.0.0.50:8283

mode: vllm
concurrent_gens: 3
token_limit: 8192

# How many requests a single IP is allowed to put in the queue.
# If an IP tries to put more than this their request will be rejected
# until the other(s) are completed.
simultaneous_requests_per_ip: 2

## Optional

max_new_tokens: 500

enable_streaming: false

log_prompts: false

verify_ssl: false # Python request has issues with self-signed certs

auth_required: false

max_queued_prompts_per_ip: 1

# Name of your proxy, shown to clients.
llm_middleware_name: local-llm-server

# Set the name of the model shown to clients
# manual_model_name: testing123

# JS tracking code to add to the home page.
# analytics_tracking_code: |
#   alert("hello");

# HTML to add under the "Estimated Wait Time" line.
# info_html: |
#   bla bla whatever

enable_openi_compatible_backend: true
# openai_api_key:
expose_openai_system_prompt: true
#openai_system_prompt: |
#  You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n

### Tuneables ##

# Path that is shown to users for them to connect to
# TODO: set this based on mode. Instead, have this be the path to the API
frontend_api_client: /api

# Path to the database, relative to the directory of server.py
database_path: ./proxy-server.db

# How to calculate the average generation time.
# Valid options:              database, minute
# "database" calculates average from historical data in the database, with the more recent data weighted more.
# "minute" calculates it from the last minute of data.
average_generation_time_mode: database

## STATS ##

# Display the total_proompts item on the stats screen.
show_num_prompts: true

# Display the uptime item on the stats screen.
show_uptime: true

show_total_output_tokens: true

show_backend_info: true

# Load the number of prompts from the database to display on the stats page.
load_num_prompts: true

## NETDATA ##

netdata_root: http://10.0.0.50:19999