local-llm-server/config/config.yml.sample

## Important

backend_url:                 https://10.0.0.86:8183

mode:                         hf-textgen
concurrent_gens:              3
token_limit:                  8192

# How many requests a single IP is allowed to put in the queue.
# If an IP tries to put more than this their request will be rejected
# until the other(s) are completed.
ip_in_queue_max:              2

## Optional

max_new_tokens:               500

log_prompts:                  false

verify_ssl:                   false # Python request has issues with self-signed certs

auth_required:                false

# TODO:                       reject any prompts with a message
# TODO:                       tokens with a 0 priority are excluded
# TODO:                       add this value to the stats page
max_queued_prompts_per_ip:    1

# Name of your proxy, shown to clients.
llm_middleware_name:          Local LLM Proxy

# JS tracking code to add to the home page.
# analytics_tracking_code:      |
#  alert("hello");

# HTML to add under the "Estimated Wait Time" line.
# info_html:                    |
#   some interesing info

### Tuneables ##

# Path that is shown to users for them to connect to
frontend_api_client:          /api

# Path to the database, relative to the directory of server.py
database_path:                ./proxy-server.db

# How to calculate the average generation time.
# Valid options:              database, minute
# "database" calculates average from historical data in the database, with the more recent data weighted more.
# "minute" calculates it from the last minute of data.
average_generation_time_mode: database

## STATS ##

# Display the total_proompts item on the stats screen.
show_num_prompts:             true

# Display the uptime item on the stats screen.
show_uptime:                  true

show_total_output_tokens:     true

show_backend_info:            true

# Load the number of prompts from the database to display on the stats page.
load_num_prompts:             true

## NETDATA ##

# netdata_root:                 http://172.0.2.140:19999