update example config

2023-08-30 18:59:29 -06:00 · 2023-08-30 18:59:29 -06:00 · 41b8232499
parent 2816c01902
commit 41b8232499
1 changed files with 45 additions and 35 deletions
--- a/config/config.yml.sample
+++ b/config/config.yml.sample
@ -1,45 +1,55 @@
-## Important stuff
+## Important

-# The base URL of your backend API
-backend_url: http://x.x.x.x:5000
+backend_url:                 https://10.0.0.86:8183

-# Mode to run the proxy in
-mode: oobabooga
-
-# How many concurrent generation requests will be processed at the same time.
-# Oobabooga only supports one. If you're using Oobabooga, you MUST set this to 1
-# or else your estimated wait time will be incorrect.
-concurrent_gens: 1
-
-# The configured token limit of your backend.
-# This number is shown to clients and on the home page. (may be important later)
-token_limit: 7777
+mode:                         hf-textgen
+concurrent_gens:              3
+token_limit:                  8192

 # How many requests a single IP is allowed to put in the queue.
 # If an IP tries to put more than this their request will be rejected
 # until the other(s) are completed.
-ip_in_queue_max: 1
-
-llm_middleware_name: Local LLM Proxy
+ip_in_queue_max:              2

 ## Optional

-# Log request and response content.
+max_new_tokens:               500
+
 log_prompts:                  false

-# Python request has issues with self-signed certs.
-verify_ssl: false
+verify_ssl:                   false # Python request has issues with self-signed certs

-# Reject all requests if they aren't authenticated with a token.
 auth_required:                false

+# TODO:                       reject any prompts with a message
+# TODO:                       tokens with a 0 priority are excluded
+# TODO:                       add this value to the stats page
+max_queued_prompts_per_ip:    1
+
+# Name of your proxy, shown to clients.
+llm_middleware_name:          Local LLM Proxy
+
 # JS tracking code to add to the home page.
 # analytics_tracking_code:      |
 #  alert("hello");

 # HTML to add under the "Estimated Wait Time" line.
 # info_html:                    |
-#  <a href="https://chub-archive.evulid.cc/#/proxy-stats.html?proxy=proxy_chub_archive_evulid">Historical Stats</a>
+#   some interesing info
+
+### Tuneables ##
+
+# Path that is shown to users for them to connect to
+frontend_api_client:          /api
+
+# Path to the database, relative to the directory of server.py
+database_path:                ./proxy-server.db
+
+# How to calculate the average generation time.
+# Valid options:              database, minute
+# "database" calculates average from historical data in the database, with the more recent data weighted more.
+# "minute" calculates it from the last minute of data.
+average_generation_time_mode: database

 ## STATS ##

@ -49,13 +59,13 @@ show_num_prompts: true
 # Display the uptime item on the stats screen.
 show_uptime:                  true

+show_total_output_tokens:     true
+
+show_backend_info:            true
+
 # Load the number of prompts from the database to display on the stats page.
 load_num_prompts:             true

-# Path that is shown to users for them to connect to
-frontend_api_client: /api
+## NETDATA ##

-# Relative paths are mapped to the directory of the server
-database_path: ./proxy-server.db
-
-average_generation_time_mode: database
+# netdata_root:                 http://172.0.2.140:19999