## Important backend_url: https://10.0.0.86:8183 mode: hf-textgen concurrent_gens: 3 token_limit: 8192 # How many requests a single IP is allowed to put in the queue. # If an IP tries to put more than this their request will be rejected # until the other(s) are completed. ip_in_queue_max: 2 ## Optional max_new_tokens: 500 log_prompts: false verify_ssl: false # Python request has issues with self-signed certs auth_required: false # TODO: reject any prompts with a message # TODO: tokens with a 0 priority are excluded # TODO: add this value to the stats page max_queued_prompts_per_ip: 1 # Name of your proxy, shown to clients. llm_middleware_name: Local LLM Proxy # JS tracking code to add to the home page. # analytics_tracking_code: | # alert("hello"); # HTML to add under the "Estimated Wait Time" line. # info_html: | # some interesing info ### Tuneables ## # Path that is shown to users for them to connect to frontend_api_client: /api # Path to the database, relative to the directory of server.py database_path: ./proxy-server.db # How to calculate the average generation time. # Valid options: database, minute # "database" calculates average from historical data in the database, with the more recent data weighted more. # "minute" calculates it from the last minute of data. average_generation_time_mode: database ## STATS ## # Display the total_proompts item on the stats screen. show_num_prompts: true # Display the uptime item on the stats screen. show_uptime: true show_total_output_tokens: true show_backend_info: true # Load the number of prompts from the database to display on the stats page. load_num_prompts: true ## NETDATA ## # netdata_root: http://172.0.2.140:19999