This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/config/config.yml.sample

80 lines
2.7 KiB
Plaintext
Raw Normal View History

2023-08-30 18:59:29 -06:00
## Important
2023-08-24 00:09:57 -06:00
backend_url: https://10.0.0.50:8283
2023-08-24 00:09:57 -06:00
mode: vllm
concurrent_gens: 3
token_limit: 8192
2023-08-24 00:09:57 -06:00
# How many requests a single IP is allowed to put in the queue.
# If an IP tries to put more than this their request will be rejected
# until the other(s) are completed.
simultaneous_requests_per_ip: 2
2023-08-24 00:09:57 -06:00
## Optional
max_new_tokens: 500
2023-08-30 18:59:29 -06:00
enable_streaming: false
2023-08-24 00:09:57 -06:00
log_prompts: false
2023-08-24 00:09:57 -06:00
verify_ssl: false # Python request has issues with self-signed certs
2023-08-30 18:59:29 -06:00
auth_required: false
max_queued_prompts_per_ip: 1
2023-08-30 18:59:29 -06:00
# Name of your proxy, shown to clients.
llm_middleware_name: local-llm-server
# Set the name of the model shown to clients
# manual_model_name: testing123
2023-08-24 00:09:57 -06:00
2023-08-24 12:19:59 -06:00
# JS tracking code to add to the home page.
# analytics_tracking_code: |
# alert("hello");
2023-08-24 00:09:57 -06:00
# HTML to add under the "Estimated Wait Time" line.
# info_html: |
# bla bla whatever
enable_openi_compatible_backend: true
# openai_api_key:
expose_openai_system_prompt: true
#openai_system_prompt: |
# You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n
2023-08-30 18:59:29 -06:00
### Tuneables ##
# Path that is shown to users for them to connect to
# TODO: set this based on mode. Instead, have this be the path to the API
frontend_api_client: /api
2023-08-30 18:59:29 -06:00
# Path to the database, relative to the directory of server.py
database_path: ./proxy-server.db
2023-08-30 18:59:29 -06:00
# How to calculate the average generation time.
# Valid options: database, minute
# "database" calculates average from historical data in the database, with the more recent data weighted more.
# "minute" calculates it from the last minute of data.
average_generation_time_mode: database
2023-08-24 00:09:57 -06:00
## STATS ##
# Display the total_proompts item on the stats screen.
show_num_prompts: true
2023-08-24 00:09:57 -06:00
# Display the uptime item on the stats screen.
show_uptime: true
2023-08-24 00:09:57 -06:00
show_total_output_tokens: true
2023-08-24 00:09:57 -06:00
show_backend_info: true
2023-08-30 18:59:29 -06:00
# Load the number of prompts from the database to display on the stats page.
load_num_prompts: true
2023-08-24 00:09:57 -06:00
2023-08-30 18:59:29 -06:00
## NETDATA ##
netdata_root: http://10.0.0.50:19999