diff --git a/README.md b/README.md index d2dce41..77b73cb 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ An example systemctl service file is provided in `other/local-llm.service`. First, set up your LLM backend. Currently, only [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) is supported, but eventually [huggingface/text-generation-inference](https://github.com/huggingface/text-generation-inference) will be the default. -Then, configure this server. The config file is located at `config/config.yml`. +Then, configure this server. The config file is located at `config/config.yml.sample` so copy it to `config/config.yml`. diff --git a/config/config.yml b/config/config.yml deleted file mode 100644 index f9a0ba4..0000000 --- a/config/config.yml +++ /dev/null @@ -1,49 +0,0 @@ -# TODO: add this file to gitignore and add a .sample.yml - -## Important - -backend_url: https://10.0.0.86:8083 - -mode: oobabooga -concurrent_gens: 3 -token_limit: 7777 - -## Optional - -log_prompts: true -verify_ssl: false # Python request has issues with self-signed certs - -auth_required: false - -llm_middleware_name: proxy.chub-archive.evulid.cc -analytics_tracking_code: | - var _paq = window._paq = window._paq || []; - _paq.push(['trackPageView']); - _paq.push(['enableLinkTracking']); - (function () { - var u = "https://mato.evulid.cc/"; - _paq.push(['setTrackerUrl', u + 'matomo.php']); - _paq.push(['setSiteId', '10']); - var d = document, - g = d.createElement('script'), - s = d.getElementsByTagName('script')[0]; - g.async = true; - g.src = u + 'matomo.js'; - s.parentNode.insertBefore(g, s); - })(); - -## STATS ## - -# Display the total_proompts item on the stats screen. -show_num_prompts: true - -# Display the uptime item on the stats screen. -show_uptime: true - -# Load the number of prompts from the database to display on the stats page. -load_num_prompts: true - -# Path that is shown to users for them to connect to -frontend_api_client: /api - -database_path: ./proxy-server.db \ No newline at end of file diff --git a/config/config.yml.sample b/config/config.yml.sample new file mode 100644 index 0000000..a20682a --- /dev/null +++ b/config/config.yml.sample @@ -0,0 +1,48 @@ +## Important stuff + +# The base URL of your backend API +backend_url: http://x.x.x.x:5000 + +# Mode to run the proxy in +mode: oobabooga + +# How many concurrent generation requests will be processed at the same time. +# Oobabooga only supports one. +concurrent_gens: 3 + +# The configured token limit of your backend. +# This number is shown to clients and on the home page. (may be important later) +token_limit: 7777 + +llm_middleware_name: Local LLM Proxy + +## Optional + +# Log request and response content. +log_prompts: false + +# Python request has issues with self-signed certs. +verify_ssl: false + +# Reject all requests if they aren't authenticated with a token. +auth_required: false + +#analytics_tracking_code: | +# alert("hello"); + +## STATS ## + +# Display the total_proompts item on the stats screen. +show_num_prompts: true + +# Display the uptime item on the stats screen. +show_uptime: true + +# Load the number of prompts from the database to display on the stats page. +load_num_prompts: true + +# Path that is shown to users for them to connect to +frontend_api_client: /api + +# Relative paths are mapped to the directory of the server +database_path: ./proxy-server.db \ No newline at end of file diff --git a/llm_server/routes/v1/generate.py b/llm_server/routes/v1/generate.py index b0ad48a..80fd4ae 100644 --- a/llm_server/routes/v1/generate.py +++ b/llm_server/routes/v1/generate.py @@ -51,7 +51,6 @@ def generate(): else: print(f'Token {token} was given priority {priority}.') - # success, response, error_msg = generator(request_json_body) event = priority_queue.put((request_json_body, client_ip, token, parameters), priority) event.wait() success, response, error_msg = event.data