update readme
This commit is contained in:
parent
bf1842f434
commit
afc138c743
|
@ -26,7 +26,7 @@ An example systemctl service file is provided in `other/local-llm.service`.
|
||||||
|
|
||||||
First, set up your LLM backend. Currently, only [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) is supported, but eventually [huggingface/text-generation-inference](https://github.com/huggingface/text-generation-inference) will be the default.
|
First, set up your LLM backend. Currently, only [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) is supported, but eventually [huggingface/text-generation-inference](https://github.com/huggingface/text-generation-inference) will be the default.
|
||||||
|
|
||||||
Then, configure this server. The config file is located at `config/config.yml`.
|
Then, configure this server. The config file is located at `config/config.yml.sample` so copy it to `config/config.yml`.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
# TODO: add this file to gitignore and add a .sample.yml
|
|
||||||
|
|
||||||
## Important
|
|
||||||
|
|
||||||
backend_url: https://10.0.0.86:8083
|
|
||||||
|
|
||||||
mode: oobabooga
|
|
||||||
concurrent_gens: 3
|
|
||||||
token_limit: 7777
|
|
||||||
|
|
||||||
## Optional
|
|
||||||
|
|
||||||
log_prompts: true
|
|
||||||
verify_ssl: false # Python request has issues with self-signed certs
|
|
||||||
|
|
||||||
auth_required: false
|
|
||||||
|
|
||||||
llm_middleware_name: proxy.chub-archive.evulid.cc
|
|
||||||
analytics_tracking_code: |
|
|
||||||
var _paq = window._paq = window._paq || [];
|
|
||||||
_paq.push(['trackPageView']);
|
|
||||||
_paq.push(['enableLinkTracking']);
|
|
||||||
(function () {
|
|
||||||
var u = "https://mato.evulid.cc/";
|
|
||||||
_paq.push(['setTrackerUrl', u + 'matomo.php']);
|
|
||||||
_paq.push(['setSiteId', '10']);
|
|
||||||
var d = document,
|
|
||||||
g = d.createElement('script'),
|
|
||||||
s = d.getElementsByTagName('script')[0];
|
|
||||||
g.async = true;
|
|
||||||
g.src = u + 'matomo.js';
|
|
||||||
s.parentNode.insertBefore(g, s);
|
|
||||||
})();
|
|
||||||
|
|
||||||
## STATS ##
|
|
||||||
|
|
||||||
# Display the total_proompts item on the stats screen.
|
|
||||||
show_num_prompts: true
|
|
||||||
|
|
||||||
# Display the uptime item on the stats screen.
|
|
||||||
show_uptime: true
|
|
||||||
|
|
||||||
# Load the number of prompts from the database to display on the stats page.
|
|
||||||
load_num_prompts: true
|
|
||||||
|
|
||||||
# Path that is shown to users for them to connect to
|
|
||||||
frontend_api_client: /api
|
|
||||||
|
|
||||||
database_path: ./proxy-server.db
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
## Important stuff
|
||||||
|
|
||||||
|
# The base URL of your backend API
|
||||||
|
backend_url: http://x.x.x.x:5000
|
||||||
|
|
||||||
|
# Mode to run the proxy in
|
||||||
|
mode: oobabooga
|
||||||
|
|
||||||
|
# How many concurrent generation requests will be processed at the same time.
|
||||||
|
# Oobabooga only supports one.
|
||||||
|
concurrent_gens: 3
|
||||||
|
|
||||||
|
# The configured token limit of your backend.
|
||||||
|
# This number is shown to clients and on the home page. (may be important later)
|
||||||
|
token_limit: 7777
|
||||||
|
|
||||||
|
llm_middleware_name: Local LLM Proxy
|
||||||
|
|
||||||
|
## Optional
|
||||||
|
|
||||||
|
# Log request and response content.
|
||||||
|
log_prompts: false
|
||||||
|
|
||||||
|
# Python request has issues with self-signed certs.
|
||||||
|
verify_ssl: false
|
||||||
|
|
||||||
|
# Reject all requests if they aren't authenticated with a token.
|
||||||
|
auth_required: false
|
||||||
|
|
||||||
|
#analytics_tracking_code: |
|
||||||
|
# alert("hello");
|
||||||
|
|
||||||
|
## STATS ##
|
||||||
|
|
||||||
|
# Display the total_proompts item on the stats screen.
|
||||||
|
show_num_prompts: true
|
||||||
|
|
||||||
|
# Display the uptime item on the stats screen.
|
||||||
|
show_uptime: true
|
||||||
|
|
||||||
|
# Load the number of prompts from the database to display on the stats page.
|
||||||
|
load_num_prompts: true
|
||||||
|
|
||||||
|
# Path that is shown to users for them to connect to
|
||||||
|
frontend_api_client: /api
|
||||||
|
|
||||||
|
# Relative paths are mapped to the directory of the server
|
||||||
|
database_path: ./proxy-server.db
|
|
@ -51,7 +51,6 @@ def generate():
|
||||||
else:
|
else:
|
||||||
print(f'Token {token} was given priority {priority}.')
|
print(f'Token {token} was given priority {priority}.')
|
||||||
|
|
||||||
# success, response, error_msg = generator(request_json_body)
|
|
||||||
event = priority_queue.put((request_json_body, client_ip, token, parameters), priority)
|
event = priority_queue.put((request_json_body, client_ip, token, parameters), priority)
|
||||||
event.wait()
|
event.wait()
|
||||||
success, response, error_msg = event.data
|
success, response, error_msg = event.data
|
||||||
|
|
Reference in New Issue