update readme

This commit is contained in:
Cyberes 2023-08-24 12:19:59 -06:00
parent 1cb6389a8a
commit 21174750ea
4 changed files with 19 additions and 6 deletions

View File

@ -50,3 +50,9 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
`expire`: UNIX timestamp of when this token expires and is not longer valid.
`disabled`: mark the token as disabled.
### To Do
- Implement streaming
- Add `huggingface/text-generation-inference`
- Convince Oobabooga to implement concurrent generation

View File

@ -7,8 +7,9 @@ backend_url: http://x.x.x.x:5000
mode: oobabooga
# How many concurrent generation requests will be processed at the same time.
# Oobabooga only supports one.
concurrent_gens: 3
# Oobabooga only supports one. If you're using Oobabooga, you MUST set this to 1
# or else your estimated wait time will be incorrect.
concurrent_gens: 1
# The configured token limit of your backend.
# This number is shown to clients and on the home page. (may be important later)
@ -27,6 +28,7 @@ verify_ssl: false
# Reject all requests if they aren't authenticated with a token.
auth_required: false
# JS tracking code to add to the home page.
#analytics_tracking_code: |
# alert("hello");

View File

@ -29,6 +29,8 @@ generation_elapsed_lock = Lock()
def calculate_avg_gen_time():
# TODO: calculate the average from the database. Have this be set by an option in the config
# Get the average generation time from Redis
average_generation_time = redis.get('average_generation_time')
if average_generation_time is None:

View File

@ -1,5 +1,5 @@
from datetime import datetime
import time
from datetime import datetime
from llm_server import opts
from llm_server.llm.info import get_running_model
@ -23,9 +23,11 @@ def generate_stats():
average_generation_time = int(calculate_avg_gen_time())
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
# TODO: https://stackoverflow.com/questions/22721579/sorting-a-nested-ordereddict-by-key-recursively
return {
'stats': {
'prompts_in_queue': proompters_in_queue,
'proompts_in_queue': proompters_in_queue,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
@ -37,12 +39,13 @@ def generate_stats():
'endpoints': {
'blocking': opts.full_client_api,
},
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
'estimated_wait_sec': int((average_generation_time * proompters_in_queue) / opts.concurrent_gens),
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
'queue_size': opts.concurrent_gens,
}
}