update readme
This commit is contained in:
parent
1cb6389a8a
commit
21174750ea
|
@ -50,3 +50,9 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
|
|||
`expire`: UNIX timestamp of when this token expires and is not longer valid.
|
||||
|
||||
`disabled`: mark the token as disabled.
|
||||
|
||||
### To Do
|
||||
|
||||
- Implement streaming
|
||||
- Add `huggingface/text-generation-inference`
|
||||
- Convince Oobabooga to implement concurrent generation
|
||||
|
|
|
@ -7,8 +7,9 @@ backend_url: http://x.x.x.x:5000
|
|||
mode: oobabooga
|
||||
|
||||
# How many concurrent generation requests will be processed at the same time.
|
||||
# Oobabooga only supports one.
|
||||
concurrent_gens: 3
|
||||
# Oobabooga only supports one. If you're using Oobabooga, you MUST set this to 1
|
||||
# or else your estimated wait time will be incorrect.
|
||||
concurrent_gens: 1
|
||||
|
||||
# The configured token limit of your backend.
|
||||
# This number is shown to clients and on the home page. (may be important later)
|
||||
|
@ -27,6 +28,7 @@ verify_ssl: false
|
|||
# Reject all requests if they aren't authenticated with a token.
|
||||
auth_required: false
|
||||
|
||||
# JS tracking code to add to the home page.
|
||||
#analytics_tracking_code: |
|
||||
# alert("hello");
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@ generation_elapsed_lock = Lock()
|
|||
|
||||
|
||||
def calculate_avg_gen_time():
|
||||
# TODO: calculate the average from the database. Have this be set by an option in the config
|
||||
|
||||
# Get the average generation time from Redis
|
||||
average_generation_time = redis.get('average_generation_time')
|
||||
if average_generation_time is None:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.llm.info import get_running_model
|
||||
|
@ -23,9 +23,11 @@ def generate_stats():
|
|||
|
||||
average_generation_time = int(calculate_avg_gen_time())
|
||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
||||
|
||||
# TODO: https://stackoverflow.com/questions/22721579/sorting-a-nested-ordereddict-by-key-recursively
|
||||
return {
|
||||
'stats': {
|
||||
'prompts_in_queue': proompters_in_queue,
|
||||
'proompts_in_queue': proompters_in_queue,
|
||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
|
||||
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||
|
@ -37,12 +39,13 @@ def generate_stats():
|
|||
'endpoints': {
|
||||
'blocking': opts.full_client_api,
|
||||
},
|
||||
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
||||
'estimated_wait_sec': int((average_generation_time * proompters_in_queue) / opts.concurrent_gens),
|
||||
'timestamp': int(time.time()),
|
||||
'openaiKeys': '∞',
|
||||
'anthropicKeys': '∞',
|
||||
'config': {
|
||||
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||
'context_size': opts.context_size,
|
||||
'queue_size': opts.concurrent_gens,
|
||||
}
|
||||
}
|
||||
|
|
Reference in New Issue