update readme
This commit is contained in:
parent
1cb6389a8a
commit
21174750ea
|
@ -49,4 +49,10 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
|
||||||
|
|
||||||
`expire`: UNIX timestamp of when this token expires and is not longer valid.
|
`expire`: UNIX timestamp of when this token expires and is not longer valid.
|
||||||
|
|
||||||
`disabled`: mark the token as disabled.
|
`disabled`: mark the token as disabled.
|
||||||
|
|
||||||
|
### To Do
|
||||||
|
|
||||||
|
- Implement streaming
|
||||||
|
- Add `huggingface/text-generation-inference`
|
||||||
|
- Convince Oobabooga to implement concurrent generation
|
||||||
|
|
|
@ -7,8 +7,9 @@ backend_url: http://x.x.x.x:5000
|
||||||
mode: oobabooga
|
mode: oobabooga
|
||||||
|
|
||||||
# How many concurrent generation requests will be processed at the same time.
|
# How many concurrent generation requests will be processed at the same time.
|
||||||
# Oobabooga only supports one.
|
# Oobabooga only supports one. If you're using Oobabooga, you MUST set this to 1
|
||||||
concurrent_gens: 3
|
# or else your estimated wait time will be incorrect.
|
||||||
|
concurrent_gens: 1
|
||||||
|
|
||||||
# The configured token limit of your backend.
|
# The configured token limit of your backend.
|
||||||
# This number is shown to clients and on the home page. (may be important later)
|
# This number is shown to clients and on the home page. (may be important later)
|
||||||
|
@ -27,6 +28,7 @@ verify_ssl: false
|
||||||
# Reject all requests if they aren't authenticated with a token.
|
# Reject all requests if they aren't authenticated with a token.
|
||||||
auth_required: false
|
auth_required: false
|
||||||
|
|
||||||
|
# JS tracking code to add to the home page.
|
||||||
#analytics_tracking_code: |
|
#analytics_tracking_code: |
|
||||||
# alert("hello");
|
# alert("hello");
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,8 @@ generation_elapsed_lock = Lock()
|
||||||
|
|
||||||
|
|
||||||
def calculate_avg_gen_time():
|
def calculate_avg_gen_time():
|
||||||
|
# TODO: calculate the average from the database. Have this be set by an option in the config
|
||||||
|
|
||||||
# Get the average generation time from Redis
|
# Get the average generation time from Redis
|
||||||
average_generation_time = redis.get('average_generation_time')
|
average_generation_time = redis.get('average_generation_time')
|
||||||
if average_generation_time is None:
|
if average_generation_time is None:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from datetime import datetime
|
|
||||||
import time
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.llm.info import get_running_model
|
from llm_server.llm.info import get_running_model
|
||||||
|
@ -23,9 +23,11 @@ def generate_stats():
|
||||||
|
|
||||||
average_generation_time = int(calculate_avg_gen_time())
|
average_generation_time = int(calculate_avg_gen_time())
|
||||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
||||||
|
|
||||||
|
# TODO: https://stackoverflow.com/questions/22721579/sorting-a-nested-ordereddict-by-key-recursively
|
||||||
return {
|
return {
|
||||||
'stats': {
|
'stats': {
|
||||||
'prompts_in_queue': proompters_in_queue,
|
'proompts_in_queue': proompters_in_queue,
|
||||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||||
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
|
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
|
||||||
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||||
|
@ -37,12 +39,13 @@ def generate_stats():
|
||||||
'endpoints': {
|
'endpoints': {
|
||||||
'blocking': opts.full_client_api,
|
'blocking': opts.full_client_api,
|
||||||
},
|
},
|
||||||
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
'estimated_wait_sec': int((average_generation_time * proompters_in_queue) / opts.concurrent_gens),
|
||||||
'timestamp': int(time.time()),
|
'timestamp': int(time.time()),
|
||||||
'openaiKeys': '∞',
|
'openaiKeys': '∞',
|
||||||
'anthropicKeys': '∞',
|
'anthropicKeys': '∞',
|
||||||
'config': {
|
'config': {
|
||||||
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||||
'context_size': opts.context_size,
|
'context_size': opts.context_size,
|
||||||
|
'queue_size': opts.concurrent_gens,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in New Issue