fix stats for real
This commit is contained in:
parent
3bb27d6900
commit
33190e3cfe
|
@ -5,7 +5,7 @@ log_prompts: true
|
||||||
mode: oobabooga
|
mode: oobabooga
|
||||||
auth_required: false
|
auth_required: false
|
||||||
concurrent_gens: 3
|
concurrent_gens: 3
|
||||||
token_limit: 5555
|
token_limit: 7777
|
||||||
|
|
||||||
backend_url: http://172.0.0.2:9104
|
backend_url: http://172.0.0.2:9104
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
from flask_caching import Cache
|
from flask_caching import Cache
|
||||||
|
from redis import Redis
|
||||||
|
|
||||||
cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/0', 'CACHE_KEY_PREFIX': 'local-llm'})
|
cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/0', 'CACHE_KEY_PREFIX': 'local-llm'})
|
||||||
|
redis = Redis()
|
||||||
|
|
|
@ -6,13 +6,22 @@ from threading import Semaphore, Thread
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.integer import ThreadSafeInteger
|
from llm_server.integer import ThreadSafeInteger
|
||||||
from llm_server.opts import concurrent_gens
|
from llm_server.opts import concurrent_gens
|
||||||
|
from llm_server.routes.cache import redis
|
||||||
|
|
||||||
# proompters_1_min = 0
|
# proompters_1_min = 0
|
||||||
concurrent_semaphore = Semaphore(concurrent_gens)
|
concurrent_semaphore = Semaphore(concurrent_gens)
|
||||||
proompts = ThreadSafeInteger(0)
|
|
||||||
start_time = datetime.now()
|
start_time = datetime.now()
|
||||||
|
|
||||||
|
|
||||||
|
def get_count():
|
||||||
|
count = redis.get('proompts')
|
||||||
|
if count is None:
|
||||||
|
count = 0
|
||||||
|
else:
|
||||||
|
count = int(count)
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
class SemaphoreCheckerThread(Thread):
|
class SemaphoreCheckerThread(Thread):
|
||||||
proompters_1_min = 0
|
proompters_1_min = 0
|
||||||
recent_prompters = {}
|
recent_prompters = {}
|
||||||
|
|
|
@ -2,8 +2,9 @@ import time
|
||||||
|
|
||||||
from flask import jsonify, request
|
from flask import jsonify, request
|
||||||
|
|
||||||
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore, proompts
|
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore
|
||||||
from . import bp
|
from . import bp
|
||||||
|
from ..cache import redis
|
||||||
from ..helpers.client import format_sillytavern_err
|
from ..helpers.client import format_sillytavern_err
|
||||||
from ..helpers.http import cache_control, validate_json
|
from ..helpers.http import cache_control, validate_json
|
||||||
from ... import opts
|
from ... import opts
|
||||||
|
@ -65,7 +66,7 @@ def generate():
|
||||||
}), 200
|
}), 200
|
||||||
response_valid_json, response_json_body = validate_json(response)
|
response_valid_json, response_json_body = validate_json(response)
|
||||||
if response_valid_json:
|
if response_valid_json:
|
||||||
proompts.increment()
|
redis.incr('proompts')
|
||||||
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
||||||
if not backend_response:
|
if not backend_response:
|
||||||
if opts.mode == 'oobabooga':
|
if opts.mode == 'oobabooga':
|
||||||
|
|
|
@ -15,7 +15,6 @@ from ...llm.info import get_running_model
|
||||||
|
|
||||||
@bp.route('/stats', methods=['GET'])
|
@bp.route('/stats', methods=['GET'])
|
||||||
@cache.cached(timeout=5, query_string=True)
|
@cache.cached(timeout=5, query_string=True)
|
||||||
@cache_control(5)
|
|
||||||
def get_stats():
|
def get_stats():
|
||||||
model_list = get_running_model() # will return False when the fetch fails
|
model_list = get_running_model() # will return False when the fetch fails
|
||||||
if isinstance(model_list, bool):
|
if isinstance(model_list, bool):
|
||||||
|
@ -27,7 +26,7 @@ def get_stats():
|
||||||
'stats': {
|
'stats': {
|
||||||
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
|
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
|
||||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||||
'total_proompts': stats.proompts.value,
|
'total_proompts': stats.get_count(),
|
||||||
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
||||||
},
|
},
|
||||||
'online': online,
|
'online': online,
|
||||||
|
|
|
@ -7,7 +7,9 @@ After=basic.target network.target
|
||||||
User=server
|
User=server
|
||||||
Group=server
|
Group=server
|
||||||
WorkingDirectory=/srv/server/local-llm-server
|
WorkingDirectory=/srv/server/local-llm-server
|
||||||
ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 3 --bind 0.0.0.0:5000 server:app
|
# Need a lot of workers since we have long-running requests
|
||||||
|
# Takes about 3.5G memory
|
||||||
|
ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 20 --bind 0.0.0.0:5000 server:app --timeout 60 --worker-class gevent
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=2
|
RestartSec=2
|
||||||
|
|
||||||
|
|
|
@ -5,4 +5,5 @@ flask_caching
|
||||||
requests
|
requests
|
||||||
tiktoken
|
tiktoken
|
||||||
gunicorn
|
gunicorn
|
||||||
redis
|
redis
|
||||||
|
gevent
|
Reference in New Issue