fix stats for real

This commit is contained in:
Cyberes 2023-08-23 01:14:19 -06:00
parent 3bb27d6900
commit 33190e3cfe
7 changed files with 22 additions and 8 deletions

View File

@ -5,7 +5,7 @@ log_prompts: true
mode: oobabooga
auth_required: false
concurrent_gens: 3
token_limit: 5555
token_limit: 7777
backend_url: http://172.0.0.2:9104

View File

@ -1,3 +1,5 @@
from flask_caching import Cache
from redis import Redis
cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/0', 'CACHE_KEY_PREFIX': 'local-llm'})
redis = Redis()

View File

@ -6,13 +6,22 @@ from threading import Semaphore, Thread
from llm_server import opts
from llm_server.integer import ThreadSafeInteger
from llm_server.opts import concurrent_gens
from llm_server.routes.cache import redis
# proompters_1_min = 0
concurrent_semaphore = Semaphore(concurrent_gens)
proompts = ThreadSafeInteger(0)
start_time = datetime.now()
def get_count():
count = redis.get('proompts')
if count is None:
count = 0
else:
count = int(count)
return count
class SemaphoreCheckerThread(Thread):
proompters_1_min = 0
recent_prompters = {}

View File

@ -2,8 +2,9 @@ import time
from flask import jsonify, request
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore, proompts
from llm_server.routes.stats import SemaphoreCheckerThread, concurrent_semaphore
from . import bp
from ..cache import redis
from ..helpers.client import format_sillytavern_err
from ..helpers.http import cache_control, validate_json
from ... import opts
@ -65,7 +66,7 @@ def generate():
}), 200
response_valid_json, response_json_body = validate_json(response)
if response_valid_json:
proompts.increment()
redis.incr('proompts')
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
if not backend_response:
if opts.mode == 'oobabooga':

View File

@ -15,7 +15,6 @@ from ...llm.info import get_running_model
@bp.route('/stats', methods=['GET'])
@cache.cached(timeout=5, query_string=True)
@cache_control(5)
def get_stats():
model_list = get_running_model() # will return False when the fetch fails
if isinstance(model_list, bool):
@ -27,7 +26,7 @@ def get_stats():
'stats': {
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'total_proompts': stats.proompts.value,
'total_proompts': stats.get_count(),
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
},
'online': online,

View File

@ -7,7 +7,9 @@ After=basic.target network.target
User=server
Group=server
WorkingDirectory=/srv/server/local-llm-server
ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 3 --bind 0.0.0.0:5000 server:app
# Need a lot of workers since we have long-running requests
# Takes about 3.5G memory
ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 20 --bind 0.0.0.0:5000 server:app --timeout 60 --worker-class gevent
Restart=always
RestartSec=2

View File

@ -5,4 +5,5 @@ flask_caching
requests
tiktoken
gunicorn
redis
redis
gevent