add home template

This commit is contained in:
Cyberes 2023-08-23 23:11:12 -06:00
parent dca2dc570f
commit f3fe514c11
9 changed files with 200 additions and 46 deletions

View File

@ -10,6 +10,8 @@ token_limit: 7777
backend_url: https://10.0.0.86:8083 backend_url: https://10.0.0.86:8083
llm_middleware_name: proxy.chub-archive.evulid.cc
## STATS ## ## STATS ##
# Display the total_proompts item on the stats screen. # Display the total_proompts item on the stats screen.

View File

@ -10,7 +10,12 @@ config_default_vars = {
'show_num_prompts': True, 'show_num_prompts': True,
'show_uptime': True, 'show_uptime': True,
} }
config_required_vars = ['token_limit', 'concurrent_gens', 'mode'] config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middleware_name']
mode_ui_names = {
'oobabooga': 'Text Gen WebUI (ooba)',
'hf-textgen': 'UNDEFINED',
}
class ConfigLoader: class ConfigLoader:

View File

@ -1,4 +1,4 @@
# Global settings that never change after startup/init # Read-only global variables
running_model = 'none' running_model = 'none'
concurrent_gens = 3 concurrent_gens = 3
@ -9,6 +9,7 @@ database_path = './proxy-server.db'
auth_required = False auth_required = False
log_prompts = False log_prompts = False
frontend_api_client = '' frontend_api_client = ''
full_client_api = None
http_host = None http_host = None
verify_ssl = True verify_ssl = True
show_num_prompts = True show_num_prompts = True

View File

@ -12,6 +12,8 @@ bp = Blueprint('v1', __name__)
def before_request(): def before_request():
if not opts.http_host: if not opts.http_host:
opts.http_host = request.headers.get("Host") opts.http_host = request.headers.get("Host")
if not opts.full_client_api:
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
if request.endpoint != 'v1.get_stats': if request.endpoint != 'v1.get_stats':
response = require_api_key() response = require_api_key()
if response is not None: if response is not None:

View File

@ -0,0 +1,48 @@
from datetime import datetime
import time
from llm_server import opts
from llm_server.llm.info import get_running_model
from llm_server.routes.queue import priority_queue
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
def generate_stats():
model_list, error = get_running_model() # will return False when the fetch fails
if isinstance(model_list, bool):
online = False
else:
online = True
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
# if len(t) == 0:
# estimated_wait = 0
# else:
# waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits))
average_generation_time = int(calculate_avg_gen_time())
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
return {
'stats': {
'prompts_in_queue': proompters_in_queue,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
'average_generation_elapsed_sec': average_generation_time,
},
'online': online,
'mode': opts.mode,
'model': model_list,
'endpoints': {
'blocking': opts.full_client_api,
},
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
}
}

View File

@ -5,51 +5,15 @@ from flask import jsonify, request
from llm_server import opts from llm_server import opts
from . import bp from . import bp
from .generate_stats import generate_stats
from .. import stats from .. import stats
from ..cache import cache
from ..queue import priority_queue from ..queue import priority_queue
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
from ...llm.info import get_running_model from ...llm.info import get_running_model
@bp.route('/stats', methods=['GET']) @bp.route('/stats', methods=['GET'])
# @cache.cached(timeout=5, query_string=True) @cache.cached(timeout=5, query_string=True)
def get_stats(): def get_stats():
model_list, error = get_running_model() # will return False when the fetch fails return jsonify(generate_stats()), 200
if isinstance(model_list, bool):
online = False
else:
online = True
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
# if len(t) == 0:
# estimated_wait = 0
# else:
# waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits))
average_generation_time = int(calculate_avg_gen_time())
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
return jsonify({
'stats': {
'prompts_in_queue': proompters_in_queue,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'total_proompts': stats.get_total_proompts() if opts.show_num_prompts else None,
'uptime': int((datetime.now() - stats.server_start_time).total_seconds()) if opts.show_uptime else None,
'average_generation_elapsed_sec': average_generation_time,
},
'online': online,
'mode': opts.mode,
'model': model_list,
'endpoints': {
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
},
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
}
}), 200

32
llm_server/threads.py Normal file
View File

@ -0,0 +1,32 @@
import time
from threading import Thread
import requests
from llm_server import opts
from llm_server.routes.cache import redis
class BackendHealthCheck(Thread):
backend_online = False
def __init__(self):
Thread.__init__(self)
self.daemon = True
def run(self):
while True:
if opts.mode == 'oobabooga':
try:
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
opts.running_model = r.json()['result']
redis.set('backend_online', 1)
except Exception as e:
redis.set('backend_online', 0)
# TODO: handle error
print(e)
elif opts.mode == 'hf-textgen':
pass
else:
raise Exception
time.sleep(1)

View File

@ -1,13 +1,14 @@
import json
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
import config from flask import Flask, jsonify, render_template, request
from flask import Flask, jsonify
import config
from llm_server import opts from llm_server import opts
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars from llm_server.config import ConfigLoader, config_default_vars, config_required_vars, mode_ui_names
from llm_server.database import get_number_of_rows, init_db from llm_server.database import get_number_of_rows, init_db
from llm_server.helpers import resolve_path from llm_server.helpers import resolve_path
from llm_server.routes.cache import cache, redis from llm_server.routes.cache import cache, redis
@ -15,6 +16,8 @@ from llm_server.routes.helpers.http import cache_control
from llm_server.routes.queue import start_workers from llm_server.routes.queue import start_workers
from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time
from llm_server.routes.v1 import bp from llm_server.routes.v1 import bp
from llm_server.routes.v1.generate_stats import generate_stats
from llm_server.threads import BackendHealthCheck
script_path = os.path.dirname(os.path.realpath(__file__)) script_path = os.path.dirname(os.path.realpath(__file__))
@ -57,6 +60,7 @@ if not opts.verify_ssl:
flushed_keys = redis.flush() flushed_keys = redis.flush()
print('Flushed', len(flushed_keys), 'keys from Redis.') print('Flushed', len(flushed_keys), 'keys from Redis.')
redis.set('backend_online', 0)
if config['load_num_prompts']: if config['load_num_prompts']:
redis.set('proompts', get_number_of_rows('prompts')) redis.set('proompts', get_number_of_rows('prompts'))
@ -70,6 +74,7 @@ start_workers(opts.concurrent_gens)
process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time) process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time)
process_avg_gen_time_background_thread.daemon = True process_avg_gen_time_background_thread.daemon = True
process_avg_gen_time_background_thread.start() process_avg_gen_time_background_thread.start()
BackendHealthCheck().start()
SemaphoreCheckerThread().start() SemaphoreCheckerThread().start()
app = Flask(__name__) app = Flask(__name__)
@ -84,9 +89,32 @@ app.register_blueprint(bp, url_prefix='/api/v1/')
@app.route('/') @app.route('/')
@app.route('/api')
@cache.cached(timeout=5, query_string=True)
def home():
if not opts.full_client_api:
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
stats = generate_stats()
if not bool(redis.get('backend_online')) or not stats['online']:
running_model = estimated_wait_sec = 'offline'
else:
running_model = opts.running_model
estimated_wait_sec = f"{stats['estimated_wait_sec']} seconds"
return render_template('home.html',
llm_middleware_name=config['llm_middleware_name'],
current_model=running_model,
client_api=opts.full_client_api,
estimated_wait=estimated_wait_sec,
mode_name=mode_ui_names[opts.mode],
context_size=opts.context_size,
stats_json=json.dumps(stats, indent=4, ensure_ascii=False)
)
@app.route('/<first>') @app.route('/<first>')
@app.route('/<first>/<path:rest>') @app.route('/<first>/<path:rest>')
@cache_control(-1)
def fallback(first=None, rest=None): def fallback(first=None, rest=None):
return jsonify({ return jsonify({
'error': 404, 'error': 404,

72
templates/home.html Normal file
View File

@ -0,0 +1,72 @@
<!DOCTYPE html>
<html>
<head>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function () {
var u = "https://mato.evulid.cc/";
_paq.push(['setTrackerUrl', u + 'matomo.php']);
_paq.push(['setSiteId', '10']);
var d = document,
g = d.createElement('script'),
s = d.getElementsByTagName('script')[0];
g.async = true;
g.src = u + 'matomo.js';
s.parentNode.insertBefore(g, s);
})();
</script>
<style>
.container {
padding: 1em 3em;
}
#json {
background-color: #ffb6c16e;
padding: 1em;
display: inline-block;
}
@media only screen and (max-width: 600px) {
.container {
padding: 1em;
}
h1 {
font-size: 1.5em;
}
}
</style>
</head>
<body>
<div class="container">
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
<p><strong>Current Model:</strong> <span id="model">{{ current_model }}</span></p>
<p><strong>Client API URL:</strong> {{ client_api }}</p>
<p><strong>Estimated Wait Time:</strong> <span id="estimatedWait">{{ estimated_wait }}</span></p>
<br>
<div id="oobabooga">
<strong>Instructions:</strong>
<ol>
<li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
<li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>Blocking API url</kbd> textbox.</li>
<li>Click <kbd>Connect</kbd> to test the connection.</li>
<li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
<li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a></li>
</ol>
</div>
<br><br>
<pre id="json">{{ stats_json }}</pre>
</div>
</body>
</html>