add home template

This commit is contained in:
Cyberes 2023-08-23 23:11:12 -06:00
parent dca2dc570f
commit f3fe514c11
9 changed files with 200 additions and 46 deletions

View File

@ -10,6 +10,8 @@ token_limit: 7777
backend_url: https://10.0.0.86:8083
llm_middleware_name: proxy.chub-archive.evulid.cc
## STATS ##
# Display the total_proompts item on the stats screen.

View File

@ -10,7 +10,12 @@ config_default_vars = {
'show_num_prompts': True,
'show_uptime': True,
}
config_required_vars = ['token_limit', 'concurrent_gens', 'mode']
config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middleware_name']
mode_ui_names = {
'oobabooga': 'Text Gen WebUI (ooba)',
'hf-textgen': 'UNDEFINED',
}
class ConfigLoader:

View File

@ -1,4 +1,4 @@
# Global settings that never change after startup/init
# Read-only global variables
running_model = 'none'
concurrent_gens = 3
@ -9,6 +9,7 @@ database_path = './proxy-server.db'
auth_required = False
log_prompts = False
frontend_api_client = ''
full_client_api = None
http_host = None
verify_ssl = True
show_num_prompts = True

View File

@ -12,6 +12,8 @@ bp = Blueprint('v1', __name__)
def before_request():
if not opts.http_host:
opts.http_host = request.headers.get("Host")
if not opts.full_client_api:
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
if request.endpoint != 'v1.get_stats':
response = require_api_key()
if response is not None:

View File

@ -0,0 +1,48 @@
from datetime import datetime
import time
from llm_server import opts
from llm_server.llm.info import get_running_model
from llm_server.routes.queue import priority_queue
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
def generate_stats():
model_list, error = get_running_model() # will return False when the fetch fails
if isinstance(model_list, bool):
online = False
else:
online = True
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
# if len(t) == 0:
# estimated_wait = 0
# else:
# waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits))
average_generation_time = int(calculate_avg_gen_time())
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
return {
'stats': {
'prompts_in_queue': proompters_in_queue,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
'average_generation_elapsed_sec': average_generation_time,
},
'online': online,
'mode': opts.mode,
'model': model_list,
'endpoints': {
'blocking': opts.full_client_api,
},
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
}
}

View File

@ -5,51 +5,15 @@ from flask import jsonify, request
from llm_server import opts
from . import bp
from .generate_stats import generate_stats
from .. import stats
from ..cache import cache
from ..queue import priority_queue
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
from ...llm.info import get_running_model
@bp.route('/stats', methods=['GET'])
# @cache.cached(timeout=5, query_string=True)
@cache.cached(timeout=5, query_string=True)
def get_stats():
model_list, error = get_running_model() # will return False when the fetch fails
if isinstance(model_list, bool):
online = False
else:
online = True
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
# if len(t) == 0:
# estimated_wait = 0
# else:
# waits = [elapsed for end, elapsed in t]
# estimated_wait = int(sum(waits) / len(waits))
average_generation_time = int(calculate_avg_gen_time())
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
return jsonify({
'stats': {
'prompts_in_queue': proompters_in_queue,
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
'total_proompts': stats.get_total_proompts() if opts.show_num_prompts else None,
'uptime': int((datetime.now() - stats.server_start_time).total_seconds()) if opts.show_uptime else None,
'average_generation_elapsed_sec': average_generation_time,
},
'online': online,
'mode': opts.mode,
'model': model_list,
'endpoints': {
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
},
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
}
}), 200
return jsonify(generate_stats()), 200

32
llm_server/threads.py Normal file
View File

@ -0,0 +1,32 @@
import time
from threading import Thread
import requests
from llm_server import opts
from llm_server.routes.cache import redis
class BackendHealthCheck(Thread):
backend_online = False
def __init__(self):
Thread.__init__(self)
self.daemon = True
def run(self):
while True:
if opts.mode == 'oobabooga':
try:
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
opts.running_model = r.json()['result']
redis.set('backend_online', 1)
except Exception as e:
redis.set('backend_online', 0)
# TODO: handle error
print(e)
elif opts.mode == 'hf-textgen':
pass
else:
raise Exception
time.sleep(1)

View File

@ -1,13 +1,14 @@
import json
import os
import sys
from pathlib import Path
from threading import Thread
import config
from flask import Flask, jsonify
from flask import Flask, jsonify, render_template, request
import config
from llm_server import opts
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars, mode_ui_names
from llm_server.database import get_number_of_rows, init_db
from llm_server.helpers import resolve_path
from llm_server.routes.cache import cache, redis
@ -15,6 +16,8 @@ from llm_server.routes.helpers.http import cache_control
from llm_server.routes.queue import start_workers
from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time
from llm_server.routes.v1 import bp
from llm_server.routes.v1.generate_stats import generate_stats
from llm_server.threads import BackendHealthCheck
script_path = os.path.dirname(os.path.realpath(__file__))
@ -57,6 +60,7 @@ if not opts.verify_ssl:
flushed_keys = redis.flush()
print('Flushed', len(flushed_keys), 'keys from Redis.')
redis.set('backend_online', 0)
if config['load_num_prompts']:
redis.set('proompts', get_number_of_rows('prompts'))
@ -70,6 +74,7 @@ start_workers(opts.concurrent_gens)
process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time)
process_avg_gen_time_background_thread.daemon = True
process_avg_gen_time_background_thread.start()
BackendHealthCheck().start()
SemaphoreCheckerThread().start()
app = Flask(__name__)
@ -84,9 +89,32 @@ app.register_blueprint(bp, url_prefix='/api/v1/')
@app.route('/')
@app.route('/api')
@cache.cached(timeout=5, query_string=True)
def home():
if not opts.full_client_api:
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
stats = generate_stats()
if not bool(redis.get('backend_online')) or not stats['online']:
running_model = estimated_wait_sec = 'offline'
else:
running_model = opts.running_model
estimated_wait_sec = f"{stats['estimated_wait_sec']} seconds"
return render_template('home.html',
llm_middleware_name=config['llm_middleware_name'],
current_model=running_model,
client_api=opts.full_client_api,
estimated_wait=estimated_wait_sec,
mode_name=mode_ui_names[opts.mode],
context_size=opts.context_size,
stats_json=json.dumps(stats, indent=4, ensure_ascii=False)
)
@app.route('/<first>')
@app.route('/<first>/<path:rest>')
@cache_control(-1)
def fallback(first=None, rest=None):
return jsonify({
'error': 404,

72
templates/home.html Normal file
View File

@ -0,0 +1,72 @@
<!DOCTYPE html>
<html>
<head>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function () {
var u = "https://mato.evulid.cc/";
_paq.push(['setTrackerUrl', u + 'matomo.php']);
_paq.push(['setSiteId', '10']);
var d = document,
g = d.createElement('script'),
s = d.getElementsByTagName('script')[0];
g.async = true;
g.src = u + 'matomo.js';
s.parentNode.insertBefore(g, s);
})();
</script>
<style>
.container {
padding: 1em 3em;
}
#json {
background-color: #ffb6c16e;
padding: 1em;
display: inline-block;
}
@media only screen and (max-width: 600px) {
.container {
padding: 1em;
}
h1 {
font-size: 1.5em;
}
}
</style>
</head>
<body>
<div class="container">
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
<p><strong>Current Model:</strong> <span id="model">{{ current_model }}</span></p>
<p><strong>Client API URL:</strong> {{ client_api }}</p>
<p><strong>Estimated Wait Time:</strong> <span id="estimatedWait">{{ estimated_wait }}</span></p>
<br>
<div id="oobabooga">
<strong>Instructions:</strong>
<ol>
<li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
<li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>Blocking API url</kbd> textbox.</li>
<li>Click <kbd>Connect</kbd> to test the connection.</li>
<li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
<li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a></li>
</ol>
</div>
<br><br>
<pre id="json">{{ stats_json }}</pre>
</div>
</body>
</html>