add home template
This commit is contained in:
parent
dca2dc570f
commit
f3fe514c11
|
@ -10,6 +10,8 @@ token_limit: 7777
|
|||
|
||||
backend_url: https://10.0.0.86:8083
|
||||
|
||||
llm_middleware_name: proxy.chub-archive.evulid.cc
|
||||
|
||||
## STATS ##
|
||||
|
||||
# Display the total_proompts item on the stats screen.
|
||||
|
|
|
@ -10,7 +10,12 @@ config_default_vars = {
|
|||
'show_num_prompts': True,
|
||||
'show_uptime': True,
|
||||
}
|
||||
config_required_vars = ['token_limit', 'concurrent_gens', 'mode']
|
||||
config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middleware_name']
|
||||
|
||||
mode_ui_names = {
|
||||
'oobabooga': 'Text Gen WebUI (ooba)',
|
||||
'hf-textgen': 'UNDEFINED',
|
||||
}
|
||||
|
||||
|
||||
class ConfigLoader:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Global settings that never change after startup/init
|
||||
# Read-only global variables
|
||||
|
||||
running_model = 'none'
|
||||
concurrent_gens = 3
|
||||
|
@ -9,6 +9,7 @@ database_path = './proxy-server.db'
|
|||
auth_required = False
|
||||
log_prompts = False
|
||||
frontend_api_client = ''
|
||||
full_client_api = None
|
||||
http_host = None
|
||||
verify_ssl = True
|
||||
show_num_prompts = True
|
||||
|
|
|
@ -12,6 +12,8 @@ bp = Blueprint('v1', __name__)
|
|||
def before_request():
|
||||
if not opts.http_host:
|
||||
opts.http_host = request.headers.get("Host")
|
||||
if not opts.full_client_api:
|
||||
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
|
||||
if request.endpoint != 'v1.get_stats':
|
||||
response = require_api_key()
|
||||
if response is not None:
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
from datetime import datetime
|
||||
import time
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.llm.info import get_running_model
|
||||
from llm_server.routes.queue import priority_queue
|
||||
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
|
||||
|
||||
|
||||
def generate_stats():
|
||||
model_list, error = get_running_model() # will return False when the fetch fails
|
||||
if isinstance(model_list, bool):
|
||||
online = False
|
||||
else:
|
||||
online = True
|
||||
|
||||
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
|
||||
# if len(t) == 0:
|
||||
# estimated_wait = 0
|
||||
# else:
|
||||
# waits = [elapsed for end, elapsed in t]
|
||||
# estimated_wait = int(sum(waits) / len(waits))
|
||||
|
||||
average_generation_time = int(calculate_avg_gen_time())
|
||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
||||
return {
|
||||
'stats': {
|
||||
'prompts_in_queue': proompters_in_queue,
|
||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
|
||||
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||
'average_generation_elapsed_sec': average_generation_time,
|
||||
},
|
||||
'online': online,
|
||||
'mode': opts.mode,
|
||||
'model': model_list,
|
||||
'endpoints': {
|
||||
'blocking': opts.full_client_api,
|
||||
},
|
||||
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
||||
'timestamp': int(time.time()),
|
||||
'openaiKeys': '∞',
|
||||
'anthropicKeys': '∞',
|
||||
'config': {
|
||||
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||
'context_size': opts.context_size,
|
||||
}
|
||||
}
|
|
@ -5,51 +5,15 @@ from flask import jsonify, request
|
|||
|
||||
from llm_server import opts
|
||||
from . import bp
|
||||
from .generate_stats import generate_stats
|
||||
from .. import stats
|
||||
from ..cache import cache
|
||||
from ..queue import priority_queue
|
||||
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
|
||||
from ...llm.info import get_running_model
|
||||
|
||||
|
||||
@bp.route('/stats', methods=['GET'])
|
||||
# @cache.cached(timeout=5, query_string=True)
|
||||
@cache.cached(timeout=5, query_string=True)
|
||||
def get_stats():
|
||||
model_list, error = get_running_model() # will return False when the fetch fails
|
||||
if isinstance(model_list, bool):
|
||||
online = False
|
||||
else:
|
||||
online = True
|
||||
|
||||
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
|
||||
# if len(t) == 0:
|
||||
# estimated_wait = 0
|
||||
# else:
|
||||
# waits = [elapsed for end, elapsed in t]
|
||||
# estimated_wait = int(sum(waits) / len(waits))
|
||||
|
||||
average_generation_time = int(calculate_avg_gen_time())
|
||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
||||
|
||||
return jsonify({
|
||||
'stats': {
|
||||
'prompts_in_queue': proompters_in_queue,
|
||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||
'total_proompts': stats.get_total_proompts() if opts.show_num_prompts else None,
|
||||
'uptime': int((datetime.now() - stats.server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||
'average_generation_elapsed_sec': average_generation_time,
|
||||
},
|
||||
'online': online,
|
||||
'mode': opts.mode,
|
||||
'model': model_list,
|
||||
'endpoints': {
|
||||
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
||||
},
|
||||
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
||||
'timestamp': int(time.time()),
|
||||
'openaiKeys': '∞',
|
||||
'anthropicKeys': '∞',
|
||||
'config': {
|
||||
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||
'context_size': opts.context_size,
|
||||
}
|
||||
}), 200
|
||||
return jsonify(generate_stats()), 200
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
import time
|
||||
from threading import Thread
|
||||
|
||||
import requests
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.routes.cache import redis
|
||||
|
||||
|
||||
class BackendHealthCheck(Thread):
|
||||
backend_online = False
|
||||
|
||||
def __init__(self):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
if opts.mode == 'oobabooga':
|
||||
try:
|
||||
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
|
||||
opts.running_model = r.json()['result']
|
||||
redis.set('backend_online', 1)
|
||||
except Exception as e:
|
||||
redis.set('backend_online', 0)
|
||||
# TODO: handle error
|
||||
print(e)
|
||||
elif opts.mode == 'hf-textgen':
|
||||
pass
|
||||
else:
|
||||
raise Exception
|
||||
time.sleep(1)
|
36
server.py
36
server.py
|
@ -1,13 +1,14 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import config
|
||||
from flask import Flask, jsonify
|
||||
from flask import Flask, jsonify, render_template, request
|
||||
|
||||
import config
|
||||
from llm_server import opts
|
||||
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars
|
||||
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars, mode_ui_names
|
||||
from llm_server.database import get_number_of_rows, init_db
|
||||
from llm_server.helpers import resolve_path
|
||||
from llm_server.routes.cache import cache, redis
|
||||
|
@ -15,6 +16,8 @@ from llm_server.routes.helpers.http import cache_control
|
|||
from llm_server.routes.queue import start_workers
|
||||
from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time
|
||||
from llm_server.routes.v1 import bp
|
||||
from llm_server.routes.v1.generate_stats import generate_stats
|
||||
from llm_server.threads import BackendHealthCheck
|
||||
|
||||
script_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
@ -57,6 +60,7 @@ if not opts.verify_ssl:
|
|||
|
||||
flushed_keys = redis.flush()
|
||||
print('Flushed', len(flushed_keys), 'keys from Redis.')
|
||||
redis.set('backend_online', 0)
|
||||
|
||||
if config['load_num_prompts']:
|
||||
redis.set('proompts', get_number_of_rows('prompts'))
|
||||
|
@ -70,6 +74,7 @@ start_workers(opts.concurrent_gens)
|
|||
process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time)
|
||||
process_avg_gen_time_background_thread.daemon = True
|
||||
process_avg_gen_time_background_thread.start()
|
||||
BackendHealthCheck().start()
|
||||
SemaphoreCheckerThread().start()
|
||||
|
||||
app = Flask(__name__)
|
||||
|
@ -84,9 +89,32 @@ app.register_blueprint(bp, url_prefix='/api/v1/')
|
|||
|
||||
|
||||
@app.route('/')
|
||||
@app.route('/api')
|
||||
@cache.cached(timeout=5, query_string=True)
|
||||
def home():
|
||||
if not opts.full_client_api:
|
||||
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
|
||||
stats = generate_stats()
|
||||
|
||||
if not bool(redis.get('backend_online')) or not stats['online']:
|
||||
running_model = estimated_wait_sec = 'offline'
|
||||
else:
|
||||
running_model = opts.running_model
|
||||
estimated_wait_sec = f"{stats['estimated_wait_sec']} seconds"
|
||||
|
||||
return render_template('home.html',
|
||||
llm_middleware_name=config['llm_middleware_name'],
|
||||
current_model=running_model,
|
||||
client_api=opts.full_client_api,
|
||||
estimated_wait=estimated_wait_sec,
|
||||
mode_name=mode_ui_names[opts.mode],
|
||||
context_size=opts.context_size,
|
||||
stats_json=json.dumps(stats, indent=4, ensure_ascii=False)
|
||||
)
|
||||
|
||||
|
||||
@app.route('/<first>')
|
||||
@app.route('/<first>/<path:rest>')
|
||||
@cache_control(-1)
|
||||
def fallback(first=None, rest=None):
|
||||
return jsonify({
|
||||
'error': 404,
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||
<script>
|
||||
var _paq = window._paq = window._paq || [];
|
||||
_paq.push(['trackPageView']);
|
||||
_paq.push(['enableLinkTracking']);
|
||||
(function () {
|
||||
var u = "https://mato.evulid.cc/";
|
||||
_paq.push(['setTrackerUrl', u + 'matomo.php']);
|
||||
_paq.push(['setSiteId', '10']);
|
||||
var d = document,
|
||||
g = d.createElement('script'),
|
||||
s = d.getElementsByTagName('script')[0];
|
||||
g.async = true;
|
||||
g.src = u + 'matomo.js';
|
||||
s.parentNode.insertBefore(g, s);
|
||||
})();
|
||||
</script>
|
||||
<style>
|
||||
.container {
|
||||
padding: 1em 3em;
|
||||
}
|
||||
|
||||
#json {
|
||||
background-color: #ffb6c16e;
|
||||
padding: 1em;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 600px) {
|
||||
.container {
|
||||
padding: 1em;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.5em;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
|
||||
|
||||
<p><strong>Current Model:</strong> <span id="model">{{ current_model }}</span></p>
|
||||
<p><strong>Client API URL:</strong> {{ client_api }}</p>
|
||||
<p><strong>Estimated Wait Time:</strong> <span id="estimatedWait">{{ estimated_wait }}</span></p>
|
||||
|
||||
<br>
|
||||
|
||||
<div id="oobabooga">
|
||||
<strong>Instructions:</strong>
|
||||
<ol>
|
||||
<li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
|
||||
<li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>Blocking API url</kbd> textbox.</li>
|
||||
<li>Click <kbd>Connect</kbd> to test the connection.</li>
|
||||
<li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
|
||||
<li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a></li>
|
||||
</ol>
|
||||
</div>
|
||||
|
||||
<br><br>
|
||||
|
||||
<pre id="json">{{ stats_json }}</pre>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
Reference in New Issue