add home template
This commit is contained in:
parent
dca2dc570f
commit
f3fe514c11
|
@ -10,6 +10,8 @@ token_limit: 7777
|
||||||
|
|
||||||
backend_url: https://10.0.0.86:8083
|
backend_url: https://10.0.0.86:8083
|
||||||
|
|
||||||
|
llm_middleware_name: proxy.chub-archive.evulid.cc
|
||||||
|
|
||||||
## STATS ##
|
## STATS ##
|
||||||
|
|
||||||
# Display the total_proompts item on the stats screen.
|
# Display the total_proompts item on the stats screen.
|
||||||
|
|
|
@ -10,7 +10,12 @@ config_default_vars = {
|
||||||
'show_num_prompts': True,
|
'show_num_prompts': True,
|
||||||
'show_uptime': True,
|
'show_uptime': True,
|
||||||
}
|
}
|
||||||
config_required_vars = ['token_limit', 'concurrent_gens', 'mode']
|
config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middleware_name']
|
||||||
|
|
||||||
|
mode_ui_names = {
|
||||||
|
'oobabooga': 'Text Gen WebUI (ooba)',
|
||||||
|
'hf-textgen': 'UNDEFINED',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ConfigLoader:
|
class ConfigLoader:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Global settings that never change after startup/init
|
# Read-only global variables
|
||||||
|
|
||||||
running_model = 'none'
|
running_model = 'none'
|
||||||
concurrent_gens = 3
|
concurrent_gens = 3
|
||||||
|
@ -9,6 +9,7 @@ database_path = './proxy-server.db'
|
||||||
auth_required = False
|
auth_required = False
|
||||||
log_prompts = False
|
log_prompts = False
|
||||||
frontend_api_client = ''
|
frontend_api_client = ''
|
||||||
|
full_client_api = None
|
||||||
http_host = None
|
http_host = None
|
||||||
verify_ssl = True
|
verify_ssl = True
|
||||||
show_num_prompts = True
|
show_num_prompts = True
|
||||||
|
|
|
@ -12,6 +12,8 @@ bp = Blueprint('v1', __name__)
|
||||||
def before_request():
|
def before_request():
|
||||||
if not opts.http_host:
|
if not opts.http_host:
|
||||||
opts.http_host = request.headers.get("Host")
|
opts.http_host = request.headers.get("Host")
|
||||||
|
if not opts.full_client_api:
|
||||||
|
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
|
||||||
if request.endpoint != 'v1.get_stats':
|
if request.endpoint != 'v1.get_stats':
|
||||||
response = require_api_key()
|
response = require_api_key()
|
||||||
if response is not None:
|
if response is not None:
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
from datetime import datetime
|
||||||
|
import time
|
||||||
|
|
||||||
|
from llm_server import opts
|
||||||
|
from llm_server.llm.info import get_running_model
|
||||||
|
from llm_server.routes.queue import priority_queue
|
||||||
|
from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
|
||||||
|
|
||||||
|
|
||||||
|
def generate_stats():
|
||||||
|
model_list, error = get_running_model() # will return False when the fetch fails
|
||||||
|
if isinstance(model_list, bool):
|
||||||
|
online = False
|
||||||
|
else:
|
||||||
|
online = True
|
||||||
|
|
||||||
|
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
|
||||||
|
# if len(t) == 0:
|
||||||
|
# estimated_wait = 0
|
||||||
|
# else:
|
||||||
|
# waits = [elapsed for end, elapsed in t]
|
||||||
|
# estimated_wait = int(sum(waits) / len(waits))
|
||||||
|
|
||||||
|
average_generation_time = int(calculate_avg_gen_time())
|
||||||
|
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
||||||
|
return {
|
||||||
|
'stats': {
|
||||||
|
'prompts_in_queue': proompters_in_queue,
|
||||||
|
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
||||||
|
'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
|
||||||
|
'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
|
||||||
|
'average_generation_elapsed_sec': average_generation_time,
|
||||||
|
},
|
||||||
|
'online': online,
|
||||||
|
'mode': opts.mode,
|
||||||
|
'model': model_list,
|
||||||
|
'endpoints': {
|
||||||
|
'blocking': opts.full_client_api,
|
||||||
|
},
|
||||||
|
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
||||||
|
'timestamp': int(time.time()),
|
||||||
|
'openaiKeys': '∞',
|
||||||
|
'anthropicKeys': '∞',
|
||||||
|
'config': {
|
||||||
|
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||||
|
'context_size': opts.context_size,
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,51 +5,15 @@ from flask import jsonify, request
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from . import bp
|
from . import bp
|
||||||
|
from .generate_stats import generate_stats
|
||||||
from .. import stats
|
from .. import stats
|
||||||
|
from ..cache import cache
|
||||||
from ..queue import priority_queue
|
from ..queue import priority_queue
|
||||||
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
|
from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
|
||||||
from ...llm.info import get_running_model
|
from ...llm.info import get_running_model
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/stats', methods=['GET'])
|
@bp.route('/stats', methods=['GET'])
|
||||||
# @cache.cached(timeout=5, query_string=True)
|
@cache.cached(timeout=5, query_string=True)
|
||||||
def get_stats():
|
def get_stats():
|
||||||
model_list, error = get_running_model() # will return False when the fetch fails
|
return jsonify(generate_stats()), 200
|
||||||
if isinstance(model_list, bool):
|
|
||||||
online = False
|
|
||||||
else:
|
|
||||||
online = True
|
|
||||||
|
|
||||||
# t = elapsed_times.copy() # copy since we do multiple operations and don't want it to change
|
|
||||||
# if len(t) == 0:
|
|
||||||
# estimated_wait = 0
|
|
||||||
# else:
|
|
||||||
# waits = [elapsed for end, elapsed in t]
|
|
||||||
# estimated_wait = int(sum(waits) / len(waits))
|
|
||||||
|
|
||||||
average_generation_time = int(calculate_avg_gen_time())
|
|
||||||
proompters_in_queue = len(priority_queue) + get_active_gen_workers()
|
|
||||||
|
|
||||||
return jsonify({
|
|
||||||
'stats': {
|
|
||||||
'prompts_in_queue': proompters_in_queue,
|
|
||||||
'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
|
|
||||||
'total_proompts': stats.get_total_proompts() if opts.show_num_prompts else None,
|
|
||||||
'uptime': int((datetime.now() - stats.server_start_time).total_seconds()) if opts.show_uptime else None,
|
|
||||||
'average_generation_elapsed_sec': average_generation_time,
|
|
||||||
},
|
|
||||||
'online': online,
|
|
||||||
'mode': opts.mode,
|
|
||||||
'model': model_list,
|
|
||||||
'endpoints': {
|
|
||||||
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
|
||||||
},
|
|
||||||
'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
|
|
||||||
'timestamp': int(time.time()),
|
|
||||||
'openaiKeys': '∞',
|
|
||||||
'anthropicKeys': '∞',
|
|
||||||
'config': {
|
|
||||||
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
|
||||||
'context_size': opts.context_size,
|
|
||||||
}
|
|
||||||
}), 200
|
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import time
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from llm_server import opts
|
||||||
|
from llm_server.routes.cache import redis
|
||||||
|
|
||||||
|
|
||||||
|
class BackendHealthCheck(Thread):
|
||||||
|
backend_online = False
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
Thread.__init__(self)
|
||||||
|
self.daemon = True
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while True:
|
||||||
|
if opts.mode == 'oobabooga':
|
||||||
|
try:
|
||||||
|
r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
|
||||||
|
opts.running_model = r.json()['result']
|
||||||
|
redis.set('backend_online', 1)
|
||||||
|
except Exception as e:
|
||||||
|
redis.set('backend_online', 0)
|
||||||
|
# TODO: handle error
|
||||||
|
print(e)
|
||||||
|
elif opts.mode == 'hf-textgen':
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
time.sleep(1)
|
36
server.py
36
server.py
|
@ -1,13 +1,14 @@
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
import config
|
from flask import Flask, jsonify, render_template, request
|
||||||
from flask import Flask, jsonify
|
|
||||||
|
|
||||||
|
import config
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars
|
from llm_server.config import ConfigLoader, config_default_vars, config_required_vars, mode_ui_names
|
||||||
from llm_server.database import get_number_of_rows, init_db
|
from llm_server.database import get_number_of_rows, init_db
|
||||||
from llm_server.helpers import resolve_path
|
from llm_server.helpers import resolve_path
|
||||||
from llm_server.routes.cache import cache, redis
|
from llm_server.routes.cache import cache, redis
|
||||||
|
@ -15,6 +16,8 @@ from llm_server.routes.helpers.http import cache_control
|
||||||
from llm_server.routes.queue import start_workers
|
from llm_server.routes.queue import start_workers
|
||||||
from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time
|
from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time
|
||||||
from llm_server.routes.v1 import bp
|
from llm_server.routes.v1 import bp
|
||||||
|
from llm_server.routes.v1.generate_stats import generate_stats
|
||||||
|
from llm_server.threads import BackendHealthCheck
|
||||||
|
|
||||||
script_path = os.path.dirname(os.path.realpath(__file__))
|
script_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
@ -57,6 +60,7 @@ if not opts.verify_ssl:
|
||||||
|
|
||||||
flushed_keys = redis.flush()
|
flushed_keys = redis.flush()
|
||||||
print('Flushed', len(flushed_keys), 'keys from Redis.')
|
print('Flushed', len(flushed_keys), 'keys from Redis.')
|
||||||
|
redis.set('backend_online', 0)
|
||||||
|
|
||||||
if config['load_num_prompts']:
|
if config['load_num_prompts']:
|
||||||
redis.set('proompts', get_number_of_rows('prompts'))
|
redis.set('proompts', get_number_of_rows('prompts'))
|
||||||
|
@ -70,6 +74,7 @@ start_workers(opts.concurrent_gens)
|
||||||
process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time)
|
process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time)
|
||||||
process_avg_gen_time_background_thread.daemon = True
|
process_avg_gen_time_background_thread.daemon = True
|
||||||
process_avg_gen_time_background_thread.start()
|
process_avg_gen_time_background_thread.start()
|
||||||
|
BackendHealthCheck().start()
|
||||||
SemaphoreCheckerThread().start()
|
SemaphoreCheckerThread().start()
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
@ -84,9 +89,32 @@ app.register_blueprint(bp, url_prefix='/api/v1/')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
|
@app.route('/api')
|
||||||
|
@cache.cached(timeout=5, query_string=True)
|
||||||
|
def home():
|
||||||
|
if not opts.full_client_api:
|
||||||
|
opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
|
||||||
|
stats = generate_stats()
|
||||||
|
|
||||||
|
if not bool(redis.get('backend_online')) or not stats['online']:
|
||||||
|
running_model = estimated_wait_sec = 'offline'
|
||||||
|
else:
|
||||||
|
running_model = opts.running_model
|
||||||
|
estimated_wait_sec = f"{stats['estimated_wait_sec']} seconds"
|
||||||
|
|
||||||
|
return render_template('home.html',
|
||||||
|
llm_middleware_name=config['llm_middleware_name'],
|
||||||
|
current_model=running_model,
|
||||||
|
client_api=opts.full_client_api,
|
||||||
|
estimated_wait=estimated_wait_sec,
|
||||||
|
mode_name=mode_ui_names[opts.mode],
|
||||||
|
context_size=opts.context_size,
|
||||||
|
stats_json=json.dumps(stats, indent=4, ensure_ascii=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/<first>')
|
@app.route('/<first>')
|
||||||
@app.route('/<first>/<path:rest>')
|
@app.route('/<first>/<path:rest>')
|
||||||
@cache_control(-1)
|
|
||||||
def fallback(first=None, rest=None):
|
def fallback(first=None, rest=None):
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': 404,
|
'error': 404,
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta content="width=device-width, initial-scale=1" name="viewport"/>
|
||||||
|
<script>
|
||||||
|
var _paq = window._paq = window._paq || [];
|
||||||
|
_paq.push(['trackPageView']);
|
||||||
|
_paq.push(['enableLinkTracking']);
|
||||||
|
(function () {
|
||||||
|
var u = "https://mato.evulid.cc/";
|
||||||
|
_paq.push(['setTrackerUrl', u + 'matomo.php']);
|
||||||
|
_paq.push(['setSiteId', '10']);
|
||||||
|
var d = document,
|
||||||
|
g = d.createElement('script'),
|
||||||
|
s = d.getElementsByTagName('script')[0];
|
||||||
|
g.async = true;
|
||||||
|
g.src = u + 'matomo.js';
|
||||||
|
s.parentNode.insertBefore(g, s);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
<style>
|
||||||
|
.container {
|
||||||
|
padding: 1em 3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
#json {
|
||||||
|
background-color: #ffb6c16e;
|
||||||
|
padding: 1em;
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media only screen and (max-width: 600px) {
|
||||||
|
.container {
|
||||||
|
padding: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
font-size: 1.5em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
|
||||||
|
|
||||||
|
<p><strong>Current Model:</strong> <span id="model">{{ current_model }}</span></p>
|
||||||
|
<p><strong>Client API URL:</strong> {{ client_api }}</p>
|
||||||
|
<p><strong>Estimated Wait Time:</strong> <span id="estimatedWait">{{ estimated_wait }}</span></p>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
<div id="oobabooga">
|
||||||
|
<strong>Instructions:</strong>
|
||||||
|
<ol>
|
||||||
|
<li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
|
||||||
|
<li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>Blocking API url</kbd> textbox.</li>
|
||||||
|
<li>Click <kbd>Connect</kbd> to test the connection.</li>
|
||||||
|
<li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
|
||||||
|
<li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a></li>
|
||||||
|
</ol>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<br><br>
|
||||||
|
|
||||||
|
<pre id="json">{{ stats_json }}</pre>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
Reference in New Issue