add home template

2023-08-23 23:11:12 -06:00 · 2023-08-23 23:11:12 -06:00 · f3fe514c11
parent dca2dc570f
commit f3fe514c11
9 changed files with 200 additions and 46 deletions
--- a/config/config.yml
+++ b/config/config.yml
@ -10,6 +10,8 @@ token_limit: 7777

 backend_url: https://10.0.0.86:8083

+llm_middleware_name: proxy.chub-archive.evulid.cc
+
 ## STATS ##

 # Display the total_proompts item on the stats screen.
--- a/llm_server/config.py
+++ b/llm_server/config.py
@ -10,7 +10,12 @@ config_default_vars = {
    'show_num_prompts': True,
    'show_uptime': True,
 }
-config_required_vars = ['token_limit', 'concurrent_gens', 'mode']
+config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middleware_name']
+
+mode_ui_names = {
+    'oobabooga': 'Text Gen WebUI (ooba)',
+    'hf-textgen': 'UNDEFINED',
+}


 class ConfigLoader:
--- a/llm_server/opts.py
+++ b/llm_server/opts.py
@ -1,4 +1,4 @@
-# Global settings that never change after startup/init
+# Read-only global variables

 running_model = 'none'
 concurrent_gens = 3
@ -9,6 +9,7 @@ database_path = './proxy-server.db'
 auth_required = False
 log_prompts = False
 frontend_api_client = ''
+full_client_api = None
 http_host = None
 verify_ssl = True
 show_num_prompts = True
--- a/llm_server/routes/v1/init.py
+++ b/llm_server/routes/v1/init.py
@ -12,6 +12,8 @@ bp = Blueprint('v1', __name__)
 def before_request():
    if not opts.http_host:
        opts.http_host = request.headers.get("Host")
+    if not opts.full_client_api:
+        opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
    if request.endpoint != 'v1.get_stats':
        response = require_api_key()
        if response is not None:
--- a/llm_server/routes/v1/generate_stats.py
+++ b/llm_server/routes/v1/generate_stats.py
@ -0,0 +1,48 @@
+from datetime import datetime
+import time
+
+from llm_server import opts
+from llm_server.llm.info import get_running_model
+from llm_server.routes.queue import priority_queue
+from llm_server.routes.stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers, get_total_proompts, server_start_time
+
+
+def generate_stats():
+    model_list, error = get_running_model()  # will return False when the fetch fails
+    if isinstance(model_list, bool):
+        online = False
+    else:
+        online = True
+
+    # t = elapsed_times.copy()  # copy since we do multiple operations and don't want it to change
+    # if len(t) == 0:
+    #     estimated_wait = 0
+    # else:
+    #     waits = [elapsed for end, elapsed in t]
+    #     estimated_wait = int(sum(waits) / len(waits))
+
+    average_generation_time = int(calculate_avg_gen_time())
+    proompters_in_queue = len(priority_queue) + get_active_gen_workers()
+    return {
+        'stats': {
+            'prompts_in_queue': proompters_in_queue,
+            'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
+            'total_proompts': get_total_proompts() if opts.show_num_prompts else None,
+            'uptime': int((datetime.now() - server_start_time).total_seconds()) if opts.show_uptime else None,
+            'average_generation_elapsed_sec': average_generation_time,
+        },
+        'online': online,
+        'mode': opts.mode,
+        'model': model_list,
+        'endpoints': {
+            'blocking': opts.full_client_api,
+        },
+        'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
+        'timestamp': int(time.time()),
+        'openaiKeys': '∞',
+        'anthropicKeys': '∞',
+        'config': {
+            'gatekeeper': 'none' if opts.auth_required is False else 'token',
+            'context_size': opts.context_size,
+        }
+    }
--- a/llm_server/routes/v1/proxy.py
+++ b/llm_server/routes/v1/proxy.py
@ -5,51 +5,15 @@ from flask import jsonify, request

 from llm_server import opts
 from . import bp
+from .generate_stats import generate_stats
 from .. import stats
+from ..cache import cache
 from ..queue import priority_queue
 from ..stats import SemaphoreCheckerThread, calculate_avg_gen_time, get_active_gen_workers
 from ...llm.info import get_running_model


@bp.route('/stats', methods=['GET'])
-# @cache.cached(timeout=5, query_string=True)
+@cache.cached(timeout=5, query_string=True)
 def get_stats():
-    model_list, error = get_running_model()  # will return False when the fetch fails
-    if isinstance(model_list, bool):
-        online = False
-    else:
-        online = True
-
-    # t = elapsed_times.copy()  # copy since we do multiple operations and don't want it to change
-    # if len(t) == 0:
-    #     estimated_wait = 0
-    # else:
-    #     waits = [elapsed for end, elapsed in t]
-    #     estimated_wait = int(sum(waits) / len(waits))
-
-    average_generation_time = int(calculate_avg_gen_time())
-    proompters_in_queue = len(priority_queue) + get_active_gen_workers()
-
-    return jsonify({
-        'stats': {
-            'prompts_in_queue': proompters_in_queue,
-            'proompters_1_min': SemaphoreCheckerThread.proompters_1_min,
-            'total_proompts': stats.get_total_proompts() if opts.show_num_prompts else None,
-            'uptime': int((datetime.now() - stats.server_start_time).total_seconds()) if opts.show_uptime else None,
-            'average_generation_elapsed_sec': average_generation_time,
-        },
-        'online': online,
-        'mode': opts.mode,
-        'model': model_list,
-        'endpoints': {
-            'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
-        },
-        'estimated_wait_sec': int(average_generation_time * proompters_in_queue),
-        'timestamp': int(time.time()),
-        'openaiKeys': '∞',
-        'anthropicKeys': '∞',
-        'config': {
-            'gatekeeper': 'none' if opts.auth_required is False else 'token',
-            'context_size': opts.context_size,
-        }
-    }), 200
+    return jsonify(generate_stats()), 200
--- a/llm_server/threads.py
+++ b/llm_server/threads.py
@ -0,0 +1,32 @@
+import time
+from threading import Thread
+
+import requests
+
+from llm_server import opts
+from llm_server.routes.cache import redis
+
+
+class BackendHealthCheck(Thread):
+    backend_online = False
+
+    def __init__(self):
+        Thread.__init__(self)
+        self.daemon = True
+
+    def run(self):
+        while True:
+            if opts.mode == 'oobabooga':
+                try:
+                    r = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
+                    opts.running_model = r.json()['result']
+                    redis.set('backend_online', 1)
+                except Exception as e:
+                    redis.set('backend_online', 0)
+                    # TODO: handle error
+                    print(e)
+            elif opts.mode == 'hf-textgen':
+                pass
+            else:
+                raise Exception
+            time.sleep(1)
--- a/server.py
+++ b/server.py
@ -1,13 +1,14 @@
+import json
 import os
 import sys
 from pathlib import Path
 from threading import Thread

-import config
-from flask import Flask, jsonify
+from flask import Flask, jsonify, render_template, request

+import config
 from llm_server import opts
-from llm_server.config import ConfigLoader, config_default_vars, config_required_vars
+from llm_server.config import ConfigLoader, config_default_vars, config_required_vars, mode_ui_names
 from llm_server.database import get_number_of_rows, init_db
 from llm_server.helpers import resolve_path
 from llm_server.routes.cache import cache, redis
@ -15,6 +16,8 @@ from llm_server.routes.helpers.http import cache_control
 from llm_server.routes.queue import start_workers
 from llm_server.routes.stats import SemaphoreCheckerThread, process_avg_gen_time
 from llm_server.routes.v1 import bp
+from llm_server.routes.v1.generate_stats import generate_stats
+from llm_server.threads import BackendHealthCheck

 script_path = os.path.dirname(os.path.realpath(__file__))

@ -57,6 +60,7 @@ if not opts.verify_ssl:

 flushed_keys = redis.flush()
 print('Flushed', len(flushed_keys), 'keys from Redis.')
+redis.set('backend_online', 0)

 if config['load_num_prompts']:
    redis.set('proompts', get_number_of_rows('prompts'))
@ -70,6 +74,7 @@ start_workers(opts.concurrent_gens)
 process_avg_gen_time_background_thread = Thread(target=process_avg_gen_time)
 process_avg_gen_time_background_thread.daemon = True
 process_avg_gen_time_background_thread.start()
+BackendHealthCheck().start()
 SemaphoreCheckerThread().start()

 app = Flask(__name__)
@ -84,9 +89,32 @@ app.register_blueprint(bp, url_prefix='/api/v1/')


@app.route('/')
+@app.route('/api')
+@cache.cached(timeout=5, query_string=True)
+def home():
+    if not opts.full_client_api:
+        opts.full_client_api = f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}'
+    stats = generate_stats()
+
+    if not bool(redis.get('backend_online')) or not stats['online']:
+        running_model = estimated_wait_sec = 'offline'
+    else:
+        running_model = opts.running_model
+        estimated_wait_sec = f"{stats['estimated_wait_sec']} seconds"
+
+    return render_template('home.html',
+                           llm_middleware_name=config['llm_middleware_name'],
+                           current_model=running_model,
+                           client_api=opts.full_client_api,
+                           estimated_wait=estimated_wait_sec,
+                           mode_name=mode_ui_names[opts.mode],
+                           context_size=opts.context_size,
+                           stats_json=json.dumps(stats, indent=4, ensure_ascii=False)
+                           )
+
+
@app.route('/<first>')
@app.route('/<first>/<path:rest>')
-@cache_control(-1)
 def fallback(first=None, rest=None):
    return jsonify({
        'error': 404,
--- a/templates/home.html
+++ b/templates/home.html
@ -0,0 +1,72 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+    <meta content="width=device-width, initial-scale=1" name="viewport"/>
+    <script>
+        var _paq = window._paq = window._paq || [];
+        _paq.push(['trackPageView']);
+        _paq.push(['enableLinkTracking']);
+        (function () {
+            var u = "https://mato.evulid.cc/";
+            _paq.push(['setTrackerUrl', u + 'matomo.php']);
+            _paq.push(['setSiteId', '10']);
+            var d = document,
+                g = d.createElement('script'),
+                s = d.getElementsByTagName('script')[0];
+            g.async = true;
+            g.src = u + 'matomo.js';
+            s.parentNode.insertBefore(g, s);
+        })();
+    </script>
+    <style>
+        .container {
+            padding: 1em 3em;
+        }
+
+        #json {
+            background-color: #ffb6c16e;
+            padding: 1em;
+            display: inline-block;
+        }
+
+        @media only screen and (max-width: 600px) {
+            .container {
+                padding: 1em;
+            }
+
+            h1 {
+                font-size: 1.5em;
+            }
+        }
+    </style>
+</head>
+
+<body>
+<div class="container">
+    <h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
+
+    <p><strong>Current Model:</strong> <span id="model">{{ current_model }}</span></p>
+    <p><strong>Client API URL:</strong> {{ client_api }}</p>
+    <p><strong>Estimated Wait Time:</strong> <span id="estimatedWait">{{ estimated_wait }}</span></p>
+
+    <br>
+
+    <div id="oobabooga">
+        <strong>Instructions:</strong>
+        <ol>
+            <li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
+            <li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>Blocking API url</kbd> textbox.</li>
+            <li>Click <kbd>Connect</kbd> to test the connection.</li>
+            <li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
+            <li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a></li>
+        </ol>
+    </div>
+
+    <br><br>
+
+    <pre id="json">{{ stats_json }}</pre>
+</div>
+</body>
+
+</html>