From 354ad8192dd2ee82414dae2118a48336868fdbc9 Mon Sep 17 00:00:00 2001
From: Cyberes <cyberes@evulid.cc>
Date: Sat, 16 Sep 2023 17:37:43 -0600
Subject: [PATCH] fix division by 0, prettify /stats json, add js var to home

---
 llm_server/database.py        |  6 +++++-
 llm_server/helpers.py         | 10 ++++++++++
 llm_server/llm/llm_backend.py |  1 -
 llm_server/routes/v1/proxy.py |  3 ++-
 other/vllm/README.md          |  9 +++++++--
 templates/home.html           |  3 +++
 6 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/llm_server/database.py b/llm_server/database.py
index f5c2537..41c3d0d 100644
--- a/llm_server/database.py
+++ b/llm_server/database.py
@@ -160,7 +160,11 @@ def weighted_average_column_for_model(table_name, column_name, model_name, backe
     # if total_weight == 0:
     #     continue
 
-    calculated_avg = weighted_sum / total_weight
+    if total_weight > 0:
+        # Avoid division by zero
+        calculated_avg = weighted_sum / total_weight
+    else:
+        calculated_avg = 0
 
     conn.close()
 
diff --git a/llm_server/helpers.py b/llm_server/helpers.py
index 40dd81c..55df351 100644
--- a/llm_server/helpers.py
+++ b/llm_server/helpers.py
@@ -2,6 +2,8 @@ import json
 from collections import OrderedDict
 from pathlib import Path
 
+from flask import make_response
+
 
 def resolve_path(*p: str):
     return Path(*p).expanduser().resolve().absolute()
@@ -47,3 +49,11 @@ def indefinite_article(word):
         return 'an'
     else:
         return 'a'
+
+
+def jsonify_pretty(json_dict: dict, status=200, indent=4, sort_keys=True):
+    response = make_response(json.dumps(json_dict, indent=indent, sort_keys=sort_keys))
+    response.headers['Content-Type'] = 'application/json; charset=utf-8'
+    response.headers['mimetype'] = 'application/json'
+    response.status_code = status
+    return response
diff --git a/llm_server/llm/llm_backend.py b/llm_server/llm/llm_backend.py
index 53f43c9..d6a1f25 100644
--- a/llm_server/llm/llm_backend.py
+++ b/llm_server/llm/llm_backend.py
@@ -33,7 +33,6 @@ class LLMBackend:
     @staticmethod
     def validate_prompt(prompt: str) -> Tuple[bool, Union[str, None]]:
         prompt_len = len(tokenizer.encode(prompt))
-        print(prompt_len, opts.context_size)
         if prompt_len > opts.context_size - 10:  # Our tokenizer isn't 100% accurate so we cut it down a bit. TODO: add a tokenizer endpoint to VLLM
             return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
         return True, None
diff --git a/llm_server/routes/v1/proxy.py b/llm_server/routes/v1/proxy.py
index acd0797..05bb534 100644
--- a/llm_server/routes/v1/proxy.py
+++ b/llm_server/routes/v1/proxy.py
@@ -3,9 +3,10 @@ from flask import jsonify
 from . import bp
 from .generate_stats import generate_stats
 from ..cache import cache
+from ...helpers import jsonify_pretty
 
 
 @bp.route('/stats', methods=['GET'])
 @cache.cached(timeout=5, query_string=True)
 def get_stats():
-    return jsonify(generate_stats()), 200
+    return jsonify_pretty(generate_stats())
diff --git a/other/vllm/README.md b/other/vllm/README.md
index eb59ae8..ce50a19 100644
--- a/other/vllm/README.md
+++ b/other/vllm/README.md
@@ -1,9 +1,14 @@
 ### Nginx
 
-1. Make sure your proxies all have a long timeout:
+Make sure your proxies all have a long timeout:
 ```
 proxy_read_timeout 300;
 proxy_connect_timeout 300;
 proxy_send_timeout 300;
 ```
-The LLM middleware has a request timeout of 120 so this longer timeout is to avoid any issues.
\ No newline at end of file
+
+The LLM middleware has a request timeout of 95 so this longer timeout is to avoid any issues.
+
+### Model Preperation
+
+Make sure your model's `tokenizer_config.json` has `4096` set equal to or greater than your token limit. 
diff --git a/templates/home.html b/templates/home.html
index ffebc11..4b9c153 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -69,6 +69,9 @@
 <script nonce="" src="https://assets.evulid.cc/js/evulid-matomo.js"></script><noscript><p><img src="https://mato.evulid.cc/matomo.php?idsite=8&amp;rec=1" style="border:0" alt="" /></p></noscript></head>
 
 <body>
+<script>
+    const backend_online = {% if current_model != 'offline' %}true{% else %}false{% endif %};
+</script>
 <div class="container">
     <h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>