fix division by 0, prettify /stats json, add js var to home

2023-09-16 17:37:43 -06:00 · 2023-09-16 17:37:43 -06:00 · 354ad8192d
parent 77edbe779c
commit 354ad8192d
6 changed files with 27 additions and 5 deletions
--- a/llm_server/database.py
+++ b/llm_server/database.py
@ -160,7 +160,11 @@ def weighted_average_column_for_model(table_name, column_name, model_name, backe
    # if total_weight == 0:
    #     continue
    if total_weight > 0:
        # Avoid division by zero
        calculated_avg = weighted_sum / total_weight
    else:
        calculated_avg = 0
    conn.close()
--- a/llm_server/helpers.py
+++ b/llm_server/helpers.py
@ -2,6 +2,8 @@ import json
 from collections import OrderedDict
 from pathlib import Path
 from flask import make_response
 def resolve_path(*p: str):
    return Path(*p).expanduser().resolve().absolute()
@ -47,3 +49,11 @@ def indefinite_article(word):
        return 'an'
    else:
        return 'a'
 def jsonify_pretty(json_dict: dict, status=200, indent=4, sort_keys=True):
    response = make_response(json.dumps(json_dict, indent=indent, sort_keys=sort_keys))
    response.headers['Content-Type'] = 'application/json; charset=utf-8'
    response.headers['mimetype'] = 'application/json'
    response.status_code = status
    return response
--- a/llm_server/llm/llm_backend.py
+++ b/llm_server/llm/llm_backend.py
@ -33,7 +33,6 @@ class LLMBackend:
    @staticmethod
    def validate_prompt(prompt: str) -> Tuple[bool, Union[str, None]]:
        prompt_len = len(tokenizer.encode(prompt))
        print(prompt_len, opts.context_size)
        if prompt_len > opts.context_size - 10:  # Our tokenizer isn't 100% accurate so we cut it down a bit. TODO: add a tokenizer endpoint to VLLM
            return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
        return True, None
--- a/llm_server/routes/v1/proxy.py
+++ b/llm_server/routes/v1/proxy.py
@ -3,9 +3,10 @@ from flask import jsonify
 from . import bp
 from .generate_stats import generate_stats
 from ..cache import cache
 from ...helpers import jsonify_pretty
@bp.route('/stats', methods=['GET'])
@cache.cached(timeout=5, query_string=True)
 def get_stats():
-    return jsonify(generate_stats()), 200
+    return jsonify_pretty(generate_stats())
--- a/other/vllm/README.md
+++ b/other/vllm/README.md
@ -1,9 +1,14 @@
 ### Nginx
-1. Make sure your proxies all have a long timeout:
+Make sure your proxies all have a long timeout:
 ```
 proxy_read_timeout 300;
 proxy_connect_timeout 300;
 proxy_send_timeout 300;
 ```
-The LLM middleware has a request timeout of 120 so this longer timeout is to avoid any issues.
+
 The LLM middleware has a request timeout of 95 so this longer timeout is to avoid any issues.
 ### Model Preperation
 Make sure your model's `tokenizer_config.json` has `4096` set equal to or greater than your token limit. 
--- a/templates/home.html
+++ b/templates/home.html
@ -69,6 +69,9 @@
 </head>
 <body>
 <script>
    const backend_online = {% if current_model != 'offline' %}true{% else %}false{% endif %};
 </script>
 <div class="container">
    <h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>