fix division by 0, prettify /stats json, add js var to home

This commit is contained in:
Cyberes 2023-09-16 17:37:43 -06:00
parent 77edbe779c
commit 354ad8192d
6 changed files with 27 additions and 5 deletions

View File

@ -160,7 +160,11 @@ def weighted_average_column_for_model(table_name, column_name, model_name, backe
# if total_weight == 0:
# continue
calculated_avg = weighted_sum / total_weight
if total_weight > 0:
# Avoid division by zero
calculated_avg = weighted_sum / total_weight
else:
calculated_avg = 0
conn.close()

View File

@ -2,6 +2,8 @@ import json
from collections import OrderedDict
from pathlib import Path
from flask import make_response
def resolve_path(*p: str):
return Path(*p).expanduser().resolve().absolute()
@ -47,3 +49,11 @@ def indefinite_article(word):
return 'an'
else:
return 'a'
def jsonify_pretty(json_dict: dict, status=200, indent=4, sort_keys=True):
response = make_response(json.dumps(json_dict, indent=indent, sort_keys=sort_keys))
response.headers['Content-Type'] = 'application/json; charset=utf-8'
response.headers['mimetype'] = 'application/json'
response.status_code = status
return response

View File

@ -33,7 +33,6 @@ class LLMBackend:
@staticmethod
def validate_prompt(prompt: str) -> Tuple[bool, Union[str, None]]:
prompt_len = len(tokenizer.encode(prompt))
print(prompt_len, opts.context_size)
if prompt_len > opts.context_size - 10: # Our tokenizer isn't 100% accurate so we cut it down a bit. TODO: add a tokenizer endpoint to VLLM
return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
return True, None

View File

@ -3,9 +3,10 @@ from flask import jsonify
from . import bp
from .generate_stats import generate_stats
from ..cache import cache
from ...helpers import jsonify_pretty
@bp.route('/stats', methods=['GET'])
@cache.cached(timeout=5, query_string=True)
def get_stats():
return jsonify(generate_stats()), 200
return jsonify_pretty(generate_stats())

View File

@ -1,9 +1,14 @@
### Nginx
1. Make sure your proxies all have a long timeout:
Make sure your proxies all have a long timeout:
```
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_send_timeout 300;
```
The LLM middleware has a request timeout of 120 so this longer timeout is to avoid any issues.
The LLM middleware has a request timeout of 95 so this longer timeout is to avoid any issues.
### Model Preperation
Make sure your model's `tokenizer_config.json` has `4096` set equal to or greater than your token limit.

View File

@ -69,6 +69,9 @@
</head>
<body>
<script>
const backend_online = {% if current_model != 'offline' %}true{% else %}false{% endif %};
</script>
<div class="container">
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>