fix division by 0, prettify /stats json, add js var to home
This commit is contained in:
parent
77edbe779c
commit
354ad8192d
|
@ -160,7 +160,11 @@ def weighted_average_column_for_model(table_name, column_name, model_name, backe
|
|||
# if total_weight == 0:
|
||||
# continue
|
||||
|
||||
calculated_avg = weighted_sum / total_weight
|
||||
if total_weight > 0:
|
||||
# Avoid division by zero
|
||||
calculated_avg = weighted_sum / total_weight
|
||||
else:
|
||||
calculated_avg = 0
|
||||
|
||||
conn.close()
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@ import json
|
|||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
||||
from flask import make_response
|
||||
|
||||
|
||||
def resolve_path(*p: str):
|
||||
return Path(*p).expanduser().resolve().absolute()
|
||||
|
@ -47,3 +49,11 @@ def indefinite_article(word):
|
|||
return 'an'
|
||||
else:
|
||||
return 'a'
|
||||
|
||||
|
||||
def jsonify_pretty(json_dict: dict, status=200, indent=4, sort_keys=True):
|
||||
response = make_response(json.dumps(json_dict, indent=indent, sort_keys=sort_keys))
|
||||
response.headers['Content-Type'] = 'application/json; charset=utf-8'
|
||||
response.headers['mimetype'] = 'application/json'
|
||||
response.status_code = status
|
||||
return response
|
||||
|
|
|
@ -33,7 +33,6 @@ class LLMBackend:
|
|||
@staticmethod
|
||||
def validate_prompt(prompt: str) -> Tuple[bool, Union[str, None]]:
|
||||
prompt_len = len(tokenizer.encode(prompt))
|
||||
print(prompt_len, opts.context_size)
|
||||
if prompt_len > opts.context_size - 10: # Our tokenizer isn't 100% accurate so we cut it down a bit. TODO: add a tokenizer endpoint to VLLM
|
||||
return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
|
||||
return True, None
|
||||
|
|
|
@ -3,9 +3,10 @@ from flask import jsonify
|
|||
from . import bp
|
||||
from .generate_stats import generate_stats
|
||||
from ..cache import cache
|
||||
from ...helpers import jsonify_pretty
|
||||
|
||||
|
||||
@bp.route('/stats', methods=['GET'])
|
||||
@cache.cached(timeout=5, query_string=True)
|
||||
def get_stats():
|
||||
return jsonify(generate_stats()), 200
|
||||
return jsonify_pretty(generate_stats())
|
||||
|
|
|
@ -1,9 +1,14 @@
|
|||
### Nginx
|
||||
|
||||
1. Make sure your proxies all have a long timeout:
|
||||
Make sure your proxies all have a long timeout:
|
||||
```
|
||||
proxy_read_timeout 300;
|
||||
proxy_connect_timeout 300;
|
||||
proxy_send_timeout 300;
|
||||
```
|
||||
The LLM middleware has a request timeout of 120 so this longer timeout is to avoid any issues.
|
||||
|
||||
The LLM middleware has a request timeout of 95 so this longer timeout is to avoid any issues.
|
||||
|
||||
### Model Preperation
|
||||
|
||||
Make sure your model's `tokenizer_config.json` has `4096` set equal to or greater than your token limit.
|
||||
|
|
|
@ -69,6 +69,9 @@
|
|||
</head>
|
||||
|
||||
<body>
|
||||
<script>
|
||||
const backend_online = {% if current_model != 'offline' %}true{% else %}false{% endif %};
|
||||
</script>
|
||||
<div class="container">
|
||||
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
|
||||
|
||||
|
|
Reference in New Issue