fix division by 0, prettify /stats json, add js var to home
This commit is contained in:
parent
77edbe779c
commit
354ad8192d
|
@ -160,7 +160,11 @@ def weighted_average_column_for_model(table_name, column_name, model_name, backe
|
||||||
# if total_weight == 0:
|
# if total_weight == 0:
|
||||||
# continue
|
# continue
|
||||||
|
|
||||||
|
if total_weight > 0:
|
||||||
|
# Avoid division by zero
|
||||||
calculated_avg = weighted_sum / total_weight
|
calculated_avg = weighted_sum / total_weight
|
||||||
|
else:
|
||||||
|
calculated_avg = 0
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,8 @@ import json
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from flask import make_response
|
||||||
|
|
||||||
|
|
||||||
def resolve_path(*p: str):
|
def resolve_path(*p: str):
|
||||||
return Path(*p).expanduser().resolve().absolute()
|
return Path(*p).expanduser().resolve().absolute()
|
||||||
|
@ -47,3 +49,11 @@ def indefinite_article(word):
|
||||||
return 'an'
|
return 'an'
|
||||||
else:
|
else:
|
||||||
return 'a'
|
return 'a'
|
||||||
|
|
||||||
|
|
||||||
|
def jsonify_pretty(json_dict: dict, status=200, indent=4, sort_keys=True):
|
||||||
|
response = make_response(json.dumps(json_dict, indent=indent, sort_keys=sort_keys))
|
||||||
|
response.headers['Content-Type'] = 'application/json; charset=utf-8'
|
||||||
|
response.headers['mimetype'] = 'application/json'
|
||||||
|
response.status_code = status
|
||||||
|
return response
|
||||||
|
|
|
@ -33,7 +33,6 @@ class LLMBackend:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def validate_prompt(prompt: str) -> Tuple[bool, Union[str, None]]:
|
def validate_prompt(prompt: str) -> Tuple[bool, Union[str, None]]:
|
||||||
prompt_len = len(tokenizer.encode(prompt))
|
prompt_len = len(tokenizer.encode(prompt))
|
||||||
print(prompt_len, opts.context_size)
|
|
||||||
if prompt_len > opts.context_size - 10: # Our tokenizer isn't 100% accurate so we cut it down a bit. TODO: add a tokenizer endpoint to VLLM
|
if prompt_len > opts.context_size - 10: # Our tokenizer isn't 100% accurate so we cut it down a bit. TODO: add a tokenizer endpoint to VLLM
|
||||||
return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
|
return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
|
||||||
return True, None
|
return True, None
|
||||||
|
|
|
@ -3,9 +3,10 @@ from flask import jsonify
|
||||||
from . import bp
|
from . import bp
|
||||||
from .generate_stats import generate_stats
|
from .generate_stats import generate_stats
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
|
from ...helpers import jsonify_pretty
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/stats', methods=['GET'])
|
@bp.route('/stats', methods=['GET'])
|
||||||
@cache.cached(timeout=5, query_string=True)
|
@cache.cached(timeout=5, query_string=True)
|
||||||
def get_stats():
|
def get_stats():
|
||||||
return jsonify(generate_stats()), 200
|
return jsonify_pretty(generate_stats())
|
||||||
|
|
|
@ -1,9 +1,14 @@
|
||||||
### Nginx
|
### Nginx
|
||||||
|
|
||||||
1. Make sure your proxies all have a long timeout:
|
Make sure your proxies all have a long timeout:
|
||||||
```
|
```
|
||||||
proxy_read_timeout 300;
|
proxy_read_timeout 300;
|
||||||
proxy_connect_timeout 300;
|
proxy_connect_timeout 300;
|
||||||
proxy_send_timeout 300;
|
proxy_send_timeout 300;
|
||||||
```
|
```
|
||||||
The LLM middleware has a request timeout of 120 so this longer timeout is to avoid any issues.
|
|
||||||
|
The LLM middleware has a request timeout of 95 so this longer timeout is to avoid any issues.
|
||||||
|
|
||||||
|
### Model Preperation
|
||||||
|
|
||||||
|
Make sure your model's `tokenizer_config.json` has `4096` set equal to or greater than your token limit.
|
||||||
|
|
|
@ -69,6 +69,9 @@
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
|
<script>
|
||||||
|
const backend_online = {% if current_model != 'offline' %}true{% else %}false{% endif %};
|
||||||
|
</script>
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
|
<h1 style="text-align: center;margin-top: 0;">{{ llm_middleware_name }}</h1>
|
||||||
|
|
||||||
|
|
Reference in New Issue