rename, more stats
This commit is contained in:
parent
a9b7a7a2c7
commit
a525093c75
|
@ -18,7 +18,7 @@ def prepare_json(json_data: dict):
|
|||
return {
|
||||
'inputs': json_data.get('prompt', ''),
|
||||
'parameters': {
|
||||
'max_new_tokens': opts.token_limit - token_count,
|
||||
'max_new_tokens': opts.context_size - token_count,
|
||||
'repetition_penalty': json_data.get('repetition_penalty', None),
|
||||
'seed': seed,
|
||||
'stop': json_data.get('stopping_strings', []),
|
||||
|
|
|
@ -4,7 +4,7 @@ running_model = 'none'
|
|||
concurrent_gens = 3
|
||||
mode = 'oobabooga'
|
||||
backend_url = None
|
||||
token_limit = 5555
|
||||
context_size = 5555
|
||||
database_path = './proxy-server.db'
|
||||
auth_required = False
|
||||
log_prompts = False
|
||||
|
|
|
@ -64,7 +64,7 @@ def generate():
|
|||
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
||||
if not backend_response:
|
||||
if opts.mode == 'oobabooga':
|
||||
backend_response = format_sillytavern_err(f'Backend returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.token_limit}.', 'error')
|
||||
backend_response = format_sillytavern_err(f'Backend returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error')
|
||||
response_json_body['results'][0]['text'] = backend_response
|
||||
else:
|
||||
raise Exception
|
||||
|
|
|
@ -17,21 +17,30 @@ from ...llm.info import get_running_model
|
|||
@cache.cached(timeout=60, query_string=True)
|
||||
@cache_control(60)
|
||||
def get_stats():
|
||||
model_list = get_running_model()
|
||||
model_list = get_running_model() # will return False when the fetch fails
|
||||
if isinstance(model_list, bool):
|
||||
# get_running_model() will return False when the fetch fails
|
||||
online = False
|
||||
else:
|
||||
online = True
|
||||
|
||||
return jsonify({
|
||||
'stats': {
|
||||
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
|
||||
'proompters_1_min': proompters_1_min,
|
||||
'total_proompts': stats.proompts.value,
|
||||
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
||||
},
|
||||
'online': online,
|
||||
'mode': opts.mode,
|
||||
'model': get_running_model(),
|
||||
'client': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
||||
'timestamp': int(time.time())
|
||||
'endpoints': {
|
||||
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
||||
},
|
||||
'timestamp': int(time.time()),
|
||||
'openaiKeys': '∞',
|
||||
'anthropicKeys': '∞',
|
||||
'config': {
|
||||
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||
'context_size': opts.context_size,
|
||||
}
|
||||
}), 200
|
||||
|
|
|
@ -46,7 +46,7 @@ opts.auth_required = config['auth_required']
|
|||
opts.log_prompts = config['log_prompts']
|
||||
opts.concurrent_gens = config['concurrent_gens']
|
||||
opts.frontend_api_client = config['frontend_api_client']
|
||||
opts.token_limit = config['token_limit']
|
||||
opts.context_size = config['token_limit']
|
||||
|
||||
app = Flask(__name__)
|
||||
cache.init_app(app)
|
||||
|
|
Reference in New Issue