rename, more stats
This commit is contained in:
parent
a9b7a7a2c7
commit
a525093c75
|
@ -18,7 +18,7 @@ def prepare_json(json_data: dict):
|
||||||
return {
|
return {
|
||||||
'inputs': json_data.get('prompt', ''),
|
'inputs': json_data.get('prompt', ''),
|
||||||
'parameters': {
|
'parameters': {
|
||||||
'max_new_tokens': opts.token_limit - token_count,
|
'max_new_tokens': opts.context_size - token_count,
|
||||||
'repetition_penalty': json_data.get('repetition_penalty', None),
|
'repetition_penalty': json_data.get('repetition_penalty', None),
|
||||||
'seed': seed,
|
'seed': seed,
|
||||||
'stop': json_data.get('stopping_strings', []),
|
'stop': json_data.get('stopping_strings', []),
|
||||||
|
|
|
@ -4,7 +4,7 @@ running_model = 'none'
|
||||||
concurrent_gens = 3
|
concurrent_gens = 3
|
||||||
mode = 'oobabooga'
|
mode = 'oobabooga'
|
||||||
backend_url = None
|
backend_url = None
|
||||||
token_limit = 5555
|
context_size = 5555
|
||||||
database_path = './proxy-server.db'
|
database_path = './proxy-server.db'
|
||||||
auth_required = False
|
auth_required = False
|
||||||
log_prompts = False
|
log_prompts = False
|
||||||
|
|
|
@ -64,7 +64,7 @@ def generate():
|
||||||
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
|
||||||
if not backend_response:
|
if not backend_response:
|
||||||
if opts.mode == 'oobabooga':
|
if opts.mode == 'oobabooga':
|
||||||
backend_response = format_sillytavern_err(f'Backend returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.token_limit}.', 'error')
|
backend_response = format_sillytavern_err(f'Backend returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error')
|
||||||
response_json_body['results'][0]['text'] = backend_response
|
response_json_body['results'][0]['text'] = backend_response
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
|
@ -17,21 +17,30 @@ from ...llm.info import get_running_model
|
||||||
@cache.cached(timeout=60, query_string=True)
|
@cache.cached(timeout=60, query_string=True)
|
||||||
@cache_control(60)
|
@cache_control(60)
|
||||||
def get_stats():
|
def get_stats():
|
||||||
model_list = get_running_model()
|
model_list = get_running_model() # will return False when the fetch fails
|
||||||
if isinstance(model_list, bool):
|
if isinstance(model_list, bool):
|
||||||
# get_running_model() will return False when the fetch fails
|
|
||||||
online = False
|
online = False
|
||||||
else:
|
else:
|
||||||
online = True
|
online = True
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
|
'stats': {
|
||||||
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
|
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
|
||||||
'proompters_1_min': proompters_1_min,
|
'proompters_1_min': proompters_1_min,
|
||||||
'total_proompts': stats.proompts.value,
|
'total_proompts': stats.proompts.value,
|
||||||
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
||||||
|
},
|
||||||
'online': online,
|
'online': online,
|
||||||
'mode': opts.mode,
|
'mode': opts.mode,
|
||||||
'model': get_running_model(),
|
'model': get_running_model(),
|
||||||
'client': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
'endpoints': {
|
||||||
'timestamp': int(time.time())
|
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
|
||||||
|
},
|
||||||
|
'timestamp': int(time.time()),
|
||||||
|
'openaiKeys': '∞',
|
||||||
|
'anthropicKeys': '∞',
|
||||||
|
'config': {
|
||||||
|
'gatekeeper': 'none' if opts.auth_required is False else 'token',
|
||||||
|
'context_size': opts.context_size,
|
||||||
|
}
|
||||||
}), 200
|
}), 200
|
||||||
|
|
|
@ -46,7 +46,7 @@ opts.auth_required = config['auth_required']
|
||||||
opts.log_prompts = config['log_prompts']
|
opts.log_prompts = config['log_prompts']
|
||||||
opts.concurrent_gens = config['concurrent_gens']
|
opts.concurrent_gens = config['concurrent_gens']
|
||||||
opts.frontend_api_client = config['frontend_api_client']
|
opts.frontend_api_client = config['frontend_api_client']
|
||||||
opts.token_limit = config['token_limit']
|
opts.context_size = config['token_limit']
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
cache.init_app(app)
|
cache.init_app(app)
|
||||||
|
|
Reference in New Issue