rename, more stats

This commit is contained in:
Cyberes 2023-08-22 20:42:38 -06:00
parent a9b7a7a2c7
commit a525093c75
5 changed files with 21 additions and 12 deletions

View File

@ -18,7 +18,7 @@ def prepare_json(json_data: dict):
return {
'inputs': json_data.get('prompt', ''),
'parameters': {
'max_new_tokens': opts.token_limit - token_count,
'max_new_tokens': opts.context_size - token_count,
'repetition_penalty': json_data.get('repetition_penalty', None),
'seed': seed,
'stop': json_data.get('stopping_strings', []),

View File

@ -4,7 +4,7 @@ running_model = 'none'
concurrent_gens = 3
mode = 'oobabooga'
backend_url = None
token_limit = 5555
context_size = 5555
database_path = './proxy-server.db'
auth_required = False
log_prompts = False

View File

@ -64,7 +64,7 @@ def generate():
backend_response = safe_list_get(response_json_body.get('results', []), 0, {}).get('text')
if not backend_response:
if opts.mode == 'oobabooga':
backend_response = format_sillytavern_err(f'Backend returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.token_limit}.', 'error')
backend_response = format_sillytavern_err(f'Backend returned an empty string. This can happen when your parameters are incorrect. Make sure your context size is no greater than {opts.context_size}.', 'error')
response_json_body['results'][0]['text'] = backend_response
else:
raise Exception

View File

@ -17,21 +17,30 @@ from ...llm.info import get_running_model
@cache.cached(timeout=60, query_string=True)
@cache_control(60)
def get_stats():
model_list = get_running_model()
model_list = get_running_model() # will return False when the fetch fails
if isinstance(model_list, bool):
# get_running_model() will return False when the fetch fails
online = False
else:
online = True
return jsonify({
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
'proompters_1_min': proompters_1_min,
'total_proompts': stats.proompts.value,
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
'stats': {
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
'proompters_1_min': proompters_1_min,
'total_proompts': stats.proompts.value,
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
},
'online': online,
'mode': opts.mode,
'model': get_running_model(),
'client': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
'timestamp': int(time.time())
'endpoints': {
'blocking': f'https://{request.headers.get("Host")}/{opts.frontend_api_client.strip("/")}',
},
'timestamp': int(time.time()),
'openaiKeys': '',
'anthropicKeys': '',
'config': {
'gatekeeper': 'none' if opts.auth_required is False else 'token',
'context_size': opts.context_size,
}
}), 200

View File

@ -46,7 +46,7 @@ opts.auth_required = config['auth_required']
opts.log_prompts = config['log_prompts']
opts.concurrent_gens = config['concurrent_gens']
opts.frontend_api_client = config['frontend_api_client']
opts.token_limit = config['token_limit']
opts.context_size = config['token_limit']
app = Flask(__name__)
cache.init_app(app)