clean up streaming

This commit is contained in:
Cyberes 2023-10-03 14:10:50 -06:00
parent e16f415749
commit 33b4b8404b
2 changed files with 18 additions and 58 deletions

View File

@ -159,23 +159,10 @@ def do_stream(ws, model_name):
}))
except:
# The has client closed the stream.
if request:
if response:
# Cancel the backend?
request.close()
end_time = time.time()
elapsed_time = end_time - start_time
log_prompt(ip=handler.client_ip,
token=handler.token,
prompt=input_prompt,
response=generated_text,
gen_time=elapsed_time,
parameters=handler.parameters,
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None
)
response.close()
# used to log here
return
message_num += 1
@ -184,22 +171,9 @@ def do_stream(ws, model_name):
# If there is no more data, break the loop
if not chunk:
break
end_time = time.time()
elapsed_time = end_time - start_time
log_prompt(ip=handler.client_ip,
token=handler.token,
prompt=input_prompt,
response=generated_text,
gen_time=elapsed_time,
parameters=handler.parameters,
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None,
is_error=not response
)
if response:
response.close()
# used to log here
except:
traceback.print_exc()
generated_text = generated_text + '\n\n' + handler.handle_error('Encountered error while streaming.', 'exception')[0].json['results'][0]['text']
@ -208,33 +182,19 @@ def do_stream(ws, model_name):
'message_num': message_num,
'text': generated_text
}))
if request:
request.close()
log_prompt(ip=handler.client_ip,
token=handler.token,
prompt=input_prompt,
response=generated_text,
gen_time=None,
parameters=handler.parameters,
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None,
is_error=True
)
return
# used to log here
finally:
# The worker incremented it, we'll decrement it.
decrement_ip_count(handler.client_ip, 'processing_ips')
decr_active_workers(handler.selected_model, handler.backend_url)
try:
ws.send(json.dumps({
'event': 'stream_end',
'message_num': message_num
}))
except:
# The client closed the stream.
try:
ws.send(json.dumps({
'event': 'stream_end',
'message_num': message_num
}))
except:
# The client closed the stream.
pass
end_time = time.time()
elapsed_time = end_time - start_time
log_prompt(ip=handler.client_ip,
@ -246,8 +206,7 @@ def do_stream(ws, model_name):
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None
backend_url=handler.backend_url
)
finally:
try:

View File

@ -24,7 +24,8 @@ from llm_server.routes.server_error import handle_server_error
from llm_server.routes.v1 import bp
from llm_server.sock import init_socketio
# TODO: make sure system tokens are excluded from 5/24 hr proompters
# TODO: redis SCAN vs KEYS??
# TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s)
# TODO: is frequency penalty the same as ooba repetition penalty???
# TODO: make sure openai_moderation_enabled works on websockets, completions, and chat completions