clean up streaming

This commit is contained in:
Cyberes 2023-10-03 14:10:50 -06:00
parent e16f415749
commit 33b4b8404b
2 changed files with 18 additions and 58 deletions

View File

@ -159,23 +159,10 @@ def do_stream(ws, model_name):
})) }))
except: except:
# The has client closed the stream. # The has client closed the stream.
if request: if response:
# Cancel the backend? # Cancel the backend?
request.close() response.close()
end_time = time.time() # used to log here
elapsed_time = end_time - start_time
log_prompt(ip=handler.client_ip,
token=handler.token,
prompt=input_prompt,
response=generated_text,
gen_time=elapsed_time,
parameters=handler.parameters,
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None
)
return return
message_num += 1 message_num += 1
@ -184,22 +171,9 @@ def do_stream(ws, model_name):
# If there is no more data, break the loop # If there is no more data, break the loop
if not chunk: if not chunk:
break break
if response:
end_time = time.time() response.close()
elapsed_time = end_time - start_time # used to log here
log_prompt(ip=handler.client_ip,
token=handler.token,
prompt=input_prompt,
response=generated_text,
gen_time=elapsed_time,
parameters=handler.parameters,
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None,
is_error=not response
)
except: except:
traceback.print_exc() traceback.print_exc()
generated_text = generated_text + '\n\n' + handler.handle_error('Encountered error while streaming.', 'exception')[0].json['results'][0]['text'] generated_text = generated_text + '\n\n' + handler.handle_error('Encountered error while streaming.', 'exception')[0].json['results'][0]['text']
@ -208,33 +182,19 @@ def do_stream(ws, model_name):
'message_num': message_num, 'message_num': message_num,
'text': generated_text 'text': generated_text
})) }))
if request: # used to log here
request.close()
log_prompt(ip=handler.client_ip,
token=handler.token,
prompt=input_prompt,
response=generated_text,
gen_time=None,
parameters=handler.parameters,
headers=r_headers,
backend_response_code=response_status_code,
request_url=r_url,
backend_url=handler.backend_url,
response_tokens=None,
is_error=True
)
return
finally: finally:
# The worker incremented it, we'll decrement it. # The worker incremented it, we'll decrement it.
decrement_ip_count(handler.client_ip, 'processing_ips') decrement_ip_count(handler.client_ip, 'processing_ips')
decr_active_workers(handler.selected_model, handler.backend_url) decr_active_workers(handler.selected_model, handler.backend_url)
try: try:
ws.send(json.dumps({ ws.send(json.dumps({
'event': 'stream_end', 'event': 'stream_end',
'message_num': message_num 'message_num': message_num
})) }))
except: except:
# The client closed the stream. # The client closed the stream.
pass
end_time = time.time() end_time = time.time()
elapsed_time = end_time - start_time elapsed_time = end_time - start_time
log_prompt(ip=handler.client_ip, log_prompt(ip=handler.client_ip,
@ -246,8 +206,7 @@ def do_stream(ws, model_name):
headers=r_headers, headers=r_headers,
backend_response_code=response_status_code, backend_response_code=response_status_code,
request_url=r_url, request_url=r_url,
backend_url=handler.backend_url, backend_url=handler.backend_url
response_tokens=None
) )
finally: finally:
try: try:

View File

@ -24,7 +24,8 @@ from llm_server.routes.server_error import handle_server_error
from llm_server.routes.v1 import bp from llm_server.routes.v1 import bp
from llm_server.sock import init_socketio from llm_server.sock import init_socketio
# TODO: make sure system tokens are excluded from 5/24 hr proompters
# TODO: redis SCAN vs KEYS??
# TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s) # TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s)
# TODO: is frequency penalty the same as ooba repetition penalty??? # TODO: is frequency penalty the same as ooba repetition penalty???
# TODO: make sure openai_moderation_enabled works on websockets, completions, and chat completions # TODO: make sure openai_moderation_enabled works on websockets, completions, and chat completions