fix streaming slowdown?
This commit is contained in:
parent
c3c053e071
commit
9e3cbc9d2e
|
@ -157,8 +157,8 @@ def openai_chat_completions(model_name=None):
|
|||
traceback.print_exc()
|
||||
yield 'data: [DONE]\n\n'
|
||||
finally:
|
||||
# if event:
|
||||
# redis.lpush(f'notifications:{event.event_id}', 'canceled')
|
||||
if event:
|
||||
redis.publish(f'notifications:{event.event_id}', 'canceled')
|
||||
stream_redis.delete(stream_name)
|
||||
|
||||
return Response(generate(), mimetype='text/event-stream')
|
||||
|
|
|
@ -205,7 +205,7 @@ def openai_completions(model_name=None):
|
|||
yield 'data: [DONE]\n\n'
|
||||
finally:
|
||||
if event:
|
||||
redis.lpush(f'notifications:{event.event_id}', 'canceled')
|
||||
redis.publish(f'notifications:{event.event_id}', 'canceled')
|
||||
stream_redis.delete(stream_name)
|
||||
|
||||
return Response(generate(), mimetype='text/event-stream')
|
||||
|
|
|
@ -185,8 +185,8 @@ def do_stream(ws, model_name):
|
|||
backend_url=handler.backend_url
|
||||
)
|
||||
finally:
|
||||
# if event_id:
|
||||
# redis.lpush(f'notifications:{event_id}', 'canceled')
|
||||
if event_id:
|
||||
redis.publish(f'notifications:{event_id}', 'canceled')
|
||||
try:
|
||||
# Must close the connection or greenlets will complain.
|
||||
ws.close()
|
||||
|
|
|
@ -23,7 +23,8 @@ def get_stream_name(name: str):
|
|||
def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str, event_id: str):
|
||||
prompt = msg_to_backend['prompt']
|
||||
stream_name = get_stream_name(stream_name)
|
||||
redis.delete(f'notifications:{event_id}')
|
||||
pubsub = redis.pubsub()
|
||||
pubsub.subscribe(f'notifications:{event_id}')
|
||||
stream_redis.delete(get_stream_name(stream_name)) # be extra sure
|
||||
try:
|
||||
response = generator(msg_to_backend, backend_url)
|
||||
|
@ -33,11 +34,11 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str
|
|||
# If there is no more data, break the loop
|
||||
if not chunk:
|
||||
break
|
||||
# message = redis.lpop(f'notifications:{event_id}')
|
||||
# if message and message.decode('utf-8') == 'canceled':
|
||||
# print('Client canceled generation')
|
||||
# response.close()
|
||||
# return
|
||||
message = pubsub.get_message()
|
||||
if message and message['data'] == b'canceled':
|
||||
print('Client canceled generation')
|
||||
response.close()
|
||||
return
|
||||
|
||||
partial_response += chunk
|
||||
if partial_response.endswith(b'\x00'):
|
||||
|
|
Reference in New Issue