diff --git a/llm_server/routes/openai/chat_completions.py b/llm_server/routes/openai/chat_completions.py index 5e5921a..fcad3a0 100644 --- a/llm_server/routes/openai/chat_completions.py +++ b/llm_server/routes/openai/chat_completions.py @@ -157,8 +157,8 @@ def openai_chat_completions(model_name=None): traceback.print_exc() yield 'data: [DONE]\n\n' finally: - if event: - redis.lpush(f'notifications:{event.event_id}', 'canceled') + # if event: + # redis.lpush(f'notifications:{event.event_id}', 'canceled') stream_redis.delete(stream_name) return Response(generate(), mimetype='text/event-stream') diff --git a/llm_server/routes/v1/generate_stream.py b/llm_server/routes/v1/generate_stream.py index 29eb281..e329cd8 100644 --- a/llm_server/routes/v1/generate_stream.py +++ b/llm_server/routes/v1/generate_stream.py @@ -185,8 +185,8 @@ def do_stream(ws, model_name): backend_url=handler.backend_url ) finally: - if event_id: - redis.lpush(f'notifications:{event_id}', 'canceled') + # if event_id: + # redis.lpush(f'notifications:{event_id}', 'canceled') try: # Must close the connection or greenlets will complain. ws.close() diff --git a/llm_server/workers/inferencer.py b/llm_server/workers/inferencer.py index b6e94dc..3d05dc2 100644 --- a/llm_server/workers/inferencer.py +++ b/llm_server/workers/inferencer.py @@ -33,11 +33,11 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str # If there is no more data, break the loop if not chunk: break - message = redis.lpop(f'notifications:{event_id}') - if message and message.decode('utf-8') == 'canceled': - print('Client canceled generation') - response.close() - return + # message = redis.lpop(f'notifications:{event_id}') + # if message and message.decode('utf-8') == 'canceled': + # print('Client canceled generation') + # response.close() + # return partial_response += chunk if partial_response.endswith(b'\x00'):