From 2ed0e01db6bbcfd7784542f029ff8ff8f90a6ac8 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Mon, 16 Oct 2023 23:44:11 -0600 Subject: [PATCH] background thread --- llm_server/workers/inferencer.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/llm_server/workers/inferencer.py b/llm_server/workers/inferencer.py index 4bc1472..b4bff23 100644 --- a/llm_server/workers/inferencer.py +++ b/llm_server/workers/inferencer.py @@ -1,5 +1,6 @@ import json import threading +import time import traceback from uuid import uuid4 @@ -16,6 +17,23 @@ stream_redis = Redis(db=8) STREAM_NAME_PREFIX = 'stream' +def check_cancellation(event, event_id): + """ + This thread checks the pub/sub channel in the background so the main process + isn't bogged down with Redis calls. + :param event: + :param event_id: + :return: + """ + pubsub = redis.pubsub() + pubsub.subscribe(f'notifications:{event_id}') + while not event.is_set(): + message = pubsub.get_message() + if message and message['data'] == b'canceled': + event.set() + time.sleep(0.5) # check every half second + + def get_stream_name(name: str): return f'{STREAM_NAME_PREFIX}:{name}' @@ -23,9 +41,10 @@ def get_stream_name(name: str): def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str, event_id: str): prompt = msg_to_backend['prompt'] stream_name = get_stream_name(stream_name) - pubsub = redis.pubsub() - pubsub.subscribe(f'notifications:{event_id}') stream_redis.delete(get_stream_name(stream_name)) # be extra sure + event = threading.Event() + t = threading.Thread(target=check_cancellation, args=(event, event_id)) + t.start() try: response = generator(msg_to_backend, backend_url) generated_text = '' @@ -34,8 +53,7 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str # If there is no more data, break the loop if not chunk: break - message = pubsub.get_message(timeout=0.001) - if message and message['data'] == b'canceled': + if event.is_set(): print('Client canceled generation') response.close() return @@ -59,6 +77,7 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str finally: # Publish final message to Redis stream stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': None, 'completed': True, 'error': None})}) + event.set() # stop the cancellation checking thread def worker(backend_url):