don't pickle streaming

This commit is contained in:
Cyberes 2023-10-16 18:35:10 -06:00
parent 81baf9616f
commit 806e522d16
6 changed files with 20 additions and 18 deletions

View File

@ -37,4 +37,5 @@ show_backends = True
background_homepage_cacher = True
openai_moderation_timeout = 5
prioritize_by_size = False
cluster_workers = 0
cluster_workers = 0
redis_stream_timeout = 25000

View File

@ -1,5 +1,5 @@
import json
import pickle
import ujson
import time
import traceback
@ -104,15 +104,15 @@ def openai_chat_completions(model_name=None):
try:
last_id = '0-0'
while True:
stream_data = stream_redis.xread({stream_name: last_id}, block=30000)
stream_data = stream_redis.xread({stream_name: last_id}, block=opts.redis_stream_timeout)
if not stream_data:
print("No message received in 30 seconds, closing stream.")
print(f"No message received in {opts.redis_stream_timeout / 1000} seconds, closing stream.")
yield 'data: [DONE]\n\n'
else:
for stream_index, item in stream_data[0][1]:
last_id = stream_index
timestamp = int(stream_index.decode('utf-8').split('-')[0])
data = pickle.loads(item[b'data'])
data = ujson.loads(item[b'data'])
if data['error']:
yield 'data: [DONE]\n\n'
return

View File

@ -1,8 +1,8 @@
import pickle
import time
import traceback
import simplejson as json
import ujson
from flask import Response, jsonify, request
from redis import Redis
@ -150,15 +150,15 @@ def openai_completions(model_name=None):
try:
last_id = '0-0'
while True:
stream_data = stream_redis.xread({stream_name: last_id}, block=30000)
stream_data = stream_redis.xread({stream_name: last_id}, block=opts.redis_stream_timeout)
if not stream_data:
print("No message received in 30 seconds, closing stream.")
print(f"No message received in {opts.redis_stream_timeout / 1000} seconds, closing stream.")
yield 'data: [DONE]\n\n'
else:
for stream_index, item in stream_data[0][1]:
last_id = stream_index
timestamp = int(stream_index.decode('utf-8').split('-')[0])
data = pickle.loads(item[b'data'])
data = ujson.loads(item[b'data'])
if data['error']:
yield 'data: [DONE]\n\n'
return

View File

@ -1,8 +1,8 @@
import json
import pickle
import time
import traceback
import ujson
from flask import request
from redis import Redis
@ -136,14 +136,14 @@ def do_stream(ws, model_name):
try:
last_id = '0-0' # The ID of the last entry we read.
while True:
stream_data = stream_redis.xread({stream_name: last_id}, block=30000)
stream_data = stream_redis.xread({stream_name: last_id}, block=opts.redis_stream_timeout)
if not stream_data:
print("No message received in 30 seconds, closing stream.")
print(f"No message received in {opts.redis_stream_timeout / 1000} seconds, closing stream.")
return
else:
for stream_index, item in stream_data[0][1]:
last_id = stream_index
data = pickle.loads(item[b'data'])
data = ujson.loads(item[b'data'])
if data['error']:
print(data['error'])
send_err_and_quit('Encountered exception while streaming.')

View File

@ -1,9 +1,9 @@
import json
import pickle
import threading
import traceback
from uuid import uuid4
import ujson
from redis import Redis
from llm_server.cluster.cluster_config import cluster_config
@ -51,13 +51,13 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str
except IndexError:
# ????
continue
stream_redis.xadd(stream_name, {'data': pickle.dumps({'new': new, 'completed': False, 'error': None})})
stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': new, 'completed': False, 'error': None})})
except Exception as e:
stream_redis.xadd(stream_name, {'data': pickle.dumps({'new': None, 'completed': True, 'error': f'{e.__class__.__name__}: {e}'})})
stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': None, 'completed': True, 'error': f'{e.__class__.__name__}: {e}'})})
traceback.print_exc()
finally:
# Publish final message to Redis stream
stream_redis.xadd(stream_name, {'data': pickle.dumps({'new': None, 'completed': True, 'error': None})})
stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': None, 'completed': True, 'error': None})})
def worker(backend_url):

View File

@ -13,4 +13,5 @@ openai~=0.28.0
urllib3~=2.0.4
flask-sock==0.6.0
gunicorn==21.2.0
redis==5.0.1
redis==5.0.1
ujson==5.8.0