Merge cluster to master #3
|
@ -37,4 +37,5 @@ show_backends = True
|
|||
background_homepage_cacher = True
|
||||
openai_moderation_timeout = 5
|
||||
prioritize_by_size = False
|
||||
cluster_workers = 0
|
||||
cluster_workers = 0
|
||||
redis_stream_timeout = 25000
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import json
|
||||
import pickle
|
||||
import ujson
|
||||
import time
|
||||
import traceback
|
||||
|
||||
|
@ -104,15 +104,15 @@ def openai_chat_completions(model_name=None):
|
|||
try:
|
||||
last_id = '0-0'
|
||||
while True:
|
||||
stream_data = stream_redis.xread({stream_name: last_id}, block=30000)
|
||||
stream_data = stream_redis.xread({stream_name: last_id}, block=opts.redis_stream_timeout)
|
||||
if not stream_data:
|
||||
print("No message received in 30 seconds, closing stream.")
|
||||
print(f"No message received in {opts.redis_stream_timeout / 1000} seconds, closing stream.")
|
||||
yield 'data: [DONE]\n\n'
|
||||
else:
|
||||
for stream_index, item in stream_data[0][1]:
|
||||
last_id = stream_index
|
||||
timestamp = int(stream_index.decode('utf-8').split('-')[0])
|
||||
data = pickle.loads(item[b'data'])
|
||||
data = ujson.loads(item[b'data'])
|
||||
if data['error']:
|
||||
yield 'data: [DONE]\n\n'
|
||||
return
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import pickle
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import simplejson as json
|
||||
import ujson
|
||||
from flask import Response, jsonify, request
|
||||
from redis import Redis
|
||||
|
||||
|
@ -150,15 +150,15 @@ def openai_completions(model_name=None):
|
|||
try:
|
||||
last_id = '0-0'
|
||||
while True:
|
||||
stream_data = stream_redis.xread({stream_name: last_id}, block=30000)
|
||||
stream_data = stream_redis.xread({stream_name: last_id}, block=opts.redis_stream_timeout)
|
||||
if not stream_data:
|
||||
print("No message received in 30 seconds, closing stream.")
|
||||
print(f"No message received in {opts.redis_stream_timeout / 1000} seconds, closing stream.")
|
||||
yield 'data: [DONE]\n\n'
|
||||
else:
|
||||
for stream_index, item in stream_data[0][1]:
|
||||
last_id = stream_index
|
||||
timestamp = int(stream_index.decode('utf-8').split('-')[0])
|
||||
data = pickle.loads(item[b'data'])
|
||||
data = ujson.loads(item[b'data'])
|
||||
if data['error']:
|
||||
yield 'data: [DONE]\n\n'
|
||||
return
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import json
|
||||
import pickle
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import ujson
|
||||
from flask import request
|
||||
from redis import Redis
|
||||
|
||||
|
@ -136,14 +136,14 @@ def do_stream(ws, model_name):
|
|||
try:
|
||||
last_id = '0-0' # The ID of the last entry we read.
|
||||
while True:
|
||||
stream_data = stream_redis.xread({stream_name: last_id}, block=30000)
|
||||
stream_data = stream_redis.xread({stream_name: last_id}, block=opts.redis_stream_timeout)
|
||||
if not stream_data:
|
||||
print("No message received in 30 seconds, closing stream.")
|
||||
print(f"No message received in {opts.redis_stream_timeout / 1000} seconds, closing stream.")
|
||||
return
|
||||
else:
|
||||
for stream_index, item in stream_data[0][1]:
|
||||
last_id = stream_index
|
||||
data = pickle.loads(item[b'data'])
|
||||
data = ujson.loads(item[b'data'])
|
||||
if data['error']:
|
||||
print(data['error'])
|
||||
send_err_and_quit('Encountered exception while streaming.')
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import json
|
||||
import pickle
|
||||
import threading
|
||||
import traceback
|
||||
from uuid import uuid4
|
||||
|
||||
import ujson
|
||||
from redis import Redis
|
||||
|
||||
from llm_server.cluster.cluster_config import cluster_config
|
||||
|
@ -51,13 +51,13 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str
|
|||
except IndexError:
|
||||
# ????
|
||||
continue
|
||||
stream_redis.xadd(stream_name, {'data': pickle.dumps({'new': new, 'completed': False, 'error': None})})
|
||||
stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': new, 'completed': False, 'error': None})})
|
||||
except Exception as e:
|
||||
stream_redis.xadd(stream_name, {'data': pickle.dumps({'new': None, 'completed': True, 'error': f'{e.__class__.__name__}: {e}'})})
|
||||
stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': None, 'completed': True, 'error': f'{e.__class__.__name__}: {e}'})})
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# Publish final message to Redis stream
|
||||
stream_redis.xadd(stream_name, {'data': pickle.dumps({'new': None, 'completed': True, 'error': None})})
|
||||
stream_redis.xadd(stream_name, {'data': ujson.dumps({'new': None, 'completed': True, 'error': None})})
|
||||
|
||||
|
||||
def worker(backend_url):
|
||||
|
|
|
@ -13,4 +13,5 @@ openai~=0.28.0
|
|||
urllib3~=2.0.4
|
||||
flask-sock==0.6.0
|
||||
gunicorn==21.2.0
|
||||
redis==5.0.1
|
||||
redis==5.0.1
|
||||
ujson==5.8.0
|
Reference in New Issue