Merge cluster to master #3
|
@ -43,6 +43,8 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
|
|||
|
||||
### Use
|
||||
|
||||
Flask may give unusual errors when running `python server.py`. I think this is coming from Flask-Socket. Running with Gunicorn seems to fix the issue: `gunicorn -b :5000 --worker-class gevent server:app`
|
||||
|
||||
|
||||
|
||||
### To Do
|
||||
|
|
|
@ -73,7 +73,7 @@ def get_model_choices(regen: bool = False):
|
|||
default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers)
|
||||
|
||||
default_backend_dict = {
|
||||
'client_api': f'https://{base_client_api}/v1',
|
||||
'client_api': f'https://{base_client_api}',
|
||||
'ws_client_api': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None,
|
||||
'openai_client_api': f'https://{base_client_api}/openai' if opts.enable_openi_compatible_backend else 'disabled',
|
||||
'estimated_wait': default_estimated_wait_sec,
|
||||
|
|
|
@ -18,12 +18,13 @@ from ...sock import sock
|
|||
# Stacking the @sock.route() creates a TypeError error on the /v1/stream endpoint.
|
||||
# We solve this by splitting the routes
|
||||
|
||||
@bp.route('/stream')
|
||||
def stream():
|
||||
@bp.route('/v1/stream')
|
||||
@bp.route('/<model_name>/v1/stream')
|
||||
def stream(model_name=None):
|
||||
return 'This is a websocket endpoint.', 400
|
||||
|
||||
|
||||
@sock.route('/stream', bp=bp)
|
||||
@sock.route('/v1/stream', bp=bp)
|
||||
def stream_without_model(ws):
|
||||
do_stream(ws, model_name=None)
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
flask~=2.3.3
|
||||
pyyaml~=6.0.1
|
||||
flask_caching
|
||||
Flask-Caching==2.0.2
|
||||
requests~=2.31.0
|
||||
tiktoken~=0.5.0
|
||||
gevent~=23.9.0.post1
|
||||
|
|
|
@ -68,9 +68,9 @@ from llm_server.llm import redis
|
|||
from llm_server.routes.v1.generate_stats import generate_stats
|
||||
|
||||
app = Flask(__name__)
|
||||
init_socketio(app)
|
||||
app.register_blueprint(bp, url_prefix='/api/')
|
||||
app.register_blueprint(openai_bp, url_prefix='/api/openai/v1/')
|
||||
init_socketio(app)
|
||||
flask_cache.init_app(app)
|
||||
flask_cache.clear()
|
||||
|
||||
|
@ -131,7 +131,7 @@ def home():
|
|||
default_active_gen_workers=default_backend_info['processing'],
|
||||
default_proompters_in_queue=default_backend_info['queued'],
|
||||
current_model=opts.manual_model_name if opts.manual_model_name else None, # else running_model,
|
||||
client_api=f'https://{base_client_api}/v1',
|
||||
client_api=f'https://{base_client_api}',
|
||||
ws_client_api=f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else 'disabled',
|
||||
default_estimated_wait=default_estimated_wait_sec,
|
||||
mode_name=mode_ui_names[opts.mode][0],
|
||||
|
|
Reference in New Issue