Merge cluster to master #3
|
@ -43,6 +43,8 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
|
||||||
|
|
||||||
### Use
|
### Use
|
||||||
|
|
||||||
|
Flask may give unusual errors when running `python server.py`. I think this is coming from Flask-Socket. Running with Gunicorn seems to fix the issue: `gunicorn -b :5000 --worker-class gevent server:app`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### To Do
|
### To Do
|
||||||
|
|
|
@ -73,7 +73,7 @@ def get_model_choices(regen: bool = False):
|
||||||
default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers)
|
default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers)
|
||||||
|
|
||||||
default_backend_dict = {
|
default_backend_dict = {
|
||||||
'client_api': f'https://{base_client_api}/v1',
|
'client_api': f'https://{base_client_api}',
|
||||||
'ws_client_api': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None,
|
'ws_client_api': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None,
|
||||||
'openai_client_api': f'https://{base_client_api}/openai' if opts.enable_openi_compatible_backend else 'disabled',
|
'openai_client_api': f'https://{base_client_api}/openai' if opts.enable_openi_compatible_backend else 'disabled',
|
||||||
'estimated_wait': default_estimated_wait_sec,
|
'estimated_wait': default_estimated_wait_sec,
|
||||||
|
|
|
@ -18,12 +18,13 @@ from ...sock import sock
|
||||||
# Stacking the @sock.route() creates a TypeError error on the /v1/stream endpoint.
|
# Stacking the @sock.route() creates a TypeError error on the /v1/stream endpoint.
|
||||||
# We solve this by splitting the routes
|
# We solve this by splitting the routes
|
||||||
|
|
||||||
@bp.route('/stream')
|
@bp.route('/v1/stream')
|
||||||
def stream():
|
@bp.route('/<model_name>/v1/stream')
|
||||||
|
def stream(model_name=None):
|
||||||
return 'This is a websocket endpoint.', 400
|
return 'This is a websocket endpoint.', 400
|
||||||
|
|
||||||
|
|
||||||
@sock.route('/stream', bp=bp)
|
@sock.route('/v1/stream', bp=bp)
|
||||||
def stream_without_model(ws):
|
def stream_without_model(ws):
|
||||||
do_stream(ws, model_name=None)
|
do_stream(ws, model_name=None)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
flask~=2.3.3
|
flask~=2.3.3
|
||||||
pyyaml~=6.0.1
|
pyyaml~=6.0.1
|
||||||
flask_caching
|
Flask-Caching==2.0.2
|
||||||
requests~=2.31.0
|
requests~=2.31.0
|
||||||
tiktoken~=0.5.0
|
tiktoken~=0.5.0
|
||||||
gevent~=23.9.0.post1
|
gevent~=23.9.0.post1
|
||||||
|
|
|
@ -68,9 +68,9 @@ from llm_server.llm import redis
|
||||||
from llm_server.routes.v1.generate_stats import generate_stats
|
from llm_server.routes.v1.generate_stats import generate_stats
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
init_socketio(app)
|
|
||||||
app.register_blueprint(bp, url_prefix='/api/')
|
app.register_blueprint(bp, url_prefix='/api/')
|
||||||
app.register_blueprint(openai_bp, url_prefix='/api/openai/v1/')
|
app.register_blueprint(openai_bp, url_prefix='/api/openai/v1/')
|
||||||
|
init_socketio(app)
|
||||||
flask_cache.init_app(app)
|
flask_cache.init_app(app)
|
||||||
flask_cache.clear()
|
flask_cache.clear()
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ def home():
|
||||||
default_active_gen_workers=default_backend_info['processing'],
|
default_active_gen_workers=default_backend_info['processing'],
|
||||||
default_proompters_in_queue=default_backend_info['queued'],
|
default_proompters_in_queue=default_backend_info['queued'],
|
||||||
current_model=opts.manual_model_name if opts.manual_model_name else None, # else running_model,
|
current_model=opts.manual_model_name if opts.manual_model_name else None, # else running_model,
|
||||||
client_api=f'https://{base_client_api}/v1',
|
client_api=f'https://{base_client_api}',
|
||||||
ws_client_api=f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else 'disabled',
|
ws_client_api=f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else 'disabled',
|
||||||
default_estimated_wait=default_estimated_wait_sec,
|
default_estimated_wait=default_estimated_wait_sec,
|
||||||
mode_name=mode_ui_names[opts.mode][0],
|
mode_name=mode_ui_names[opts.mode][0],
|
||||||
|
|
Reference in New Issue