Merge cluster to master #3

Merged
cyberes merged 163 commits from cluster into master 2023-10-27 19:19:22 -06:00
5 changed files with 10 additions and 7 deletions
Showing only changes of commit d203973e80 - Show all commits

View File

@ -43,6 +43,8 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database.
### Use ### Use
Flask may give unusual errors when running `python server.py`. I think this is coming from Flask-Socket. Running with Gunicorn seems to fix the issue: `gunicorn -b :5000 --worker-class gevent server:app`
### To Do ### To Do

View File

@ -73,7 +73,7 @@ def get_model_choices(regen: bool = False):
default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers) default_estimated_wait_sec = calculate_wait_time(default_average_generation_elapsed_sec, default_proompters_in_queue, default_backend_info['concurrent_gens'], default_active_gen_workers)
default_backend_dict = { default_backend_dict = {
'client_api': f'https://{base_client_api}/v1', 'client_api': f'https://{base_client_api}',
'ws_client_api': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None, 'ws_client_api': f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else None,
'openai_client_api': f'https://{base_client_api}/openai' if opts.enable_openi_compatible_backend else 'disabled', 'openai_client_api': f'https://{base_client_api}/openai' if opts.enable_openi_compatible_backend else 'disabled',
'estimated_wait': default_estimated_wait_sec, 'estimated_wait': default_estimated_wait_sec,

View File

@ -18,12 +18,13 @@ from ...sock import sock
# Stacking the @sock.route() creates a TypeError error on the /v1/stream endpoint. # Stacking the @sock.route() creates a TypeError error on the /v1/stream endpoint.
# We solve this by splitting the routes # We solve this by splitting the routes
@bp.route('/stream') @bp.route('/v1/stream')
def stream(): @bp.route('/<model_name>/v1/stream')
def stream(model_name=None):
return 'This is a websocket endpoint.', 400 return 'This is a websocket endpoint.', 400
@sock.route('/stream', bp=bp) @sock.route('/v1/stream', bp=bp)
def stream_without_model(ws): def stream_without_model(ws):
do_stream(ws, model_name=None) do_stream(ws, model_name=None)

View File

@ -1,6 +1,6 @@
flask~=2.3.3 flask~=2.3.3
pyyaml~=6.0.1 pyyaml~=6.0.1
flask_caching Flask-Caching==2.0.2
requests~=2.31.0 requests~=2.31.0
tiktoken~=0.5.0 tiktoken~=0.5.0
gevent~=23.9.0.post1 gevent~=23.9.0.post1

View File

@ -68,9 +68,9 @@ from llm_server.llm import redis
from llm_server.routes.v1.generate_stats import generate_stats from llm_server.routes.v1.generate_stats import generate_stats
app = Flask(__name__) app = Flask(__name__)
init_socketio(app)
app.register_blueprint(bp, url_prefix='/api/') app.register_blueprint(bp, url_prefix='/api/')
app.register_blueprint(openai_bp, url_prefix='/api/openai/v1/') app.register_blueprint(openai_bp, url_prefix='/api/openai/v1/')
init_socketio(app)
flask_cache.init_app(app) flask_cache.init_app(app)
flask_cache.clear() flask_cache.clear()
@ -131,7 +131,7 @@ def home():
default_active_gen_workers=default_backend_info['processing'], default_active_gen_workers=default_backend_info['processing'],
default_proompters_in_queue=default_backend_info['queued'], default_proompters_in_queue=default_backend_info['queued'],
current_model=opts.manual_model_name if opts.manual_model_name else None, # else running_model, current_model=opts.manual_model_name if opts.manual_model_name else None, # else running_model,
client_api=f'https://{base_client_api}/v1', client_api=f'https://{base_client_api}',
ws_client_api=f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else 'disabled', ws_client_api=f'wss://{base_client_api}/v1/stream' if opts.enable_streaming else 'disabled',
default_estimated_wait=default_estimated_wait_sec, default_estimated_wait=default_estimated_wait_sec,
mode_name=mode_ui_names[opts.mode][0], mode_name=mode_ui_names[opts.mode][0],