Merge cluster to master #3
|
@ -5,20 +5,20 @@ class DatabaseConnection:
|
||||||
host: str = None
|
host: str = None
|
||||||
username: str = None
|
username: str = None
|
||||||
password: str = None
|
password: str = None
|
||||||
database: str = None
|
database_name: str = None
|
||||||
|
|
||||||
def init_db(self, host, username, password, database):
|
def init_db(self, host, username, password, database_name):
|
||||||
self.host = host
|
self.host = host
|
||||||
self.username = username
|
self.username = username
|
||||||
self.password = password
|
self.password = password
|
||||||
self.database = database
|
self.database_name = database_name
|
||||||
|
|
||||||
def cursor(self):
|
def cursor(self):
|
||||||
db = pymysql.connect(
|
db = pymysql.connect(
|
||||||
host=self.host,
|
host=self.host,
|
||||||
user=self.username,
|
user=self.username,
|
||||||
password=self.password,
|
password=self.password,
|
||||||
database=self.database,
|
database=self.database_name,
|
||||||
charset='utf8mb4',
|
charset='utf8mb4',
|
||||||
autocommit=True,
|
autocommit=True,
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,7 +4,6 @@ import flask
|
||||||
from flask import jsonify, request
|
from flask import jsonify, request
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.database.database import do_db_log
|
|
||||||
from llm_server.database.log_to_db import log_to_db
|
from llm_server.database.log_to_db import log_to_db
|
||||||
from llm_server.routes.helpers.client import format_sillytavern_err
|
from llm_server.routes.helpers.client import format_sillytavern_err
|
||||||
from llm_server.routes.request_handler import RequestHandler
|
from llm_server.routes.request_handler import RequestHandler
|
||||||
|
|
|
@ -35,6 +35,7 @@ def worker(backend_url):
|
||||||
redis.publish(event_id, 'begin')
|
redis.publish(event_id, 'begin')
|
||||||
for item in pubsub.listen():
|
for item in pubsub.listen():
|
||||||
if item['type'] == 'message' and item['data'].decode('utf-8') == 'finished':
|
if item['type'] == 'message' and item['data'].decode('utf-8') == 'finished':
|
||||||
|
# The streaming endpoint has said that it has finished
|
||||||
break
|
break
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
else:
|
else:
|
||||||
|
@ -47,6 +48,7 @@ def worker(backend_url):
|
||||||
finally:
|
finally:
|
||||||
decrement_ip_count(client_ip, 'processing_ips')
|
decrement_ip_count(client_ip, 'processing_ips')
|
||||||
decr_active_workers(selected_model, backend_url)
|
decr_active_workers(selected_model, backend_url)
|
||||||
|
print('Worker finished processing for', client_ip)
|
||||||
|
|
||||||
|
|
||||||
def start_workers(cluster: dict):
|
def start_workers(cluster: dict):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import pickle
|
import pickle
|
||||||
|
import traceback
|
||||||
|
|
||||||
import redis
|
import redis
|
||||||
|
|
||||||
|
@ -17,6 +18,7 @@ def db_logger():
|
||||||
p.subscribe('database-logger')
|
p.subscribe('database-logger')
|
||||||
|
|
||||||
for message in p.listen():
|
for message in p.listen():
|
||||||
|
try:
|
||||||
if message['type'] == 'message':
|
if message['type'] == 'message':
|
||||||
data = pickle.loads(message['data'])
|
data = pickle.loads(message['data'])
|
||||||
function_name = data['function']
|
function_name = data['function']
|
||||||
|
@ -25,3 +27,5 @@ def db_logger():
|
||||||
|
|
||||||
if function_name == 'log_prompt':
|
if function_name == 'log_prompt':
|
||||||
do_db_log(*args, **kwargs)
|
do_db_log(*args, **kwargs)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
|
@ -25,4 +25,4 @@ def console_printer():
|
||||||
processing_count += redis.get(k, default=0, dtype=int)
|
processing_count += redis.get(k, default=0, dtype=int)
|
||||||
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
||||||
logger.info(f'REQUEST QUEUE -> Processing: {processing_count} | Queued: {len(priority_queue)} | Backends Online: {len(backends)}')
|
logger.info(f'REQUEST QUEUE -> Processing: {processing_count} | Queued: {len(priority_queue)} | Backends Online: {len(backends)}')
|
||||||
time.sleep(1)
|
time.sleep(10)
|
||||||
|
|
|
@ -75,7 +75,8 @@ def stream_response(prompt, history):
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_tokens=300,
|
max_tokens=300,
|
||||||
stream=True
|
stream=True,
|
||||||
|
headers={'LLM-Source': 'huggingface-demo'}
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise gr.Error("Failed to reach inference endpoint.")
|
raise gr.Error("Failed to reach inference endpoint.")
|
||||||
|
|
Reference in New Issue