Merge cluster to master #3
|
@ -222,7 +222,7 @@ class RequestHandler:
|
||||||
processing_ip = 0
|
processing_ip = 0
|
||||||
|
|
||||||
if queued_ip_count + processing_ip >= self.token_simultaneous_ip:
|
if queued_ip_count + processing_ip >= self.token_simultaneous_ip:
|
||||||
print(f'Rejecting request from {self.client_ip} - {queued_ip_count} queued, {processing_ip} processing.')
|
print(f'Rejecting request from {self.client_ip} - {processing_ip} processing, {queued_ip_count} queued')
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -25,6 +25,7 @@ def console_printer():
|
||||||
for k in processing:
|
for k in processing:
|
||||||
processing_count += redis.get(k, default=0, dtype=int)
|
processing_count += redis.get(k, default=0, dtype=int)
|
||||||
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
backends = [k for k, v in cluster_config.all().items() if v['online']]
|
||||||
|
activity = priority_queue.activity()
|
||||||
|
|
||||||
# Calculate the queue size the same way it's done on the stats.
|
# Calculate the queue size the same way it's done on the stats.
|
||||||
queue_size = 0
|
queue_size = 0
|
||||||
|
@ -32,8 +33,6 @@ def console_printer():
|
||||||
for model in running_models:
|
for model in running_models:
|
||||||
queue_size += priority_queue.len(model)
|
queue_size += priority_queue.len(model)
|
||||||
|
|
||||||
activity = priority_queue.activity()
|
|
||||||
|
|
||||||
# Active Workers and Processing should read the same. If not, that's an issue.
|
# Active Workers and Processing should read the same. If not, that's an issue.
|
||||||
logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}')
|
logger.info(f'REQUEST QUEUE -> Active Workers: {len([i for i in activity if i[1]])} | Processing: {processing_count} | Queued: {queue_size} | Backends Online: {len(backends)}')
|
||||||
time.sleep(2)
|
time.sleep(10)
|
||||||
|
|
|
@ -44,21 +44,21 @@ rows=$(echo "sqrt($num_windows)" | bc)
|
||||||
columns=$(echo "($num_windows + $rows - 1) / $rows" | bc)
|
columns=$(echo "($num_windows + $rows - 1) / $rows" | bc)
|
||||||
|
|
||||||
# Create a new tmux session
|
# Create a new tmux session
|
||||||
tmux new-session -d -s my_session "$command -p 0"
|
tmux new-session -d -s llm_tester "$command -p 0"
|
||||||
|
|
||||||
# Create the remaining windows
|
# Create the remaining windows
|
||||||
for ((i = 1; i < $num_windows; i++)); do
|
for ((i = 1; i < $num_windows; i++)); do
|
||||||
if ((i % $columns == 0)); then
|
if ((i % $columns == 0)); then
|
||||||
tmux select-layout -t my_session:0 tiled
|
tmux select-layout -t llm_tester:0 tiled
|
||||||
tmux select-pane -t 0
|
tmux select-pane -t 0
|
||||||
tmux split-window -t my_session:0 -v "$command -p $i"
|
tmux split-window -t llm_tester:0 -v "$command -p $i"
|
||||||
else
|
else
|
||||||
tmux split-window -t my_session:0 -h "$command -p $i"
|
tmux split-window -t llm_tester:0 -h "$command -p $i"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Balance the windows
|
# Balance the windows
|
||||||
tmux select-layout -t my_session:0 tiled
|
tmux select-layout -t llm_tester:0 tiled
|
||||||
|
|
||||||
# Attach to the session
|
# Attach to the session
|
||||||
tmux attach-session -t my_session
|
tmux attach-session -t llm_tester
|
||||||
|
|
Reference in New Issue