local-llm-server/llm_server/routes/stats.py

from datetime import datetime

from llm_server.custom_redis import redis
from llm_server.helpers import round_up_base

server_start_time = datetime.now()


def get_total_proompts():
    count = redis.get('proompts')
    if count is None:
        count = 0
    else:
        count = int(count)
    return count


def get_active_gen_workers_model(selected_model: str = None):
    return redis.get(f'active_gen_workers:{selected_model}', dtype=int, default=0)


def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, active_gen_workers):
    if active_gen_workers < concurrent_gens:
        return 0
    elif active_gen_workers >= concurrent_gens:
        # Calculate how long it will take to complete the currently running gens and the queued requests.
        # If the proompters in the queue are equal to the number of workers, just use the calculated generation time.
        # Otherwise, use how many requests we can process concurrently times the calculated generation time. Then, round
        # that number up to the nearest base gen_time_calc (ie. if gen_time_calc is 8 and the calculated number is 11.6, we will get 18). Finally,
        # Add gen_time_calc to the time to account for the currently running generations.
        # This assumes that all active workers will finish at the same time, which is unlikely.
        # Regardless, this is the most accurate estimate we can get without tracking worker elapsed times.
        proompters_in_queue_wait_time = gen_time_calc if (proompters_in_queue / concurrent_gens) <= 1 \
            else round_up_base(((proompters_in_queue / concurrent_gens) * gen_time_calc), base=gen_time_calc)
        return proompters_in_queue_wait_time + gen_time_calc if active_gen_workers > 0 else 0
    elif proompters_in_queue == 0 and active_gen_workers == 0:
        # No queue, no workers
        return 0
    else:
        # No queue
        return gen_time_calc
MVP 2023-08-21 21:28:52 -06:00			`from datetime import datetime`

set up cluster config and basic background workers 2023-09-28 18:40:24 -06:00			`from llm_server.custom_redis import redis`
functional 2023-09-30 19:41:50 -06:00			`from llm_server.helpers import round_up_base`
MVP 2023-08-21 21:28:52 -06:00
add estimated wait time and other time tracking stats 2023-08-23 21:33:52 -06:00			`server_start_time = datetime.now()`


fix some stuff related to gunicorn workers 2023-08-23 22:01:06 -06:00			`def get_total_proompts():`
fix stats for real 2023-08-23 01:14:19 -06:00			`count = redis.get('proompts')`
			`if count is None:`
			`count = 0`
			`else:`
			`count = int(count)`
			`return count`


fix processing not being decremented on streaming, fix confusion over queue, adjust stop sequences 2023-10-02 20:53:08 -06:00			`def get_active_gen_workers_model(selected_model: str = None):`
			`return redis.get(f'active_gen_workers:{selected_model}', dtype=int, default=0)`
functional 2023-09-30 19:41:50 -06:00

			`def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, active_gen_workers):`
			`if active_gen_workers < concurrent_gens:`
			`return 0`
			`elif active_gen_workers >= concurrent_gens:`
			`# Calculate how long it will take to complete the currently running gens and the queued requests.`
			`# If the proompters in the queue are equal to the number of workers, just use the calculated generation time.`
			`# Otherwise, use how many requests we can process concurrently times the calculated generation time. Then, round`
			`# that number up to the nearest base gen_time_calc (ie. if gen_time_calc is 8 and the calculated number is 11.6, we will get 18). Finally,`
			`# Add gen_time_calc to the time to account for the currently running generations.`
			`# This assumes that all active workers will finish at the same time, which is unlikely.`
			`# Regardless, this is the most accurate estimate we can get without tracking worker elapsed times.`
			`proompters_in_queue_wait_time = gen_time_calc if (proompters_in_queue / concurrent_gens) <= 1 \`
			`else round_up_base(((proompters_in_queue / concurrent_gens) * gen_time_calc), base=gen_time_calc)`
			`return proompters_in_queue_wait_time + gen_time_calc if active_gen_workers > 0 else 0`
			`elif proompters_in_queue == 0 and active_gen_workers == 0:`
			`# No queue, no workers`
			`return 0`
			`else:`
			`# No queue`
			`return gen_time_calc`