local-llm-server/llm_server/routes/stats.py

import collections
import time
from datetime import datetime
from threading import Semaphore, Thread

from llm_server import opts
from llm_server.integer import ThreadSafeInteger
from llm_server.opts import concurrent_gens

proompters_1_min = 0
concurrent_semaphore = Semaphore(concurrent_gens)
proompts = ThreadSafeInteger(0)
start_time = datetime.now()


class SemaphoreCheckerThread(Thread):
    def __init__(self, semaphore):
        Thread.__init__(self)
        self.semaphore = semaphore
        self.values = collections.deque(maxlen=60)
        self.daemon = True

    def run(self):
        global proompters_1_min
        while True:
            # If the semaphore value is less than the maximum, a prompter has sent a prompt
            if opts.concurrent_gens > self.semaphore._value:
                self.values.append(1)
            else:
                self.values.append(0)
            proompters_1_min = sum(self.values)
            time.sleep(1)


thread = SemaphoreCheckerThread(concurrent_semaphore)
thread.start()