From 4b3e0671c6819f8807ade06ad5b1df97ee562aa4 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Wed, 10 Jan 2024 15:01:26 -0700 Subject: [PATCH] clean some stuff up, bump VLLM version --- README.md | 1 + llm_server/cluster/backend.py | 41 ++++++++++++++++++++-------- llm_server/cluster/cluster_config.py | 18 ++++++++++-- llm_server/cluster/redis_cycle.py | 6 ++++ llm_server/cluster/stores.py | 4 +++ llm_server/cluster/worker.py | 13 ++++++++- llm_server/custom_redis.py | 18 ++++++++++-- llm_server/llm/vllm/vllm_backend.py | 10 ++++++- llm_server/logging.py | 8 +++--- llm_server/routes/openai/models.py | 2 +- llm_server/routes/request_handler.py | 2 +- llm_server/routes/v1/info.py | 2 +- llm_server/routes/v1/proxy.py | 2 +- llm_server/workers/mainer.py | 2 +- requirements.txt | 2 +- 15 files changed, 101 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index fdaa7a9..78e1559 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ The purpose of this server is to abstract your LLM backend from your frontend AP ### Install also need to create /var/log/localllm +chown -R server:adm /var/log/localllm/ 1. `sudo apt install redis` 2. `python3 -m venv venv` diff --git a/llm_server/cluster/backend.py b/llm_server/cluster/backend.py index eb695d2..1114439 100644 --- a/llm_server/cluster/backend.py +++ b/llm_server/cluster/backend.py @@ -1,7 +1,7 @@ import numpy as np from llm_server import opts -from llm_server.cluster.cluster_config import cluster_config, get_a_cluster_backend +from llm_server.cluster.cluster_config import get_a_cluster_backend, cluster_config from llm_server.cluster.stores import redis_running_models from llm_server.custom_redis import redis from llm_server.llm.generator import generator @@ -12,26 +12,38 @@ from llm_server.routes.stats import calculate_wait_time, get_active_gen_workers_ def get_backends_from_model(model_name: str): + """ + Get the backends that are running a specific model. This is the inverse of `get_model_choices()`. + :param model_name: + :return: + """ return [x.decode('utf-8') for x in redis_running_models.smembers(model_name)] def get_running_models(): - return redis_running_models.keys() + """ + Get all the models that are in the cluster. + :return: + """ + return list(redis_running_models.keys()) -def purge_backend_from_running_models(backend_url: str): - keys = redis_running_models.keys() - pipeline = redis_running_models.pipeline() - for model in keys: - pipeline.srem(model, backend_url) - pipeline.execute() - - -def is_valid_model(model_name: str): +def is_valid_model(model_name: str) -> bool: + """ + Is this a model that is being hosted in the cluster? + :param model_name: + :return: + """ return redis_running_models.exists(model_name) def test_backend(backend_url: str, test_prompt: bool = False): + """ + Test (using a test prompt) a backend to check if it is online. + :param backend_url: + :param test_prompt: + :return: + """ backend_info = cluster_config.get_backend(backend_url) if test_prompt: handler = VLLMBackend(backend_url) @@ -56,7 +68,12 @@ def test_backend(backend_url: str, test_prompt: bool = False): return True, i -def get_model_choices(regen: bool = False): +def get_model_choices(regen: bool = False) -> tuple[dict, dict]: + """ + Get the infor and stats of the models hosted in the cluster. + :param regen: + :return: + """ if not regen: c = redis.getp('model_choices') if c: diff --git a/llm_server/cluster/cluster_config.py b/llm_server/cluster/cluster_config.py index 891dfc1..277cdb1 100644 --- a/llm_server/cluster/cluster_config.py +++ b/llm_server/cluster/cluster_config.py @@ -9,7 +9,13 @@ from llm_server.custom_redis import RedisCustom from llm_server.routes.helpers.model import estimate_model_size +# Don't try to reorganize this file or else you'll run into circular imports. + class RedisClusterStore: + """ + A class used to store the cluster state in Redis. + """ + def __init__(self, name: str, **kwargs): self.name = name self.config_redis = RedisCustom(name, **kwargs) @@ -52,7 +58,8 @@ class RedisClusterStore: def validate_backend(self, backend_url: str): """ - Returns the backend URL that was given, or a new one if that was offline. + Returns the backend URL that was given. + If that backend is offline, it will select a new one. This fallback behavior does NOT take the selected model into account. :param backend_url: :return: """ @@ -68,6 +75,11 @@ cluster_config = RedisClusterStore('cluster_config') def get_backends(): + """ + Get all the backends in the cluster, sorted by priority. + The first tuple is the online ones, second is the ones that are offline. + :return: + """ backends = cluster_config.all() result = {} for k, v in backends.items(): @@ -103,13 +115,13 @@ def get_backends(): def get_a_cluster_backend(model=None): """ Get a backend from Redis. If there are no online backends, return None. - If `model` is not supplied, we will pick one ourself. + If `model` is not supplied, we will pick one ourselves. """ if model: # First, determine if there are multiple backends hosting the same model. backends_hosting_model = [i.decode('utf-8') for i in redis_running_models.smembers(model)] - # If so, create an iterator for those backends + # If so, create a Redis "cycle" iterator for those backends. if len(backends_hosting_model): add_backend_cycler(model, backends_hosting_model) cycled = redis_cycle(model) diff --git a/llm_server/cluster/redis_cycle.py b/llm_server/cluster/redis_cycle.py index 266241d..9d0ba3c 100644 --- a/llm_server/cluster/redis_cycle.py +++ b/llm_server/cluster/redis_cycle.py @@ -22,6 +22,12 @@ def redis_cycle(list_name): def add_backend_cycler(list_name: str, new_elements: list): + """ + Create a `redis_cycle()` iterator in Redis. + :param list_name: + :param new_elements: + :return: + """ existing_elements = [i.decode('utf-8') for i in redis_cycler_db.lrange(list_name, 0, -1)] existing_set = set(existing_elements) diff --git a/llm_server/cluster/stores.py b/llm_server/cluster/stores.py index c0cbdcc..6ec5814 100644 --- a/llm_server/cluster/stores.py +++ b/llm_server/cluster/stores.py @@ -1,3 +1,7 @@ from llm_server.custom_redis import RedisCustom +""" +Global variables for accessing Redis stores. +""" + redis_running_models = RedisCustom('running_models') diff --git a/llm_server/cluster/worker.py b/llm_server/cluster/worker.py index 9652db9..fff2381 100644 --- a/llm_server/cluster/worker.py +++ b/llm_server/cluster/worker.py @@ -2,8 +2,12 @@ import time from threading import Thread from llm_server.cluster.backend import test_backend -from llm_server.cluster.cluster_config import cluster_config from llm_server.cluster.stores import redis_running_models +from llm_server.cluster.cluster_config import cluster_config + +""" +The definition for the cluster worker used to test the backends. +""" def cluster_worker(): @@ -25,6 +29,13 @@ def cluster_worker(): def check_backend(n, v, test_prompt): + """ + The function ran by the worker to test a backend. + :param n: I don't remember. + :param v: I don't remember. + :param test_prompt: + :return: + """ online, backend_info = test_backend(v['backend_url'], test_prompt=test_prompt) if online: running_model = backend_info['model'] diff --git a/llm_server/custom_redis.py b/llm_server/custom_redis.py index a055537..00abb06 100644 --- a/llm_server/custom_redis.py +++ b/llm_server/custom_redis.py @@ -7,7 +7,7 @@ import redis as redis_pkg import simplejson as json from flask_caching import Cache from redis import Redis -from redis.typing import AnyKeyT, EncodableT, ExpiryT, FieldT, KeyT, PatternT, ZScoreBoundT +from redis.typing import AnyKeyT, EncodableT, ExpiryT, FieldT, KeyT, PatternT, ZScoreBoundT, AbsExpiryT flask_cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/15', 'CACHE_KEY_PREFIX': 'local_llm_flask'}) @@ -17,9 +17,11 @@ ONE_MONTH_SECONDS = 2678000 class RedisCustom(Redis): """ A simple wrapper class for Redis to create a "namespace" within a DB, - which simplyifies key management. + which simplifies key management. """ + # TODO: is there a better way to do this instead of overriding every single method? + def __init__(self, prefix, **kwargs): super().__init__() self.redis = Redis(**kwargs) @@ -34,7 +36,17 @@ class RedisCustom(Redis): def _key(self, key): return f"{self.prefix}:{key}" - def set(self, key, value, ex: Union[ExpiryT, None] = None): + def set(self, key: KeyT, + value: EncodableT, + ex: Union[ExpiryT, None] = None, + px: Union[ExpiryT, None] = None, + nx: bool = False, + xx: bool = False, + keepttl: bool = False, + get: bool = False, + exat: Union[AbsExpiryT, None] = None, + pxat: Union[AbsExpiryT, None] = None + ): return self.redis.set(self._key(key), value, ex=ex) def get(self, key, default=None, dtype=None): diff --git a/llm_server/llm/vllm/vllm_backend.py b/llm_server/llm/vllm/vllm_backend.py index 5c12b45..df2ad2c 100644 --- a/llm_server/llm/vllm/vllm_backend.py +++ b/llm_server/llm/vllm/vllm_backend.py @@ -24,12 +24,19 @@ class VLLMBackend(LLMBackend): return jsonify({'results': [{'text': backend_response}]}), 200 def get_parameters(self, parameters) -> Tuple[dict | None, str | None]: + """ + Convert the Oobabooga parameters to VLLM and validate them. + :param parameters: + :return: + """ try: # top_k == -1 means disabled top_k = parameters.get('top_k', self._default_params['top_k']) if top_k <= 0: top_k = -1 + # We call the internal VLLM `SamplingParams` class to validate the input parameters. + # Parameters from Oobabooga don't line up here exactly, so we have to shuffle some things around. # TODO: support more params sampling_params = SamplingParams( temperature=parameters.get('temperature', self._default_params['temperature']), @@ -45,9 +52,10 @@ class VLLMBackend(LLMBackend): early_stopping=parameters.get('early_stopping', self._default_params['early_stopping']) ) except ValueError as e: + # `SamplingParams` will return a pretty error message. Send that back to the caller. return None, str(e).strip('.') - # We use max_new_tokens throughout the server. + # We use max_new_tokens throughout this program, so rename the variable. result = vars(sampling_params) result['max_new_tokens'] = result.pop('max_tokens') diff --git a/llm_server/logging.py b/llm_server/logging.py index 594f609..53f6d72 100644 --- a/llm_server/logging.py +++ b/llm_server/logging.py @@ -1,6 +1,6 @@ import logging -from pathlib import Path import sys +from pathlib import Path import coloredlogs @@ -33,9 +33,10 @@ logging_info = LoggingInfo() LOG_DIRECTORY = None -def init_logging(filepath:Path=None): +def init_logging(filepath: Path = None): """ - Set up the parent logger. + Set up the parent logger. Ensures this logger and all children to log to a file. + This is only called by `server.py` since there is wierdness with Gunicorn. The deamon doesn't need this. :return: """ logger = logging.getLogger('llm_server') @@ -53,7 +54,6 @@ def init_logging(filepath:Path=None): logger.addHandler(handler) - def create_logger(name): logger = logging.getLogger('llm_server').getChild(name) logger.setLevel(logging_info.level) diff --git a/llm_server/routes/openai/models.py b/llm_server/routes/openai/models.py index 2ff0629..d0e7701 100644 --- a/llm_server/routes/openai/models.py +++ b/llm_server/routes/openai/models.py @@ -7,7 +7,7 @@ from llm_server.custom_redis import ONE_MONTH_SECONDS, flask_cache, redis from . import openai_bp from ..stats import server_start_time from ... import opts -from ...cluster.cluster_config import cluster_config, get_a_cluster_backend +from ...cluster.cluster_config import get_a_cluster_backend, cluster_config from ...helpers import jsonify_pretty from ...llm.openai.transform import generate_oai_string diff --git a/llm_server/routes/request_handler.py b/llm_server/routes/request_handler.py index f4abfa6..aadb443 100644 --- a/llm_server/routes/request_handler.py +++ b/llm_server/routes/request_handler.py @@ -5,7 +5,7 @@ import flask from flask import Response, request from llm_server import opts -from llm_server.cluster.cluster_config import cluster_config, get_a_cluster_backend +from llm_server.cluster.cluster_config import get_a_cluster_backend, cluster_config from llm_server.custom_redis import redis from llm_server.database.database import get_token_ratelimit from llm_server.database.log_to_db import log_to_db diff --git a/llm_server/routes/v1/info.py b/llm_server/routes/v1/info.py index 342921e..e83ac62 100644 --- a/llm_server/routes/v1/info.py +++ b/llm_server/routes/v1/info.py @@ -6,7 +6,7 @@ from llm_server.custom_redis import flask_cache from . import bp from ... import opts from ...cluster.backend import get_backends_from_model, is_valid_model -from ...cluster.cluster_config import cluster_config, get_a_cluster_backend +from ...cluster.cluster_config import get_a_cluster_backend, cluster_config @bp.route('/v1/model', methods=['GET']) diff --git a/llm_server/routes/v1/proxy.py b/llm_server/routes/v1/proxy.py index 6e3708e..79222b3 100644 --- a/llm_server/routes/v1/proxy.py +++ b/llm_server/routes/v1/proxy.py @@ -4,7 +4,7 @@ from llm_server.custom_redis import flask_cache from . import bp from .generate_stats import generate_stats from ..auth import requires_auth -from ...cluster.cluster_config import cluster_config, get_backends +from ...cluster.cluster_config import get_backends, cluster_config from ...helpers import jsonify_pretty diff --git a/llm_server/workers/mainer.py b/llm_server/workers/mainer.py index d342f4b..fb1f3b0 100644 --- a/llm_server/workers/mainer.py +++ b/llm_server/workers/mainer.py @@ -3,7 +3,7 @@ import time import requests from llm_server import opts -from llm_server.cluster.cluster_config import cluster_config, get_backends +from llm_server.cluster.cluster_config import get_backends, cluster_config from llm_server.custom_redis import redis from llm_server.database.database import weighted_average_column_for_model from llm_server.llm.info import get_info diff --git a/requirements.txt b/requirements.txt index 89f4be7..139cbc6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,6 @@ flask-sock==0.6.0 gunicorn==21.2.0 redis==5.0.1 ujson==5.8.0 -vllm==0.2.1.post1 +vllm==0.2.7 gradio~=3.46.1 coloredlogs~=15.0.1 \ No newline at end of file