clean some stuff up, bump VLLM version

This commit is contained in:
Cyberes 2024-01-10 15:01:26 -07:00
parent 0e7f04ab2d
commit 4b3e0671c6
15 changed files with 101 additions and 30 deletions

View File

@ -7,6 +7,7 @@ The purpose of this server is to abstract your LLM backend from your frontend AP
### Install ### Install
also need to create /var/log/localllm also need to create /var/log/localllm
chown -R server:adm /var/log/localllm/
1. `sudo apt install redis` 1. `sudo apt install redis`
2. `python3 -m venv venv` 2. `python3 -m venv venv`

View File

@ -1,7 +1,7 @@
import numpy as np import numpy as np
from llm_server import opts from llm_server import opts
from llm_server.cluster.cluster_config import cluster_config, get_a_cluster_backend from llm_server.cluster.cluster_config import get_a_cluster_backend, cluster_config
from llm_server.cluster.stores import redis_running_models from llm_server.cluster.stores import redis_running_models
from llm_server.custom_redis import redis from llm_server.custom_redis import redis
from llm_server.llm.generator import generator from llm_server.llm.generator import generator
@ -12,26 +12,38 @@ from llm_server.routes.stats import calculate_wait_time, get_active_gen_workers_
def get_backends_from_model(model_name: str): def get_backends_from_model(model_name: str):
"""
Get the backends that are running a specific model. This is the inverse of `get_model_choices()`.
:param model_name:
:return:
"""
return [x.decode('utf-8') for x in redis_running_models.smembers(model_name)] return [x.decode('utf-8') for x in redis_running_models.smembers(model_name)]
def get_running_models(): def get_running_models():
return redis_running_models.keys() """
Get all the models that are in the cluster.
:return:
"""
return list(redis_running_models.keys())
def purge_backend_from_running_models(backend_url: str): def is_valid_model(model_name: str) -> bool:
keys = redis_running_models.keys() """
pipeline = redis_running_models.pipeline() Is this a model that is being hosted in the cluster?
for model in keys: :param model_name:
pipeline.srem(model, backend_url) :return:
pipeline.execute() """
def is_valid_model(model_name: str):
return redis_running_models.exists(model_name) return redis_running_models.exists(model_name)
def test_backend(backend_url: str, test_prompt: bool = False): def test_backend(backend_url: str, test_prompt: bool = False):
"""
Test (using a test prompt) a backend to check if it is online.
:param backend_url:
:param test_prompt:
:return:
"""
backend_info = cluster_config.get_backend(backend_url) backend_info = cluster_config.get_backend(backend_url)
if test_prompt: if test_prompt:
handler = VLLMBackend(backend_url) handler = VLLMBackend(backend_url)
@ -56,7 +68,12 @@ def test_backend(backend_url: str, test_prompt: bool = False):
return True, i return True, i
def get_model_choices(regen: bool = False): def get_model_choices(regen: bool = False) -> tuple[dict, dict]:
"""
Get the infor and stats of the models hosted in the cluster.
:param regen:
:return:
"""
if not regen: if not regen:
c = redis.getp('model_choices') c = redis.getp('model_choices')
if c: if c:

View File

@ -9,7 +9,13 @@ from llm_server.custom_redis import RedisCustom
from llm_server.routes.helpers.model import estimate_model_size from llm_server.routes.helpers.model import estimate_model_size
# Don't try to reorganize this file or else you'll run into circular imports.
class RedisClusterStore: class RedisClusterStore:
"""
A class used to store the cluster state in Redis.
"""
def __init__(self, name: str, **kwargs): def __init__(self, name: str, **kwargs):
self.name = name self.name = name
self.config_redis = RedisCustom(name, **kwargs) self.config_redis = RedisCustom(name, **kwargs)
@ -52,7 +58,8 @@ class RedisClusterStore:
def validate_backend(self, backend_url: str): def validate_backend(self, backend_url: str):
""" """
Returns the backend URL that was given, or a new one if that was offline. Returns the backend URL that was given.
If that backend is offline, it will select a new one. This fallback behavior does NOT take the selected model into account.
:param backend_url: :param backend_url:
:return: :return:
""" """
@ -68,6 +75,11 @@ cluster_config = RedisClusterStore('cluster_config')
def get_backends(): def get_backends():
"""
Get all the backends in the cluster, sorted by priority.
The first tuple is the online ones, second is the ones that are offline.
:return:
"""
backends = cluster_config.all() backends = cluster_config.all()
result = {} result = {}
for k, v in backends.items(): for k, v in backends.items():
@ -103,13 +115,13 @@ def get_backends():
def get_a_cluster_backend(model=None): def get_a_cluster_backend(model=None):
""" """
Get a backend from Redis. If there are no online backends, return None. Get a backend from Redis. If there are no online backends, return None.
If `model` is not supplied, we will pick one ourself. If `model` is not supplied, we will pick one ourselves.
""" """
if model: if model:
# First, determine if there are multiple backends hosting the same model. # First, determine if there are multiple backends hosting the same model.
backends_hosting_model = [i.decode('utf-8') for i in redis_running_models.smembers(model)] backends_hosting_model = [i.decode('utf-8') for i in redis_running_models.smembers(model)]
# If so, create an iterator for those backends # If so, create a Redis "cycle" iterator for those backends.
if len(backends_hosting_model): if len(backends_hosting_model):
add_backend_cycler(model, backends_hosting_model) add_backend_cycler(model, backends_hosting_model)
cycled = redis_cycle(model) cycled = redis_cycle(model)

View File

@ -22,6 +22,12 @@ def redis_cycle(list_name):
def add_backend_cycler(list_name: str, new_elements: list): def add_backend_cycler(list_name: str, new_elements: list):
"""
Create a `redis_cycle()` iterator in Redis.
:param list_name:
:param new_elements:
:return:
"""
existing_elements = [i.decode('utf-8') for i in redis_cycler_db.lrange(list_name, 0, -1)] existing_elements = [i.decode('utf-8') for i in redis_cycler_db.lrange(list_name, 0, -1)]
existing_set = set(existing_elements) existing_set = set(existing_elements)

View File

@ -1,3 +1,7 @@
from llm_server.custom_redis import RedisCustom from llm_server.custom_redis import RedisCustom
"""
Global variables for accessing Redis stores.
"""
redis_running_models = RedisCustom('running_models') redis_running_models = RedisCustom('running_models')

View File

@ -2,8 +2,12 @@ import time
from threading import Thread from threading import Thread
from llm_server.cluster.backend import test_backend from llm_server.cluster.backend import test_backend
from llm_server.cluster.cluster_config import cluster_config
from llm_server.cluster.stores import redis_running_models from llm_server.cluster.stores import redis_running_models
from llm_server.cluster.cluster_config import cluster_config
"""
The definition for the cluster worker used to test the backends.
"""
def cluster_worker(): def cluster_worker():
@ -25,6 +29,13 @@ def cluster_worker():
def check_backend(n, v, test_prompt): def check_backend(n, v, test_prompt):
"""
The function ran by the worker to test a backend.
:param n: I don't remember.
:param v: I don't remember.
:param test_prompt:
:return:
"""
online, backend_info = test_backend(v['backend_url'], test_prompt=test_prompt) online, backend_info = test_backend(v['backend_url'], test_prompt=test_prompt)
if online: if online:
running_model = backend_info['model'] running_model = backend_info['model']

View File

@ -7,7 +7,7 @@ import redis as redis_pkg
import simplejson as json import simplejson as json
from flask_caching import Cache from flask_caching import Cache
from redis import Redis from redis import Redis
from redis.typing import AnyKeyT, EncodableT, ExpiryT, FieldT, KeyT, PatternT, ZScoreBoundT from redis.typing import AnyKeyT, EncodableT, ExpiryT, FieldT, KeyT, PatternT, ZScoreBoundT, AbsExpiryT
flask_cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/15', 'CACHE_KEY_PREFIX': 'local_llm_flask'}) flask_cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/15', 'CACHE_KEY_PREFIX': 'local_llm_flask'})
@ -17,9 +17,11 @@ ONE_MONTH_SECONDS = 2678000
class RedisCustom(Redis): class RedisCustom(Redis):
""" """
A simple wrapper class for Redis to create a "namespace" within a DB, A simple wrapper class for Redis to create a "namespace" within a DB,
which simplyifies key management. which simplifies key management.
""" """
# TODO: is there a better way to do this instead of overriding every single method?
def __init__(self, prefix, **kwargs): def __init__(self, prefix, **kwargs):
super().__init__() super().__init__()
self.redis = Redis(**kwargs) self.redis = Redis(**kwargs)
@ -34,7 +36,17 @@ class RedisCustom(Redis):
def _key(self, key): def _key(self, key):
return f"{self.prefix}:{key}" return f"{self.prefix}:{key}"
def set(self, key, value, ex: Union[ExpiryT, None] = None): def set(self, key: KeyT,
value: EncodableT,
ex: Union[ExpiryT, None] = None,
px: Union[ExpiryT, None] = None,
nx: bool = False,
xx: bool = False,
keepttl: bool = False,
get: bool = False,
exat: Union[AbsExpiryT, None] = None,
pxat: Union[AbsExpiryT, None] = None
):
return self.redis.set(self._key(key), value, ex=ex) return self.redis.set(self._key(key), value, ex=ex)
def get(self, key, default=None, dtype=None): def get(self, key, default=None, dtype=None):

View File

@ -24,12 +24,19 @@ class VLLMBackend(LLMBackend):
return jsonify({'results': [{'text': backend_response}]}), 200 return jsonify({'results': [{'text': backend_response}]}), 200
def get_parameters(self, parameters) -> Tuple[dict | None, str | None]: def get_parameters(self, parameters) -> Tuple[dict | None, str | None]:
"""
Convert the Oobabooga parameters to VLLM and validate them.
:param parameters:
:return:
"""
try: try:
# top_k == -1 means disabled # top_k == -1 means disabled
top_k = parameters.get('top_k', self._default_params['top_k']) top_k = parameters.get('top_k', self._default_params['top_k'])
if top_k <= 0: if top_k <= 0:
top_k = -1 top_k = -1
# We call the internal VLLM `SamplingParams` class to validate the input parameters.
# Parameters from Oobabooga don't line up here exactly, so we have to shuffle some things around.
# TODO: support more params # TODO: support more params
sampling_params = SamplingParams( sampling_params = SamplingParams(
temperature=parameters.get('temperature', self._default_params['temperature']), temperature=parameters.get('temperature', self._default_params['temperature']),
@ -45,9 +52,10 @@ class VLLMBackend(LLMBackend):
early_stopping=parameters.get('early_stopping', self._default_params['early_stopping']) early_stopping=parameters.get('early_stopping', self._default_params['early_stopping'])
) )
except ValueError as e: except ValueError as e:
# `SamplingParams` will return a pretty error message. Send that back to the caller.
return None, str(e).strip('.') return None, str(e).strip('.')
# We use max_new_tokens throughout the server. # We use max_new_tokens throughout this program, so rename the variable.
result = vars(sampling_params) result = vars(sampling_params)
result['max_new_tokens'] = result.pop('max_tokens') result['max_new_tokens'] = result.pop('max_tokens')

View File

@ -1,6 +1,6 @@
import logging import logging
from pathlib import Path
import sys import sys
from pathlib import Path
import coloredlogs import coloredlogs
@ -33,9 +33,10 @@ logging_info = LoggingInfo()
LOG_DIRECTORY = None LOG_DIRECTORY = None
def init_logging(filepath:Path=None): def init_logging(filepath: Path = None):
""" """
Set up the parent logger. Set up the parent logger. Ensures this logger and all children to log to a file.
This is only called by `server.py` since there is wierdness with Gunicorn. The deamon doesn't need this.
:return: :return:
""" """
logger = logging.getLogger('llm_server') logger = logging.getLogger('llm_server')
@ -53,7 +54,6 @@ def init_logging(filepath:Path=None):
logger.addHandler(handler) logger.addHandler(handler)
def create_logger(name): def create_logger(name):
logger = logging.getLogger('llm_server').getChild(name) logger = logging.getLogger('llm_server').getChild(name)
logger.setLevel(logging_info.level) logger.setLevel(logging_info.level)

View File

@ -7,7 +7,7 @@ from llm_server.custom_redis import ONE_MONTH_SECONDS, flask_cache, redis
from . import openai_bp from . import openai_bp
from ..stats import server_start_time from ..stats import server_start_time
from ... import opts from ... import opts
from ...cluster.cluster_config import cluster_config, get_a_cluster_backend from ...cluster.cluster_config import get_a_cluster_backend, cluster_config
from ...helpers import jsonify_pretty from ...helpers import jsonify_pretty
from ...llm.openai.transform import generate_oai_string from ...llm.openai.transform import generate_oai_string

View File

@ -5,7 +5,7 @@ import flask
from flask import Response, request from flask import Response, request
from llm_server import opts from llm_server import opts
from llm_server.cluster.cluster_config import cluster_config, get_a_cluster_backend from llm_server.cluster.cluster_config import get_a_cluster_backend, cluster_config
from llm_server.custom_redis import redis from llm_server.custom_redis import redis
from llm_server.database.database import get_token_ratelimit from llm_server.database.database import get_token_ratelimit
from llm_server.database.log_to_db import log_to_db from llm_server.database.log_to_db import log_to_db

View File

@ -6,7 +6,7 @@ from llm_server.custom_redis import flask_cache
from . import bp from . import bp
from ... import opts from ... import opts
from ...cluster.backend import get_backends_from_model, is_valid_model from ...cluster.backend import get_backends_from_model, is_valid_model
from ...cluster.cluster_config import cluster_config, get_a_cluster_backend from ...cluster.cluster_config import get_a_cluster_backend, cluster_config
@bp.route('/v1/model', methods=['GET']) @bp.route('/v1/model', methods=['GET'])

View File

@ -4,7 +4,7 @@ from llm_server.custom_redis import flask_cache
from . import bp from . import bp
from .generate_stats import generate_stats from .generate_stats import generate_stats
from ..auth import requires_auth from ..auth import requires_auth
from ...cluster.cluster_config import cluster_config, get_backends from ...cluster.cluster_config import get_backends, cluster_config
from ...helpers import jsonify_pretty from ...helpers import jsonify_pretty

View File

@ -3,7 +3,7 @@ import time
import requests import requests
from llm_server import opts from llm_server import opts
from llm_server.cluster.cluster_config import cluster_config, get_backends from llm_server.cluster.cluster_config import get_backends, cluster_config
from llm_server.custom_redis import redis from llm_server.custom_redis import redis
from llm_server.database.database import weighted_average_column_for_model from llm_server.database.database import weighted_average_column_for_model
from llm_server.llm.info import get_info from llm_server.llm.info import get_info

View File

@ -13,6 +13,6 @@ flask-sock==0.6.0
gunicorn==21.2.0 gunicorn==21.2.0
redis==5.0.1 redis==5.0.1
ujson==5.8.0 ujson==5.8.0
vllm==0.2.1.post1 vllm==0.2.7
gradio~=3.46.1 gradio~=3.46.1
coloredlogs~=15.0.1 coloredlogs~=15.0.1