local-llm-server/llm_server/config/model.py

75 lines
1.8 KiB
Python
Raw Normal View History

2024-05-07 12:20:53 -06:00
from enum import Enum
from typing import Union, List
from pydantic import BaseModel
class ConfigClusterMode(str, Enum):
vllm = 'vllm'
class ConfigCluser(BaseModel):
backend_url: str
concurrent_gens: int
mode: ConfigClusterMode
priority: int
class ConfigFrontendApiModes(str, Enum):
ooba = 'ooba'
class ConfigMysql(BaseModel):
host: str
username: str
password: str
database: str
maxconn: int
class ConfigAvgGenTimeModes(str, Enum):
database = 'database'
minute = 'minute'
class ConfigModel(BaseModel):
frontend_api_mode: ConfigFrontendApiModes
cluster: List[ConfigCluser]
prioritize_by_size: bool
admin_token: Union[str, None]
mysql: ConfigMysql
http_host: str
webserver_log_directory: str
include_system_tokens_in_stats: bool
background_homepage_cacher: bool
max_new_tokens: int
enable_streaming: int
show_backends: bool
log_prompts: bool
verify_ssl: bool
auth_required: bool
simultaneous_requests_per_ip: int
max_queued_prompts_per_ip: int
llm_middleware_name: str
analytics_tracking_code: Union[str, None]
info_html: Union[str, None]
enable_openi_compatible_backend: bool
openai_api_key: Union[str, None]
expose_openai_system_prompt: bool
openai_expose_our_model: bool
openai_force_no_hashes: bool
openai_moderation_enabled: bool
openai_moderation_timeout: int
openai_moderation_scan_last_n: int
openai_org_name: str
openai_silent_trim: bool
frontend_api_client: str
average_generation_time_mode: ConfigAvgGenTimeModes
show_num_prompts: bool
show_uptime: bool
show_total_output_tokens: bool
show_backend_info: bool
load_num_prompts: bool
manual_model_name: Union[str, None]
backend_request_timeout: int