diff --git a/llm_server/routes/openai/__init__.py b/llm_server/routes/openai/__init__.py index 3a69aa7..6ca4d92 100644 --- a/llm_server/routes/openai/__init__.py +++ b/llm_server/routes/openai/__init__.py @@ -19,7 +19,20 @@ def before_oai_request(): @openai_bp.errorhandler(500) @openai_model_bp.errorhandler(500) def handle_error(e): - return handle_server_error(e) + """ + Found Codes: + "auth_subrequest_error" + """ + + print(e) + return jsonify({ + "error": { + "message": "Internal server error", + "type": "auth_subrequest_error", + "param": None, + "code": "internal_error" + } + }), 500 from .models import openai_list_models diff --git a/other/tests/config.sh b/other/tests/config.sh new file mode 100644 index 0000000..64bea46 --- /dev/null +++ b/other/tests/config.sh @@ -0,0 +1,11 @@ +HOST="proxy.chub-archive.evulid.cc" + +AUTH_KEY="TEST_1df979f0-6df1-41bd-814a-e99b1680e727" + +PROXY_SERVERS=( + "http://172.0.4.7:3128" + "http://172.0.4.8:3128" + "http://172.0.4.10:3128" + "http://172.0.4.12:3128" + "http://172.0.4.13:3128" +) diff --git a/other/tests/generate.sh b/other/tests/generate.sh new file mode 100755 index 0000000..f36d73e --- /dev/null +++ b/other/tests/generate.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +SLEEP_TIME=2 + +while getopts p:t: flag; do + case "${flag}" in + p) PROXY_CHOICE=${OPTARG} ;; + t) SLEEP_TIME=${OPTARG} ;; + *) ;; + esac +done + +SOURCE=${BASH_SOURCE[0]} +while [ -L "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink + DIR=$(cd -P "$(dirname "$SOURCE")" >/dev/null 2>&1 && pwd) + SOURCE=$(readlink "$SOURCE") + [[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located +done +DIR=$(cd -P "$(dirname "$SOURCE")" >/dev/null 2>&1 && pwd) + +source "$DIR/config.sh" + +if [ -n "$PROXY_CHOICE" ]; then + our_proxy_server="${PROXY_SERVERS[$PROXY_CHOICE]}" + echo "Using $our_proxy_server" +else + our_proxy_server="" +fi + +while true; do + echo "--> START <--" + + DATA=$( + cat < DONE <--\n" + sleep $SLEEP_TIME +done diff --git a/other/tests/oai-chat-completion.sh b/other/tests/oai-chat-completion.sh new file mode 100755 index 0000000..5355a8a --- /dev/null +++ b/other/tests/oai-chat-completion.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +DO_STREAM=false +SLEEP_TIME=2 + +while getopts p:t:s flag; do + case "${flag}" in + s) DO_STREAM=true ;; + p) PROXY_CHOICE=${OPTARG} ;; + t) SLEEP_TIME=${OPTARG} ;; + *) ;; + esac +done + +SOURCE=${BASH_SOURCE[0]} +while [ -L "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink + DIR=$(cd -P "$(dirname "$SOURCE")" >/dev/null 2>&1 && pwd) + SOURCE=$(readlink "$SOURCE") + [[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located +done +DIR=$(cd -P "$(dirname "$SOURCE")" >/dev/null 2>&1 && pwd) + +source "$DIR/config.sh" + +if [ ! -z "$PROXY_CHOICE" ]; then + our_proxy_server="${PROXY_SERVERS[$PROXY_CHOICE]}" + echo "Using $our_proxy_server" +else + our_proxy_server="" +fi + +while true; do + echo "--> START <--" + + DATA=$( + cat < DONE <--\n" + sleep $SLEEP_TIME +done diff --git a/other/tests/oai-completion.sh b/other/tests/oai-completion.sh new file mode 100755 index 0000000..f89d1c8 --- /dev/null +++ b/other/tests/oai-completion.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +DO_STREAM=false +SLEEP_TIME=2 + +while getopts p:t:s flag; do + case "${flag}" in + s) DO_STREAM=true ;; + p) PROXY_CHOICE=${OPTARG} ;; + t) SLEEP_TIME=${OPTARG} ;; + *) ;; + esac +done + +SOURCE=${BASH_SOURCE[0]} +while [ -L "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink + DIR=$(cd -P "$(dirname "$SOURCE")" >/dev/null 2>&1 && pwd) + SOURCE=$(readlink "$SOURCE") + [[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located +done +DIR=$(cd -P "$(dirname "$SOURCE")" >/dev/null 2>&1 && pwd) + +source "$DIR/config.sh" + +if [ ! -z "$PROXY_CHOICE" ]; then + our_proxy_server="${PROXY_SERVERS[$PROXY_CHOICE]}" + echo "Using $our_proxy_server" +else + our_proxy_server="" +fi + +while true; do + echo "--> START <--" + + DATA=$( + cat < DONE <--\n" + sleep $SLEEP_TIME +done diff --git a/other/tests/start-bulk.sh b/other/tests/start-bulk.sh new file mode 100755 index 0000000..49e92a6 --- /dev/null +++ b/other/tests/start-bulk.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# Function to display help message +function display_help { + echo "Usage: $0 -n num_windows -c command" + echo + echo " -n, --number Number of windows to create" + echo " -c, --command Command to run in each window" + echo + exit 1 +} + +# Parse command line arguments +while getopts "n:c:h" opt; do + case ${opt} in + n) + num_windows=${OPTARG} + ;; + c) + command=${OPTARG} + ;; + h) + display_help + ;; + \?) + echo "Invalid option: -$OPTARG" 1>&2 + display_help + ;; + :) + echo "Option -$OPTARG requires an argument." 1>&2 + display_help + ;; + esac +done + +# Check if number of windows and command are provided +if [ -z "$num_windows" ] || [ -z "$command" ]; then + echo "Both number of windows and command are required." + display_help +fi + +# Calculate rows and columns +rows=$(echo "sqrt($num_windows)" | bc) +columns=$(echo "($num_windows + $rows - 1) / $rows" | bc) + +# Create a new tmux session +tmux new-session -d -s my_session "$command -p 0" + +# Create the remaining windows +for ((i = 1; i < $num_windows; i++)); do + if ((i % $columns == 0)); then + tmux select-layout -t my_session:0 tiled + tmux select-pane -t 0 + tmux split-window -t my_session:0 -v "$command -p $i" + else + tmux split-window -t my_session:0 -h "$command -p $i" + fi +done + +# Balance the windows +tmux select-layout -t my_session:0 tiled + +# Attach to the session +tmux attach-session -t my_session diff --git a/other/ooba-test-streaming.py b/other/tests/stream.py old mode 100644 new mode 100755 similarity index 54% rename from other/ooba-test-streaming.py rename to other/tests/stream.py index 7f5185d..f5c4023 --- a/other/ooba-test-streaming.py +++ b/other/tests/stream.py @@ -1,38 +1,50 @@ import asyncio import json import sys +import os +import time +from pathlib import Path try: import websockets except ImportError: print("Websockets package not found. Make sure it's installed.") -# For local streaming, the websockets are hosted without ssl - ws:// -HOST = 'localhost:5000' -URI = f'ws://{HOST}/api/v1/stream' +script_path = os.path.dirname(os.path.realpath(__file__)) -# For reverse-proxied streaming, the remote will likely host with ssl - wss:// -# URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream' +def parse_bash_config(file_path): + config = {} + with open(file_path, 'r') as f: + for line in f: + if line.startswith('#') or '=' not in line: + continue + key, value = line.strip().split('=', 1) + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + elif value.startswith('(') and value.endswith(')'): + value = value[1:-1].split() + value = [v.strip('"') for v in value] + config[key] = value + return config + + +config = parse_bash_config(Path(script_path, 'config.sh')) async def run(context): - # Note: the selected defaults change from time to time. request = { 'prompt': context, 'max_new_tokens': 250, 'auto_max_new_tokens': False, 'max_tokens_second': 0, - - # Generation params. If 'preset' is set to different than 'None', the values - # in presets/preset-name.yaml are used instead of the individual numbers. 'preset': 'None', 'do_sample': True, 'temperature': 0.7, 'top_p': 0.1, 'typical_p': 1, - 'epsilon_cutoff': 0, # In units of 1e-4 - 'eta_cutoff': 0, # In units of 1e-4 + 'epsilon_cutoff': 0, + 'eta_cutoff': 0, 'tfs': 1, 'top_a': 0, 'repetition_penalty': 1.18, @@ -49,7 +61,6 @@ async def run(context): 'mirostat_eta': 0.1, 'guidance_scale': 1, 'negative_prompt': '', - 'seed': -1, 'add_bos_token': True, 'truncation_length': 2048, @@ -59,7 +70,7 @@ async def run(context): 'stopping_strings': [] } - async with websockets.connect(URI, ping_interval=None) as websocket: + async with websockets.connect(f'wss://{config["HOST"]}/api/v1/stream', ping_interval=None) as websocket: await websocket.send(json.dumps(request)) yield context # Remove this if you just want to see the reply @@ -68,21 +79,28 @@ async def run(context): incoming_data = await websocket.recv() incoming_data = json.loads(incoming_data) + print(incoming_data) + match incoming_data['event']: - case 'text_stream': - yield incoming_data['text'] + # case 'text_stream': + # yield incoming_data['text'] case 'stream_end': return async def print_response_stream(prompt): - async for response in run(prompt): - print(response, end='') - sys.stdout.flush() # If we don't flush, we won't see tokens in realtime. - print('\n\nfinished') + try: + async for response in run(prompt): + print(response, end='') + sys.stdout.flush() # If we don't flush, we won't see tokens in realtime. + except Exception as e: + print(e) if __name__ == '__main__': - # prompt = "In order to make homemade bread, follow these steps:\n1)" prompt = "Write a 300 word description of how an apple tree grows.\n\n" - asyncio.run(print_response_stream(prompt)) + while True: + print('--> START <--') + asyncio.run(print_response_stream(prompt)) + print('--> DONE <--') + time.sleep(2)