diff --git a/llm_server/routes/openai_request_handler.py b/llm_server/routes/openai_request_handler.py index 9716eb9..bc5c6f5 100644 --- a/llm_server/routes/openai_request_handler.py +++ b/llm_server/routes/openai_request_handler.py @@ -67,7 +67,6 @@ class OpenAIRequestHandler(RequestHandler): llm_request = {**self.parameters, 'prompt': self.prompt} (success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request) - model = self.request_json_body.get('model') if success: @@ -98,6 +97,7 @@ class OpenAIRequestHandler(RequestHandler): return response, 429 def handle_error(self, error_msg: str, error_type: str = 'error') -> Tuple[flask.Response, int]: + print(error_msg) return jsonify({ "error": { "message": "Invalid request, check your parameters and try again.", diff --git a/other/gradio/gradio_chat.py b/other/gradio/gradio_chat.py new file mode 100644 index 0000000..e8e54b2 --- /dev/null +++ b/other/gradio/gradio_chat.py @@ -0,0 +1,62 @@ +import os +import sys +import warnings + +import gradio as gr +import openai + +warnings.filterwarnings("ignore") + +API_BASE = os.getenv('API_BASE') +if not API_BASE: + print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1') + sys.exit(1) + +# A system prompt can be injected into the very first spot in the context. +# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE, +# the content in CONTEXT_TRIGGER_INJECTION will be injected. +# Setting CONTEXT_TRIGGER_PHRASE will also add it to the selectable examples. +CONTEXT_TRIGGER_PHRASE = os.getenv('CONTEXT_TRIGGER_PHRASE') +CONTEXT_TRIGGER_INJECTION = os.getenv('CONTEXT_TRIGGER_INJECTION') + +openai.api_key = 'null' +openai.api_base = API_BASE + + +def stream_response(prompt, history): + messages = [] + do_injection = False + for human, assistant in history: + messages.append({'role': 'user', 'content': str(human)}) + messages.append({'role': 'assistant', 'content': str(assistant)}) + + if CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in human: + do_injection = True + messages.append({'role': 'user', 'content': prompt}) + + if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt): + messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION}) + + try: + response = openai.ChatCompletion.create( + model='0', + messages=messages, + temperature=0, + max_tokens=300, + stream=True + ) + except Exception: + raise gr.Error("Failed to reach inference endpoint.") + + message = '' + for chunk in response: + if len(chunk['choices'][0]['delta']) != 0: + message += chunk['choices'][0]['delta']['content'] + yield message + + +examples = ["hello", "hola", "merhaba"] +if CONTEXT_TRIGGER_PHRASE: + examples.insert(0, CONTEXT_TRIGGER_PHRASE) + +gr.ChatInterface(stream_response, examples=examples, title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=3).launch() diff --git a/other/gradio_chat.py b/other/gradio_chat.py deleted file mode 100644 index eb10d26..0000000 --- a/other/gradio_chat.py +++ /dev/null @@ -1,33 +0,0 @@ -import warnings - -import gradio as gr -import openai - -warnings.filterwarnings("ignore") - -openai.api_key = 'null' -openai.api_base = 'http://localhost:5000/api/openai/v1' - - -def stream_response(prompt, history): - messages = [] - for x in history: - messages.append({'role': 'user', 'content': x[0]}) - messages.append({'role': 'assistant', 'content': x[1]}) - messages.append({'role': 'user', 'content': prompt}) - - response = openai.ChatCompletion.create( - model='0', - messages=messages, - temperature=0, - max_tokens=300, - stream=True - ) - - message = '' - for chunk in response: - message += chunk['choices'][0]['delta']['content'] - yield message - - -gr.ChatInterface(stream_response, examples=["hello", "hola", "merhaba"], title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue().launch()