From 5f7bf4faca6aeadfa52fde42252498a2a2e07062 Mon Sep 17 00:00:00 2001
From: Cyberes <cyberes@evulid.cc>
Date: Mon, 9 Oct 2023 18:12:12 -0600
Subject: [PATCH] misc changes

---
 llm_server/routes/openai_request_handler.py |  2 +-
 other/gradio/gradio_chat.py                 | 62 +++++++++++++++++++++
 other/gradio_chat.py                        | 33 -----------
 3 files changed, 63 insertions(+), 34 deletions(-)
 create mode 100644 other/gradio/gradio_chat.py
 delete mode 100644 other/gradio_chat.py

diff --git a/llm_server/routes/openai_request_handler.py b/llm_server/routes/openai_request_handler.py
index 9716eb9..bc5c6f5 100644
--- a/llm_server/routes/openai_request_handler.py
+++ b/llm_server/routes/openai_request_handler.py
@@ -67,7 +67,6 @@ class OpenAIRequestHandler(RequestHandler):
 
         llm_request = {**self.parameters, 'prompt': self.prompt}
         (success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request)
-
         model = self.request_json_body.get('model')
 
         if success:
@@ -98,6 +97,7 @@ class OpenAIRequestHandler(RequestHandler):
         return response, 429
 
     def handle_error(self, error_msg: str, error_type: str = 'error') -> Tuple[flask.Response, int]:
+        print(error_msg)
         return jsonify({
             "error": {
                 "message": "Invalid request, check your parameters and try again.",
diff --git a/other/gradio/gradio_chat.py b/other/gradio/gradio_chat.py
new file mode 100644
index 0000000..e8e54b2
--- /dev/null
+++ b/other/gradio/gradio_chat.py
@@ -0,0 +1,62 @@
+import os
+import sys
+import warnings
+
+import gradio as gr
+import openai
+
+warnings.filterwarnings("ignore")
+
+API_BASE = os.getenv('API_BASE')
+if not API_BASE:
+    print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')
+    sys.exit(1)
+
+# A system prompt can be injected into the very first spot in the context.
+# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,
+# the content in CONTEXT_TRIGGER_INJECTION will be injected.
+# Setting CONTEXT_TRIGGER_PHRASE will also add it to the selectable examples.
+CONTEXT_TRIGGER_PHRASE = os.getenv('CONTEXT_TRIGGER_PHRASE')
+CONTEXT_TRIGGER_INJECTION = os.getenv('CONTEXT_TRIGGER_INJECTION')
+
+openai.api_key = 'null'
+openai.api_base = API_BASE
+
+
+def stream_response(prompt, history):
+    messages = []
+    do_injection = False
+    for human, assistant in history:
+        messages.append({'role': 'user', 'content': str(human)})
+        messages.append({'role': 'assistant', 'content': str(assistant)})
+
+        if CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in human:
+            do_injection = True
+    messages.append({'role': 'user', 'content': prompt})
+
+    if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):
+        messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})
+
+    try:
+        response = openai.ChatCompletion.create(
+            model='0',
+            messages=messages,
+            temperature=0,
+            max_tokens=300,
+            stream=True
+        )
+    except Exception:
+        raise gr.Error("Failed to reach inference endpoint.")
+
+    message = ''
+    for chunk in response:
+        if len(chunk['choices'][0]['delta']) != 0:
+            message += chunk['choices'][0]['delta']['content']
+            yield message
+
+
+examples = ["hello", "hola", "merhaba"]
+if CONTEXT_TRIGGER_PHRASE:
+    examples.insert(0, CONTEXT_TRIGGER_PHRASE)
+
+gr.ChatInterface(stream_response, examples=examples, title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=3).launch()
diff --git a/other/gradio_chat.py b/other/gradio_chat.py
deleted file mode 100644
index eb10d26..0000000
--- a/other/gradio_chat.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import warnings
-
-import gradio as gr
-import openai
-
-warnings.filterwarnings("ignore")
-
-openai.api_key = 'null'
-openai.api_base = 'http://localhost:5000/api/openai/v1'
-
-
-def stream_response(prompt, history):
-    messages = []
-    for x in history:
-        messages.append({'role': 'user', 'content': x[0]})
-        messages.append({'role': 'assistant', 'content': x[1]})
-    messages.append({'role': 'user', 'content': prompt})
-
-    response = openai.ChatCompletion.create(
-        model='0',
-        messages=messages,
-        temperature=0,
-        max_tokens=300,
-        stream=True
-    )
-
-    message = ''
-    for chunk in response:
-        message += chunk['choices'][0]['delta']['content']
-        yield message
-
-
-gr.ChatInterface(stream_response, examples=["hello", "hola", "merhaba"], title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue().launch()