2023-10-27 19:19:22 -06:00
4 changed files with 28 additions and 15 deletions
--- a/daemon.py
+++ b/daemon.py
@ -20,8 +20,8 @@ else:
    config_path = Path(script_path, 'config', 'config.yml')

 if __name__ == "__main__":
-    Redis().flushall()
-    print('Flushed Redis.')
+    # Redis().flushall()
+    # print('Flushed Redis.')

    success, config, msg = load_config(config_path)
    if not success:
--- a/llm_server/workers/inferencer.py
+++ b/llm_server/workers/inferencer.py
@ -19,6 +19,8 @@ def worker(backend_url):
        increment_ip_count(client_ip, 'processing_ips')
        incr_active_workers(selected_model, backend_url)

+        print('Worker starting processing for', client_ip)
+
        try:
            if not request_json_body:
                # This was a dummy request from the streaming handlers.
--- a/llm_server/workers/threader.py
+++ b/llm_server/workers/threader.py
@ -44,7 +44,6 @@ def start_background():
    t.start()
    print('Started the console printer.')

-    redis_running_models.flush()
    t = Thread(target=cluster_worker)
    t.daemon = True
    t.start()
--- a/other/gradio/gradio_chat.py
+++ b/other/gradio/gradio_chat.py
@ -12,6 +12,12 @@ if not API_BASE:
    print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')
    sys.exit(1)

+BACKUP_API_BASE = os.getenv('BACKUP_API_BASE')
+if BACKUP_API_BASE:
+    print('Using BACKUP_API_BASE:', BACKUP_API_BASE)
+
+APP_TITLE = os.getenv('APP_TITLE')
+
 # A system prompt can be injected into the very first spot in the context.
 # If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,
 # the content in CONTEXT_TRIGGER_INJECTION will be injected.
@ -37,16 +43,22 @@ def stream_response(prompt, history):
    if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):
        messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})

-    try:
-        response = openai.ChatCompletion.create(
-            model='0',
-            messages=messages,
-            temperature=0,
-            max_tokens=300,
-            stream=True
-        )
-    except Exception:
-        raise gr.Error("Failed to reach inference endpoint.")
+    for _ in range(2):
+        try:
+            response = openai.ChatCompletion.create(
+                model='0',
+                messages=messages,
+                temperature=0,
+                max_tokens=300,
+                stream=True
+            )
+            break
+        except Exception:
+            openai.api_base = BACKUP_API_BASE
+            raise gr.Error("Failed to reach inference endpoint.")
+
+    # Go back to the default endpoint
+    openai.api_base = API_BASE

    message = ''
    for chunk in response:
@ -55,8 +67,8 @@ def stream_response(prompt, history):
            yield message


-examples = ["hello", "hola", "merhaba"]
+examples = ["hello"]
 if CONTEXT_TRIGGER_PHRASE:
    examples.insert(0, CONTEXT_TRIGGER_PHRASE)

-gr.ChatInterface(stream_response, examples=examples, title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=3).launch()
+gr.ChatInterface(stream_response, examples=examples, title=APP_TITLE, analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=1, api_open=False).launch(show_api=False)