local-llm-server/other/gradio/gradio_chat.py

import os
import sys
import warnings

import gradio as gr
import openai

warnings.filterwarnings("ignore")

API_BASE = os.getenv('API_BASE')
if not API_BASE:
    print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')
    sys.exit(1)

BACKUP_API_BASE = os.getenv('BACKUP_API_BASE')
if BACKUP_API_BASE:
    print('Using BACKUP_API_BASE:', BACKUP_API_BASE)

APP_TITLE = os.getenv('APP_TITLE')

# A system prompt can be injected into the very first spot in the context.
# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,
# the content in CONTEXT_TRIGGER_INJECTION will be injected.
# Setting CONTEXT_TRIGGER_PHRASE will also add it to the selectable examples.
CONTEXT_TRIGGER_PHRASE = os.getenv('CONTEXT_TRIGGER_PHRASE')
CONTEXT_TRIGGER_INJECTION = os.getenv('CONTEXT_TRIGGER_INJECTION')

openai.api_key = 'null'
openai.api_base = API_BASE


def stream_response(prompt, history):
    messages = []
    do_injection = False
    for human, assistant in history:
        messages.append({'role': 'user', 'content': str(human)})
        messages.append({'role': 'assistant', 'content': str(assistant)})

        if CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in human:
            do_injection = True
    messages.append({'role': 'user', 'content': prompt})

    if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):
        messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})

    for _ in range(2):
        try:
            response = openai.ChatCompletion.create(
                model='0',
                messages=messages,
                temperature=0,
                max_tokens=300,
                stream=True
            )
            break
        except Exception:
            openai.api_base = BACKUP_API_BASE
            raise gr.Error("Failed to reach inference endpoint.")

    # Go back to the default endpoint
    openai.api_base = API_BASE

    message = ''
    for chunk in response:
        if len(chunk['choices'][0]['delta']) != 0:
            message += chunk['choices'][0]['delta']['content']
            yield message


examples = ["hello"]
if CONTEXT_TRIGGER_PHRASE:
    examples.insert(0, CONTEXT_TRIGGER_PHRASE)

gr.ChatInterface(stream_response, examples=examples, title=APP_TITLE, analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=1, api_open=False).launch(show_api=False)
misc changes 2023-10-09 18:12:12 -06:00			`import os`
			`import sys`
			`import warnings`

			`import gradio as gr`
			`import openai`

			`warnings.filterwarnings("ignore")`

			`API_BASE = os.getenv('API_BASE')`
			`if not API_BASE:`
			`print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')`
			`sys.exit(1)`

test 2023-10-11 09:09:41 -06:00			`BACKUP_API_BASE = os.getenv('BACKUP_API_BASE')`
			`if BACKUP_API_BASE:`
			`print('Using BACKUP_API_BASE:', BACKUP_API_BASE)`

			`APP_TITLE = os.getenv('APP_TITLE')`

misc changes 2023-10-09 18:12:12 -06:00			`# A system prompt can be injected into the very first spot in the context.`
			`# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,`
			`# the content in CONTEXT_TRIGGER_INJECTION will be injected.`
			`# Setting CONTEXT_TRIGGER_PHRASE will also add it to the selectable examples.`
			`CONTEXT_TRIGGER_PHRASE = os.getenv('CONTEXT_TRIGGER_PHRASE')`
			`CONTEXT_TRIGGER_INJECTION = os.getenv('CONTEXT_TRIGGER_INJECTION')`

			`openai.api_key = 'null'`
			`openai.api_base = API_BASE`


			`def stream_response(prompt, history):`
			`messages = []`
			`do_injection = False`
			`for human, assistant in history:`
			`messages.append({'role': 'user', 'content': str(human)})`
			`messages.append({'role': 'assistant', 'content': str(assistant)})`

			`if CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in human:`
			`do_injection = True`
			`messages.append({'role': 'user', 'content': prompt})`

			`if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):`
			`messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})`

test 2023-10-11 09:09:41 -06:00			`for _ in range(2):`
			`try:`
			`response = openai.ChatCompletion.create(`
			`model='0',`
			`messages=messages,`
			`temperature=0,`
			`max_tokens=300,`
			`stream=True`
			`)`
			`break`
			`except Exception:`
			`openai.api_base = BACKUP_API_BASE`
			`raise gr.Error("Failed to reach inference endpoint.")`

			`# Go back to the default endpoint`
			`openai.api_base = API_BASE`
misc changes 2023-10-09 18:12:12 -06:00
			`message = ''`
			`for chunk in response:`
			`if len(chunk['choices'][0]['delta']) != 0:`
			`message += chunk['choices'][0]['delta']['content']`
			`yield message`


test 2023-10-11 09:09:41 -06:00			`examples = ["hello"]`
misc changes 2023-10-09 18:12:12 -06:00			`if CONTEXT_TRIGGER_PHRASE:`
			`examples.insert(0, CONTEXT_TRIGGER_PHRASE)`

test 2023-10-11 09:09:41 -06:00			`gr.ChatInterface(stream_response, examples=examples, title=APP_TITLE, analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=1, api_open=False).launch(show_api=False)`