Merge cluster to master #3
|
@ -67,7 +67,6 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
|
|
||||||
llm_request = {**self.parameters, 'prompt': self.prompt}
|
llm_request = {**self.parameters, 'prompt': self.prompt}
|
||||||
(success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request)
|
(success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request)
|
||||||
|
|
||||||
model = self.request_json_body.get('model')
|
model = self.request_json_body.get('model')
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
|
@ -98,6 +97,7 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
return response, 429
|
return response, 429
|
||||||
|
|
||||||
def handle_error(self, error_msg: str, error_type: str = 'error') -> Tuple[flask.Response, int]:
|
def handle_error(self, error_msg: str, error_type: str = 'error') -> Tuple[flask.Response, int]:
|
||||||
|
print(error_msg)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"error": {
|
"error": {
|
||||||
"message": "Invalid request, check your parameters and try again.",
|
"message": "Invalid request, check your parameters and try again.",
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import gradio as gr
|
||||||
|
import openai
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
API_BASE = os.getenv('API_BASE')
|
||||||
|
if not API_BASE:
|
||||||
|
print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# A system prompt can be injected into the very first spot in the context.
|
||||||
|
# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,
|
||||||
|
# the content in CONTEXT_TRIGGER_INJECTION will be injected.
|
||||||
|
# Setting CONTEXT_TRIGGER_PHRASE will also add it to the selectable examples.
|
||||||
|
CONTEXT_TRIGGER_PHRASE = os.getenv('CONTEXT_TRIGGER_PHRASE')
|
||||||
|
CONTEXT_TRIGGER_INJECTION = os.getenv('CONTEXT_TRIGGER_INJECTION')
|
||||||
|
|
||||||
|
openai.api_key = 'null'
|
||||||
|
openai.api_base = API_BASE
|
||||||
|
|
||||||
|
|
||||||
|
def stream_response(prompt, history):
|
||||||
|
messages = []
|
||||||
|
do_injection = False
|
||||||
|
for human, assistant in history:
|
||||||
|
messages.append({'role': 'user', 'content': str(human)})
|
||||||
|
messages.append({'role': 'assistant', 'content': str(assistant)})
|
||||||
|
|
||||||
|
if CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in human:
|
||||||
|
do_injection = True
|
||||||
|
messages.append({'role': 'user', 'content': prompt})
|
||||||
|
|
||||||
|
if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):
|
||||||
|
messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
model='0',
|
||||||
|
messages=messages,
|
||||||
|
temperature=0,
|
||||||
|
max_tokens=300,
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
raise gr.Error("Failed to reach inference endpoint.")
|
||||||
|
|
||||||
|
message = ''
|
||||||
|
for chunk in response:
|
||||||
|
if len(chunk['choices'][0]['delta']) != 0:
|
||||||
|
message += chunk['choices'][0]['delta']['content']
|
||||||
|
yield message
|
||||||
|
|
||||||
|
|
||||||
|
examples = ["hello", "hola", "merhaba"]
|
||||||
|
if CONTEXT_TRIGGER_PHRASE:
|
||||||
|
examples.insert(0, CONTEXT_TRIGGER_PHRASE)
|
||||||
|
|
||||||
|
gr.ChatInterface(stream_response, examples=examples, title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=3).launch()
|
|
@ -1,33 +0,0 @@
|
||||||
import warnings
|
|
||||||
|
|
||||||
import gradio as gr
|
|
||||||
import openai
|
|
||||||
|
|
||||||
warnings.filterwarnings("ignore")
|
|
||||||
|
|
||||||
openai.api_key = 'null'
|
|
||||||
openai.api_base = 'http://localhost:5000/api/openai/v1'
|
|
||||||
|
|
||||||
|
|
||||||
def stream_response(prompt, history):
|
|
||||||
messages = []
|
|
||||||
for x in history:
|
|
||||||
messages.append({'role': 'user', 'content': x[0]})
|
|
||||||
messages.append({'role': 'assistant', 'content': x[1]})
|
|
||||||
messages.append({'role': 'user', 'content': prompt})
|
|
||||||
|
|
||||||
response = openai.ChatCompletion.create(
|
|
||||||
model='0',
|
|
||||||
messages=messages,
|
|
||||||
temperature=0,
|
|
||||||
max_tokens=300,
|
|
||||||
stream=True
|
|
||||||
)
|
|
||||||
|
|
||||||
message = ''
|
|
||||||
for chunk in response:
|
|
||||||
message += chunk['choices'][0]['delta']['content']
|
|
||||||
yield message
|
|
||||||
|
|
||||||
|
|
||||||
gr.ChatInterface(stream_response, examples=["hello", "hola", "merhaba"], title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue().launch()
|
|
Reference in New Issue