Merge cluster to master #3
|
@ -20,8 +20,8 @@ else:
|
|||
config_path = Path(script_path, 'config', 'config.yml')
|
||||
|
||||
if __name__ == "__main__":
|
||||
Redis().flushall()
|
||||
print('Flushed Redis.')
|
||||
# Redis().flushall()
|
||||
# print('Flushed Redis.')
|
||||
|
||||
success, config, msg = load_config(config_path)
|
||||
if not success:
|
||||
|
|
|
@ -19,6 +19,8 @@ def worker(backend_url):
|
|||
increment_ip_count(client_ip, 'processing_ips')
|
||||
incr_active_workers(selected_model, backend_url)
|
||||
|
||||
print('Worker starting processing for', client_ip)
|
||||
|
||||
try:
|
||||
if not request_json_body:
|
||||
# This was a dummy request from the streaming handlers.
|
||||
|
|
|
@ -44,7 +44,6 @@ def start_background():
|
|||
t.start()
|
||||
print('Started the console printer.')
|
||||
|
||||
redis_running_models.flush()
|
||||
t = Thread(target=cluster_worker)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
|
|
@ -12,6 +12,12 @@ if not API_BASE:
|
|||
print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')
|
||||
sys.exit(1)
|
||||
|
||||
BACKUP_API_BASE = os.getenv('BACKUP_API_BASE')
|
||||
if BACKUP_API_BASE:
|
||||
print('Using BACKUP_API_BASE:', BACKUP_API_BASE)
|
||||
|
||||
APP_TITLE = os.getenv('APP_TITLE')
|
||||
|
||||
# A system prompt can be injected into the very first spot in the context.
|
||||
# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,
|
||||
# the content in CONTEXT_TRIGGER_INJECTION will be injected.
|
||||
|
@ -37,16 +43,22 @@ def stream_response(prompt, history):
|
|||
if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):
|
||||
messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})
|
||||
|
||||
try:
|
||||
response = openai.ChatCompletion.create(
|
||||
model='0',
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
max_tokens=300,
|
||||
stream=True
|
||||
)
|
||||
except Exception:
|
||||
raise gr.Error("Failed to reach inference endpoint.")
|
||||
for _ in range(2):
|
||||
try:
|
||||
response = openai.ChatCompletion.create(
|
||||
model='0',
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
max_tokens=300,
|
||||
stream=True
|
||||
)
|
||||
break
|
||||
except Exception:
|
||||
openai.api_base = BACKUP_API_BASE
|
||||
raise gr.Error("Failed to reach inference endpoint.")
|
||||
|
||||
# Go back to the default endpoint
|
||||
openai.api_base = API_BASE
|
||||
|
||||
message = ''
|
||||
for chunk in response:
|
||||
|
@ -55,8 +67,8 @@ def stream_response(prompt, history):
|
|||
yield message
|
||||
|
||||
|
||||
examples = ["hello", "hola", "merhaba"]
|
||||
examples = ["hello"]
|
||||
if CONTEXT_TRIGGER_PHRASE:
|
||||
examples.insert(0, CONTEXT_TRIGGER_PHRASE)
|
||||
|
||||
gr.ChatInterface(stream_response, examples=examples, title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=3).launch()
|
||||
gr.ChatInterface(stream_response, examples=examples, title=APP_TITLE, analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=1, api_open=False).launch(show_api=False)
|
||||
|
|
Reference in New Issue