Merge cluster to master #3

Merged
cyberes merged 163 commits from cluster into master 2023-10-27 19:19:22 -06:00
4 changed files with 28 additions and 15 deletions
Showing only changes of commit f4e5b5275d - Show all commits

View File

@ -20,8 +20,8 @@ else:
config_path = Path(script_path, 'config', 'config.yml')
if __name__ == "__main__":
Redis().flushall()
print('Flushed Redis.')
# Redis().flushall()
# print('Flushed Redis.')
success, config, msg = load_config(config_path)
if not success:

View File

@ -19,6 +19,8 @@ def worker(backend_url):
increment_ip_count(client_ip, 'processing_ips')
incr_active_workers(selected_model, backend_url)
print('Worker starting processing for', client_ip)
try:
if not request_json_body:
# This was a dummy request from the streaming handlers.

View File

@ -44,7 +44,6 @@ def start_background():
t.start()
print('Started the console printer.')
redis_running_models.flush()
t = Thread(target=cluster_worker)
t.daemon = True
t.start()

View File

@ -12,6 +12,12 @@ if not API_BASE:
print('Must set the secret variable API_BASE to your https://your-site/api/openai/v1')
sys.exit(1)
BACKUP_API_BASE = os.getenv('BACKUP_API_BASE')
if BACKUP_API_BASE:
print('Using BACKUP_API_BASE:', BACKUP_API_BASE)
APP_TITLE = os.getenv('APP_TITLE')
# A system prompt can be injected into the very first spot in the context.
# If the user sends a message that contains the CONTEXT_TRIGGER_PHRASE,
# the content in CONTEXT_TRIGGER_INJECTION will be injected.
@ -37,16 +43,22 @@ def stream_response(prompt, history):
if do_injection or (CONTEXT_TRIGGER_INJECTION and CONTEXT_TRIGGER_PHRASE in prompt):
messages.insert(0, {'role': 'system', 'content': CONTEXT_TRIGGER_INJECTION})
try:
response = openai.ChatCompletion.create(
model='0',
messages=messages,
temperature=0,
max_tokens=300,
stream=True
)
except Exception:
raise gr.Error("Failed to reach inference endpoint.")
for _ in range(2):
try:
response = openai.ChatCompletion.create(
model='0',
messages=messages,
temperature=0,
max_tokens=300,
stream=True
)
break
except Exception:
openai.api_base = BACKUP_API_BASE
raise gr.Error("Failed to reach inference endpoint.")
# Go back to the default endpoint
openai.api_base = API_BASE
message = ''
for chunk in response:
@ -55,8 +67,8 @@ def stream_response(prompt, history):
yield message
examples = ["hello", "hola", "merhaba"]
examples = ["hello"]
if CONTEXT_TRIGGER_PHRASE:
examples.insert(0, CONTEXT_TRIGGER_PHRASE)
gr.ChatInterface(stream_response, examples=examples, title="Chatbot Demo", analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=3).launch()
gr.ChatInterface(stream_response, examples=examples, title=APP_TITLE, analytics_enabled=False, cache_examples=False, css='#component-0{height:100%!important}').queue(concurrency_count=1, api_open=False).launch(show_api=False)