Merge cluster to master #3
|
@ -57,7 +57,6 @@ def openai_chat_completions():
|
||||||
else:
|
else:
|
||||||
handler.prompt = transform_messages_to_prompt(handler.request.json['messages'])
|
handler.prompt = transform_messages_to_prompt(handler.request.json['messages'])
|
||||||
|
|
||||||
generated_text = ''
|
|
||||||
response_status_code = 0
|
response_status_code = 0
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
@ -98,6 +97,7 @@ def openai_chat_completions():
|
||||||
oai_string = generate_oai_string(30)
|
oai_string = generate_oai_string(30)
|
||||||
|
|
||||||
def generate():
|
def generate():
|
||||||
|
try:
|
||||||
response = generator(msg_to_backend, handler.backend_url)
|
response = generator(msg_to_backend, handler.backend_url)
|
||||||
generated_text = ''
|
generated_text = ''
|
||||||
partial_response = b''
|
partial_response = b''
|
||||||
|
@ -146,13 +146,13 @@ def openai_chat_completions():
|
||||||
r_url,
|
r_url,
|
||||||
handler.backend_url,
|
handler.backend_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
return Response(generate(), mimetype='text/event-stream')
|
|
||||||
except Exception:
|
|
||||||
traceback.print_exc()
|
|
||||||
return 'INTERNAL SERVER', 500
|
|
||||||
finally:
|
finally:
|
||||||
# The worker incremented it, we'll decrement it.
|
# The worker incremented it, we'll decrement it.
|
||||||
decrement_ip_count(handler.client_ip, 'processing_ips')
|
decrement_ip_count(handler.client_ip, 'processing_ips')
|
||||||
decr_active_workers(handler.selected_model, handler.backend_url)
|
decr_active_workers(handler.selected_model, handler.backend_url)
|
||||||
print(len(generated_text))
|
print(len(generated_text))
|
||||||
|
|
||||||
|
return Response(generate(), mimetype='text/event-stream')
|
||||||
|
except Exception:
|
||||||
|
traceback.print_exc()
|
||||||
|
return 'INTERNAL SERVER', 500
|
||||||
|
|
Reference in New Issue