Merge cluster to master #3
|
@ -3,7 +3,6 @@ import time
|
|||
import traceback
|
||||
|
||||
from flask import Response, jsonify, request
|
||||
from redis import Redis
|
||||
|
||||
from llm_server.custom_redis import redis
|
||||
from . import openai_bp
|
||||
|
@ -98,7 +97,6 @@ def openai_chat_completions():
|
|||
oai_string = generate_oai_string(30)
|
||||
|
||||
def generate():
|
||||
try:
|
||||
response = generator(msg_to_backend, handler.backend_url)
|
||||
generated_text = ''
|
||||
partial_response = b''
|
||||
|
@ -147,13 +145,12 @@ def openai_chat_completions():
|
|||
r_url,
|
||||
handler.backend_url,
|
||||
)
|
||||
finally:
|
||||
# The worker incremented it, we'll decrement it.
|
||||
decrement_ip_count(handler.client_ip, 'processing_ips')
|
||||
decr_active_workers(handler.selected_model, handler.backend_url)
|
||||
print('cleaned up')
|
||||
|
||||
return Response(generate(), mimetype='text/event-stream')
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
return 'INTERNAL SERVER', 500
|
||||
finally:
|
||||
# The worker incremented it, we'll decrement it.
|
||||
decrement_ip_count(handler.client_ip, 'processing_ips')
|
||||
decr_active_workers(handler.selected_model, handler.backend_url)
|
||||
|
|
Reference in New Issue