Merge cluster to master #3

Merged
cyberes merged 163 commits from cluster into master 2023-10-27 19:19:22 -06:00
1 changed files with 51 additions and 54 deletions
Showing only changes of commit 0718f10eb9 - Show all commits

View File

@ -3,7 +3,6 @@ import time
import traceback import traceback
from flask import Response, jsonify, request from flask import Response, jsonify, request
from redis import Redis
from llm_server.custom_redis import redis from llm_server.custom_redis import redis
from . import openai_bp from . import openai_bp
@ -98,62 +97,60 @@ def openai_chat_completions():
oai_string = generate_oai_string(30) oai_string = generate_oai_string(30)
def generate(): def generate():
try: response = generator(msg_to_backend, handler.backend_url)
response = generator(msg_to_backend, handler.backend_url) generated_text = ''
generated_text = '' partial_response = b''
partial_response = b'' for chunk in response.iter_content(chunk_size=1):
for chunk in response.iter_content(chunk_size=1): partial_response += chunk
partial_response += chunk if partial_response.endswith(b'\x00'):
if partial_response.endswith(b'\x00'): json_strs = partial_response.split(b'\x00')
json_strs = partial_response.split(b'\x00') for json_str in json_strs:
for json_str in json_strs: if json_str:
if json_str: try:
try: json_obj = json.loads(json_str.decode())
json_obj = json.loads(json_str.decode()) new = json_obj['text'][0].split(handler.prompt + generated_text)[1]
new = json_obj['text'][0].split(handler.prompt + generated_text)[1] generated_text = generated_text + new
generated_text = generated_text + new except IndexError:
except IndexError: # ????
# ???? continue
continue
data = { data = {
"id": f"chatcmpl-{oai_string}", "id": f"chatcmpl-{oai_string}",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
"created": int(time.time()), "created": int(time.time()),
"model": model, "model": model,
"choices": [ "choices": [
{ {
"index": 0, "index": 0,
"delta": { "delta": {
"content": new "content": new
}, },
"finish_reason": None "finish_reason": None
} }
] ]
} }
yield f'data: {json.dumps(data)}\n\n' yield f'data: {json.dumps(data)}\n\n'
yield 'data: [DONE]\n\n' yield 'data: [DONE]\n\n'
end_time = time.time() end_time = time.time()
elapsed_time = end_time - start_time elapsed_time = end_time - start_time
log_to_db( log_to_db(
handler.client_ip, handler.client_ip,
handler.token, handler.token,
handler.prompt, handler.prompt,
generated_text, generated_text,
elapsed_time, elapsed_time,
handler.parameters, handler.parameters,
r_headers, r_headers,
response_status_code, response_status_code,
r_url, r_url,
handler.backend_url, handler.backend_url,
) )
finally:
# The worker incremented it, we'll decrement it.
decrement_ip_count(handler.client_ip, 'processing_ips')
decr_active_workers(handler.selected_model, handler.backend_url)
print('cleaned up')
return Response(generate(), mimetype='text/event-stream') return Response(generate(), mimetype='text/event-stream')
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
return 'INTERNAL SERVER', 500 return 'INTERNAL SERVER', 500
finally:
# The worker incremented it, we'll decrement it.
decrement_ip_count(handler.client_ip, 'processing_ips')
decr_active_workers(handler.selected_model, handler.backend_url)