fix flask exception

This commit is contained in:
Cyberes 2023-09-25 17:22:28 -06:00
parent 135bd743bb
commit 30282479a0
2 changed files with 3 additions and 2 deletions

View File

@ -104,10 +104,12 @@ def stream(ws):
end_time = time.time() end_time = time.time()
elapsed_time = end_time - start_time elapsed_time = end_time - start_time
r_headers = dict(request.headers)
r_url = request.url
def background_task(): def background_task():
generated_tokens = tokenize(generated_text) generated_tokens = tokenize(generated_text)
log_prompt(handler.client_ip, handler.token, input_prompt, generated_text, elapsed_time, handler.parameters, dict(request.headers), response_status_code, request.url, response_tokens=generated_tokens) log_prompt(handler.client_ip, handler.token, input_prompt, generated_text, elapsed_time, handler.parameters, r_headers, response_status_code, r_url, response_tokens=generated_tokens)
# TODO: use async/await instead of threads # TODO: use async/await instead of threads
threading.Thread(target=background_task).start() threading.Thread(target=background_task).start()

View File

@ -20,7 +20,6 @@ from llm_server.routes.server_error import handle_server_error
# TODO: add more excluding to SYSTEM__ tokens # TODO: add more excluding to SYSTEM__ tokens
# TODO: make sure the OpenAI moderation endpoint scans the last n messages rather than only the last one (make that threaded) # TODO: make sure the OpenAI moderation endpoint scans the last n messages rather than only the last one (make that threaded)
# TODO: support turbo-instruct on openai endpoint # TODO: support turbo-instruct on openai endpoint
# TODO: show requested model (not actual LLM backend model) in OpenAI responses
try: try:
import vllm import vllm