fix flask exception
This commit is contained in:
parent
135bd743bb
commit
30282479a0
|
@ -104,10 +104,12 @@ def stream(ws):
|
|||
|
||||
end_time = time.time()
|
||||
elapsed_time = end_time - start_time
|
||||
r_headers = dict(request.headers)
|
||||
r_url = request.url
|
||||
|
||||
def background_task():
|
||||
generated_tokens = tokenize(generated_text)
|
||||
log_prompt(handler.client_ip, handler.token, input_prompt, generated_text, elapsed_time, handler.parameters, dict(request.headers), response_status_code, request.url, response_tokens=generated_tokens)
|
||||
log_prompt(handler.client_ip, handler.token, input_prompt, generated_text, elapsed_time, handler.parameters, r_headers, response_status_code, r_url, response_tokens=generated_tokens)
|
||||
|
||||
# TODO: use async/await instead of threads
|
||||
threading.Thread(target=background_task).start()
|
||||
|
|
|
@ -20,7 +20,6 @@ from llm_server.routes.server_error import handle_server_error
|
|||
# TODO: add more excluding to SYSTEM__ tokens
|
||||
# TODO: make sure the OpenAI moderation endpoint scans the last n messages rather than only the last one (make that threaded)
|
||||
# TODO: support turbo-instruct on openai endpoint
|
||||
# TODO: show requested model (not actual LLM backend model) in OpenAI responses
|
||||
|
||||
try:
|
||||
import vllm
|
||||
|
|
Reference in New Issue