test
This commit is contained in:
parent
aed5db4968
commit
cd325216e2
|
@ -26,6 +26,7 @@ def oai_to_vllm(request_json_body, hashes: bool, mode):
|
|||
|
||||
|
||||
def format_oai_err(err_msg):
|
||||
print('OAI ERROR MESSAGE:', err_msg)
|
||||
return jsonify({
|
||||
"error": {
|
||||
"message": err_msg,
|
||||
|
|
|
@ -25,6 +25,7 @@ class OpenAIRequestHandler(RequestHandler):
|
|||
self.prompt = None
|
||||
|
||||
def handle_request(self) -> Tuple[flask.Response, int]:
|
||||
print('recieved request')
|
||||
assert not self.used
|
||||
|
||||
if opts.openai_silent_trim:
|
||||
|
@ -66,11 +67,13 @@ class OpenAIRequestHandler(RequestHandler):
|
|||
model = self.request_json_body.get('model')
|
||||
|
||||
if success:
|
||||
print('sent success response')
|
||||
return self.build_openai_response(self.prompt, backend_response.json['results'][0]['text'], model=model), backend_response_status_code
|
||||
else:
|
||||
return backend_response, backend_response_status_code
|
||||
|
||||
def handle_ratelimited(self, do_log: bool = True):
|
||||
print('OAI ratelimited:', self.client_ip, self.request.headers)
|
||||
_, default_backend_info = get_model_choices()
|
||||
w = int(default_backend_info['estimated_wait']) if default_backend_info['estimated_wait'] > 0 else 2
|
||||
response = jsonify({
|
||||
|
|
Reference in New Issue