Merge cluster to master #3
|
@ -25,7 +25,6 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
self.prompt = None
|
self.prompt = None
|
||||||
|
|
||||||
def handle_request(self) -> Tuple[flask.Response, int]:
|
def handle_request(self) -> Tuple[flask.Response, int]:
|
||||||
print('recieved request')
|
|
||||||
assert not self.used
|
assert not self.used
|
||||||
|
|
||||||
if opts.openai_silent_trim:
|
if opts.openai_silent_trim:
|
||||||
|
@ -39,7 +38,6 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
return invalid_response
|
return invalid_response
|
||||||
|
|
||||||
if opts.openai_moderation_enabled and opts.openai_api_key and is_api_key_moderated(self.token):
|
if opts.openai_moderation_enabled and opts.openai_api_key and is_api_key_moderated(self.token):
|
||||||
print('moderating', self.token)
|
|
||||||
try:
|
try:
|
||||||
# Gather the last message from the user and all preceding system messages
|
# Gather the last message from the user and all preceding system messages
|
||||||
msg_l = self.request.json['messages'].copy()
|
msg_l = self.request.json['messages'].copy()
|
||||||
|
@ -60,17 +58,14 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
# TODO: support Ooba
|
# TODO: support Ooba
|
||||||
print('converting to vllm')
|
|
||||||
self.parameters = oai_to_vllm(self.parameters, hashes=True, mode=self.cluster_backend_info['mode'])
|
self.parameters = oai_to_vllm(self.parameters, hashes=True, mode=self.cluster_backend_info['mode'])
|
||||||
|
|
||||||
print('generating')
|
|
||||||
llm_request = {**self.parameters, 'prompt': self.prompt}
|
llm_request = {**self.parameters, 'prompt': self.prompt}
|
||||||
(success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request)
|
(success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request)
|
||||||
|
|
||||||
model = self.request_json_body.get('model')
|
model = self.request_json_body.get('model')
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
print('sent success response')
|
|
||||||
return self.build_openai_response(self.prompt, backend_response.json['results'][0]['text'], model=model), backend_response_status_code
|
return self.build_openai_response(self.prompt, backend_response.json['results'][0]['text'], model=model), backend_response_status_code
|
||||||
else:
|
else:
|
||||||
return backend_response, backend_response_status_code
|
return backend_response, backend_response_status_code
|
||||||
|
|
Reference in New Issue