add moderation endpoint to openai api, update config
This commit is contained in:
parent
8f4f17166e
commit
a89295193f
|
@ -1,49 +1,58 @@
|
||||||
## Important
|
## Important
|
||||||
|
|
||||||
backend_url: https://10.0.0.86:8183
|
backend_url: https://10.0.0.50:8283
|
||||||
|
|
||||||
mode: hf-textgen
|
mode: vllm
|
||||||
concurrent_gens: 3
|
concurrent_gens: 3
|
||||||
token_limit: 8192
|
token_limit: 8192
|
||||||
|
|
||||||
# How many requests a single IP is allowed to put in the queue.
|
# How many requests a single IP is allowed to put in the queue.
|
||||||
# If an IP tries to put more than this their request will be rejected
|
# If an IP tries to put more than this their request will be rejected
|
||||||
# until the other(s) are completed.
|
# until the other(s) are completed.
|
||||||
simultaneous_requests_per_ip: 2
|
simultaneous_requests_per_ip: 2
|
||||||
|
|
||||||
## Optional
|
## Optional
|
||||||
|
|
||||||
max_new_tokens: 500
|
max_new_tokens: 500
|
||||||
|
|
||||||
log_prompts: false
|
enable_streaming: false
|
||||||
|
|
||||||
verify_ssl: false # Python request has issues with self-signed certs
|
log_prompts: false
|
||||||
|
|
||||||
auth_required: false
|
verify_ssl: false # Python request has issues with self-signed certs
|
||||||
|
|
||||||
# TODO: reject any prompts with a message
|
auth_required: false
|
||||||
# TODO: tokens with a 0 priority are excluded
|
|
||||||
# TODO: add this value to the stats page
|
max_queued_prompts_per_ip: 1
|
||||||
max_queued_prompts_per_ip: 1
|
|
||||||
|
|
||||||
# Name of your proxy, shown to clients.
|
# Name of your proxy, shown to clients.
|
||||||
llm_middleware_name: Local LLM Proxy
|
llm_middleware_name: local-llm-server
|
||||||
|
|
||||||
|
# Set the name of the model shown to clients
|
||||||
|
# manual_model_name: testing123
|
||||||
|
|
||||||
# JS tracking code to add to the home page.
|
# JS tracking code to add to the home page.
|
||||||
# analytics_tracking_code: |
|
# analytics_tracking_code: |
|
||||||
# alert("hello");
|
# alert("hello");
|
||||||
|
|
||||||
# HTML to add under the "Estimated Wait Time" line.
|
# HTML to add under the "Estimated Wait Time" line.
|
||||||
# info_html: |
|
# info_html: |
|
||||||
# some interesing info
|
# bla bla whatever
|
||||||
|
|
||||||
|
enable_openi_compatible_backend: true
|
||||||
|
# openai_api_key:
|
||||||
|
expose_openai_system_prompt: true
|
||||||
|
#openai_system_prompt: |
|
||||||
|
# You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n
|
||||||
|
|
||||||
### Tuneables ##
|
### Tuneables ##
|
||||||
|
|
||||||
# Path that is shown to users for them to connect to
|
# Path that is shown to users for them to connect to
|
||||||
frontend_api_client: /api
|
# TODO: set this based on mode. Instead, have this be the path to the API
|
||||||
|
frontend_api_client: /api
|
||||||
|
|
||||||
# Path to the database, relative to the directory of server.py
|
# Path to the database, relative to the directory of server.py
|
||||||
database_path: ./proxy-server.db
|
database_path: ./proxy-server.db
|
||||||
|
|
||||||
# How to calculate the average generation time.
|
# How to calculate the average generation time.
|
||||||
# Valid options: database, minute
|
# Valid options: database, minute
|
||||||
|
@ -54,18 +63,18 @@ average_generation_time_mode: database
|
||||||
## STATS ##
|
## STATS ##
|
||||||
|
|
||||||
# Display the total_proompts item on the stats screen.
|
# Display the total_proompts item on the stats screen.
|
||||||
show_num_prompts: true
|
show_num_prompts: true
|
||||||
|
|
||||||
# Display the uptime item on the stats screen.
|
# Display the uptime item on the stats screen.
|
||||||
show_uptime: true
|
show_uptime: true
|
||||||
|
|
||||||
show_total_output_tokens: true
|
show_total_output_tokens: true
|
||||||
|
|
||||||
show_backend_info: true
|
show_backend_info: true
|
||||||
|
|
||||||
# Load the number of prompts from the database to display on the stats page.
|
# Load the number of prompts from the database to display on the stats page.
|
||||||
load_num_prompts: true
|
load_num_prompts: true
|
||||||
|
|
||||||
## NETDATA ##
|
## NETDATA ##
|
||||||
|
|
||||||
# netdata_root: http://172.0.2.140:19999
|
netdata_root: http://10.0.0.50:19999
|
|
@ -19,6 +19,7 @@ config_default_vars = {
|
||||||
'manual_model_name': False,
|
'manual_model_name': False,
|
||||||
'enable_streaming': True,
|
'enable_streaming': True,
|
||||||
'enable_openi_compatible_backend': True,
|
'enable_openi_compatible_backend': True,
|
||||||
|
'openai_api_key': None,
|
||||||
'expose_openai_system_prompt': True,
|
'expose_openai_system_prompt': True,
|
||||||
'openai_system_prompt': """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n""",
|
'openai_system_prompt': """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n""",
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,6 @@ enable_openi_compatible_backend = True
|
||||||
openai_system_prompt = """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n"""
|
openai_system_prompt = """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n"""
|
||||||
expose_openai_system_prompt = True
|
expose_openai_system_prompt = True
|
||||||
enable_streaming = True
|
enable_streaming = True
|
||||||
|
openai_api_key = None
|
||||||
backend_request_timeout = 30
|
backend_request_timeout = 30
|
||||||
backend_generate_request_timeout = 120
|
backend_generate_request_timeout = 120
|
||||||
|
|
|
@ -19,6 +19,6 @@ def openai_chat_completions():
|
||||||
return OpenAIRequestHandler(request).handle_request()
|
return OpenAIRequestHandler(request).handle_request()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}')
|
print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}')
|
||||||
print(print(traceback.format_exc()))
|
print(traceback.format_exc())
|
||||||
print(request.data)
|
print(request.data)
|
||||||
return build_openai_response('', format_sillytavern_err(f'Server encountered exception', 'error')), 200
|
return build_openai_response('', format_sillytavern_err(f'Server encountered exception.', 'error')), 200
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
|
import requests
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from flask import jsonify
|
from flask import jsonify
|
||||||
|
|
||||||
|
@ -35,6 +38,19 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
llm_request = {**self.parameters, 'prompt': self.prompt}
|
llm_request = {**self.parameters, 'prompt': self.prompt}
|
||||||
|
|
||||||
_, (backend_response, backend_response_status_code) = self.generate_response(llm_request)
|
_, (backend_response, backend_response_status_code) = self.generate_response(llm_request)
|
||||||
|
|
||||||
|
if opts.openai_api_key:
|
||||||
|
try:
|
||||||
|
flagged = check_moderation_endpoint(self.request.json['messages'][-1]['content'])
|
||||||
|
if flagged:
|
||||||
|
mod_msg = f"The user's message does not comply with {opts.llm_middleware_name} policies. Offending categories: {json.dumps(flagged['categories'])}"
|
||||||
|
self.request.json['messages'].insert((len(self.request.json['messages'])), {'role': 'system', 'content': mod_msg})
|
||||||
|
self.prompt = self.transform_messages_to_prompt()
|
||||||
|
# print(json.dumps(self.request.json['messages'], indent=4))
|
||||||
|
except Exception as e:
|
||||||
|
print(f'OpenAI moderation endpoint failed:', f'{e.__class__.__name__}: {e}')
|
||||||
|
print(traceback.format_exc())
|
||||||
|
|
||||||
return build_openai_response(self.prompt, backend_response.json['results'][0]['text']), backend_response_status_code
|
return build_openai_response(self.prompt, backend_response.json['results'][0]['text']), backend_response_status_code
|
||||||
|
|
||||||
def handle_ratelimited(self):
|
def handle_ratelimited(self):
|
||||||
|
@ -56,14 +72,30 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
prompt += f'### ASSISTANT: {msg["content"]}\n\n'
|
prompt += f'### ASSISTANT: {msg["content"]}\n\n'
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
except:
|
except Exception as e:
|
||||||
return False
|
# TODO: use logging
|
||||||
|
print(f'Failed to transform OpenAI to prompt:', f'{e.__class__.__name__}: {e}')
|
||||||
|
print(traceback.format_exc())
|
||||||
|
return ''
|
||||||
|
|
||||||
prompt = prompt.strip(' ').strip('\n').strip('\n\n') # TODO: this is really lazy
|
prompt = prompt.strip(' ').strip('\n').strip('\n\n') # TODO: this is really lazy
|
||||||
prompt += '\n\n### RESPONSE: '
|
prompt += '\n\n### RESPONSE: '
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
def check_moderation_endpoint(prompt: str):
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f"Bearer {opts.openai_api_key}",
|
||||||
|
}
|
||||||
|
response = requests.post('https://api.openai.com/v1/moderations', headers=headers, json={"input": prompt}).json()
|
||||||
|
offending_categories = []
|
||||||
|
for k, v in response['results'][0]['categories'].items():
|
||||||
|
if v:
|
||||||
|
offending_categories.append(k)
|
||||||
|
return {'flagged': response['results'][0]['flagged'], 'categories': offending_categories}
|
||||||
|
|
||||||
|
|
||||||
def build_openai_response(prompt, response):
|
def build_openai_response(prompt, response):
|
||||||
# Seperate the user's prompt from the context
|
# Seperate the user's prompt from the context
|
||||||
x = prompt.split('### USER:')
|
x = prompt.split('### USER:')
|
||||||
|
|
|
@ -18,6 +18,6 @@ def generate():
|
||||||
return OobaRequestHandler(request).handle_request()
|
return OobaRequestHandler(request).handle_request()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}')
|
print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}')
|
||||||
print(print(traceback.format_exc()))
|
print(traceback.format_exc())
|
||||||
print(request.data)
|
print(request.data)
|
||||||
return format_sillytavern_err(f'Server encountered exception', 'error'), 200
|
return format_sillytavern_err(f'Server encountered exception.', 'error'), 200
|
||||||
|
|
|
@ -74,6 +74,7 @@ opts.enable_openi_compatible_backend = config['enable_openi_compatible_backend']
|
||||||
opts.openai_system_prompt = config['openai_system_prompt']
|
opts.openai_system_prompt = config['openai_system_prompt']
|
||||||
opts.expose_openai_system_prompt = config['expose_openai_system_prompt']
|
opts.expose_openai_system_prompt = config['expose_openai_system_prompt']
|
||||||
opts.enable_streaming = config['enable_streaming']
|
opts.enable_streaming = config['enable_streaming']
|
||||||
|
opts.openai_api_key = config['openai_api_key']
|
||||||
|
|
||||||
opts.verify_ssl = config['verify_ssl']
|
opts.verify_ssl = config['verify_ssl']
|
||||||
if not opts.verify_ssl:
|
if not opts.verify_ssl:
|
||||||
|
|
Reference in New Issue