add moderation endpoint to openai api, update config

This commit is contained in:
Cyberes 2023-09-14 15:07:17 -06:00
parent 8f4f17166e
commit a89295193f
7 changed files with 76 additions and 33 deletions

View File

@ -1,8 +1,8 @@
## Important ## Important
backend_url: https://10.0.0.86:8183 backend_url: https://10.0.0.50:8283
mode: hf-textgen mode: vllm
concurrent_gens: 3 concurrent_gens: 3
token_limit: 8192 token_limit: 8192
@ -15,19 +15,21 @@ simultaneous_requests_per_ip: 2
max_new_tokens: 500 max_new_tokens: 500
enable_streaming: false
log_prompts: false log_prompts: false
verify_ssl: false # Python request has issues with self-signed certs verify_ssl: false # Python request has issues with self-signed certs
auth_required: false auth_required: false
# TODO: reject any prompts with a message
# TODO: tokens with a 0 priority are excluded
# TODO: add this value to the stats page
max_queued_prompts_per_ip: 1 max_queued_prompts_per_ip: 1
# Name of your proxy, shown to clients. # Name of your proxy, shown to clients.
llm_middleware_name: Local LLM Proxy llm_middleware_name: local-llm-server
# Set the name of the model shown to clients
# manual_model_name: testing123
# JS tracking code to add to the home page. # JS tracking code to add to the home page.
# analytics_tracking_code: | # analytics_tracking_code: |
@ -35,11 +37,18 @@ llm_middleware_name: Local LLM Proxy
# HTML to add under the "Estimated Wait Time" line. # HTML to add under the "Estimated Wait Time" line.
# info_html: | # info_html: |
# some interesing info # bla bla whatever
enable_openi_compatible_backend: true
# openai_api_key:
expose_openai_system_prompt: true
#openai_system_prompt: |
# You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n
### Tuneables ## ### Tuneables ##
# Path that is shown to users for them to connect to # Path that is shown to users for them to connect to
# TODO: set this based on mode. Instead, have this be the path to the API
frontend_api_client: /api frontend_api_client: /api
# Path to the database, relative to the directory of server.py # Path to the database, relative to the directory of server.py
@ -68,4 +77,4 @@ load_num_prompts: true
## NETDATA ## ## NETDATA ##
# netdata_root: http://172.0.2.140:19999 netdata_root: http://10.0.0.50:19999

View File

@ -19,6 +19,7 @@ config_default_vars = {
'manual_model_name': False, 'manual_model_name': False,
'enable_streaming': True, 'enable_streaming': True,
'enable_openi_compatible_backend': True, 'enable_openi_compatible_backend': True,
'openai_api_key': None,
'expose_openai_system_prompt': True, 'expose_openai_system_prompt': True,
'openai_system_prompt': """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n""", 'openai_system_prompt': """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n""",
} }

View File

@ -28,6 +28,6 @@ enable_openi_compatible_backend = True
openai_system_prompt = """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n""" openai_system_prompt = """You are an assistant chatbot. Your main function is to provide accurate and helpful responses to the user's queries. You should always be polite, respectful, and patient. You should not provide any personal opinions or advice unless specifically asked by the user. You should not make any assumptions about the user's knowledge or abilities. You should always strive to provide clear and concise answers. If you do not understand a user's query, ask for clarification. If you cannot provide an answer, apologize and suggest the user seek help elsewhere.\nLines that start with "### ASSISTANT" were messages you sent previously.\nLines that start with "### USER" were messages sent by the user you are chatting with.\nYou will respond to the "### RESPONSE:" prompt as the assistant and follow the instructions given by the user.\n\n"""
expose_openai_system_prompt = True expose_openai_system_prompt = True
enable_streaming = True enable_streaming = True
openai_api_key = None
backend_request_timeout = 30 backend_request_timeout = 30
backend_generate_request_timeout = 120 backend_generate_request_timeout = 120

View File

@ -19,6 +19,6 @@ def openai_chat_completions():
return OpenAIRequestHandler(request).handle_request() return OpenAIRequestHandler(request).handle_request()
except Exception as e: except Exception as e:
print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}') print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}')
print(print(traceback.format_exc())) print(traceback.format_exc())
print(request.data) print(request.data)
return build_openai_response('', format_sillytavern_err(f'Server encountered exception', 'error')), 200 return build_openai_response('', format_sillytavern_err(f'Server encountered exception.', 'error')), 200

View File

@ -1,9 +1,12 @@
import json
import re import re
import time import time
import traceback
from typing import Tuple from typing import Tuple
from uuid import uuid4 from uuid import uuid4
import flask import flask
import requests
import tiktoken import tiktoken
from flask import jsonify from flask import jsonify
@ -35,6 +38,19 @@ class OpenAIRequestHandler(RequestHandler):
llm_request = {**self.parameters, 'prompt': self.prompt} llm_request = {**self.parameters, 'prompt': self.prompt}
_, (backend_response, backend_response_status_code) = self.generate_response(llm_request) _, (backend_response, backend_response_status_code) = self.generate_response(llm_request)
if opts.openai_api_key:
try:
flagged = check_moderation_endpoint(self.request.json['messages'][-1]['content'])
if flagged:
mod_msg = f"The user's message does not comply with {opts.llm_middleware_name} policies. Offending categories: {json.dumps(flagged['categories'])}"
self.request.json['messages'].insert((len(self.request.json['messages'])), {'role': 'system', 'content': mod_msg})
self.prompt = self.transform_messages_to_prompt()
# print(json.dumps(self.request.json['messages'], indent=4))
except Exception as e:
print(f'OpenAI moderation endpoint failed:', f'{e.__class__.__name__}: {e}')
print(traceback.format_exc())
return build_openai_response(self.prompt, backend_response.json['results'][0]['text']), backend_response_status_code return build_openai_response(self.prompt, backend_response.json['results'][0]['text']), backend_response_status_code
def handle_ratelimited(self): def handle_ratelimited(self):
@ -56,14 +72,30 @@ class OpenAIRequestHandler(RequestHandler):
prompt += f'### ASSISTANT: {msg["content"]}\n\n' prompt += f'### ASSISTANT: {msg["content"]}\n\n'
else: else:
return False return False
except: except Exception as e:
return False # TODO: use logging
print(f'Failed to transform OpenAI to prompt:', f'{e.__class__.__name__}: {e}')
print(traceback.format_exc())
return ''
prompt = prompt.strip(' ').strip('\n').strip('\n\n') # TODO: this is really lazy prompt = prompt.strip(' ').strip('\n').strip('\n\n') # TODO: this is really lazy
prompt += '\n\n### RESPONSE: ' prompt += '\n\n### RESPONSE: '
return prompt return prompt
def check_moderation_endpoint(prompt: str):
headers = {
'Content-Type': 'application/json',
'Authorization': f"Bearer {opts.openai_api_key}",
}
response = requests.post('https://api.openai.com/v1/moderations', headers=headers, json={"input": prompt}).json()
offending_categories = []
for k, v in response['results'][0]['categories'].items():
if v:
offending_categories.append(k)
return {'flagged': response['results'][0]['flagged'], 'categories': offending_categories}
def build_openai_response(prompt, response): def build_openai_response(prompt, response):
# Seperate the user's prompt from the context # Seperate the user's prompt from the context
x = prompt.split('### USER:') x = prompt.split('### USER:')

View File

@ -18,6 +18,6 @@ def generate():
return OobaRequestHandler(request).handle_request() return OobaRequestHandler(request).handle_request()
except Exception as e: except Exception as e:
print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}') print(f'EXCEPTION on {request.url}!!!', f'{e.__class__.__name__}: {e}')
print(print(traceback.format_exc())) print(traceback.format_exc())
print(request.data) print(request.data)
return format_sillytavern_err(f'Server encountered exception', 'error'), 200 return format_sillytavern_err(f'Server encountered exception.', 'error'), 200

View File

@ -74,6 +74,7 @@ opts.enable_openi_compatible_backend = config['enable_openi_compatible_backend']
opts.openai_system_prompt = config['openai_system_prompt'] opts.openai_system_prompt = config['openai_system_prompt']
opts.expose_openai_system_prompt = config['expose_openai_system_prompt'] opts.expose_openai_system_prompt = config['expose_openai_system_prompt']
opts.enable_streaming = config['enable_streaming'] opts.enable_streaming = config['enable_streaming']
opts.openai_api_key = config['openai_api_key']
opts.verify_ssl = config['verify_ssl'] opts.verify_ssl = config['verify_ssl']
if not opts.verify_ssl: if not opts.verify_ssl: