add vision

This commit is contained in:
Cyberes 2024-04-09 19:26:44 -06:00
parent bcb564fc36
commit 7b20e71d65
17 changed files with 178 additions and 42 deletions

6
.gitignore vendored
View File

@ -1,7 +1,7 @@
.idea
*-store/
*.yaml
!config.sample.yaml
bot-store/
config.yaml
config.yml
# ---> Python
# Byte-compiled / optimized / DLL files

View File

@ -5,6 +5,8 @@ _Chatbots for Matrix._
This bot supports OpenAI, Anthropic, and locally hosted models that use an OpenAI-compatible endpoint. It can run multiple different models using
different triggers, such as `!c4` for GPT4 and `!ca` for Anthropic, all through the same bot.
OpenAI and Anthropic vision is supported.
<br>
## Install

View File

@ -77,6 +77,10 @@ command:
# Custom OpenAI endpoint. OpenAI only.
# api_base: https://example.com/openai/v1
# Enable vision on this model.
# Bot can only view images that are in threads. Threads cannot be started with images.
# vision: false
# Bot's description, shown when running `!matrixgpt`.
# help: A helpful assistant.

View File

@ -11,7 +11,7 @@ from pathlib import Path
from aiohttp import ClientConnectionError, ServerDisconnectedError
from bison.errors import SchemeValidationError
from nio import InviteMemberEvent, JoinResponse, MegolmEvent, RoomMessageText, UnknownEvent
from nio import InviteMemberEvent, JoinResponse, MegolmEvent, RoomMessageText, UnknownEvent, RoomMessageImage
from matrix_gpt import MatrixClientHelper
from matrix_gpt.callbacks import MatrixBotCallbacks
@ -79,7 +79,7 @@ async def main(args):
# Set up event callbacks
callbacks = MatrixBotCallbacks(client=client_helper)
client.add_event_callback(callbacks.handle_message, RoomMessageText)
client.add_event_callback(callbacks.handle_message, (RoomMessageText, RoomMessageImage))
client.add_event_callback(callbacks.handle_invite, InviteMemberEvent)
client.add_event_callback(callbacks.decryption_failure, MegolmEvent)
client.add_event_callback(callbacks.unknown, UnknownEvent)

View File

@ -1,5 +1,6 @@
import logging
from matrix_gpt import MatrixClientHelper
from matrix_gpt.config import global_config
from matrix_gpt.generate_clients.anthropic import AnthropicApiClient
from matrix_gpt.generate_clients.openai import OpenAIClient
@ -23,30 +24,32 @@ class ApiClientManager:
self._openai_api_key = global_config['openai'].get('api_key', 'MatrixGPT')
self._anth_api_key = global_config['anthropic'].get('api_key')
def get_client(self, mode: str):
def get_client(self, mode: str, client_helper: MatrixClientHelper):
if mode == 'openai':
return self.openai_client()
return self.openai_client(client_helper)
elif mode == 'anth':
return self.anth_client()
return self.anth_client(client_helper)
else:
raise Exception
def openai_client(self):
def openai_client(self, client_helper: MatrixClientHelper):
self._set_from_config()
if not self._openai_api_key:
self.logger.error('Missing an OpenAI API key!')
return None
return OpenAIClient(
api_key=self._openai_api_key,
client_helper=client_helper
)
def anth_client(self):
def anth_client(self, client_helper: MatrixClientHelper):
self._set_from_config()
if not self._anth_api_key:
self.logger.error('Missing an Anthropic API key!')
return None
return AnthropicApiClient(
api_key=self._anth_api_key,
client_helper=client_helper
)

View File

@ -1,8 +1,9 @@
import asyncio
import logging
import time
from typing import Union
from nio import (AsyncClient, InviteMemberEvent, MatrixRoom, MegolmEvent, RoomMessageText, UnknownEvent)
from nio import (AsyncClient, InviteMemberEvent, MatrixRoom, MegolmEvent, RoomMessageText, UnknownEvent, RoomMessageImage)
from .chat_functions import check_authorized, is_thread, check_command_prefix
from .config import global_config
@ -18,12 +19,11 @@ class MatrixBotCallbacks:
self.startup_ts = time.time() * 1000
self.seen_messages = set()
async def handle_message(self, room: MatrixRoom, requestor_event: RoomMessageText) -> None:
async def handle_message(self, room: MatrixRoom, requestor_event: Union[RoomMessageText, RoomMessageImage]) -> None:
"""
Callback for when a message event is received.
"""
# Mark all messages as read.
mark_read_task = asyncio.create_task(self.client.room_read_markers(room.room_id, requestor_event.event_id, requestor_event.event_id))
mark_read_task = asyncio.create_task(self.client.room_read_markers(room.room_id, requestor_event.event_id, requestor_event.event_id)) # Mark all messages as read.
msg = requestor_event.body.strip().strip('\n')
if msg == "** Unable to decrypt: The sender's device has not sent us the keys for this message. **":
self.logger.debug(f'Unable to decrypt event "{requestor_event.event_id} in room {room.room_id}')
@ -37,6 +37,7 @@ class MatrixBotCallbacks:
await sound_off(room, requestor_event, self.client_helper)
return
if requestor_event.event_id in self.seen_messages:
# Need to track messages manually because the sync background thread may trigger the callback.
return
self.seen_messages.add(requestor_event.event_id)
command_activated, sent_command_prefix, command_info = check_command_prefix(msg)
@ -46,8 +47,8 @@ class MatrixBotCallbacks:
self.logger.debug(f'Message from {requestor_event.sender} in {room.room_id} --> "{msg}"')
# Start the task in the background and don't wait for it here or else we'll block everything.
task = asyncio.create_task(do_reply_threaded_msg(self.client_helper, room, requestor_event))
elif command_activated and not is_thread(requestor_event):
# Everything else
elif isinstance(requestor_event, RoomMessageText) and command_activated and not is_thread(requestor_event):
# Everything else. Images do not start threads.
self.logger.debug(f'Message from {requestor_event.sender} in {room.room_id} --> "{msg}"')
allowed_to_chat = command_info.allowed_to_chat + global_config['allowed_to_chat']
if not check_authorized(requestor_event.sender, allowed_to_chat):

View File

@ -1,5 +1,6 @@
import logging
from typing import List, Tuple
from urllib.parse import urlparse
from nio import AsyncClient, Event, MatrixRoom, RoomGetEventResponse, RoomMessageText
@ -36,19 +37,26 @@ async def is_this_our_thread(client: AsyncClient, room: MatrixRoom, event: RoomM
async def get_thread_content(client: AsyncClient, room: MatrixRoom, base_event: RoomMessageText) -> List[Event]:
messages = []
# This is the event of the message that was just sent.
new_event = (await client.room_get_event(room.room_id, base_event.event_id)).event
while True:
if new_event.source['content'].get('m.relates_to', {}).get('rel_type') == 'm.thread':
# Put the event in the messages list only if it's related to the thread we're parsing.
messages.append(new_event)
else:
break
# Fetch the next event.
new_event = (await client.room_get_event(
room.room_id,
new_event.source['content']['m.relates_to']['m.in_reply_to']['event_id'])
).event
# Put the root event in the array.
messages.append((await client.room_get_event(
room.room_id, base_event.source['content']['m.relates_to']['event_id'])
).event) # put the root event in the array
).event)
messages.reverse()
return messages
@ -77,3 +85,12 @@ def check_authorized(string, to_check):
return output
else:
raise Exception
async def download_mxc(url: str, client: AsyncClient) -> bytes:
mxc = urlparse(url)
response = await client.download(mxc.netloc, mxc.path.strip("/"))
if hasattr(response, "body"):
return response.body
else:
return b''

View File

@ -31,6 +31,7 @@ config_scheme = bison.Scheme(
bison.Option('system_prompt', field_type=str, default=None),
bison.Option('injected_system_prompt', field_type=str, default=None),
bison.Option('api_base', field_type=[str, NoneType], default=None),
bison.Option('vision', field_type=bool, default=False),
bison.Option('help', field_type=[str, NoneType], default=None),
)),
bison.DictOption('openai', scheme=bison.Scheme(
@ -83,7 +84,15 @@ class ConfigManager:
if not self._config.config['openai']['api_key'] and not self._config.config['anthropic']['api_key']:
raise SchemeValidationError('You need an OpenAI or Anthropic API key')
self._parsed_config = self._merge_in_list_defaults()
# TODO: make sure there aren't duplicate triggers
# Make sure there aren't duplicate triggers
existing_triggers = []
for item in self._config.config['command']:
trigger = item['trigger']
if trigger in existing_triggers:
raise SchemeValidationError(f'Duplicate trigger {trigger}')
existing_triggers.append(trigger)
self._command_prefixes = self._generate_command_prefixes()
def _merge_in_list_defaults(self):

View File

@ -30,7 +30,7 @@ async def generate_ai_response(
try:
await client.room_typing(room.room_id, typing_state=True, timeout=global_config['response_timeout'] * 1000)
api_client = api_client_helper.get_client(command_info.api_type)
api_client = api_client_helper.get_client(command_info.api_type, client_helper)
messages = api_client.assemble_context(msg, system_prompt=command_info.system_prompt, injected_system_prompt=command_info.injected_system_prompt)
response = None
@ -77,9 +77,18 @@ async def generate_ai_response(
# Logging
if global_config['logging']['log_full_response']:
logger.debug(
{'event_id': event.event_id, 'room': room.room_id, 'messages': messages, 'response': response}
)
data = {'event_id': event.event_id, 'room': room.room_id, 'messages': messages, 'response': response}
# Remove images from the logged data.
for i in range(len(data['messages'])):
if isinstance(data['messages'][i]['content'], list):
# Images are always sent as lists
if data['messages'][i]['content'][0].get('source', {}).get('media_type'):
# Anthropic
data['messages'][i]['content'][0]['source']['data'] = '...'
elif data['messages'][i]['content'][0].get('image_url'):
# OpenAI
data['messages'][i]['content'][0]['image_url']['url'] = '...'
logger.debug(data)
z = text_response.replace("\n", "\\n")
logger.info(f'Reply to {event.event_id} --> {command_info.model} responded with "{z}"')

View File

@ -1,14 +1,17 @@
from typing import Union
from anthropic import AsyncAnthropic
from nio import RoomMessageImage
from matrix_gpt.chat_functions import download_mxc
from matrix_gpt.generate_clients.api_client import ApiClient
from matrix_gpt.generate_clients.command_info import CommandInfo
from matrix_gpt.image import process_image
class AnthropicApiClient(ApiClient):
def __init__(self, api_key: str):
super().__init__(api_key)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _create_client(self, base_url: str = None):
return AsyncAnthropic(
@ -23,11 +26,46 @@ class AnthropicApiClient(ApiClient):
self._context = messages
return messages
def verify_context(self):
"""
Verify that the context alternates between the human and assistant, inserting the opposite
user type if it does not alternate correctly.
"""
i = 0
while i < len(self._context) - 1:
if self._context[i]['role'] == self._context[i + 1]['role']:
dummy = self.generate_text_msg(f'<{self._BOT_NAME} did not respond>', self._BOT_NAME) if self._context[i]['role'] == self._HUMAN_NAME else self.generate_text_msg(f'<{self._HUMAN_NAME} did not respond>', self._HUMAN_NAME)
self._context.insert(i + 1, dummy)
i += 1
# if self._context[-1]['role'] == self._HUMAN_NAME:
# self._context.append(self.generate_text_msg(f'<{self._BOT_NAME} did not respond>', self._BOT_NAME))
def generate_text_msg(self, content: str, role: str):
assert role in [self._HUMAN_NAME, self._BOT_NAME]
return {"role": role, "content": [{"type": "text", "text": str(content)}]}
def append_msg(self, content: str, role: str):
assert role in [self._HUMAN_NAME, self._BOT_NAME]
self._context.append({"role": role, "content": [{"type": "text", "text": str(content)}]})
self._context.append(self.generate_text_msg(content, role))
async def append_img(self, img_event: RoomMessageImage, role: str):
assert role in [self._HUMAN_NAME, self._BOT_NAME]
img_bytes = await download_mxc(img_event.url, self.client_helper.client)
encoded_image = process_image(img_bytes, resize_px=784)
self._context.append({
"role": role,
'content': [{
'type': 'image',
'source': {
'type': 'base64',
'media_type': 'image/png',
'data': encoded_image
}
}]
})
async def generate(self, command_info: CommandInfo):
self.verify_context()
r = await self._create_client().messages.create(
model=command_info.model,
max_tokens=None if command_info.max_tokens == 0 else command_info.max_tokens,

View File

@ -1,5 +1,8 @@
from typing import Union
from nio import RoomMessageImage
from matrix_gpt import MatrixClientHelper
from matrix_gpt.generate_clients.command_info import CommandInfo
@ -7,8 +10,9 @@ class ApiClient:
_HUMAN_NAME = 'user'
_BOT_NAME = 'assistant'
def __init__(self, api_key: str):
def __init__(self, api_key: str, client_helper: MatrixClientHelper):
self.api_key = api_key
self.client_helper = client_helper
self._context = []
def _create_client(self, base_url: str = None):
@ -17,9 +21,15 @@ class ApiClient:
def assemble_context(self, messages: Union[str, list], system_prompt: str = None, injected_system_prompt: str = None):
raise NotImplementedError
def generate_text_msg(self, content: str, role: str):
raise NotImplementedError
def append_msg(self, content: str, role: str):
raise NotImplementedError
async def append_img(self, img_event: RoomMessageImage, role: str):
raise NotImplementedError
async def generate(self, command_info: CommandInfo):
raise NotImplementedError

View File

@ -2,7 +2,7 @@ from matrix_gpt.config import global_config
class CommandInfo:
def __init__(self, trigger: str, api_type: str, model: str, max_tokens: int, temperature: float, allowed_to_chat: list, allowed_to_thread: list, allowed_to_invite: list, system_prompt: str, injected_system_prompt: str, api_base: str = None, help: str = None):
def __init__(self, trigger: str, api_type: str, model: str, max_tokens: int, temperature: float, allowed_to_chat: list, allowed_to_thread: list, allowed_to_invite: list, system_prompt: str, injected_system_prompt: str, api_base: str = None, vision: bool = False, help: str = None):
self.trigger = trigger
assert api_type in ['openai', 'anth']
self.api_type = api_type
@ -12,6 +12,7 @@ class CommandInfo:
self.system_prompt = system_prompt
self.injected_system_prompt = injected_system_prompt
self.api_base = api_base
self.vision = vision
self.help = help
self.allowed_to_chat = allowed_to_chat

View File

@ -1,15 +1,18 @@
from typing import Union
from nio import RoomMessageImage
from openai import AsyncOpenAI
from matrix_gpt.chat_functions import download_mxc
from matrix_gpt.config import global_config
from matrix_gpt.generate_clients.api_client import ApiClient
from matrix_gpt.generate_clients.command_info import CommandInfo
from matrix_gpt.image import process_image
class OpenAIClient(ApiClient):
def __init__(self, api_key: str):
super().__init__(api_key)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _create_client(self, api_base: str = None):
return AsyncOpenAI(
@ -21,6 +24,21 @@ class OpenAIClient(ApiClient):
assert role in [self._HUMAN_NAME, self._BOT_NAME]
self._context.append({'role': role, 'content': content})
async def append_img(self, img_event: RoomMessageImage, role: str):
assert role in [self._HUMAN_NAME, self._BOT_NAME]
img_bytes = await download_mxc(img_event.url, self.client_helper.client)
encoded_image = process_image(img_bytes, resize_px=512)
self._context.append({
"role": role,
'content': [{
'type': 'image_url',
'image_url': {
'url': f"data:image/png;base64,{encoded_image}",
'detail': 'low'
}
}]
})
def assemble_context(self, messages: Union[str, list], system_prompt: str = None, injected_system_prompt: str = None):
if isinstance(messages, list):
messages = messages

View File

@ -50,7 +50,7 @@ async def do_reply_threaded_msg(client_helper: MatrixClientHelper, room: MatrixR
await client.room_typing(room.room_id, typing_state=True, timeout=30000)
thread_content = await get_thread_content(client, room, requestor_event)
api_client = api_client_helper.get_client(command_info.api_type)
api_client = api_client_helper.get_client(command_info.api_type, client_helper)
for event in thread_content:
if isinstance(event, MegolmEvent):
await client_helper.send_text_to_room(
@ -64,11 +64,15 @@ async def do_reply_threaded_msg(client_helper: MatrixClientHelper, room: MatrixR
await client.room_typing(room.room_id, typing_state=False, timeout=1000)
return
else:
thread_msg = event.body.strip().strip('\n')
api_client.append_msg(
role=api_client.BOT_NAME if event.sender == client.user_id else api_client.HUMAN_NAME,
content=thread_msg if not check_command_prefix(thread_msg)[0] else thread_msg[len(sent_command_prefix):].strip()
)
role = api_client.BOT_NAME if event.sender == client.user_id else api_client.HUMAN_NAME
if isinstance(event, RoomMessageText):
thread_msg = event.body.strip().strip('\n')
api_client.append_msg(
role=role,
content=thread_msg if not check_command_prefix(thread_msg)[0] else thread_msg[len(sent_command_prefix):].strip(),
)
elif command_info.vision:
await api_client.append_img(event, role)
await generate_ai_response(
client_helper=client_helper,

23
matrix_gpt/image.py Normal file
View File

@ -0,0 +1,23 @@
import base64
import io
from PIL import Image
def process_image(source_bytes: bytes, resize_px: int):
image = Image.open(io.BytesIO(source_bytes))
width, height = image.size
if min(width, height) > resize_px:
if width < height:
new_width = resize_px
new_height = int((height / width) * new_width)
else:
new_height = resize_px
new_width = int((width / height) * new_height)
image = image.resize((new_width, new_height))
byte_arr = io.BytesIO()
image.save(byte_arr, format='PNG')
image_bytes = byte_arr.getvalue()
return base64.b64encode(image_bytes).decode('utf-8')

View File

@ -29,7 +29,6 @@ class MatrixClientHelper:
self.store_path = Path(store_path).absolute().expanduser().resolve()
self.store_path.mkdir(parents=True, exist_ok=True)
self.auth_file = self.store_path / (device_id.lower() + '.json')
self.device_name = device_id
self.client: AsyncClient = AsyncClient(homeserver=self.homeserver, user=self.user_id, config=self.client_config, device_id=device_id)
self.logger = logging.getLogger('MatrixGPT').getChild('MatrixClientHelper')
@ -85,8 +84,8 @@ class MatrixClientHelper:
def _read_details_from_disk(self):
if not self.auth_file.exists():
return {}
with open(self.auth_file, "r") as f:
return {'auth': {}, 'extra': {}}
with open(self.auth_file, 'r') as f:
return json.load(f)
def _write_details_to_disk(self, resp: LoginResponse = None, extra_data: dict = None) -> None:
@ -165,7 +164,6 @@ class MatrixClientHelper:
}
}
# TODO: don't force this to string. what if we want to send an array?
content["m.matrixgpt"] = {
"error": str(extra_error),
"msg": str(extra_msg),

View File

@ -1,7 +1,6 @@
matrix-nio[e2e]==0.24.0
pyyaml==6.0.1
matrix-nio==0.24.0
markdown==3.6
python-olm==3.2.16
openai==1.16.2
anthropic==0.23.1
pillow==10.3.0
git+https://git.evulid.cc/cyberes/bison.git