ad bing copilot support, other minor changes

2024-04-10 16:42:52 -06:00 · 2024-04-10 16:42:52 -06:00 · ad986c3e0b
parent 5f4fa86480
commit ad986c3e0b
11 changed files with 156 additions and 24 deletions
--- a/README.md
+++ b/README.md
@ -2,10 +2,17 @@

 _Chatbots for Matrix._

-This bot supports OpenAI, Anthropic, and locally hosted models that use an OpenAI-compatible endpoint. It can run multiple different models using
+This bot supports OpenAI, Anthropic, and locally hosted models that use an OpenAI-compatible endpoint. It can run
+multiple different models using
 different triggers, such as `!c4` for GPT4 and `!ca` for Anthropic, all through the same bot.

-OpenAI and Anthropic vision is supported.
+**Supported Services**
+
+- OpenAI (+ vision)
+- Anthropic (+ vision)
+- Anthropic
+- Bing Copilot
+- ~~Dalle-3~~ (TODO)

 <br>

@ -16,18 +23,19 @@ OpenAI and Anthropic vision is supported.
    sudo apt install libolm-dev gcc python3-dev
    pip install -r requirements.txt
    ```
-2. Copy `config.sample.yaml` to `config.yaml` and fill it out with the bot's Matrix authentication and your OpenAI and/or Anthropic API keys.
+2. Copy `config.sample.yaml` to `config.yaml` and fill it out with the bot's Matrix authentication and your OpenAI
+   and/or Anthropic API keys.
 3. Start the bot with `python3 main.py`

-[Pantalaimon](https://github.com/matrix-org/pantalaimon) is **required** for the bot to be able to talk in encrypted rooms.
+[Pantalaimon](https://github.com/matrix-org/pantalaimon) is **required** for the bot to be able to talk in encrypted
+rooms.

 I included a sample Systemd service (`matrixgpt.service`).

-
-
 ## Use

-First, invite your bot to a room. Then you can start a chat by prefixing your message with your trigger (for example, `!c hello!`). The bot will create a thread when it replies. You don't need to use the trigger in the thread.
+First, invite your bot to a room. Then you can start a chat by prefixing your message with your trigger (for
+example, `!c hello!`). The bot will create a thread when it replies. You don't need to use the trigger in the thread.

 Use `!matrixgpt` to view the bot's help. The bot also responds to `!bots`.

--- a/main.py
+++ b/main.py
@ -84,9 +84,6 @@ async def main(args):
    client.add_event_callback(callbacks.decryption_failure, MegolmEvent)
    client.add_event_callback(callbacks.unknown, UnknownEvent)

-    # TODO: multimedia mode?
-    # RoomMessageImage
-
    # Keep trying to reconnect on failure (with some time in-between)
    while True:
        try:
--- a/matrix_gpt/api_client_manager.py
+++ b/matrix_gpt/api_client_manager.py
@ -3,6 +3,7 @@ import logging
 from matrix_gpt import MatrixClientHelper
 from matrix_gpt.config import global_config
 from matrix_gpt.generate_clients.anthropic import AnthropicApiClient
+from matrix_gpt.generate_clients.copilot import CopilotClient
 from matrix_gpt.generate_clients.openai import OpenAIClient

 """
@ -23,12 +24,15 @@ class ApiClientManager:
        """
        self._openai_api_key = global_config['openai'].get('api_key', 'MatrixGPT')
        self._anth_api_key = global_config['anthropic'].get('api_key')
+        self._copilot_cookie = global_config['copilot'].get('api_key')

    def get_client(self, mode: str, client_helper: MatrixClientHelper):
        if mode == 'openai':
            return self.openai_client(client_helper)
-        elif mode == 'anth':
+        elif mode == 'anthropic':
            return self.anth_client(client_helper)
+        elif mode == 'copilot':
+            return self.copilot_client(client_helper)
        else:
            raise Exception

@ -52,5 +56,15 @@ class ApiClientManager:
            client_helper=client_helper
        )

+    def copilot_client(self, client_helper):
+        self._set_from_config()
+        if not self._copilot_cookie:
+            self.logger.error('Missing a Copilot API key!')
+            return None
+        return CopilotClient(
+            api_key=self._copilot_cookie,
+            client_helper=client_helper,
+        )
+

 api_client_helper = ApiClientManager()
--- a/matrix_gpt/config.py
+++ b/matrix_gpt/config.py
@ -5,6 +5,8 @@ from types import NoneType
 import bison
 from bison.errors import SchemeValidationError

+VALID_API_TYPES = ['openai', 'anthropic', 'copilot']
+
 config_scheme = bison.Scheme(
    bison.Option('store_path', default='bot-store/', field_type=str),
    bison.DictOption('auth', scheme=bison.Scheme(
@ -21,7 +23,7 @@ config_scheme = bison.Scheme(
    bison.Option('response_timeout', default=120, field_type=int),
    bison.ListOption('command', required=True, member_scheme=bison.Scheme(
        bison.Option('trigger', field_type=str, required=True),
-        bison.Option('api_type', field_type=str, choices=['openai', 'anth'], required=True),
+        bison.Option('api_type', field_type=str, choices=VALID_API_TYPES, required=True),
        bison.Option('model', field_type=str, required=True),
        bison.Option('max_tokens', field_type=int, default=0),
        bison.Option('temperature', field_type=[int, float], default=0.5),
@ -40,6 +42,9 @@ config_scheme = bison.Scheme(
    bison.DictOption('anthropic', scheme=bison.Scheme(
        bison.Option('api_key', field_type=[str, NoneType], required=False, default=None),
    )),
+    bison.DictOption('copilot', scheme=bison.Scheme(
+        bison.Option('api_key', field_type=[str, NoneType], required=False, default=None),
+    )),
    bison.DictOption('logging', scheme=bison.Scheme(
        bison.Option('log_level', field_type=str, default='info'),
        bison.Option('log_full_response', field_type=bool, default=True),
@ -82,10 +87,18 @@ class ConfigManager:
    def validate(self):
        assert not self._validated
        self._config.validate()
-        if not self._config.config['openai']['api_key'] and not self._config.config['anthropic']['api_key']:
-            raise SchemeValidationError('You need an OpenAI or Anthropic API key')
+        config_api_keys = 0
+        for api in VALID_API_TYPES:
+            if self._config.config[api].get('api_key'):
+                config_api_keys += 1
+        if config_api_keys < 1:
+            raise SchemeValidationError('You need an API key')
        self._parsed_config = self._merge_in_list_defaults()

+        for item in self._config.config['command']:
+            if item['api_type'] == 'copilot' and item['model'] != 'copilot':
+                raise SchemeValidationError('The Copilot model type must be set to `copilot`')
+
        # Make sure there aren't duplicate triggers
        existing_triggers = []
        for item in self._config.config['command']:
@ -119,7 +132,7 @@ class ConfigManager:
        command_prefixes = {}
        for item in self._parsed_config['command']:
            command_prefixes[item['trigger']] = item
-            if item['api_type'] == 'anth' and item.get('max_tokens', 0) < 1:
+            if item['api_type'] == 'anthropic' and item.get('max_tokens', 0) < 1:
                raise SchemeValidationError(f'Anthropic requires `max_tokens`. See <https://support.anthropic.com/en/articles/7996856-what-is-the-maximum-prompt-length>')

        return command_prefixes
--- a/matrix_gpt/generate.py
+++ b/matrix_gpt/generate.py
@ -33,10 +33,16 @@ async def generate_ai_response(
        api_client = api_client_helper.get_client(command_info.api_type, client_helper)
        messages = api_client.assemble_context(msg, system_prompt=command_info.system_prompt, injected_system_prompt=command_info.injected_system_prompt)

+        if api_client.check_ignore_request():
+            logger.debug(f'Reply to {event.event_id} was ignored by the model "{command_info.model}".')
+            await client.room_typing(room.room_id, typing_state=False, timeout=1000)
+            return
+
        response = None
        try:
            task = asyncio.create_task(api_client.generate(command_info))
            for task in asyncio.as_completed([task], timeout=global_config['response_timeout']):
+                # TODO: add a while loop and heartbeat the background thread
                try:
                    response = await task
                    break
--- a/matrix_gpt/generate_clients/anthropic.py
+++ b/matrix_gpt/generate_clients/anthropic.py
@ -15,7 +15,7 @@ class AnthropicApiClient(ApiClient):

    def _create_client(self, base_url: str = None):
        return AsyncAnthropic(
-            api_key=self.api_key
+            api_key=self._api_key
        )

    def assemble_context(self, messages: Union[str, list], system_prompt: str = None, injected_system_prompt: str = None):
@ -50,7 +50,7 @@ class AnthropicApiClient(ApiClient):

    async def append_img(self, img_event: RoomMessageImage, role: str):
        assert role in [self._HUMAN_NAME, self._BOT_NAME]
-        img_bytes = await download_mxc(img_event.url, self.client_helper.client)
+        img_bytes = await download_mxc(img_event.url, self._client_helper.client)
        encoded_image = process_image(img_bytes, resize_px=784)
        self._context.append({
            "role": role,
--- a/matrix_gpt/generate_clients/api_client.py
+++ b/matrix_gpt/generate_clients/api_client.py
@ -11,13 +11,16 @@ class ApiClient:
    _BOT_NAME = 'assistant'

    def __init__(self, api_key: str, client_helper: MatrixClientHelper):
-        self.api_key = api_key
-        self.client_helper = client_helper
+        self._api_key = api_key
+        self._client_helper = client_helper
        self._context = []

    def _create_client(self, base_url: str = None):
        raise NotImplementedError

+    def check_ignore_request(self):
+        return False
+
    def assemble_context(self, messages: Union[str, list], system_prompt: str = None, injected_system_prompt: str = None):
        raise NotImplementedError

--- a/matrix_gpt/generate_clients/command_info.py
+++ b/matrix_gpt/generate_clients/command_info.py
@ -1,10 +1,10 @@
-from matrix_gpt.config import global_config
+from matrix_gpt.config import global_config, VALID_API_TYPES


 class CommandInfo:
    def __init__(self, trigger: str, api_type: str, model: str, max_tokens: int, temperature: float, allowed_to_chat: list, allowed_to_thread: list, allowed_to_invite: list, system_prompt: str, injected_system_prompt: str, api_base: str = None, vision: bool = False, help: str = None):
        self.trigger = trigger
-        assert api_type in ['openai', 'anth']
+        assert api_type in VALID_API_TYPES
        self.api_type = api_type
        self.model = model
        self.max_tokens = max_tokens
--- a/matrix_gpt/generate_clients/copilot.py
+++ b/matrix_gpt/generate_clients/copilot.py
@ -0,0 +1,85 @@
+import re
+from typing import Union
+from urllib.parse import urlparse
+
+from nio import RoomMessageImage
+from sydney import SydneyClient
+
+from matrix_gpt.generate_clients.api_client import ApiClient
+from matrix_gpt.generate_clients.command_info import CommandInfo
+
+_REGEX_ATTR_RE_STR = r'^\[(\d*)]:\s(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)\s*(\"\")*'
+_REGEX_ATTR_RE = re.compile(_REGEX_ATTR_RE_STR)
+_REGEX_ATTR_LINK_RE_STR = [r'\[\^\d*\^]\[', r']']
+_REGEX_ATTR_LINK_RE = re.compile(r'\d*'.join(_REGEX_ATTR_LINK_RE_STR))
+
+
+class CopilotClient(ApiClient):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def _create_client(self, api_base: str = None):
+        return None
+
+    def append_msg(self, content: str, role: str):
+        assert role in [self._HUMAN_NAME, self._BOT_NAME]
+        self._context.append({'role': role, 'content': content})
+
+    async def append_img(self, img_event: RoomMessageImage, role: str):
+        raise NotImplementedError
+
+    def check_ignore_request(self):
+        if len(self._context) > 1:
+            return True
+        return False
+
+    def assemble_context(self, messages: Union[str, list], system_prompt: str = None, injected_system_prompt: str = None):
+        if isinstance(messages, list):
+            messages = messages
+        else:
+            messages = [{'role': self._HUMAN_NAME, 'content': messages}]
+        self._context = messages
+        return messages
+
+    async def generate(self, command_info: CommandInfo):
+        async with SydneyClient(bing_cookies=self._api_key) as sydney:
+            response = dict(await sydney.ask(self._context[0]['content'], citations=True, raw=True))
+            bot_response = response['item']['messages'][-1]
+
+            text_card = {}
+            for msg in bot_response['adaptiveCards'][0]['body']:
+                if msg.get('type') == 'TextBlock':
+                    text_card = msg
+                    break
+            response_text = text_card.get('text', '')
+
+            # Parse the attribution links.
+            attributions_strs = []
+            for line in response_text.split('\n'):
+                m = re.match(_REGEX_ATTR_RE, line)
+                if m:
+                    i = int(m.group(1))
+                    attributions_strs.insert(i, m.group(2))
+
+        if len(attributions_strs):
+            # Remove the original attributions from the text.
+            response_text = response_text.split("\n", len(attributions_strs) + 1)[len(attributions_strs) + 1]
+
+            # Add a list of attributions at the bottom of the response.
+            response_text += '\n\nCitations:'
+            for i in range(len(attributions_strs)):
+                url = attributions_strs[i]
+                domain = urlparse(url).netloc
+                response_text += f'\n\n{i + 1}. [{domain}]({url})'
+
+            # Add links to the inline attributions.
+            for match in re.findall(_REGEX_ATTR_LINK_RE, response_text):
+                match_clean = re.sub(r'\[\^\d*\^]', '', match)
+                i = int(re.match(r'\[(\d*)]', match_clean).group(1))
+                assert i - 1 >= 0
+                new_str = f'[[{i}]]({attributions_strs[i - 1]})'
+                n = response_text.replace(match, new_str)
+                response_text = n
+
+        response_text += "\n\n*Copilot lacks a context mechanism so the bot cannot respond past the first message. Conversations with Copilot are not private.*"
+        return response_text
--- a/matrix_gpt/generate_clients/openai.py
+++ b/matrix_gpt/generate_clients/openai.py
@ -16,7 +16,7 @@ class OpenAIClient(ApiClient):

    def _create_client(self, api_base: str = None):
        return AsyncOpenAI(
-            api_key=self.api_key,
+            api_key=self._api_key,
            base_url=api_base
        )

@ -26,7 +26,7 @@ class OpenAIClient(ApiClient):

    async def append_img(self, img_event: RoomMessageImage, role: str):
        assert role in [self._HUMAN_NAME, self._BOT_NAME]
-        img_bytes = await download_mxc(img_event.url, self.client_helper.client)
+        img_bytes = await download_mxc(img_event.url, self._client_helper.client)
        encoded_image = process_image(img_bytes, resize_px=512)
        self._context.append({
            "role": role,
--- a/matrix_gpt/handle_actions.py
+++ b/matrix_gpt/handle_actions.py
@ -122,7 +122,13 @@ async def sound_off(room: MatrixRoom, event: RoomMessageText, client_helper: Mat
        injected_system_prompt_text = f" Injected system prompt: yes." if command['injected_system_prompt'] else ''
        help_text = f" ***{command['help'].strip('.')}.***" if command['help'] else ''
        vision_text = ' Vision: yes.' if command['vision'] else ''
-        text_response = text_response + f"`{command['trigger']}`  -  Model: {command['model']}. Temperature: {command['temperature']}.{max_tokens}{vision_text}{system_prompt_text}{injected_system_prompt_text}{help_text}\n\n"
+
+        if command['model'] != 'copilot':
+            text_response = text_response + f"`{command['trigger']}`  -  Model: {command['model']}. Temperature: {command['temperature']}.{max_tokens}{vision_text}{system_prompt_text}{injected_system_prompt_text}{help_text}\n\n"
+        else:
+            # Copilot is very basic.
+            # TODO: make sure to update this if Copilot gets vision support.
+            text_response = text_response + f"`{command['trigger']}`  -  Model: {command['model']}.{help_text}\n\n"
    return await client_helper.send_text_to_room(
        room.room_id,
        text_response,