MatrixGPT/matrix_gpt/generate_clients/copilot.py

97 lines
3.9 KiB
Python

import re
from typing import Union
from urllib.parse import urlparse
from nio import RoomMessageImage
from sydney import SydneyClient
from matrix_gpt.generate_clients.api_client import ApiClient
from matrix_gpt.generate_clients.command_info import CommandInfo
"""
This was written with sydney.py==0.20.4 but requirements.txt has not locked in a version because Bing's API may change.
"""
_REGEX_ATTR_RE_STR = r'^\[(\d*)]:\s(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)\s*(\"\")*'
_REGEX_ATTR_RE = re.compile(_REGEX_ATTR_RE_STR)
_REGEX_ATTR_LINK_RE_STR = [r'\[\^\d*\^]\[', r']']
_REGEX_ATTR_LINK_RE = re.compile(r'\d*'.join(_REGEX_ATTR_LINK_RE_STR))
"""
To implement context, could we maybe pickle the `sydney` object and track state via requester event ID?
Probably best not to store it in memory, but maybe a sqlite database in /tmp?
But might have to store it in memory because of async issues and not being able to restore the state of an old async loop.
"""
class CopilotClient(ApiClient):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _create_client(self, api_base: str = None):
return None
def append_msg(self, content: str, role: str):
assert role in [self._HUMAN_NAME, self._BOT_NAME]
self._context.append({'role': role, 'content': content})
async def append_img(self, img_event: RoomMessageImage, role: str):
raise NotImplementedError
def check_ignore_request(self):
if len(self._context) > 1:
return True
return False
def assemble_context(self, messages: Union[str, list], system_prompt: str = None, injected_system_prompt: str = None):
if isinstance(messages, list):
messages = messages
else:
messages = [{'role': self._HUMAN_NAME, 'content': messages}]
self._context = messages
return messages
async def generate(self, command_info: CommandInfo):
async with SydneyClient(bing_cookies=self._api_key) as sydney:
response = dict(await sydney.ask(self._context[0]['content'], citations=True, raw=True))
bot_response = response['item']['messages'][-1]
text_card = {}
for msg in bot_response['adaptiveCards'][0]['body']:
if msg.get('type') == 'TextBlock':
text_card = msg
break
response_text = text_card.get('text', '')
# Parse the attribution links.
attributions_strs = []
for line in response_text.split('\n'):
m = re.match(_REGEX_ATTR_RE, line)
if m:
i = int(m.group(1))
attributions_strs.insert(i, m.group(2))
if len(attributions_strs):
# Remove the original attributions from the text.
response_text = response_text.split("\n", len(attributions_strs) + 1)[len(attributions_strs) + 1]
# Add a list of attributions at the bottom of the response.
response_text += '\n\nCitations:'
for i in range(len(attributions_strs)):
url = attributions_strs[i]
domain = urlparse(url).netloc
response_text += f'\n\n{i + 1}. [{domain}]({url})'
# Add links to the inline attributions.
for match in re.findall(_REGEX_ATTR_LINK_RE, response_text):
match_clean = re.sub(r'\[\^\d*\^]', '', match)
i = int(re.match(r'\[(\d*)]', match_clean).group(1))
assert i - 1 >= 0
new_str = f'[[{i}]]({attributions_strs[i - 1]})'
n = response_text.replace(match, new_str)
response_text = n
response_text += "\n\n*Copilot lacks a context mechanism so the bot cannot respond past the first message. Conversations with Copilot are not private.*"
return response_text