add files

This commit is contained in:
Cyberes 2024-04-03 22:57:54 -06:00
parent 86a4024446
commit dcdcc15b53
11 changed files with 514 additions and 1 deletions

4
.gitignore vendored
View File

@ -1,3 +1,7 @@
.idea
config.yml
bots/
# ---> Python # ---> Python
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

View File

@ -1,3 +1,27 @@
# matrix-room-exporter # matrix-room-exporter
Script to export Matrix a room _Export a Matrix a room._
This is a simple bot that can export a Matrix room and upload the compressed archive to a Cloudflare R2 bucket for the
room to download.
### Install
1. `pip install -r requirements.txt`
2. `cp config.yml.sample config.yml`
3. [Create a Cloudflare R2 bucket](https://developers.cloudflare.com/r2/get-started/#2-create-a-bucket).
4. Enable [public access](https://developers.cloudflare.com/r2/buckets/public-buckets/).
5. Create an [access key](https://developers.cloudflare.com/r2/api/s3/tokens/).
6. Set up an [object lifecycle rule](https://developers.cloudflare.com/r2/buckets/object-lifecycles/) to auto-delete old
exports from your bucket. 24 hours is a good choice.
7. Configure `config.yml` with your R2 credentials.
8. Set up a [matrix-org/pantalaimon](https://github.com/matrix-org/pantalaimon) for encrypted sync.
9. Create a new Matrix user and configure it in `config.yml`.
10. Start the bot with `python3 main.py`
### Use
1. Invite the new bot to the room you want to export.
2. Send the message `!export` to start the export process.
The bot will upload the exported room to your R2 bucket and share the link with the room.

20
config.yml.sample Normal file
View File

@ -0,0 +1,20 @@
auth:
username: exportbot
password: password1234
homeserver: https://pantalaimon.example.com
# Where to cache the bot's login info.
store_path: ./bots
command_prefix: '!export'
# How often a room is allowed to be exported.
# If an export failed, the interval will be half of this value.
export_interval: 3600
r2:
bucket_name: export-bot
accountid: 12345
access_key_id: 67890
access_key_secret: abc1234
pub_url: https://pub-example12345.r2.dev

0
exporter/__init__.py Normal file
View File

202
exporter/callbacks.py Normal file
View File

@ -0,0 +1,202 @@
import copy
import json
import logging
import shutil
import tempfile
import time
import traceback
from datetime import datetime, timedelta
from pathlib import Path
import aiofiles
import aiofiles.os as aos
from nio import (AsyncClient, InviteMemberEvent, JoinError, MatrixRoom, RoomMessageText, SyncResponse, RoomMessageMedia, MessageDirection)
from exporter.export import download_mxc, zip_directory, upload_to_r2, trim_filename, fetch_events
class RoomExport:
def __init__(self, room_id: str, allowed_again: datetime):
self.room_id = room_id
self.allowed_again = allowed_again
class ExportTracker:
def __init__(self):
self._exported_rooms = {}
def add_export(self, room_id: str, seconds_elapsed_allowed: int) -> None:
allowed_again = datetime.now() + timedelta(seconds=seconds_elapsed_allowed)
self._exported_rooms[room_id] = RoomExport(room_id, allowed_again)
def check_allowed(self, room_id):
if self.get_export(room_id):
return datetime.now() >= self._exported_rooms[room_id].allowed_again
return True
def get_export(self, room_id: str) -> RoomExport | None:
return copy.deepcopy(self._exported_rooms.get(room_id))
class MatrixBotCallbacks:
def __init__(self, client: AsyncClient, config: dict):
self.client = client
self.config = config
self.logger = logging.getLogger('MatrixGPT').getChild('MatrixBotCallbacks')
self.startup_ts = time.time() * 1000
self.seen_events = {}
self.exports = ExportTracker()
self.zip_temp_dir = Path(tempfile.mkdtemp())
async def handle_invite(self, room: MatrixRoom, event: InviteMemberEvent) -> None:
"""
Since the InviteMemberEvent is fired for every m.room.member state received
in a sync response's `rooms.invite` section, we will receive some that are
not actually our own invite event (such as the inviter's membership).
This makes sure we only call `callbacks.invite` with our own invite events.
"""
# event.source.get('origin_server_ts') > self.startup_time
if event.state_key == self.client.user_id:
self.logger.info(f"Got invite to {room.room_id} from {event.sender}.")
# Attempt to join 3 times before giving up
for attempt in range(3):
result = await self.client.join(room.room_id)
if isinstance(result, JoinError):
self.logger.error(f"Error joining room {room.room_id} (attempt {attempt}): {result.message}")
else:
self.logger.info(f"Joined via invite: {room.room_id}")
return
else:
self.logger.error("Unable to join room: %s", room.room_id)
async def handle_message(self, room: MatrixRoom, event: RoomMessageText) -> None:
# Extract the message text
await self.client.room_read_markers(room.room_id, event.event_id, event.event_id)
# Ignore messages from ourselves
if event.sender == self.client.user_id:
return
if event.server_timestamp < self.startup_ts:
self.logger.debug(f'Skipping event as it was sent before startup time: {event.event_id}')
return
if event.event_id in list(self.seen_events.keys()):
self.logger.debug(f'Skipping seen event: {event.event_id}')
return
msg = event.body.strip().strip('\n')
if msg == self.config['command_prefix']:
self.logger.info(f"Export for {room.room_id} requested by {event.sender}")
if not self.exports.check_allowed(room.room_id):
last_export = self.exports.get_export(room.room_id)
time_diff = last_export.allowed_again - datetime.now()
minutes_until_future = time_diff.total_seconds() // 60
content = {
"body": f'Cannot export again for {minutes_until_future} minutes.',
"m.relates_to": {
"m.in_reply_to": {
"event_id": event.event_id
}
},
"msgtype": "m.text"
}
await self.client.room_send(
room.room_id,
'm.room.message',
content
)
self.logger.info(f"Rejected export in {room.room_id}, {minutes_until_future} minutes remaining.")
return
zipfile_name = f'{room.room_id.replace(":", "_").replace("!", "").replace(".", "")}-{int(time.time())}.zip'
start_msg = await self.client.room_send(
room_id=room.room_id,
message_type="m.room.message",
content={"msgtype": "m.text", "format": "org.matrix.custom.html", "body": 'Exporting room...', 'formatted_body': '<i>Exporting room...</i>'},
)
try:
sync_response = await self.client.sync(full_state=True, sync_filter={"room": {"timeline": {"limit": 1}}})
if not isinstance(sync_response, SyncResponse):
self.logger.error(f'Failed to sync room "{room.room_id}": {sync_response}')
raise
start_token = sync_response.rooms.join[room.room_id].timeline.prev_batch
room_events = [
await fetch_events(self.client, room.room_id, start_token, MessageDirection.back),
await fetch_events(self.client, room.room_id, start_token, MessageDirection.front),
]
temp_dir = Path(tempfile.mkdtemp())
export_data = []
self.logger.debug(f'Writing export for {room.room_id} to {temp_dir}')
for direction in room_events:
for event in direction.all():
if isinstance(event, RoomMessageMedia):
media_data = await download_mxc(self.client, event.url)
filename = trim_filename(f'{int(event.server_timestamp / 1000)} -- {event.body}')
event.source["_file_path"] = filename
async with aiofiles.open(temp_dir / filename, "wb") as f_media:
await f_media.write(media_data)
export_data.extend(direction.jsonify())
async with aiofiles.open(temp_dir / 'data.json', 'w', encoding='utf-8') as f:
await f.write(json.dumps(export_data, ensure_ascii=False, indent=4))
zipfile_path = self.zip_temp_dir / zipfile_name
self.logger.debug(f'Creating zip: {zipfile_path}')
await zip_directory(temp_dir, zipfile_path)
shutil.rmtree(temp_dir)
self.logger.debug(f'Uploading: {zipfile_path}')
r2_upload = await upload_to_r2(
zipfile_path, zipfile_name,
self.config['r2']['bucket_name'],
self.config['r2']['accountid'],
self.config['r2']['access_key_id'],
self.config['r2']['access_key_secret']
)
await aos.remove(zipfile_path)
self.logger.info(f'Export for {room.room_id} completed.')
self.exports.add_export(room.room_id, self.config['export_interval'])
pub_url = f'{self.config["r2"]["pub_url"]}/{zipfile_name}'
formatted_body = f'<strong>Export complete!</strong><br><a href="{pub_url}">{pub_url}</a><br>This file will be deleted after 24 hours.'
content = {
"body": f'Export complete!\n{pub_url}\nThis file will be deleted after 24 hours.',
"formatted_body": formatted_body,
"format": "org.matrix.custom.html",
"m.relates_to": {
"m.in_reply_to": {
"event_id": start_msg.event_id
}
},
"msgtype": "m.text"
}
await self.client.room_send(
room.room_id,
'm.room.message',
content
)
except Exception as e:
self.logger.error(f'Export failed for {room.room_id}: {traceback.format_exc()}')
self.exports.add_export(room.room_id, self.config['export_interval'] // 2)
content = {
"body": f'❌ Failed to export room!',
"m.relates_to": {
"m.in_reply_to": {
"event_id": start_msg.event_id
}
},
"msgtype": "m.text"
}
await self.client.room_send(
room.room_id,
'm.room.message',
content
)

16
exporter/config.py Normal file
View File

@ -0,0 +1,16 @@
DEFAULT_CONFIG = {
'auth': {
'username': None,
'password': None,
'homeserver': None,
},
'command_prefix': '!export',
'export_interval': 3600,
'r2': {
'bucket_name': None,
'accountid': None,
'access_key_id': None,
'access_key_secret': None,
'pub_url': None,
}
}

115
exporter/export.py Normal file
View File

@ -0,0 +1,115 @@
import asyncio
import copy
import os
import zipfile
from pathlib import Path
from typing import Union, List
from urllib.parse import urlparse
import aioboto3
import aiofiles
from nio import (
RedactedEvent,
RoomMessageFormatted,
RoomMessageMedia,
Event,
RoomMessagesError,
AsyncClient,
MessageDirection
)
"""
Inspired by https://github.com/russelldavies/matrix-archive/blob/master/matrix-archive.py
"""
class RoomEvents:
def __init__(self, room_id: str):
self._room_id = room_id
self._events: List[Event] = []
def add_event(self, event):
self._events.append(event)
def all(self):
return copy.deepcopy(self._events)
def jsonify(self):
dump = [x.source for x in self._events]
return dump
def is_valid_event(event: Event):
return isinstance(event, (RoomMessageFormatted, RedactedEvent, RoomMessageMedia))
async def download_mxc(client: AsyncClient, url: str) -> bytes:
mxc = urlparse(url)
response = await client.download(mxc.netloc, mxc.path.strip("/"))
if hasattr(response, "body"):
return response.body
else:
return b''
async def zip_directory(directory_path, zip_path):
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _zip_directory, directory_path, zip_path)
def _zip_directory(directory_path, zip_path):
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for root, dirs, files in os.walk(directory_path):
for file in files:
file_path = str(os.path.join(root, file))
zip_file.write(file_path, os.path.relpath(file_path, directory_path))
async def upload_to_r2(source_file: Union[str, Path], destination_file: str, bucket_name: str, accountid: str, access_key_id: str, access_key_secret: str):
session = aioboto3.Session(
aws_access_key_id=access_key_id,
aws_secret_access_key=access_key_secret,
)
async with session.resource(
service_name="s3",
endpoint_url=f'https://{accountid}.r2.cloudflarestorage.com',
) as s3:
bucket = await s3.Bucket(bucket_name)
async with aiofiles.open(source_file, 'rb') as f:
return await bucket.upload_fileobj(f, destination_file)
def trim_filename(filename, max_length=255):
extension = os.path.splitext(filename)[1]
max_length -= len(extension)
if len(filename) > max_length:
filename = filename[:max_length]
filename += extension
return filename
async def fetch_events(client: AsyncClient, room_id: str, start_token: str, direction: MessageDirection) -> RoomEvents:
room_events = RoomEvents(room_id)
events = []
while True:
response = await client.room_messages(
room_id,
start=start_token,
limit=1000,
direction=direction
)
if isinstance(response, RoomMessagesError):
raise Exception(f'Failed to read room "{room_id}": {response.message}')
if len(response.chunk) == 0:
break
events.extend(event for event in response.chunk)
start_token = response.end
if direction == MessageDirection.back:
events = reversed(events)
for event in events:
room_events.add_event(event)
return room_events

71
exporter/matrix.py Normal file
View File

@ -0,0 +1,71 @@
import json
import logging
import os
from pathlib import Path
from nio import AsyncClient, AsyncClientConfig, LoginError
from nio import LoginResponse
class MatrixClientHelper:
"""
A simple wrapper class for common matrix-nio actions.
"""
# Encryption is disabled because it's handled by Pantalaimon.
client_config = AsyncClientConfig(max_limit_exceeded=0, max_timeouts=0, store_sync_tokens=True, encryption_enabled=False)
def __init__(self, user_id: str, passwd: str, homeserver: str, store_path: str, device_name: str = 'MatrixGPT'):
self.user_id = user_id
self.passwd = passwd
self.homeserver = homeserver
if not (self.homeserver.startswith("https://") or self.homeserver.startswith("http://")):
self.homeserver = "https://" + self.homeserver
self.store_path = Path(store_path).absolute().expanduser().resolve()
self.store_path.mkdir(parents=True, exist_ok=True)
self.auth_file = self.store_path / (device_name.lower() + '.json')
self.device_name = device_name
self.client = AsyncClient(homeserver=self.homeserver, user=self.user_id, config=self.client_config, device_id=device_name)
self.logger = logging.getLogger('MatrixGPT').getChild('MatrixClientHelper')
async def login(self) -> tuple[bool, LoginError] | tuple[bool, LoginResponse | None]:
try:
# If there are no previously-saved credentials, we'll use the password.
if not os.path.exists(self.auth_file):
self.logger.info('Using username/password.')
resp = await self.client.login(self.passwd, device_name=self.device_name)
# Check that we logged in successfully.
if isinstance(resp, LoginResponse):
self.write_details_to_disk(resp)
return True, resp
else:
return False, resp
else:
# Otherwise the config file exists, so we'll use the stored credentials.
self.logger.info('Using cached credentials.')
with open(self.auth_file, "r") as f:
config = json.load(f)
client = AsyncClient(config["homeserver"])
client.access_token = config["access_token"]
client.user_id = config["user_id"]
client.device_id = config["device_id"]
resp = await self.client.login(self.passwd, device_name=self.device_name)
if isinstance(resp, LoginResponse):
self.write_details_to_disk(resp)
return True, resp
else:
return False, resp
except Exception:
return False, None
def write_details_to_disk(self, resp: LoginResponse) -> None:
with open(self.auth_file, "w") as f:
json.dump({"homeserver": self.homeserver,
"user_id": resp.user_id,
"device_id": resp.device_id,
"access_token": resp.access_token,
}, f)

58
main.py Normal file
View File

@ -0,0 +1,58 @@
import argparse
import asyncio
import logging
import os
from pathlib import Path
import yaml
from nio import InviteMemberEvent, RoomMessageText
from exporter.callbacks import MatrixBotCallbacks
from exporter.config import DEFAULT_CONFIG
from exporter.matrix import MatrixClientHelper
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
async def main(args):
logging.basicConfig()
logger = logging.getLogger('MatrixGPT')
with open(args.config) as stream:
try:
yaml_config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
logger.critical(f'Failed to load config: {exc}')
quit(1)
config = DEFAULT_CONFIG | yaml_config
logger.setLevel(logging.DEBUG)
matrix_helper = MatrixClientHelper(
user_id=config['auth']['username'],
passwd=config['auth']['password'],
homeserver=config['auth']['homeserver'],
store_path=config['store_path'],
)
client = matrix_helper.client
login_success, login_response = await matrix_helper.login()
if not login_success:
print(login_response)
quit(1)
logger.info(f'Logged in as {client.user_id}')
callbacks = MatrixBotCallbacks(client, config)
client.add_event_callback(callbacks.handle_invite, InviteMemberEvent)
client.add_event_callback(callbacks.handle_message, RoomMessageText)
await client.sync_forever(timeout=30000)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Export a Matrix room.')
parser.add_argument('--config', default=Path(SCRIPT_PATH) / 'config.yml')
args = parser.parse_args()
asyncio.run(main(args))

View File

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
matrix-nio==0.24.0
aioboto3==12.3.0
pyyaml==6.0.1