imap-archiver/archiver.py

82 lines
3.1 KiB
Python
Raw Normal View History

2024-03-05 23:57:38 -07:00
#!/usr/bin/env python3
import argparse
import logging
import sys
from datetime import datetime
from pathlib import Path
import humanize
import yaml
from iarchiver.database import EmailDatabase
from iarchiver.email import unix_timestamp_since_to_imap_timestamp
2024-03-06 10:30:13 -07:00
from iarchiver.mail_conn import MailConnection
2024-03-05 23:57:38 -07:00
def main(args):
logging.basicConfig()
logger = logging.getLogger('iarchiver')
logger.setLevel(logging.INFO)
with open(args.config) as f:
config = yaml.safe_load(f)
exclude_folders = config.get('exclude_folders', [])
if not config.get('server') or not config.get('username') or not config.get('password') or not config.get('database_path') or not config.get('attachments_path'):
logger.critical('Bad config file.')
sys.exit(1)
attachments_dir = Path(config['attachments_path'])
attachments_dir.mkdir(parents=True, exist_ok=True)
database = EmailDatabase(Path(config['database_path']))
mail = MailConnection(config['server'], config['username'], config['password'], attachments_dir)
mail.load_folders()
if config['server'] == 'imap.gmail.com':
mail.folder_structure.remove('INBOX') # We will use "'[Gmail]/All Mail'" instead
mail.folder_structure.remove('[Gmail]')
num_folders_to_sync = len(mail.folder_structure)
logger.info(f'Syncing {num_folders_to_sync} folders...')
2024-03-05 23:57:38 -07:00
2024-03-06 13:34:14 -07:00
last_refresh = database.have_we_done_a_full_sync_at_all()
last_refresh_imap_date = None
if last_refresh:
last_refresh_imap_date = unix_timestamp_since_to_imap_timestamp(last_refresh)
logger.info(f'Syncing emails received after {last_refresh_imap_date}')
2024-03-05 23:57:38 -07:00
new_emails = 0
new_attachments = 0
did_full_sync = False
sync_start_time = datetime.now()
for folder_name in mail.folder_structure:
if folder_name in exclude_folders:
2024-03-05 23:57:38 -07:00
# Exclude folder
continue
logger.info(folder_name)
if last_refresh:
2024-03-06 13:34:14 -07:00
search_criterion = ['(SINCE "' + last_refresh_imap_date + '")']
else:
did_full_sync = True
search_criterion = ['ALL']
for email in mail.fetch_folder(folder_name, search_criterion=search_criterion):
2024-03-06 12:08:12 -07:00
timestamp, to_email, from_email, subject, raw_content, attachments = email
is_new_email = database.insert_email(folder_name, timestamp, subject, raw_content, to_email, from_email, attachments)
if is_new_email:
new_emails += 1
if len(attachments):
new_attachments += 1
2024-03-05 23:57:38 -07:00
elapsed = datetime.now() - sync_start_time
2024-03-06 00:18:41 -07:00
database.finish_sync('refresh' if not did_full_sync else 'full', new_emails, new_attachments, int(elapsed.total_seconds()))
logger.info(f'Finished email {"refresh" if not did_full_sync else "sync"} in {humanize.naturaldelta(elapsed)} and added {new_emails} new emails and {new_attachments} attachments.')
2024-03-05 23:57:38 -07:00
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sync and archive your IMAP server.')
parser.add_argument('--config', default='config.yml', help='Path to config file.')
args = parser.parse_args()
main(args)