imap-archiver/archiver.py

90 lines
3.5 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import logging
import sys
from datetime import datetime
from pathlib import Path
import humanize
import yaml
from iarchiver.database import EmailDatabase
from iarchiver.email import unix_timestamp_since_to_imap_timestamp
from iarchiver.mail_conn import MailConnection
def main(args):
logging.basicConfig()
logger = logging.getLogger('iarchiver')
logger.setLevel(logging.INFO)
with open(args.config) as f:
config = yaml.safe_load(f)
exclude_folders = config.get('exclude_folders', [])
if not config.get('server') or not config.get('username') or not config.get('password') or not config.get('database_path') or not config.get('attachments_path'):
logger.critical('Bad config file.')
sys.exit(1)
save_inline_attachments = config.get('save_inline_attachments', False)
if save_inline_attachments:
logger.info('Saving inline attachments as well')
attachments_dir = Path(config['attachments_path'])
attachments_dir.mkdir(parents=True, exist_ok=True)
database = EmailDatabase(Path(config['database_path']))
mail = MailConnection(config['server'], config['username'], config['password'], attachments_dir, save_inline_attachments=save_inline_attachments)
mail.load_folders()
if config['server'] == 'imap.gmail.com':
mail.folder_structure.remove('INBOX') # We will use "'[Gmail]/All Mail'" instead
mail.folder_structure.remove('[Gmail]')
num_folders_to_sync = len(mail.folder_structure)
logger.info(f'Syncing {num_folders_to_sync} folders...')
if args.full:
last_refresh = None
else:
last_refresh = database.have_we_done_a_full_sync_at_all()
last_refresh_imap_date = None
if last_refresh:
last_refresh_imap_date = unix_timestamp_since_to_imap_timestamp(last_refresh)
logger.info(f'Syncing emails received after {last_refresh_imap_date}')
new_emails = 0
new_attachments = 0
did_full_sync = False
sync_start_time = datetime.now()
for folder_name in mail.folder_structure:
if folder_name in exclude_folders:
# Exclude folder
continue
logger.info(folder_name)
if last_refresh:
search_criterion = ['(SINCE "' + last_refresh_imap_date + '")']
else:
did_full_sync = True
search_criterion = ['ALL']
for email in mail.fetch_folder(folder_name, search_criterion=search_criterion):
timestamp, to_email, from_email, subject, raw_content, attachments = email
is_new_email = database.insert_email(folder_name, timestamp, subject, raw_content, to_email, from_email, attachments)
if is_new_email:
new_emails += 1
if len(attachments):
new_attachments += 1
elapsed = datetime.now() - sync_start_time
database.finish_sync('refresh' if not did_full_sync else 'full', new_emails, new_attachments, int(elapsed.total_seconds()))
logger.info(f'Finished email {"refresh" if not did_full_sync else "sync"} in {humanize.naturaldelta(elapsed)} and added {new_emails} new emails and {new_attachments} attachments.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sync and archive your IMAP server.')
parser.add_argument('--config', default='config.yml', help='Path to config file.')
parser.add_argument('--full', action='store_true', help='Perform a full sync.')
args = parser.parse_args()
main(args)