#!/usr/bin/env python3 import argparse import logging import sys from datetime import datetime from pathlib import Path import humanize import yaml from iarchiver.database import EmailDatabase from iarchiver.email import unix_timestamp_since_to_imap_timestamp from iarchiver.mail_conn import MailConnection def main(args): logging.basicConfig() logger = logging.getLogger('iarchiver') logger.setLevel(logging.INFO) with open(args.config) as f: config = yaml.safe_load(f) exclude_folders = config.get('exclude_folders', []) if not config.get('server') or not config.get('username') or not config.get('password') or not config.get('database_path') or not config.get('attachments_path'): logger.critical('Bad config file.') sys.exit(1) save_inline_attachments = config.get('save_inline_attachments', False) if save_inline_attachments: logger.info('Saving inline attachments as well') attachments_dir = Path(config['attachments_path']) attachments_dir.mkdir(parents=True, exist_ok=True) database = EmailDatabase(Path(config['database_path'])) mail = MailConnection(config['server'], config['username'], config['password'], attachments_dir, save_inline_attachments=save_inline_attachments) mail.load_folders() if config['server'] == 'imap.gmail.com': mail.folder_structure.remove('INBOX') # We will use "'[Gmail]/All Mail'" instead mail.folder_structure.remove('[Gmail]') num_folders_to_sync = len(mail.folder_structure) logger.info(f'Syncing {num_folders_to_sync} folders...') last_refresh = database.have_we_done_a_full_sync_at_all() last_refresh_imap_date = None if last_refresh: last_refresh_imap_date = unix_timestamp_since_to_imap_timestamp(last_refresh) logger.info(f'Syncing emails received after {last_refresh_imap_date}') new_emails = 0 new_attachments = 0 did_full_sync = False sync_start_time = datetime.now() for folder_name in mail.folder_structure: if folder_name in exclude_folders: # Exclude folder continue logger.info(folder_name) if last_refresh: search_criterion = ['(SINCE "' + last_refresh_imap_date + '")'] else: did_full_sync = True search_criterion = ['ALL'] for email in mail.fetch_folder(folder_name, search_criterion=search_criterion): timestamp, to_email, from_email, subject, raw_content, attachments = email is_new_email = database.insert_email(folder_name, timestamp, subject, raw_content, to_email, from_email, attachments) if is_new_email: new_emails += 1 if len(attachments): new_attachments += 1 elapsed = datetime.now() - sync_start_time database.finish_sync('refresh' if not did_full_sync else 'full', new_emails, new_attachments, int(elapsed.total_seconds())) logger.info(f'Finished email {"refresh" if not did_full_sync else "sync"} in {humanize.naturaldelta(elapsed)} and added {new_emails} new emails and {new_attachments} attachments.') if __name__ == '__main__': parser = argparse.ArgumentParser(description='Sync and archive your IMAP server.') parser.add_argument('--config', default='config.yml', help='Path to config file.') args = parser.parse_args() main(args)