2024-03-05 23:57:38 -07:00
#!/usr/bin/env python3
import argparse
import logging
import sys
from datetime import datetime
from pathlib import Path
import humanize
import yaml
from iarchiver . database import EmailDatabase
from iarchiver . email import unix_timestamp_since_to_imap_timestamp
2024-03-06 10:30:13 -07:00
from iarchiver . mail_conn import MailConnection
2024-03-05 23:57:38 -07:00
def main ( args ) :
logging . basicConfig ( )
logger = logging . getLogger ( ' iarchiver ' )
logger . setLevel ( logging . INFO )
with open ( args . config ) as f :
config = yaml . safe_load ( f )
exclude_folders = config . get ( ' exclude_folders ' , [ ] )
if not config . get ( ' server ' ) or not config . get ( ' username ' ) or not config . get ( ' password ' ) or not config . get ( ' database_path ' ) or not config . get ( ' attachments_path ' ) :
logger . critical ( ' Bad config file. ' )
sys . exit ( 1 )
attachments_dir = Path ( config [ ' attachments_path ' ] )
attachments_dir . mkdir ( parents = True , exist_ok = True )
database = EmailDatabase ( Path ( config [ ' database_path ' ] ) )
mail = MailConnection ( config [ ' server ' ] , config [ ' username ' ] , config [ ' password ' ] , attachments_dir )
mail . load_folders ( )
2024-03-06 11:36:43 -07:00
if config [ ' server ' ] == ' imap.gmail.com ' :
mail . folder_structure . remove ( ' INBOX ' ) # We will use "'[Gmail]/All Mail'" instead
mail . folder_structure . remove ( ' [Gmail] ' )
num_folders_to_sync = len ( mail . folder_structure )
logger . info ( f ' Syncing { num_folders_to_sync } folders... ' )
2024-03-05 23:57:38 -07:00
2024-03-06 13:34:14 -07:00
last_refresh = database . have_we_done_a_full_sync_at_all ( )
last_refresh_imap_date = None
if last_refresh :
last_refresh_imap_date = unix_timestamp_since_to_imap_timestamp ( last_refresh )
logger . info ( f ' Syncing emails received after { last_refresh_imap_date } ' )
2024-03-05 23:57:38 -07:00
new_emails = 0
new_attachments = 0
did_full_sync = False
sync_start_time = datetime . now ( )
2024-03-06 11:36:43 -07:00
for folder_name in mail . folder_structure :
if folder_name in exclude_folders :
2024-03-05 23:57:38 -07:00
# Exclude folder
continue
2024-03-06 11:36:43 -07:00
logger . info ( folder_name )
if last_refresh :
2024-03-06 13:34:14 -07:00
search_criterion = [ ' (SINCE " ' + last_refresh_imap_date + ' " ) ' ]
2024-03-06 11:36:43 -07:00
else :
did_full_sync = True
search_criterion = [ ' ALL ' ]
for email in mail . fetch_folder ( folder_name , search_criterion = search_criterion ) :
2024-03-06 12:08:12 -07:00
timestamp , to_email , from_email , subject , raw_content , attachments = email
is_new_email = database . insert_email ( folder_name , timestamp , subject , raw_content , to_email , from_email , attachments )
2024-03-06 11:36:43 -07:00
if is_new_email :
new_emails + = 1
if len ( attachments ) :
new_attachments + = 1
2024-03-05 23:57:38 -07:00
elapsed = datetime . now ( ) - sync_start_time
2024-03-06 00:18:41 -07:00
database . finish_sync ( ' refresh ' if not did_full_sync else ' full ' , new_emails , new_attachments , int ( elapsed . total_seconds ( ) ) )
logger . info ( f ' Finished email { " refresh " if not did_full_sync else " sync " } in { humanize . naturaldelta ( elapsed ) } and added { new_emails } new emails and { new_attachments } attachments. ' )
2024-03-05 23:57:38 -07:00
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( description = ' Sync and archive your IMAP server. ' )
parser . add_argument ( ' --config ' , default = ' config.yml ' , help = ' Path to config file. ' )
args = parser . parse_args ( )
main ( args )