2024-03-05 23:57:38 -07:00
|
|
|
import json
|
|
|
|
import re
|
|
|
|
import sqlite3
|
|
|
|
import time
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import List
|
|
|
|
|
2024-03-06 12:08:12 -07:00
|
|
|
from iarchiver.hash import murmur3_chunked
|
2024-03-06 10:30:13 -07:00
|
|
|
from iarchiver.mail_conn import FileAttachment, FileAttachmentEncoder
|
2024-03-05 23:57:38 -07:00
|
|
|
|
|
|
|
|
|
|
|
def is_valid_table_name(table_name):
|
|
|
|
return re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', table_name) is not None
|
|
|
|
|
|
|
|
|
|
|
|
def sanitize_table_name(name):
|
|
|
|
name = name.replace('/', '_')
|
|
|
|
# Remove any non-alphanumeric characters
|
|
|
|
sanitized_name = re.sub(r'\W+', '', name)
|
|
|
|
# If the first character is a digit, prepend an underscore
|
|
|
|
if sanitized_name and sanitized_name[0].isdigit():
|
|
|
|
sanitized_name = '_' + sanitized_name
|
|
|
|
return sanitized_name
|
|
|
|
|
|
|
|
|
|
|
|
class EmailDatabase:
|
|
|
|
__restricted_strings = ['folders_mapping', 'syncs']
|
|
|
|
|
|
|
|
def __init__(self, filepath: Path):
|
|
|
|
filepath = filepath.expanduser().absolute().resolve()
|
|
|
|
self.conn = sqlite3.connect(filepath)
|
|
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute(f'CREATE TABLE IF NOT EXISTS folders_mapping (name TEXT UNIQUE, table_name TEXT UNIQUE)')
|
2024-03-06 00:18:41 -07:00
|
|
|
cursor.execute(f'CREATE TABLE IF NOT EXISTS syncs (timestamp INTEGER UNIQUE, type TEXT, new_emails INTEGER, new_attachments INTEGER, new_folders INTEGER, duration INTEGER)')
|
2024-03-05 23:57:38 -07:00
|
|
|
self.conn.commit()
|
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
def __create_table(self, table_name: str):
|
|
|
|
sanitized_table_name = sanitize_table_name(table_name)
|
|
|
|
if sanitized_table_name in self.__restricted_strings:
|
|
|
|
raise ValueError(f'Invalid table name, conflicts with system tables: {table_name}')
|
|
|
|
cursor = self.conn.cursor()
|
2024-03-06 12:08:12 -07:00
|
|
|
cursor.execute(f'CREATE TABLE IF NOT EXISTS {sanitized_table_name} (timestamp INTEGER, to_email TEXT, from_email TEXT, subject TEXT, raw_content TEXT, raw_content_hash TEXT, attachments TEXT, id INTEGER PRIMARY KEY AUTOINCREMENT)')
|
2024-03-05 23:57:38 -07:00
|
|
|
cursor.execute('INSERT OR IGNORE INTO folders_mapping (name, table_name) VALUES (?, ?)', (table_name, sanitized_table_name))
|
|
|
|
self.conn.commit()
|
|
|
|
cursor.close()
|
|
|
|
|
2024-03-06 12:08:12 -07:00
|
|
|
def insert_email(self, folder: str, timestamp: int, subject: str, raw_content: str, to_email: str, from_email: str, attachments: List[FileAttachment]):
|
|
|
|
raw_content_hash = murmur3_chunked(raw_content.encode())
|
2024-03-05 23:57:38 -07:00
|
|
|
sanitized_table_name = sanitize_table_name(folder)
|
|
|
|
self.__create_table(folder)
|
|
|
|
cursor = self.conn.cursor()
|
|
|
|
|
|
|
|
# Check if record already exists
|
2024-03-06 12:08:12 -07:00
|
|
|
stmt_check = f"SELECT * FROM {sanitized_table_name} WHERE timestamp = ? AND raw_content_hash = ?"
|
|
|
|
cursor.execute(stmt_check, (timestamp, raw_content_hash))
|
2024-03-05 23:57:38 -07:00
|
|
|
data = cursor.fetchone()
|
|
|
|
|
|
|
|
# If record does not exist, insert it
|
|
|
|
new_email = False
|
|
|
|
if data is None:
|
2024-03-06 12:08:12 -07:00
|
|
|
stmt = f"INSERT INTO {sanitized_table_name} (timestamp, to_email, from_email, subject, raw_content, raw_content_hash, attachments) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
|
|
|
cursor.execute(stmt, (timestamp, to_email, from_email, subject, raw_content, raw_content_hash, json.dumps(attachments, cls=FileAttachmentEncoder)))
|
2024-03-05 23:57:38 -07:00
|
|
|
self.conn.commit()
|
|
|
|
new_email = True
|
|
|
|
cursor.close()
|
|
|
|
return new_email
|
|
|
|
|
2024-03-06 00:18:41 -07:00
|
|
|
def finish_sync(self, sync_type: str, new_emails: int, new_attachments: int, duration: int):
|
2024-03-05 23:57:38 -07:00
|
|
|
now = int(time.time())
|
|
|
|
cursor = self.conn.cursor()
|
2024-03-06 00:18:41 -07:00
|
|
|
cursor.execute('INSERT INTO syncs (timestamp, type, new_emails, new_attachments, duration) VALUES (?, ?, ?, ?, ?)', (now, sync_type, new_emails, new_attachments, duration))
|
2024-03-05 23:57:38 -07:00
|
|
|
self.conn.commit()
|
|
|
|
cursor.close()
|
|
|
|
return now
|
|
|
|
|
|
|
|
def have_we_done_a_full_sync_at_all(self):
|
|
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute("SELECT * FROM syncs ORDER BY timestamp LIMIT 1")
|
|
|
|
row = cursor.fetchone()
|
|
|
|
cursor.close()
|
|
|
|
if row is not None:
|
|
|
|
return row[0]
|
|
|
|
else:
|
|
|
|
return None
|