add viewer server
This commit is contained in:
parent
be1083e9b3
commit
94aadf1246
|
@ -2,6 +2,7 @@ import concurrent.futures
|
|||
import email
|
||||
import imaplib
|
||||
import logging
|
||||
import traceback
|
||||
from email.header import decode_header
|
||||
from email.utils import parsedate_to_datetime
|
||||
from json import JSONEncoder
|
||||
|
@ -15,12 +16,13 @@ from iarchiver.hash import murmur3_chunked
|
|||
|
||||
|
||||
class FileAttachment:
|
||||
def __init__(self, file_name: str, file_hash: str):
|
||||
def __init__(self, file_name: str, file_hash: str, content_type: str):
|
||||
self.filename = file_name
|
||||
self.hash = file_hash
|
||||
self.content_type = content_type
|
||||
|
||||
def to_dict(self):
|
||||
return {'filename': self.filename, 'hash': self.hash}
|
||||
return {'filename': self.filename, 'hash': self.hash, 'content_type': self.content_type}
|
||||
|
||||
|
||||
class FileAttachmentEncoder(JSONEncoder):
|
||||
|
@ -44,79 +46,82 @@ class MailConnection:
|
|||
return self.folder_structure
|
||||
|
||||
def __fetch_email(self, i):
|
||||
result, data = self.mail.uid('fetch', str(i), '(BODY[])') # fetch the raw email
|
||||
if data[0] is None:
|
||||
return
|
||||
raw_email_bytes = data[0][1]
|
||||
try:
|
||||
detected = chardet.detect(raw_email_bytes)
|
||||
except TypeError as e:
|
||||
self.logger.critical(f'Failed to decode an email. Timeout? Server error? - "{e}"')
|
||||
result, data = self.mail.uid('fetch', str(i), '(BODY[])') # fetch the raw email
|
||||
if data[0] is None:
|
||||
return
|
||||
raw_email_bytes = data[0][1]
|
||||
try:
|
||||
detected = chardet.detect(raw_email_bytes)
|
||||
except TypeError as e:
|
||||
self.logger.critical(f'Failed to decode an email. Timeout? Server error? - "{e}"')
|
||||
return
|
||||
encoding = detected['encoding']
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
raw_email = raw_email_bytes.decode(encoding, errors='replace')
|
||||
email_message = email.message_from_string(raw_email)
|
||||
date_header = email_message['Date']
|
||||
if not date_header:
|
||||
date_header = 'Thu, 1 Jan 1970 00:00:00 +0000'
|
||||
parsed_date = email.utils.parsedate_to_datetime(date_header)
|
||||
unix_timestamp = int(parsed_date.timestamp())
|
||||
|
||||
from_addr = email_message['From']
|
||||
to_addr = email_message['To']
|
||||
if not from_addr and not to_addr:
|
||||
return
|
||||
if not from_addr:
|
||||
from_addr = ''
|
||||
if not to_addr:
|
||||
to_addr = ''
|
||||
|
||||
from_header = ', '.join(extract_emails(from_addr))
|
||||
to_header = ', '.join(extract_emails(to_addr))
|
||||
if '@' not in from_header:
|
||||
from_header = from_addr
|
||||
if '@' not in to_header:
|
||||
to_header = to_addr
|
||||
|
||||
subject_header = email_message['Subject']
|
||||
if subject_header:
|
||||
subject = decode_header(subject_header)[0][0]
|
||||
if isinstance(subject, bytes):
|
||||
try:
|
||||
detected = chardet.detect(subject)
|
||||
encoding = detected['encoding']
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
subject = subject.decode(encoding, errors='replace')
|
||||
except UnicodeDecodeError:
|
||||
subject = subject.decode('utf-8')
|
||||
else:
|
||||
return
|
||||
|
||||
attachments = []
|
||||
if email_message.is_multipart():
|
||||
for part in email_message.walk():
|
||||
content_disposition = str(part.get("Content-Disposition"))
|
||||
if "attachment" in content_disposition:
|
||||
filename = part.get_filename()
|
||||
if filename:
|
||||
# The filename of the file is the hash of its content, which should de-duplicate files.
|
||||
filecontents = part.get_payload(decode=True)
|
||||
if not filecontents:
|
||||
continue
|
||||
filehash = murmur3_chunked(filecontents)
|
||||
part.set_payload(f'MMH3:{filehash}') # replace the attachment with its hash
|
||||
filepath = self.attachments_dir / f'F{filehash}'
|
||||
file_obj = FileAttachment(filename, filehash, part.get_content_type())
|
||||
if not filepath.is_file():
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(filecontents)
|
||||
attachments.append(file_obj)
|
||||
raw_email_clean = email_message.as_string()
|
||||
return unix_timestamp, to_header, from_header, subject, raw_email_clean, attachments
|
||||
except Exception as e:
|
||||
self.logger.critical(traceback.format_exc())
|
||||
return
|
||||
encoding = detected['encoding']
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
raw_email = raw_email_bytes.decode(encoding, errors='replace')
|
||||
email_message = email.message_from_string(raw_email)
|
||||
date_header = email_message['Date']
|
||||
if not date_header:
|
||||
date_header = 'Thu, 1 Jan 1970 00:00:00 +0000'
|
||||
parsed_date = email.utils.parsedate_to_datetime(date_header)
|
||||
unix_timestamp = int(parsed_date.timestamp())
|
||||
|
||||
from_addr = email_message['From']
|
||||
to_addr = email_message['To']
|
||||
if not from_addr and not to_addr:
|
||||
return
|
||||
if not from_addr:
|
||||
from_addr = ''
|
||||
if not to_addr:
|
||||
to_addr = ''
|
||||
|
||||
from_header = ', '.join(extract_emails(from_addr))
|
||||
to_header = ', '.join(extract_emails(to_addr))
|
||||
if '@' not in from_header:
|
||||
from_header = from_addr
|
||||
if '@' not in to_header:
|
||||
to_header = to_addr
|
||||
|
||||
subject_header = email_message['Subject']
|
||||
if subject_header:
|
||||
subject = decode_header(subject_header)[0][0]
|
||||
if isinstance(subject, bytes):
|
||||
try:
|
||||
detected = chardet.detect(subject)
|
||||
encoding = detected['encoding']
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
subject = subject.decode(encoding, errors='replace')
|
||||
except UnicodeDecodeError:
|
||||
subject = subject.decode('utf-8')
|
||||
else:
|
||||
return
|
||||
|
||||
attachments = []
|
||||
if email_message.is_multipart():
|
||||
for part in email_message.walk():
|
||||
# content_type = part.get_content_type()
|
||||
content_disposition = str(part.get("Content-Disposition"))
|
||||
if "attachment" in content_disposition:
|
||||
filename = part.get_filename()
|
||||
if filename:
|
||||
# The filename of the file is the hash of its content, which should de-duplicate files.
|
||||
filecontents = part.get_payload(decode=True)
|
||||
if not filecontents:
|
||||
continue
|
||||
filehash = murmur3_chunked(filecontents)
|
||||
part.set_payload(f'MD5:{filehash}') # replace the attachment with its hash
|
||||
filepath = self.attachments_dir / filehash
|
||||
file_obj = FileAttachment(filename, filehash)
|
||||
if not filepath.is_file():
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(filecontents)
|
||||
attachments.append(file_obj)
|
||||
raw_email_clean = email_message.as_string()
|
||||
return unix_timestamp, to_header, from_header, subject, raw_email_clean, attachments
|
||||
|
||||
def fetch_folder(self, folder: str, search_criterion: List[str] = None, max_threads: int = 1):
|
||||
"""
|
||||
|
@ -135,7 +140,6 @@ class MailConnection:
|
|||
first_email_id = int(id_list[0])
|
||||
latest_email_id = int(id_list[-1])
|
||||
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
|
||||
futures = {executor.submit(self.__fetch_email, i) for i in range(latest_email_id, first_email_id, -1)}
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pyyaml==6.0.1
|
||||
chardet==5.2.0
|
||||
humanize==4.9.0
|
||||
mmh3==4.1.0
|
||||
mmh3==4.1.0
|
||||
flask==3.0.2
|
63
server.py
63
server.py
|
@ -0,0 +1,63 @@
|
|||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import magic
|
||||
from flask import Flask, render_template, send_from_directory
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def get_db_connection():
|
||||
conn = sqlite3.connect('emails.db')
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def dict_from_row(row):
|
||||
return dict(zip(row.keys(), row))
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
conn = get_db_connection()
|
||||
folders = conn.execute('SELECT name, table_name FROM folders_mapping').fetchall()
|
||||
syncs = conn.execute('SELECT * FROM syncs ORDER BY timestamp DESC').fetchall()
|
||||
conn.close()
|
||||
syncs = [dict_from_row(sync) for sync in syncs]
|
||||
for sync in syncs:
|
||||
sync['timestamp'] = datetime.fromtimestamp(sync['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
|
||||
return render_template('index.html', folders=folders, syncs=syncs)
|
||||
|
||||
|
||||
@app.route('/folder/<table_name>')
|
||||
def folder(table_name):
|
||||
conn = get_db_connection()
|
||||
emails = conn.execute(f'SELECT * FROM {table_name} ORDER BY timestamp DESC').fetchall()
|
||||
conn.close()
|
||||
emails = [dict_from_row(email) for email in emails]
|
||||
for email in emails:
|
||||
email['timestamp'] = datetime.fromtimestamp(email['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
|
||||
return render_template('folder.html', emails=emails, table_name=table_name)
|
||||
|
||||
|
||||
@app.route('/email/<table_name>/<id>')
|
||||
def email(table_name, id):
|
||||
conn = get_db_connection()
|
||||
email = conn.execute(f'SELECT * FROM {table_name} WHERE id = ?', (id,)).fetchone()
|
||||
conn.close()
|
||||
email = dict_from_row(email)
|
||||
email['timestamp'] = datetime.fromtimestamp(email['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
|
||||
attachments = json.loads(email['attachments'])
|
||||
return render_template('email.html', email=email, attachments=attachments)
|
||||
|
||||
|
||||
@app.route('/attachments/<path:filename>')
|
||||
def download_file(filename):
|
||||
mimetype = magic.from_file(str(Path('attachments', filename)), mime=True)
|
||||
return send_from_directory('attachments', filename, mimetype=mimetype)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', debug=True)
|
|
@ -0,0 +1,17 @@
|
|||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Email</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Email</h1>
|
||||
<h2>Attachments</h2>
|
||||
<ul>
|
||||
{% for attachment in attachments %}
|
||||
<a href="{{ url_for('download_file', filename='F' + attachment.hash) }}">{{ attachment.filename }}</a>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
<h2>Content</h2>
|
||||
<pre>{{ email.raw_content }}</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,23 @@
|
|||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Emails</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Emails</h1>
|
||||
<ul>
|
||||
{% for email in emails %}
|
||||
<li>
|
||||
<a href="{{ url_for('email', table_name=table_name, id=email.id) }}">
|
||||
{{ email.timestamp }} | <i>{{ email.from_email }}</i> - <strong>{{ email.subject }}</strong>
|
||||
</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</body>
|
||||
<style>
|
||||
ul {
|
||||
list-style-type: none;
|
||||
}
|
||||
</style>
|
||||
</html>
|
|
@ -0,0 +1,40 @@
|
|||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Email Folders</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Email Folders</h1>
|
||||
<ul>
|
||||
{% for folder in folders %}
|
||||
<li><a href="{{ url_for('folder', table_name=folder.table_name) }}">{{ folder.name }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
<h1>Last Syncs</h1>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Timestamp</th>
|
||||
<th>Type</th>
|
||||
<th>New Emails</th>
|
||||
<th>New Attachments</th>
|
||||
<th>New Folders</th>
|
||||
<th>Duration</th>
|
||||
</tr>
|
||||
{% for sync in syncs %}
|
||||
<tr>
|
||||
<td>{{ sync.timestamp }}</td>
|
||||
<td>{{ sync.type }}</td>
|
||||
<td>{{ sync.new_emails }}</td>
|
||||
<td>{{ sync.new_attachments }}</td>
|
||||
<td>{{ sync.new_folders }}</td>
|
||||
<td>{{ sync.duration }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</body>
|
||||
<style>
|
||||
ul {
|
||||
list-style-type: none;
|
||||
}
|
||||
</style>
|
||||
</html>
|
Loading…
Reference in New Issue