add viewer server
This commit is contained in:
parent
be1083e9b3
commit
94aadf1246
|
@ -2,6 +2,7 @@ import concurrent.futures
|
||||||
import email
|
import email
|
||||||
import imaplib
|
import imaplib
|
||||||
import logging
|
import logging
|
||||||
|
import traceback
|
||||||
from email.header import decode_header
|
from email.header import decode_header
|
||||||
from email.utils import parsedate_to_datetime
|
from email.utils import parsedate_to_datetime
|
||||||
from json import JSONEncoder
|
from json import JSONEncoder
|
||||||
|
@ -15,12 +16,13 @@ from iarchiver.hash import murmur3_chunked
|
||||||
|
|
||||||
|
|
||||||
class FileAttachment:
|
class FileAttachment:
|
||||||
def __init__(self, file_name: str, file_hash: str):
|
def __init__(self, file_name: str, file_hash: str, content_type: str):
|
||||||
self.filename = file_name
|
self.filename = file_name
|
||||||
self.hash = file_hash
|
self.hash = file_hash
|
||||||
|
self.content_type = content_type
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return {'filename': self.filename, 'hash': self.hash}
|
return {'filename': self.filename, 'hash': self.hash, 'content_type': self.content_type}
|
||||||
|
|
||||||
|
|
||||||
class FileAttachmentEncoder(JSONEncoder):
|
class FileAttachmentEncoder(JSONEncoder):
|
||||||
|
@ -44,79 +46,82 @@ class MailConnection:
|
||||||
return self.folder_structure
|
return self.folder_structure
|
||||||
|
|
||||||
def __fetch_email(self, i):
|
def __fetch_email(self, i):
|
||||||
result, data = self.mail.uid('fetch', str(i), '(BODY[])') # fetch the raw email
|
|
||||||
if data[0] is None:
|
|
||||||
return
|
|
||||||
raw_email_bytes = data[0][1]
|
|
||||||
try:
|
try:
|
||||||
detected = chardet.detect(raw_email_bytes)
|
result, data = self.mail.uid('fetch', str(i), '(BODY[])') # fetch the raw email
|
||||||
except TypeError as e:
|
if data[0] is None:
|
||||||
self.logger.critical(f'Failed to decode an email. Timeout? Server error? - "{e}"')
|
return
|
||||||
|
raw_email_bytes = data[0][1]
|
||||||
|
try:
|
||||||
|
detected = chardet.detect(raw_email_bytes)
|
||||||
|
except TypeError as e:
|
||||||
|
self.logger.critical(f'Failed to decode an email. Timeout? Server error? - "{e}"')
|
||||||
|
return
|
||||||
|
encoding = detected['encoding']
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
raw_email = raw_email_bytes.decode(encoding, errors='replace')
|
||||||
|
email_message = email.message_from_string(raw_email)
|
||||||
|
date_header = email_message['Date']
|
||||||
|
if not date_header:
|
||||||
|
date_header = 'Thu, 1 Jan 1970 00:00:00 +0000'
|
||||||
|
parsed_date = email.utils.parsedate_to_datetime(date_header)
|
||||||
|
unix_timestamp = int(parsed_date.timestamp())
|
||||||
|
|
||||||
|
from_addr = email_message['From']
|
||||||
|
to_addr = email_message['To']
|
||||||
|
if not from_addr and not to_addr:
|
||||||
|
return
|
||||||
|
if not from_addr:
|
||||||
|
from_addr = ''
|
||||||
|
if not to_addr:
|
||||||
|
to_addr = ''
|
||||||
|
|
||||||
|
from_header = ', '.join(extract_emails(from_addr))
|
||||||
|
to_header = ', '.join(extract_emails(to_addr))
|
||||||
|
if '@' not in from_header:
|
||||||
|
from_header = from_addr
|
||||||
|
if '@' not in to_header:
|
||||||
|
to_header = to_addr
|
||||||
|
|
||||||
|
subject_header = email_message['Subject']
|
||||||
|
if subject_header:
|
||||||
|
subject = decode_header(subject_header)[0][0]
|
||||||
|
if isinstance(subject, bytes):
|
||||||
|
try:
|
||||||
|
detected = chardet.detect(subject)
|
||||||
|
encoding = detected['encoding']
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
subject = subject.decode(encoding, errors='replace')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
subject = subject.decode('utf-8')
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
attachments = []
|
||||||
|
if email_message.is_multipart():
|
||||||
|
for part in email_message.walk():
|
||||||
|
content_disposition = str(part.get("Content-Disposition"))
|
||||||
|
if "attachment" in content_disposition:
|
||||||
|
filename = part.get_filename()
|
||||||
|
if filename:
|
||||||
|
# The filename of the file is the hash of its content, which should de-duplicate files.
|
||||||
|
filecontents = part.get_payload(decode=True)
|
||||||
|
if not filecontents:
|
||||||
|
continue
|
||||||
|
filehash = murmur3_chunked(filecontents)
|
||||||
|
part.set_payload(f'MMH3:{filehash}') # replace the attachment with its hash
|
||||||
|
filepath = self.attachments_dir / f'F{filehash}'
|
||||||
|
file_obj = FileAttachment(filename, filehash, part.get_content_type())
|
||||||
|
if not filepath.is_file():
|
||||||
|
with open(filepath, 'wb') as f:
|
||||||
|
f.write(filecontents)
|
||||||
|
attachments.append(file_obj)
|
||||||
|
raw_email_clean = email_message.as_string()
|
||||||
|
return unix_timestamp, to_header, from_header, subject, raw_email_clean, attachments
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.critical(traceback.format_exc())
|
||||||
return
|
return
|
||||||
encoding = detected['encoding']
|
|
||||||
if not encoding:
|
|
||||||
encoding = 'utf-8'
|
|
||||||
raw_email = raw_email_bytes.decode(encoding, errors='replace')
|
|
||||||
email_message = email.message_from_string(raw_email)
|
|
||||||
date_header = email_message['Date']
|
|
||||||
if not date_header:
|
|
||||||
date_header = 'Thu, 1 Jan 1970 00:00:00 +0000'
|
|
||||||
parsed_date = email.utils.parsedate_to_datetime(date_header)
|
|
||||||
unix_timestamp = int(parsed_date.timestamp())
|
|
||||||
|
|
||||||
from_addr = email_message['From']
|
|
||||||
to_addr = email_message['To']
|
|
||||||
if not from_addr and not to_addr:
|
|
||||||
return
|
|
||||||
if not from_addr:
|
|
||||||
from_addr = ''
|
|
||||||
if not to_addr:
|
|
||||||
to_addr = ''
|
|
||||||
|
|
||||||
from_header = ', '.join(extract_emails(from_addr))
|
|
||||||
to_header = ', '.join(extract_emails(to_addr))
|
|
||||||
if '@' not in from_header:
|
|
||||||
from_header = from_addr
|
|
||||||
if '@' not in to_header:
|
|
||||||
to_header = to_addr
|
|
||||||
|
|
||||||
subject_header = email_message['Subject']
|
|
||||||
if subject_header:
|
|
||||||
subject = decode_header(subject_header)[0][0]
|
|
||||||
if isinstance(subject, bytes):
|
|
||||||
try:
|
|
||||||
detected = chardet.detect(subject)
|
|
||||||
encoding = detected['encoding']
|
|
||||||
if not encoding:
|
|
||||||
encoding = 'utf-8'
|
|
||||||
subject = subject.decode(encoding, errors='replace')
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
subject = subject.decode('utf-8')
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
|
|
||||||
attachments = []
|
|
||||||
if email_message.is_multipart():
|
|
||||||
for part in email_message.walk():
|
|
||||||
# content_type = part.get_content_type()
|
|
||||||
content_disposition = str(part.get("Content-Disposition"))
|
|
||||||
if "attachment" in content_disposition:
|
|
||||||
filename = part.get_filename()
|
|
||||||
if filename:
|
|
||||||
# The filename of the file is the hash of its content, which should de-duplicate files.
|
|
||||||
filecontents = part.get_payload(decode=True)
|
|
||||||
if not filecontents:
|
|
||||||
continue
|
|
||||||
filehash = murmur3_chunked(filecontents)
|
|
||||||
part.set_payload(f'MD5:{filehash}') # replace the attachment with its hash
|
|
||||||
filepath = self.attachments_dir / filehash
|
|
||||||
file_obj = FileAttachment(filename, filehash)
|
|
||||||
if not filepath.is_file():
|
|
||||||
with open(filepath, 'wb') as f:
|
|
||||||
f.write(filecontents)
|
|
||||||
attachments.append(file_obj)
|
|
||||||
raw_email_clean = email_message.as_string()
|
|
||||||
return unix_timestamp, to_header, from_header, subject, raw_email_clean, attachments
|
|
||||||
|
|
||||||
def fetch_folder(self, folder: str, search_criterion: List[str] = None, max_threads: int = 1):
|
def fetch_folder(self, folder: str, search_criterion: List[str] = None, max_threads: int = 1):
|
||||||
"""
|
"""
|
||||||
|
@ -135,7 +140,6 @@ class MailConnection:
|
||||||
first_email_id = int(id_list[0])
|
first_email_id = int(id_list[0])
|
||||||
latest_email_id = int(id_list[-1])
|
latest_email_id = int(id_list[-1])
|
||||||
|
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
|
||||||
futures = {executor.submit(self.__fetch_email, i) for i in range(latest_email_id, first_email_id, -1)}
|
futures = {executor.submit(self.__fetch_email, i) for i in range(latest_email_id, first_email_id, -1)}
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in concurrent.futures.as_completed(futures):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
pyyaml==6.0.1
|
pyyaml==6.0.1
|
||||||
chardet==5.2.0
|
chardet==5.2.0
|
||||||
humanize==4.9.0
|
humanize==4.9.0
|
||||||
mmh3==4.1.0
|
mmh3==4.1.0
|
||||||
|
flask==3.0.2
|
63
server.py
63
server.py
|
@ -0,0 +1,63 @@
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import magic
|
||||||
|
from flask import Flask, render_template, send_from_directory
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_db_connection():
|
||||||
|
conn = sqlite3.connect('emails.db')
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def dict_from_row(row):
|
||||||
|
return dict(zip(row.keys(), row))
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
conn = get_db_connection()
|
||||||
|
folders = conn.execute('SELECT name, table_name FROM folders_mapping').fetchall()
|
||||||
|
syncs = conn.execute('SELECT * FROM syncs ORDER BY timestamp DESC').fetchall()
|
||||||
|
conn.close()
|
||||||
|
syncs = [dict_from_row(sync) for sync in syncs]
|
||||||
|
for sync in syncs:
|
||||||
|
sync['timestamp'] = datetime.fromtimestamp(sync['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
return render_template('index.html', folders=folders, syncs=syncs)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/folder/<table_name>')
|
||||||
|
def folder(table_name):
|
||||||
|
conn = get_db_connection()
|
||||||
|
emails = conn.execute(f'SELECT * FROM {table_name} ORDER BY timestamp DESC').fetchall()
|
||||||
|
conn.close()
|
||||||
|
emails = [dict_from_row(email) for email in emails]
|
||||||
|
for email in emails:
|
||||||
|
email['timestamp'] = datetime.fromtimestamp(email['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
return render_template('folder.html', emails=emails, table_name=table_name)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/email/<table_name>/<id>')
|
||||||
|
def email(table_name, id):
|
||||||
|
conn = get_db_connection()
|
||||||
|
email = conn.execute(f'SELECT * FROM {table_name} WHERE id = ?', (id,)).fetchone()
|
||||||
|
conn.close()
|
||||||
|
email = dict_from_row(email)
|
||||||
|
email['timestamp'] = datetime.fromtimestamp(email['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
attachments = json.loads(email['attachments'])
|
||||||
|
return render_template('email.html', email=email, attachments=attachments)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/attachments/<path:filename>')
|
||||||
|
def download_file(filename):
|
||||||
|
mimetype = magic.from_file(str(Path('attachments', filename)), mime=True)
|
||||||
|
return send_from_directory('attachments', filename, mimetype=mimetype)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(host='0.0.0.0', debug=True)
|
|
@ -0,0 +1,17 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Email</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Email</h1>
|
||||||
|
<h2>Attachments</h2>
|
||||||
|
<ul>
|
||||||
|
{% for attachment in attachments %}
|
||||||
|
<a href="{{ url_for('download_file', filename='F' + attachment.hash) }}">{{ attachment.filename }}</a>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
<h2>Content</h2>
|
||||||
|
<pre>{{ email.raw_content }}</pre>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,23 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Emails</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Emails</h1>
|
||||||
|
<ul>
|
||||||
|
{% for email in emails %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ url_for('email', table_name=table_name, id=email.id) }}">
|
||||||
|
{{ email.timestamp }} | <i>{{ email.from_email }}</i> - <strong>{{ email.subject }}</strong>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
<style>
|
||||||
|
ul {
|
||||||
|
list-style-type: none;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</html>
|
|
@ -0,0 +1,40 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Email Folders</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Email Folders</h1>
|
||||||
|
<ul>
|
||||||
|
{% for folder in folders %}
|
||||||
|
<li><a href="{{ url_for('folder', table_name=folder.table_name) }}">{{ folder.name }}</a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
<h1>Last Syncs</h1>
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Timestamp</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>New Emails</th>
|
||||||
|
<th>New Attachments</th>
|
||||||
|
<th>New Folders</th>
|
||||||
|
<th>Duration</th>
|
||||||
|
</tr>
|
||||||
|
{% for sync in syncs %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ sync.timestamp }}</td>
|
||||||
|
<td>{{ sync.type }}</td>
|
||||||
|
<td>{{ sync.new_emails }}</td>
|
||||||
|
<td>{{ sync.new_attachments }}</td>
|
||||||
|
<td>{{ sync.new_folders }}</td>
|
||||||
|
<td>{{ sync.duration }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
<style>
|
||||||
|
ul {
|
||||||
|
list-style-type: none;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</html>
|
Loading…
Reference in New Issue