First Commit

This commit is contained in:
Tyler
2026-03-24 17:01:09 -04:00
committed by GitHub
commit 71861ae630
15 changed files with 2047 additions and 0 deletions
View File
+206
View File
@@ -0,0 +1,206 @@
"""Encrypted config management using age (via pyrage) and TOML.
The config file is stored as an age-encrypted TOML blob at data/config.age.
The encryption key is a scrypt-derived passphrase key from the master password.
Config schema (as TOML):
[proton]
email = "user@proton.me"
password = "..."
mailbox_password = "..." # optional, "" if not set
totp_secret = "..." # base32 TOTP secret
[icloud]
email = "user@icloud.com"
password = "..." # app-specific password
[preferences]
delivery_mode = "imap" # "imap" | "mbox"
poll_interval_min = 60 # integer minutes
"""
import io
from pathlib import Path
from typing import Any
import pyrage
import toml
CONFIG_PATH = Path(__file__).parent.parent / "data" / "config.age"
# Keys we expose to the rest of the app
REQUIRED_KEYS = {
"proton": ["email", "password", "totp_secret"],
"icloud": ["email", "password"],
"preferences": ["delivery_mode", "poll_interval_min"],
}
DELIVERY_MODES = ("imap", "mbox")
MIN_INTERVAL_MIN = 15
class ConfigError(Exception):
pass
# ---------------------------------------------------------------------------
# Low-level encrypt / decrypt helpers
# ---------------------------------------------------------------------------
def _encrypt(plaintext: str, passphrase: str) -> bytes:
identity = pyrage.passphrase.Recipient(passphrase)
return pyrage.encrypt(plaintext.encode(), [identity])
def _decrypt(ciphertext: bytes, passphrase: str) -> str:
identity = pyrage.passphrase.Identity(passphrase)
return pyrage.decrypt(ciphertext, [identity]).decode()
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def config_exists() -> bool:
return CONFIG_PATH.exists()
def save_config(data: dict[str, Any], passphrase: str) -> None:
"""Serialise *data* to TOML, encrypt with *passphrase*, write to disk."""
_validate(data)
plaintext = toml.dumps(data)
ciphertext = _encrypt(plaintext, passphrase)
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
CONFIG_PATH.write_bytes(ciphertext)
def load_config(passphrase: str) -> dict[str, Any]:
"""Decrypt the config file and return it as a dict."""
if not CONFIG_PATH.exists():
raise ConfigError("No config file found. Run --setup first.")
try:
ciphertext = CONFIG_PATH.read_bytes()
plaintext = _decrypt(ciphertext, passphrase)
except Exception as exc:
raise ConfigError(f"Failed to decrypt config (wrong master password?): {exc}") from exc
data = toml.loads(plaintext)
_validate(data)
return data
def update_config(passphrase: str, section: str, key: str, value: Any) -> None:
"""Load config, change one value, and re-save."""
data = load_config(passphrase)
if section not in data:
data[section] = {}
data[section][key] = value
save_config(data, passphrase)
# ---------------------------------------------------------------------------
# Validation
# ---------------------------------------------------------------------------
def _validate(data: dict[str, Any]) -> None:
for section, keys in REQUIRED_KEYS.items():
if section not in data:
raise ConfigError(f"Config missing section [{section}]")
for key in keys:
if key not in data[section]:
raise ConfigError(f"Config missing [{section}].{key}")
mode = data["preferences"]["delivery_mode"]
if mode not in DELIVERY_MODES:
raise ConfigError(
f"Invalid delivery_mode '{mode}'. Must be one of {DELIVERY_MODES}"
)
interval = data["preferences"]["poll_interval_min"]
if not isinstance(interval, int) or interval < MIN_INTERVAL_MIN:
raise ConfigError(
f"poll_interval_min must be an integer >= {MIN_INTERVAL_MIN}"
)
# ---------------------------------------------------------------------------
# Interactive setup wizard helpers
# ---------------------------------------------------------------------------
INTERVAL_PRESETS = {
"1": 15,
"2": 30,
"3": 60,
"4": 360,
"5": 1440,
}
def build_config_interactively() -> tuple[dict[str, Any], str]:
"""Prompt the user for all settings and a master password.
Returns (config_dict, master_password).
"""
import getpass
print("\n=== MailRelay First-Time Setup ===\n")
proton_email = input("Proton Mail email address: ").strip()
proton_password = getpass.getpass("Proton Mail password: ")
proton_mailbox_pw = getpass.getpass(
"Proton Mail mailbox password (leave blank if none): "
)
totp_secret = input("TOTP secret key (base32, from your 2FA setup): ").strip()
icloud_email = input("\niCloud email address: ").strip()
icloud_password = getpass.getpass("iCloud app-specific password: ")
print("\nDelivery mode:")
print(" 1) Automatic IMAP push to iCloud (default)")
print(" 2) Manual MBOX download")
mode_choice = input("Choose [1/2, default 1]: ").strip() or "1"
delivery_mode = "imap" if mode_choice != "2" else "mbox"
print("\nPolling interval:")
print(" 1) 15 minutes")
print(" 2) 30 minutes")
print(" 3) 1 hour (default)")
print(" 4) 6 hours")
print(" 5) 24 hours")
print(" 6) Custom")
interval_choice = input("Choose [1-6, default 3]: ").strip() or "3"
if interval_choice in INTERVAL_PRESETS:
poll_interval = INTERVAL_PRESETS[interval_choice]
else:
while True:
raw = input(f"Enter interval in minutes (min {MIN_INTERVAL_MIN}): ").strip()
if raw.isdigit() and int(raw) >= MIN_INTERVAL_MIN:
poll_interval = int(raw)
break
print(f"Please enter a whole number >= {MIN_INTERVAL_MIN}.")
print("\nSet a master password to encrypt your config.")
while True:
master_pw = getpass.getpass("Master password: ")
confirm = getpass.getpass("Confirm master password: ")
if master_pw == confirm:
break
print("Passwords do not match. Try again.")
config = {
"proton": {
"email": proton_email,
"password": proton_password,
"mailbox_password": proton_mailbox_pw,
"totp_secret": totp_secret,
},
"icloud": {
"email": icloud_email,
"password": icloud_password,
},
"preferences": {
"delivery_mode": delivery_mode,
"poll_interval_min": poll_interval,
},
}
return config, master_pw
+145
View File
@@ -0,0 +1,145 @@
"""SQLite-backed deduplication and delivery-state tracking."""
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Generator, Iterable
DB_PATH = Path(__file__).parent.parent / "data" / "mailrelay.db"
# Delivery states
STATE_PENDING = "pending" # MBOX generated, not yet downloaded
STATE_DELIVERED = "delivered" # IMAP pushed or MBOX confirmed downloaded
def _connect() -> sqlite3.Connection:
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
return conn
@contextmanager
def _db() -> Generator[sqlite3.Connection, None, None]:
conn = _connect()
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
def init_db() -> None:
"""Create tables if they don't exist."""
with _db() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
message_id TEXT PRIMARY KEY,
state TEXT NOT NULL DEFAULT 'delivered',
mbox_path TEXT,
created_at DATETIME DEFAULT (datetime('now')),
updated_at DATETIME DEFAULT (datetime('now'))
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_state ON messages(state)
""")
def is_known(message_id: str) -> bool:
"""Return True if a message ID is already tracked (any state)."""
with _db() as conn:
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ?", (message_id,)
).fetchone()
return row is not None
def filter_new(message_ids: Iterable[str]) -> list[str]:
"""Return only the IDs not yet in the database."""
ids = list(message_ids)
if not ids:
return []
with _db() as conn:
placeholders = ",".join("?" * len(ids))
known = {
row[0]
for row in conn.execute(
f"SELECT message_id FROM messages WHERE message_id IN ({placeholders})",
ids,
)
}
return [mid for mid in ids if mid not in known]
def mark_pending(message_ids: Iterable[str], mbox_path: str) -> None:
"""Record message IDs as pending (MBOX created, not yet downloaded)."""
with _db() as conn:
conn.executemany(
"""
INSERT INTO messages (message_id, state, mbox_path)
VALUES (?, ?, ?)
ON CONFLICT(message_id) DO UPDATE SET
state = excluded.state,
mbox_path = excluded.mbox_path,
updated_at = datetime('now')
""",
[(mid, STATE_PENDING, mbox_path) for mid in message_ids],
)
def mark_delivered(message_ids: Iterable[str]) -> None:
"""Mark message IDs as fully delivered."""
ids = list(message_ids)
if not ids:
return
with _db() as conn:
conn.executemany(
"""
INSERT INTO messages (message_id, state)
VALUES (?, ?)
ON CONFLICT(message_id) DO UPDATE SET
state = excluded.state,
mbox_path = NULL,
updated_at = datetime('now')
""",
[(mid, STATE_DELIVERED) for mid in ids],
)
def get_pending_mboxes() -> list[dict]:
"""Return all distinct pending MBOX paths with their message IDs."""
with _db() as conn:
rows = conn.execute(
"SELECT message_id, mbox_path FROM messages WHERE state = ?",
(STATE_PENDING,),
).fetchall()
by_path: dict[str, list[str]] = {}
for row in rows:
path = row["mbox_path"]
by_path.setdefault(path, []).append(row["message_id"])
return [{"mbox_path": path, "message_ids": ids} for path, ids in by_path.items()]
def clear_pending_for_mbox(mbox_path: str) -> list[str]:
"""Remove pending state for a given MBOX (used on cleanup/re-process).
Returns the list of message IDs that were pending for that MBOX.
"""
with _db() as conn:
rows = conn.execute(
"SELECT message_id FROM messages WHERE state = ? AND mbox_path = ?",
(STATE_PENDING, mbox_path),
).fetchall()
message_ids = [row["message_id"] for row in rows]
conn.execute(
"DELETE FROM messages WHERE state = ? AND mbox_path = ?",
(STATE_PENDING, mbox_path),
)
return message_ids
+188
View File
@@ -0,0 +1,188 @@
"""Automate proton-mail-export-cli via pexpect.
The Proton Export CLI interactive prompt sequence (observed order):
1. Email address
2. Password
3. Two-factor authentication code (only if 2FA is enabled)
4. Mailbox password (only if separate mailbox password is set)
After authentication the CLI exports all mail to the export directory.
Each message produces two files:
{messageID}.eml
{messageID}.metadata.json
Because the exact prompt strings can vary between CLI versions and account
configurations, each pattern is defined as a regex so minor wording differences
are tolerated. If the CLI changes its prompt wording, update PROMPTS below.
"""
from pathlib import Path
from typing import Optional
import pexpect
from .logger import get_logger
from .tools import BINARY_PATH as CLI_BINARY_PATH, ensure_export_cli
log = get_logger(__name__)
# ---------------------------------------------------------------------------
# Configurable paths
# ---------------------------------------------------------------------------
EXPORT_DIR = Path(__file__).parent.parent / "data" / "exports"
# ---------------------------------------------------------------------------
# Prompt patterns (case-insensitive regex matched against CLI output)
# ---------------------------------------------------------------------------
PROMPTS = {
"email": r"[Ee]mail\s*(address)?[\s:>]+",
"password": r"[Pp]assword[\s:>]+",
"totp": r"[Tt]wo.factor|[Oo]ne.time|[Tt][Oo][Tt][Pp]|[Aa]uth.*code",
"mailbox_password": r"[Mm]ailbox\s*[Pp]assword[\s:>]+",
"done": r"[Ee]xport\s*(complete|finished|done)|[Ss]uccessfully\s*export",
"error": r"[Ee]rror|[Ff]ailed|[Ii]nvalid",
}
# Maximum time to wait for each prompt (seconds)
PROMPT_TIMEOUT = 120
# Total export timeout — large mailboxes can take a while
EXPORT_TIMEOUT = 3600
class ExportError(Exception):
pass
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def run_export(
email: str,
password: str,
totp_code: str,
mailbox_password: str = "",
export_dir: Optional[Path] = None,
) -> Path:
"""Drive proton-mail-export-cli and return the export directory path.
Raises ExportError on authentication failure or unexpected CLI output.
"""
out_dir = export_dir or EXPORT_DIR
out_dir.mkdir(parents=True, exist_ok=True)
try:
binary = ensure_export_cli()
except Exception as exc:
raise ExportError(str(exc)) from exc
cmd = [str(binary), "--export-dir", str(out_dir)]
log.info("Starting Proton export CLI: %s", " ".join(cmd))
child = pexpect.spawn(
cmd[0], cmd[1:], timeout=PROMPT_TIMEOUT, encoding="utf-8"
)
child.logfile_read = _PexpectLogger(log)
try:
_drive_cli(child, email, password, totp_code, mailbox_password)
except pexpect.TIMEOUT as exc:
child.close(force=True)
raise ExportError("Timed out waiting for CLI prompt.") from exc
except pexpect.EOF as exc:
output = child.before or ""
child.close()
if child.exitstatus and child.exitstatus != 0:
raise ExportError(
f"CLI exited with code {child.exitstatus}. Output: {output.strip()}"
) from exc
# EOF after a successful export is normal
log.info("CLI process finished (EOF).")
log.info("Export complete. Files in: %s", out_dir)
return out_dir
def _drive_cli(
child: pexpect.spawn,
email: str,
password: str,
totp_code: str,
mailbox_password: str,
) -> None:
"""Respond to each interactive prompt in sequence."""
patterns = [
pexpect.TIMEOUT,
pexpect.EOF,
PROMPTS["email"],
PROMPTS["password"],
PROMPTS["totp"],
PROMPTS["mailbox_password"],
PROMPTS["done"],
PROMPTS["error"],
]
totp_sent = False
mailbox_sent = False
while True:
idx = child.expect(patterns, timeout=PROMPT_TIMEOUT)
if idx == 0: # TIMEOUT
raise pexpect.TIMEOUT("No prompt received within timeout.")
if idx == 1: # EOF — process exited
return
if idx == 2: # email prompt
log.debug("CLI requested email.")
child.sendline(email)
elif idx == 3: # password prompt
# The CLI may show a password prompt for both the account password
# and the mailbox password. We track which we've already sent.
if not mailbox_sent and mailbox_password and totp_sent:
log.debug("CLI requested mailbox password.")
child.sendline(mailbox_password)
mailbox_sent = True
else:
log.debug("CLI requested account password.")
child.sendline(password)
elif idx == 4: # TOTP prompt
log.debug("CLI requested TOTP code.")
child.sendline(totp_code)
totp_sent = True
elif idx == 5: # explicit mailbox password prompt
log.debug("CLI requested mailbox password (explicit prompt).")
child.sendline(mailbox_password or "")
mailbox_sent = True
elif idx == 6: # export done
log.info("CLI reported export complete.")
return
elif idx == 7: # error line
snippet = (child.before or "").strip().splitlines()[-1]
raise ExportError(f"CLI reported an error: {snippet}")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class _PexpectLogger:
"""Thin adapter so pexpect writes its read data to our logger at DEBUG."""
def __init__(self, logger):
self._log = logger
def write(self, s: str) -> None:
if s.strip():
self._log.debug("[cli] %s", s.rstrip())
def flush(self) -> None:
pass
+120
View File
@@ -0,0 +1,120 @@
"""Push emails to iCloud Mail via IMAP APPEND.
iCloud IMAP settings:
Host : imap.mail.me.com
Port : 993 (SSL)
Auth : email + app-specific password
The APPEND command places a message directly into a mailbox without going
through SMTP, so no "sent" copy is created and delivery is instant.
"""
import imaplib
import socket
from typing import Optional
from .database import mark_delivered
from .logger import get_logger
from .processor import RichEmail
log = get_logger(__name__)
ICLOUD_IMAP_HOST = "imap.mail.me.com"
ICLOUD_IMAP_PORT = 993
DEFAULT_MAILBOX = "INBOX"
CONNECT_TIMEOUT = 30 # seconds
class ForwarderError(Exception):
pass
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def push_emails(
emails: list[RichEmail],
icloud_email: str,
icloud_password: str,
mailbox: str = DEFAULT_MAILBOX,
) -> tuple[list[str], list[str]]:
"""APPEND each email to the iCloud mailbox.
Returns (succeeded_ids, failed_ids).
Records succeeded IDs as delivered in SQLite.
"""
if not emails:
return [], []
succeeded: list[str] = []
failed: list[str] = []
try:
conn = _connect(icloud_email, icloud_password)
except ForwarderError as exc:
log.error("IMAP connection failed: %s", exc)
return [], [e.message_id for e in emails]
try:
for rich in emails:
try:
_append(conn, rich, mailbox)
succeeded.append(rich.message_id)
log.info("IMAP pushed: %s", rich.message_id)
except Exception as exc:
log.error("Failed to push %s: %s", rich.message_id, exc)
failed.append(rich.message_id)
finally:
_logout(conn)
if succeeded:
mark_delivered(succeeded)
log.info("Marked %d message(s) as delivered.", len(succeeded))
return succeeded, failed
# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------
def _connect(email_addr: str, password: str) -> imaplib.IMAP4_SSL:
log.debug("Connecting to %s:%d", ICLOUD_IMAP_HOST, ICLOUD_IMAP_PORT)
try:
conn = imaplib.IMAP4_SSL(
ICLOUD_IMAP_HOST,
ICLOUD_IMAP_PORT,
)
except (OSError, socket.gaierror) as exc:
raise ForwarderError(f"Cannot reach {ICLOUD_IMAP_HOST}: {exc}") from exc
try:
conn.login(email_addr, password)
log.debug("IMAP login successful for %s", email_addr)
except imaplib.IMAP4.error as exc:
raise ForwarderError(f"IMAP authentication failed: {exc}") from exc
return conn
def _append(
conn: imaplib.IMAP4_SSL,
rich: RichEmail,
mailbox: str,
) -> None:
"""APPEND a single message to *mailbox*."""
# imaplib.IMAP4.append expects: mailbox, flags, date_time, message
# We pass None for flags and date_time to let the server set defaults.
status, data = conn.append(mailbox, None, None, rich.raw_bytes)
if status != "OK":
raise ForwarderError(
f"APPEND returned {status} for {rich.message_id}: {data}"
)
def _logout(conn: imaplib.IMAP4_SSL) -> None:
try:
conn.logout()
except Exception:
pass
+47
View File
@@ -0,0 +1,47 @@
"""Rotating log file setup for MailRelay."""
import logging
from logging.handlers import RotatingFileHandler
from pathlib import Path
LOG_PATH = Path(__file__).parent.parent / "data" / "mailrelay.log"
LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
MAX_BYTES = 5 * 1024 * 1024 # 5 MB
BACKUP_COUNT = 3
def get_logger(name: str) -> logging.Logger:
"""Return a named logger wired to the shared rotating file + stderr."""
logger = logging.getLogger(name)
if logger.handlers:
return logger # already configured
logger.setLevel(logging.DEBUG)
# Rotating file handler
LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
file_handler = RotatingFileHandler(
LOG_PATH, maxBytes=MAX_BYTES, backupCount=BACKUP_COUNT, encoding="utf-8"
)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter(LOG_FORMAT))
# Console handler (INFO and above)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter(LOG_FORMAT))
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
def tail_log(lines: int = 50) -> str:
"""Return the last N lines of the log file as a string."""
if not LOG_PATH.exists():
return "(no log file yet)"
text = LOG_PATH.read_text(encoding="utf-8")
all_lines = text.splitlines()
return "\n".join(all_lines[-lines:])
+15
View File
@@ -0,0 +1,15 @@
"""TOTP code generation using pyotp."""
import pyotp
def generate_totp(secret: str) -> str:
"""Generate the current TOTP code from a base32 secret."""
totp = pyotp.TOTP(secret)
return totp.now()
def verify_totp(secret: str, code: str) -> bool:
"""Verify a TOTP code against a secret (useful for debugging setup)."""
totp = pyotp.TOTP(secret)
return totp.verify(code)
+207
View File
@@ -0,0 +1,207 @@
"""MBOX packaging and local HTTP download server (Mode 2 / fallback).
Flow:
1. bundle_emails() — write a timestamped .mbox into data/downloads/
mark message IDs as "pending" in SQLite
return the local download URL
2. start_server() — launch a FastAPI/uvicorn server in a background thread
serving data/downloads/
3. On GET /download/{filename} the server streams the file, then marks all
IDs for that MBOX as delivered and deletes the file.
4. cleanup_stale() — called at the start of each sync cycle; deletes any
MBOX files that were never collected and removes their
pending DB entries so they can be re-processed.
"""
import mailbox
import os
import threading
import time
from datetime import datetime
from pathlib import Path
from typing import Optional
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from . import database
from .logger import get_logger
from .processor import RichEmail
log = get_logger(__name__)
DOWNLOADS_DIR = Path(__file__).parent.parent / "data" / "downloads"
SERVER_HOST = "127.0.0.1"
SERVER_PORT = 8765
_server_thread: Optional[threading.Thread] = None
_uvicorn_server: Optional[uvicorn.Server] = None
app = FastAPI(title="MailRelay Download Server", docs_url=None, redoc_url=None)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def bundle_emails(emails: list[RichEmail]) -> Optional[str]:
"""Write emails to a new .mbox file and return the download URL.
Returns None if emails list is empty.
Marks message IDs as 'pending' in SQLite.
"""
if not emails:
return None
DOWNLOADS_DIR.mkdir(parents=True, exist_ok=True)
timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
mbox_filename = f"mailrelay_{timestamp}.mbox"
mbox_path = DOWNLOADS_DIR / mbox_filename
mbox = mailbox.mbox(str(mbox_path))
mbox.lock()
try:
for rich in emails:
mbox.add(mailbox.mboxMessage(rich.raw_bytes))
finally:
mbox.flush()
mbox.unlock()
mbox.close()
message_ids = [e.message_id for e in emails]
database.mark_pending(message_ids, str(mbox_path))
url = f"http://{SERVER_HOST}:{SERVER_PORT}/download/{mbox_filename}"
log.info(
"MBOX bundle created: %s (%d message(s)). Download: %s",
mbox_filename,
len(emails),
url,
)
return url
def cleanup_stale() -> list[str]:
"""Remove MBOX files from previous cycles that were never downloaded.
Returns the list of message IDs that were cleared (will be re-processed
on the next sync since they are removed from the DB).
"""
pending = database.get_pending_mboxes()
cleared_ids: list[str] = []
for entry in pending:
mbox_path = Path(entry["mbox_path"])
ids = entry["message_ids"]
log.warning(
"Stale MBOX detected (never downloaded): %s — clearing %d pending ID(s) for re-processing.",
mbox_path.name,
len(ids),
)
cleared = database.clear_pending_for_mbox(str(mbox_path))
cleared_ids.extend(cleared)
if mbox_path.exists():
try:
mbox_path.unlink()
log.info("Deleted stale MBOX: %s", mbox_path.name)
except OSError as exc:
log.error("Could not delete %s: %s", mbox_path.name, exc)
return cleared_ids
def start_server() -> None:
"""Start the FastAPI download server in a daemon thread (idempotent)."""
global _server_thread, _uvicorn_server
if _server_thread and _server_thread.is_alive():
return # already running
config = uvicorn.Config(
app,
host=SERVER_HOST,
port=SERVER_PORT,
log_level="warning",
access_log=False,
)
_uvicorn_server = uvicorn.Server(config)
_server_thread = threading.Thread(
target=_uvicorn_server.run, daemon=True, name="mailrelay-download-server"
)
_server_thread.start()
log.info("Download server started at http://%s:%d", SERVER_HOST, SERVER_PORT)
def stop_server() -> None:
"""Gracefully stop the download server."""
global _uvicorn_server
if _uvicorn_server:
_uvicorn_server.should_exit = True
log.info("Download server stopped.")
# ---------------------------------------------------------------------------
# FastAPI routes
# ---------------------------------------------------------------------------
@app.get("/download/{filename}")
async def download_mbox(filename: str):
"""Serve a .mbox file, mark it delivered, then delete it."""
# Basic path safety — no traversal
if "/" in filename or "\\" in filename or filename.startswith("."):
raise HTTPException(status_code=400, detail="Invalid filename.")
mbox_path = DOWNLOADS_DIR / filename
if not mbox_path.exists():
raise HTTPException(status_code=404, detail="File not found or already downloaded.")
# We need to deliver after the response is sent.
# Use a background task via starlette.
from starlette.background import BackgroundTask
task = BackgroundTask(_on_download_complete, str(mbox_path))
log.info("Serving MBOX download: %s", filename)
return FileResponse(
path=str(mbox_path),
media_type="application/mbox",
filename=filename,
background=task,
)
@app.get("/status")
async def server_status():
pending = database.get_pending_mboxes()
return {
"pending_mboxes": [
{"file": Path(e["mbox_path"]).name, "message_count": len(e["message_ids"])}
for e in pending
]
}
# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------
def _on_download_complete(mbox_path_str: str) -> None:
"""Called after a successful file download: mark delivered + delete file."""
mbox_path = Path(mbox_path_str)
ids = database.clear_pending_for_mbox(mbox_path_str)
if ids:
database.mark_delivered(ids)
log.info(
"Download confirmed for %s — marked %d message(s) as delivered.",
mbox_path.name,
len(ids),
)
if mbox_path.exists():
try:
mbox_path.unlink()
log.info("Deleted downloaded MBOX: %s", mbox_path.name)
except OSError as exc:
log.error("Could not delete %s: %s", mbox_path.name, exc)
+184
View File
@@ -0,0 +1,184 @@
"""Scan an export directory, pair EML + metadata, check dedup, return new emails.
Proton exports produce two files per message:
{messageID}.eml
{messageID}.metadata.json
Metadata fields of interest (Proton-specific):
Subject, SenderAddress, SenderName, ToList, CCList, BCCList,
Time (Unix timestamp), Unread, LabelIDs, ExternalID, NumAttachments
These are mapped to standard RFC 5322 headers when they are missing from the
raw EML (Proton sometimes omits headers in the raw export).
"""
import email
import email.policy
import json
from dataclasses import dataclass, field
from email.message import EmailMessage
from pathlib import Path
from typing import Optional
from .database import filter_new
from .logger import get_logger
log = get_logger(__name__)
@dataclass
class RichEmail:
"""An enriched email ready for delivery."""
message_id: str
message: EmailMessage # the (possibly augmented) email object
raw_bytes: bytes # final RFC 2822 bytes
metadata: dict = field(default_factory=dict)
class ProcessorError(Exception):
pass
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def scan_and_filter(export_dir: Path) -> list[RichEmail]:
"""Scan *export_dir*, pair EML+metadata, filter already-seen IDs.
Returns a list of RichEmail objects for new messages only.
"""
pairs = _find_pairs(export_dir)
log.info("Found %d exported message(s) in %s", len(pairs), export_dir)
ids = list(pairs.keys())
new_ids = filter_new(ids)
log.info("%d new message(s) after deduplication.", len(new_ids))
results: list[RichEmail] = []
for mid in new_ids:
eml_path, meta_path = pairs[mid]
try:
rich = _build_rich_email(mid, eml_path, meta_path)
results.append(rich)
except Exception as exc:
log.warning("Skipping %s — could not process: %s", mid, exc)
return results
# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------
def _find_pairs(export_dir: Path) -> dict[str, tuple[Path, Optional[Path]]]:
"""Return {messageID: (eml_path, meta_path_or_None)} for every .eml found."""
pairs: dict[str, tuple[Path, Optional[Path]]] = {}
for eml_path in export_dir.glob("*.eml"):
mid = eml_path.stem
meta_path = eml_path.with_suffix(".metadata.json")
if not meta_path.exists():
meta_path = None
log.debug("No metadata file for %s", mid)
pairs[mid] = (eml_path, meta_path)
return pairs
def _build_rich_email(
message_id: str,
eml_path: Path,
meta_path: Optional[Path],
) -> RichEmail:
raw = eml_path.read_bytes()
msg: EmailMessage = email.message_from_bytes(
raw, policy=email.policy.default
) # type: ignore[assignment]
metadata: dict = {}
if meta_path:
try:
metadata = json.loads(meta_path.read_text(encoding="utf-8"))
except Exception as exc:
log.warning("Could not parse metadata for %s: %s", message_id, exc)
# Augment missing headers from metadata
_merge_metadata(msg, metadata)
final_bytes = msg.as_bytes(policy=email.policy.SMTP)
return RichEmail(
message_id=message_id,
message=msg,
raw_bytes=final_bytes,
metadata=metadata,
)
def _merge_metadata(msg: EmailMessage, meta: dict) -> None:
"""Back-fill standard headers from Proton metadata where missing."""
if not meta:
return
# Subject
if not msg.get("Subject") and meta.get("Subject"):
msg["Subject"] = meta["Subject"]
# From
if not msg.get("From"):
sender_addr = meta.get("SenderAddress", "")
sender_name = meta.get("SenderName", "")
if sender_addr:
from_value = (
f'"{sender_name}" <{sender_addr}>'
if sender_name
else sender_addr
)
msg["From"] = from_value
# To
if not msg.get("To"):
to_list = meta.get("ToList", [])
if to_list:
msg["To"] = _format_address_list(to_list)
# CC
if not msg.get("Cc"):
cc_list = meta.get("CCList", [])
if cc_list:
msg["Cc"] = _format_address_list(cc_list)
# BCC
if not msg.get("Bcc"):
bcc_list = meta.get("BCCList", [])
if bcc_list:
msg["Bcc"] = _format_address_list(bcc_list)
# Date — Proton uses Unix timestamp in "Time"
if not msg.get("Date") and meta.get("Time"):
import email.utils
msg["Date"] = email.utils.formatdate(meta["Time"], localtime=False)
# Message-ID — prefer ExternalID if the EML header is missing
if not msg.get("Message-ID") and meta.get("ExternalID"):
msg["Message-ID"] = f"<{meta['ExternalID']}>"
# X-Proton-* passthrough headers for labels and read status
if meta.get("LabelIDs"):
msg["X-Proton-LabelIDs"] = ",".join(str(l) for l in meta["LabelIDs"])
if "Unread" in meta:
msg["X-Proton-Unread"] = str(meta["Unread"])
def _format_address_list(entries: list) -> str:
"""Convert Proton address list entries to RFC 5322 address string."""
parts = []
for entry in entries:
if isinstance(entry, dict):
name = entry.get("Name", "")
addr = entry.get("Address", "")
if addr:
parts.append(f'"{name}" <{addr}>' if name else addr)
elif isinstance(entry, str):
parts.append(entry)
return ", ".join(parts)
+101
View File
@@ -0,0 +1,101 @@
"""APScheduler wrapper for MailRelay's polling interval.
The scheduler runs a single persistent background job that fires the sync
function at the user-configured interval. It also exposes helpers so main.py
can trigger an immediate run or print the next scheduled time.
"""
from datetime import datetime, timezone
from typing import Callable, Optional
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from .logger import get_logger
log = get_logger(__name__)
JOB_ID = "mailrelay_sync"
_scheduler: Optional[BackgroundScheduler] = None
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def start(sync_fn: Callable, interval_minutes: int) -> None:
"""Initialise and start the scheduler with *interval_minutes* between runs.
*sync_fn* is called with no arguments each time the interval fires.
"""
global _scheduler
if _scheduler and _scheduler.running:
log.warning("Scheduler already running — ignoring start() call.")
return
_scheduler = BackgroundScheduler(timezone="UTC")
_scheduler.add_job(
_guarded(sync_fn),
trigger=IntervalTrigger(minutes=interval_minutes),
id=JOB_ID,
name="MailRelay sync",
replace_existing=True,
max_instances=1, # prevent overlapping runs
coalesce=True, # skip missed fires rather than catching up
)
_scheduler.start()
log.info(
"Scheduler started. Sync will run every %d minute(s).", interval_minutes
)
def stop() -> None:
"""Gracefully shut down the scheduler."""
global _scheduler
if _scheduler and _scheduler.running:
_scheduler.shutdown(wait=False)
log.info("Scheduler stopped.")
_scheduler = None
def run_now(sync_fn: Callable) -> None:
"""Trigger an immediate sync outside the normal schedule."""
log.info("Manual run triggered.")
_guarded(sync_fn)()
def next_run_time() -> Optional[datetime]:
"""Return the next scheduled run time (UTC), or None if not scheduled."""
if not _scheduler or not _scheduler.running:
return None
job = _scheduler.get_job(JOB_ID)
if job and job.next_run_time:
return job.next_run_time
return None
def update_interval(interval_minutes: int) -> None:
"""Change the polling interval without restarting the scheduler."""
if not _scheduler or not _scheduler.running:
raise RuntimeError("Scheduler is not running.")
_scheduler.reschedule_job(
JOB_ID,
trigger=IntervalTrigger(minutes=interval_minutes),
)
log.info("Polling interval updated to %d minute(s).", interval_minutes)
# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------
def _guarded(fn: Callable) -> Callable:
"""Wrap *fn* so unhandled exceptions are logged but don't kill the scheduler."""
def wrapper(*args, **kwargs):
try:
fn(*args, **kwargs)
except Exception as exc:
log.error("Unhandled exception in sync function: %s", exc, exc_info=True)
return wrapper
+147
View File
@@ -0,0 +1,147 @@
"""Manage the bundled Proton Mail Export CLI binary.
The binary lives in mailrelay/tools/proton-export/proton-mail-export-cli
and is downloaded on first use (with user consent).
Public API
----------
ensure_export_cli() -> Path
Return the path to the binary, downloading it first if needed.
Raises ToolSetupError if the user declines or the download fails.
BINARY_PATH : Path
Absolute path to where the binary is expected.
"""
import os
import shutil
import subprocess
import sys
import tarfile
from pathlib import Path
from .logger import get_logger
log = get_logger(__name__)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
TOOLS_DIR = Path(__file__).parent.parent / "tools" / "proton-export"
BINARY_NAME = "proton-mail-export-cli"
BINARY_PATH = TOOLS_DIR / BINARY_NAME
DOWNLOAD_URL = (
"https://proton.me/download/export-tool/proton-mail-export-cli-linux_x86_64.tar.gz"
)
ARCHIVE_NAME = "proton-mail-export-cli-linux_x86_64.tar.gz"
class ToolSetupError(Exception):
pass
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def ensure_export_cli() -> Path:
"""Return the path to proton-mail-export-cli, downloading if necessary.
Raises ToolSetupError if the binary is unavailable and the user declines
to download it, or if the download / extraction fails.
"""
if BINARY_PATH.exists():
log.debug("Export CLI found at %s", BINARY_PATH)
return BINARY_PATH
log.info("Export CLI not found at %s", BINARY_PATH)
_prompt_and_download()
return BINARY_PATH
# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------
def _prompt_and_download() -> None:
"""Ask the user whether to download the CLI, then do it."""
print(
"\nThe Proton Mail Export CLI is required but was not found.\n"
f"It will be downloaded from:\n {DOWNLOAD_URL}\n"
f"and installed to:\n {BINARY_PATH}\n"
)
answer = input("Download now? [Y/n]: ").strip().lower()
if answer and answer not in ("y", "yes"):
raise ToolSetupError(
"Download declined. Re-run and choose Y, or place the binary at:\n"
f" {BINARY_PATH}"
)
TOOLS_DIR.mkdir(parents=True, exist_ok=True)
archive_path = TOOLS_DIR / ARCHIVE_NAME
_download(DOWNLOAD_URL, archive_path)
_extract(archive_path, TOOLS_DIR)
if not BINARY_PATH.exists():
raise ToolSetupError(
f"Extraction completed but '{BINARY_NAME}' not found in {TOOLS_DIR}.\n"
"The archive layout may have changed — check the contents manually."
)
# Ensure the binary is executable
BINARY_PATH.chmod(BINARY_PATH.stat().st_mode | 0o755)
log.info("Export CLI ready at %s", BINARY_PATH)
# Remove the archive to keep the tools directory tidy
try:
archive_path.unlink()
except OSError:
pass
def _download(url: str, dest: Path) -> None:
"""Download *url* to *dest* using wget (with progress output)."""
if not shutil.which("wget"):
raise ToolSetupError(
"'wget' is required to download the export tool but was not found on PATH."
)
log.info("Downloading %s", url)
result = subprocess.run(
["wget", "--show-progress", "-O", str(dest), url],
check=False,
)
if result.returncode != 0:
# Clean up partial download
if dest.exists():
dest.unlink()
raise ToolSetupError(
f"wget exited with code {result.returncode}. Check your network connection."
)
log.info("Download complete: %s", dest.name)
def _extract(archive_path: Path, dest_dir: Path) -> None:
"""Extract a .tar.gz archive into *dest_dir*."""
log.info("Extracting %s", archive_path.name)
try:
with tarfile.open(archive_path, "r:gz") as tar:
# Safety: skip any members with absolute paths or path traversal
safe_members = [
m for m in tar.getmembers()
if not os.path.isabs(m.name) and ".." not in m.name
]
tar.extractall(path=dest_dir, members=safe_members)
except tarfile.TarError as exc:
raise ToolSetupError(f"Failed to extract archive: {exc}") from exc
# If the binary landed inside a subdirectory, hoist it up
if not BINARY_PATH.exists():
for candidate in dest_dir.rglob(BINARY_NAME):
shutil.move(str(candidate), str(BINARY_PATH))
log.debug("Moved binary from %s to %s", candidate, BINARY_PATH)
break