Skip to content

Instantly share code, notes, and snippets.

@gioxx
Created April 12, 2026 14:07
Show Gist options
  • Select an option

  • Save gioxx/785b93b9a0adc747d1d0f5541f3c0ed7 to your computer and use it in GitHub Desktop.

Select an option

Save gioxx/785b93b9a0adc747d1d0f5541f3c0ed7 to your computer and use it in GitHub Desktop.
Anonimizza i log di MailArchiver sostituendo con [REDACTED] le informazioni sensibili: oggetto delle e-mail, indirizzi (From/To) e nome utente nei login
#!/usr/bin/env python3
"""
anonymize_maillog.py
Anonimizza i log di MailArchiver sostituendo con [REDACTED] le informazioni sensibili:
- Oggetto delle email
- Indirizzi email (From/To)
- Nome utente nei login
"""
import re
import sys
import argparse
from pathlib import Path
EMAIL_RE = re.compile(r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}')
# "Archived email: OGGETTO, From: ..." oppure "Archived email: , From:"
ARCHIVED_RE = re.compile(
r'(Archived email: )(.*?)(, From: )(.*?)(, To: )(.*?)(, Account: )',
)
# "Successfully saved email with N attachments" → nessuna info sensibile, si lascia
# "User 'USERNAME' signed in successfully"
USER_SIGNIN_RE = re.compile(r"(User ')([^']+)(' signed in successfully)")
# "Updated user: USERNAME (ID: N)"
USER_UPDATE_RE = re.compile(r"(Updated user: )(\S+)( \(ID: \d+\))")
# Nomi propri nel campo "Cleaned up" o "Processed" → nessuna info sensibile
def redact_email(match):
return '[REDACTED_EMAIL]'
def redact_archived(match):
subject = match.group(2).strip()
from_addr = match.group(4).strip()
to_addr = match.group(6).strip()
redacted_subject = '[REDACTED_SUBJECT]' if subject else ''
redacted_from = '[REDACTED_EMAIL]' if from_addr else ''
# To può essere vuoto (sent mail senza destinatario esplicito)
redacted_to = '[REDACTED_EMAIL]' if to_addr else ''
return (
match.group(1) +
redacted_subject +
match.group(3) +
redacted_from +
match.group(5) +
redacted_to +
match.group(7)
)
def redact_user_signin(match):
return match.group(1) + '[REDACTED_USER]' + match.group(3)
def redact_user_update(match):
return match.group(1) + '[REDACTED_USER]' + match.group(3)
def anonymize_line(line: str) -> str:
# 1. Riga "Archived email: ..."
line = ARCHIVED_RE.sub(redact_archived, line)
# 2. Login utente
line = USER_SIGNIN_RE.sub(redact_user_signin, line)
# 3. Update utente
line = USER_UPDATE_RE.sub(redact_user_update, line)
# 4. Qualsiasi email residua (es. in messaggi di errore, checkpoint, ecc.)
line = EMAIL_RE.sub('[REDACTED_EMAIL]', line)
return line
def anonymize_log(input_path: Path, output_path: Path | None) -> None:
text = input_path.read_text(encoding='utf-8')
lines = text.splitlines(keepends=True)
anonymized = [anonymize_line(l) for l in lines]
result = ''.join(anonymized)
if output_path:
output_path.write_text(result, encoding='utf-8')
print(f"✓ Output scritto in: {output_path}")
else:
sys.stdout.write(result)
def main():
parser = argparse.ArgumentParser(
description='Anonimizza i log di MailArchiver.'
)
parser.add_argument('input', help='File di log da anonimizzare')
parser.add_argument(
'-o', '--output',
help='File di output (default: stampa su stdout)',
default=None
)
args = parser.parse_args()
input_path = Path(args.input)
if not input_path.exists():
print(f"Errore: file non trovato: {input_path}", file=sys.stderr)
sys.exit(1)
output_path = Path(args.output) if args.output else None
anonymize_log(input_path, output_path)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment