Created
April 12, 2026 14:07
-
-
Save gioxx/785b93b9a0adc747d1d0f5541f3c0ed7 to your computer and use it in GitHub Desktop.
Anonimizza i log di MailArchiver sostituendo con [REDACTED] le informazioni sensibili: oggetto delle e-mail, indirizzi (From/To) e nome utente nei login
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| anonymize_maillog.py | |
| Anonimizza i log di MailArchiver sostituendo con [REDACTED] le informazioni sensibili: | |
| - Oggetto delle email | |
| - Indirizzi email (From/To) | |
| - Nome utente nei login | |
| """ | |
| import re | |
| import sys | |
| import argparse | |
| from pathlib import Path | |
| EMAIL_RE = re.compile(r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}') | |
| # "Archived email: OGGETTO, From: ..." oppure "Archived email: , From:" | |
| ARCHIVED_RE = re.compile( | |
| r'(Archived email: )(.*?)(, From: )(.*?)(, To: )(.*?)(, Account: )', | |
| ) | |
| # "Successfully saved email with N attachments" → nessuna info sensibile, si lascia | |
| # "User 'USERNAME' signed in successfully" | |
| USER_SIGNIN_RE = re.compile(r"(User ')([^']+)(' signed in successfully)") | |
| # "Updated user: USERNAME (ID: N)" | |
| USER_UPDATE_RE = re.compile(r"(Updated user: )(\S+)( \(ID: \d+\))") | |
| # Nomi propri nel campo "Cleaned up" o "Processed" → nessuna info sensibile | |
| def redact_email(match): | |
| return '[REDACTED_EMAIL]' | |
| def redact_archived(match): | |
| subject = match.group(2).strip() | |
| from_addr = match.group(4).strip() | |
| to_addr = match.group(6).strip() | |
| redacted_subject = '[REDACTED_SUBJECT]' if subject else '' | |
| redacted_from = '[REDACTED_EMAIL]' if from_addr else '' | |
| # To può essere vuoto (sent mail senza destinatario esplicito) | |
| redacted_to = '[REDACTED_EMAIL]' if to_addr else '' | |
| return ( | |
| match.group(1) + | |
| redacted_subject + | |
| match.group(3) + | |
| redacted_from + | |
| match.group(5) + | |
| redacted_to + | |
| match.group(7) | |
| ) | |
| def redact_user_signin(match): | |
| return match.group(1) + '[REDACTED_USER]' + match.group(3) | |
| def redact_user_update(match): | |
| return match.group(1) + '[REDACTED_USER]' + match.group(3) | |
| def anonymize_line(line: str) -> str: | |
| # 1. Riga "Archived email: ..." | |
| line = ARCHIVED_RE.sub(redact_archived, line) | |
| # 2. Login utente | |
| line = USER_SIGNIN_RE.sub(redact_user_signin, line) | |
| # 3. Update utente | |
| line = USER_UPDATE_RE.sub(redact_user_update, line) | |
| # 4. Qualsiasi email residua (es. in messaggi di errore, checkpoint, ecc.) | |
| line = EMAIL_RE.sub('[REDACTED_EMAIL]', line) | |
| return line | |
| def anonymize_log(input_path: Path, output_path: Path | None) -> None: | |
| text = input_path.read_text(encoding='utf-8') | |
| lines = text.splitlines(keepends=True) | |
| anonymized = [anonymize_line(l) for l in lines] | |
| result = ''.join(anonymized) | |
| if output_path: | |
| output_path.write_text(result, encoding='utf-8') | |
| print(f"✓ Output scritto in: {output_path}") | |
| else: | |
| sys.stdout.write(result) | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description='Anonimizza i log di MailArchiver.' | |
| ) | |
| parser.add_argument('input', help='File di log da anonimizzare') | |
| parser.add_argument( | |
| '-o', '--output', | |
| help='File di output (default: stampa su stdout)', | |
| default=None | |
| ) | |
| args = parser.parse_args() | |
| input_path = Path(args.input) | |
| if not input_path.exists(): | |
| print(f"Errore: file non trovato: {input_path}", file=sys.stderr) | |
| sys.exit(1) | |
| output_path = Path(args.output) if args.output else None | |
| anonymize_log(input_path, output_path) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment