fesplugas (owner)

Revisions

gist: 145673 Download_button fork
public
Description:
Remove duplicated emails on Mail.app
Public Clone URL: git://gist.github.com/145673.git
Embed All Files: show embed
Text only #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /usr/bin/perl -w
 
# Script to report on, and delete, email messages stored in mbox-like
# files. Reads a list of files on stdin, so can be used as:
# find . -name '*.emlx' -print | script
# Use -v for verbose output about each duplicate file.
# Use -q to supress the summary at the end.
# Use -d (with care :-) to delete duplicates.
 
# Usage:
# $ find . -name '*.emlx' -print | ./duplicate.pl -v
 
use strict;
 
use vars qw($opt_d $opt_q $opt_v);
use Getopt::Std;
 
my %ids = ();
my $dups = 0;
my $unique = 0;
my $total = 0;
 
getopts('dqv');
 
foreach (<>) {
  chomp;
  my $file = $_;
  my $id = "";
  if (!open MBOX, $file) {
    print "Warning: can't open file $file: $!, skipping.\n";
    next;
  }
  while (<MBOX>) {
    if (/^Message-ID: (<[^>]+>)/i) {
      $id = $1;
      if ($ids{$1}) {
        print "File $file is a duplicate of $ids{$1}" if ($opt_v);
        if ($opt_d) {
          unlink $file;
          print ", deleted" if ($opt_v);
        }
        print ".\n" if ($opt_v);
        $dups++;
      }
      else {
        $ids{$1} = $file;
        $unique++;
      }
      last;
    }
    elsif (/^$/) {
      if (!$id) {
        print "Warning: no Message-ID: header in file $file, skipping.\n";
      }
      close MBOX;
      last;
    }
  }
}
 
$total = $dups + $unique;
print "Summary: $total messages, $unique unique and $dups duplicates.\n"
if (!$opt_q);
exit 0;