Created
May 4, 2012 10:18
-
-
Save ringe/2593827 to your computer and use it in GitHub Desktop.
Find and extract every email address from IMAP folder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chars = %w{ | / - \\ } | |
def find_all_email_addresses_in(base_folder) | |
# Bash goodness :P | |
a=`find #{base_folder} -type d -name cur`.split("\n") +`find #{base_folder} -type d -name new`.split("\n") | |
@emails=[] | |
# http://stackoverflow.com/questions/535644/find-email-addresses-in-large-data-stream | |
a.each do |folder| | |
Dir[folder+"/*"].each do |email| | |
content = File.read(email) | |
r1 = Regexp.new(/(((From|^To|Cc):.*<)\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b(>))/) | |
r2 = Regexp.new(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/) | |
mails = content.scan(r1).join("\n").downcase | |
@emails << mails.scan(r2).uniq | |
@emails.flatten!.uniq! | |
end | |
end | |
File.open(ENV['HOME']+"/emails.txt", 'w') {|f| f.write(@emails.sort.join(", ")) } | |
end | |
t = Thread.new { find_all_email_addresses_in(".") } | |
while t.alive? | |
print chars[0] | |
sleep 0.1 | |
print "\b" | |
chars.push chars.shift | |
end | |
t.join | |
puts "Save all emails found to #{ENV['HOME']}/emails.txt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is for extracting from a local Maildir, not from IMAP, no?
And you'll (likely) want to make sure you're extracting from the headers only and not the body (stop at a blank line https://tools.ietf.org/html/rfc5322#page-7 ), and handle multiple comma-separated entries on a line.