|
#!/usr/bin/env ruby |
|
# |
|
# Transforms subfolders that end in ".pst.*" and their contents into a CSV |
|
# named "emails.csv" that's ready to import into Overview. |
|
# |
|
# The subfolders must all have been generated by pffexport. |
|
|
|
def parse_email(email) |
|
if email[0] == '/' |
|
# It's an LDAP address, not an email address. We'll pick out the CNs |
|
email |
|
.split('/') |
|
.map do |component| |
|
if component =~ /(?:O|CN)=(.*)/ |
|
$1 |
|
else |
|
nil |
|
end |
|
end |
|
.compact |
|
.reverse |
|
.join('@') |
|
.downcase |
|
else |
|
$1 |
|
end |
|
end |
|
|
|
def build_person(name, email) |
|
if name == email || email.nil? || email == '' |
|
name |
|
else |
|
"#{name} <#{email}>" |
|
end |
|
end |
|
|
|
def parse_recipients_txt(filename) |
|
ret = { |
|
to: [], |
|
cc: [], |
|
bcc: [] |
|
} |
|
|
|
name = '' |
|
email = '' |
|
recipient_type = '' |
|
|
|
if File.exist?(filename) |
|
for line in IO.readlines(filename) |
|
if line =~ /^(.*?):\s*(.*)/ |
|
key = $1 |
|
value = $2 |
|
|
|
case key |
|
when 'Display name' then name = value |
|
when 'Email address' then email = parse_email(value) |
|
when 'Recipient type' |
|
if value =~ /(to|cc|bcc)/i |
|
recipient_type = value.downcase.to_sym |
|
else |
|
recipient_type = :to |
|
end |
|
end |
|
else |
|
ret[recipient_type] << build_person(name, email) |
|
name = nil |
|
email = nil |
|
recipient_type = nil |
|
end |
|
end |
|
|
|
if recipient_type |
|
ret[recipient_type] << build_person(name, email) |
|
end |
|
end |
|
|
|
ret.each_key { |key| ret[key] = ret[key].join(', ') } |
|
ret |
|
end |
|
|
|
def parse_date_s(date_s) |
|
if date_s =~ /(.*?:\d\d).* (\w+)/ |
|
"#{$1} #{$2}" |
|
else |
|
date_s |
|
end |
|
end |
|
|
|
def parse_outlook_headers_txt(filename) |
|
from_name = '' |
|
from_email = '' |
|
date_s = '' |
|
subject = '' |
|
|
|
for line in IO.readlines(filename, binmode: true) |
|
line.encode!('utf-8', 'ascii-8bit', invalid: :replace, undef: :replace) |
|
if line =~ /^(.*?):\s*(.*)/ |
|
key = $1 |
|
value = $2 |
|
|
|
case key |
|
when 'Sender name' then from_name = value |
|
when 'Sender email address' then from_email = parse_email(value) |
|
when 'Delivery time' then date_s = parse_date_s(value) |
|
when 'Subject' then subject = value |
|
end |
|
end |
|
end |
|
|
|
{ |
|
from: build_person(from_name, from_email), |
|
date: date_s, |
|
subject: subject |
|
} |
|
end |
|
|
|
def headers_to_text(headers, recipients) |
|
ret = [ |
|
"Date: #{headers[:date]}", |
|
"From: #{headers[:from]}" |
|
] |
|
|
|
[ [ :to, 'To' ], [ :cc, 'cc' ], [ :bcc, 'bcc' ] ].each do |key, header| |
|
if !recipients[key].empty? |
|
ret << "#{header}: #{recipients[key]}" |
|
end |
|
end |
|
|
|
ret << "Subject: #{headers[:subject]}" |
|
|
|
ret.join("\n") |
|
end |
|
|
|
def quote_csv_value(datum) |
|
if datum =~ /[\x00-\x1f",]/ |
|
"\"#{datum.gsub(/"/, '""')}\"" |
|
else |
|
datum |
|
end |
|
end |
|
|
|
def message_to_csv_row(message_dirname) |
|
recipients = parse_recipients_txt("#{message_dirname}/Recipients.txt") |
|
headers = parse_outlook_headers_txt("#{message_dirname}/OutlookHeaders.txt") |
|
text = IO.read("#{message_dirname}/Message.txt") |
|
|
|
path = message_dirname |
|
.sub(/([^\/]+.pst).export/) { $1 } |
|
.sub(/\/Top of Personal Folders\//, '/') |
|
.sub(/\/Message(\d+)$/) { '/' + $1 } |
|
|
|
full_text = "#{headers_to_text(headers, recipients)}\n\n#{text}" |
|
|
|
[ |
|
path, |
|
headers[:from], |
|
headers[:subject], |
|
headers[:date], |
|
recipients[:to], |
|
recipients[:cc], |
|
recipients[:bcc], |
|
full_text |
|
].map{ |s| quote_csv_value(s) }.join(',') + "\n" |
|
end |
|
|
|
File.open('emails.csv', 'wb') do |f| |
|
f.write("title,from,subject,date,to,cc,bcc,text\n") |
|
|
|
Dir['*.pst.*/**/Message[0-9][0-9][0-9][0-9][0-9]/Message.txt'].each do |filename| |
|
dirname = File.dirname(filename) |
|
csv_row = message_to_csv_row(dirname) |
|
f.write(csv_row) |
|
end |
|
end |