Skip to content

Instantly share code, notes, and snippets.

@agisga
Last active January 11, 2017 05:37
Show Gist options
  • Save agisga/2e1a350beeb73e450a4fb1ccfadfc6b4 to your computer and use it in GitHub Desktop.
Save agisga/2e1a350beeb73e450a4fb1ccfadfc6b4 to your computer and use it in GitHub Desktop.
# This code was used for the following blog post:
#
# http://www.alexejgossmann.com/email_data_analysis/
#
# Part of the code is based on: http://www.johnantony.com/sending-and-receiving-outlook-email-via-ruby/
# All private information (such as email addresses and passwords) was replaced with ***********
require 'mail'
require 'daru'
#--- configure delivery and retrieval methods
Mail.defaults do
delivery_method :smtp, {
:address => 'smtp.office365.com',
:port => 587,
:domain => 'tulane.edu',
:user_name => '*******@tulane.edu',
:password => '**********',
:authentication => :login,
:enable_starttls_auto => true
}
retriever_method :imap, { :address => 'outlook.office365.com',
:port => 993,
:user_name => '*******@tulane.edu',
:password => '**********',
:enable_ssl => true }
end
#--- retrieve all emails from INBOX
emails = Mail.all(read_only: true)
puts "Number of emails retrieved: #{emails.length}"
#--- extract all emails from PhD advisors
emails_from_advisors = emails.select do |email|
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.from.to_s
end
puts "Number of emails received from all advisors: #{emails_from_advisors.length}"
#--- put the emails into a data frame and save it
n = emails_from_advisors.length
emails_from_advisors_df = Daru::DataFrame.new( { 'subject' => Array.new(n),
'from' => Array.new(n),
'to' => Array.new(n),
'cc' => Array.new(n),
'bcc' => Array.new(n),
'date' => Array.new(n),
'messageid' => Array.new(n),
'replyto' => Array.new(n),
'inreplyto' => Array.new(n),
'body' => Array.new(n) } )
emails_from_advisors.each_with_index do |email, ind|
# email fields defined at https://github.com/mikel/mail/tree/master/lib/mail/fields
emails_from_advisors_df['subject'][ind] = email.subject.to_s.gsub(",", ";")
# (commas are replaced with semicolons in order to write csv later)
emails_from_advisors_df['from'][ind] = email.from.to_s.gsub(",", ";")
emails_from_advisors_df['to'][ind] = email.to.to_s.gsub(",", ";")
emails_from_advisors_df['cc'][ind] = email.cc.to_s.gsub(",", ";")
emails_from_advisors_df['bcc'][ind] = email.bcc.to_s.gsub(",", ";")
emails_from_advisors_df['date'][ind] = email.date.to_s.gsub(",", ";")
emails_from_advisors_df['messageid'][ind] = email.message_id.to_s.gsub(",", ";")
emails_from_advisors_df['replyto'][ind] = email.reply_to.to_s.gsub(",", ";")
emails_from_advisors_df['inreplyto'][ind] = email.in_reply_to.to_s.gsub(",", ";")
#emails_from_advisors_df['body'][ind] = email.body.to_s.gsub(",", ";")
end
emails_from_advisors_df.write_csv "emails_from_advisors.csv"
#--- retrieve all emails from "Sent Items"
sent = Mail.all(mailbox: "Sent Items", read_only: true)
puts "Number of emails retrieved: #{sent.length}"
#--- extract all emails to PhD advisors
emails_to_advisors = sent.select do |email|
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.to.to_s ||
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.cc.to_s ||
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.bcc.to_s
end
puts "Number of emails sent to all advisors: #{emails_to_advisors.length}"
#--- put the emails sent to PhD advisors into a data frame and save it
n = emails_to_advisors.length
emails_to_advisors_df = Daru::DataFrame.new( { 'subject' => Array.new(n),
'from' => Array.new(n),
'to' => Array.new(n),
'cc' => Array.new(n),
'bcc' => Array.new(n),
'date' => Array.new(n),
'messageid' => Array.new(n),
'replyto' => Array.new(n),
'inreplyto' => Array.new(n),
'body' => Array.new(n) } )
emails_to_advisors.each_with_index do |email, ind|
# email fields defined at https://github.com/mikel/mail/tree/master/lib/mail/fields
emails_to_advisors_df['subject'][ind] = email.subject.to_s.gsub(",", ";")
# (commas are replaced with semicolons in order to write csv below)
emails_to_advisors_df['from'][ind] = email.from.to_s.gsub(",", ";")
emails_to_advisors_df['to'][ind] = email.to.to_s.gsub(",", ";")
emails_to_advisors_df['cc'][ind] = email.cc.to_s.gsub(",", ";")
emails_to_advisors_df['bcc'][ind] = email.bcc.to_s.gsub(",", ";")
emails_to_advisors_df['date'][ind] = email.date.to_s.gsub(",", ";")
emails_to_advisors_df['messageid'][ind] = email.message_id.to_s.gsub(",", ";")
emails_to_advisors_df['replyto'][ind] = email.reply_to.to_s.gsub(",", ";")
emails_to_advisors_df['inreplyto'][ind] = email.in_reply_to.to_s.gsub(",", ";")
#emails_to_advisors_df['body'][ind] = email.body.to_s.gsub(",", ";")
end
emails_to_advisors_df.write_csv "emails_to_advisors.csv"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment