Last active
January 11, 2017 05:37
-
-
Save agisga/2e1a350beeb73e450a4fb1ccfadfc6b4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code was used for the following blog post: | |
# | |
# http://www.alexejgossmann.com/email_data_analysis/ | |
# | |
# Part of the code is based on: http://www.johnantony.com/sending-and-receiving-outlook-email-via-ruby/ | |
# All private information (such as email addresses and passwords) was replaced with *********** | |
require 'mail' | |
require 'daru' | |
#--- configure delivery and retrieval methods | |
Mail.defaults do | |
delivery_method :smtp, { | |
:address => 'smtp.office365.com', | |
:port => 587, | |
:domain => 'tulane.edu', | |
:user_name => '*******@tulane.edu', | |
:password => '**********', | |
:authentication => :login, | |
:enable_starttls_auto => true | |
} | |
retriever_method :imap, { :address => 'outlook.office365.com', | |
:port => 993, | |
:user_name => '*******@tulane.edu', | |
:password => '**********', | |
:enable_ssl => true } | |
end | |
#--- retrieve all emails from INBOX | |
emails = Mail.all(read_only: true) | |
puts "Number of emails retrieved: #{emails.length}" | |
#--- extract all emails from PhD advisors | |
emails_from_advisors = emails.select do |email| | |
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.from.to_s | |
end | |
puts "Number of emails received from all advisors: #{emails_from_advisors.length}" | |
#--- put the emails into a data frame and save it | |
n = emails_from_advisors.length | |
emails_from_advisors_df = Daru::DataFrame.new( { 'subject' => Array.new(n), | |
'from' => Array.new(n), | |
'to' => Array.new(n), | |
'cc' => Array.new(n), | |
'bcc' => Array.new(n), | |
'date' => Array.new(n), | |
'messageid' => Array.new(n), | |
'replyto' => Array.new(n), | |
'inreplyto' => Array.new(n), | |
'body' => Array.new(n) } ) | |
emails_from_advisors.each_with_index do |email, ind| | |
# email fields defined at https://github.com/mikel/mail/tree/master/lib/mail/fields | |
emails_from_advisors_df['subject'][ind] = email.subject.to_s.gsub(",", ";") | |
# (commas are replaced with semicolons in order to write csv later) | |
emails_from_advisors_df['from'][ind] = email.from.to_s.gsub(",", ";") | |
emails_from_advisors_df['to'][ind] = email.to.to_s.gsub(",", ";") | |
emails_from_advisors_df['cc'][ind] = email.cc.to_s.gsub(",", ";") | |
emails_from_advisors_df['bcc'][ind] = email.bcc.to_s.gsub(",", ";") | |
emails_from_advisors_df['date'][ind] = email.date.to_s.gsub(",", ";") | |
emails_from_advisors_df['messageid'][ind] = email.message_id.to_s.gsub(",", ";") | |
emails_from_advisors_df['replyto'][ind] = email.reply_to.to_s.gsub(",", ";") | |
emails_from_advisors_df['inreplyto'][ind] = email.in_reply_to.to_s.gsub(",", ";") | |
#emails_from_advisors_df['body'][ind] = email.body.to_s.gsub(",", ";") | |
end | |
emails_from_advisors_df.write_csv "emails_from_advisors.csv" | |
#--- retrieve all emails from "Sent Items" | |
sent = Mail.all(mailbox: "Sent Items", read_only: true) | |
puts "Number of emails retrieved: #{sent.length}" | |
#--- extract all emails to PhD advisors | |
emails_to_advisors = sent.select do |email| | |
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.to.to_s || | |
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.cc.to_s || | |
/(*******@tulane.edu|*******@tulane.edu|*******@gmail.com|*******@tulane.edu)/i =~ email.bcc.to_s | |
end | |
puts "Number of emails sent to all advisors: #{emails_to_advisors.length}" | |
#--- put the emails sent to PhD advisors into a data frame and save it | |
n = emails_to_advisors.length | |
emails_to_advisors_df = Daru::DataFrame.new( { 'subject' => Array.new(n), | |
'from' => Array.new(n), | |
'to' => Array.new(n), | |
'cc' => Array.new(n), | |
'bcc' => Array.new(n), | |
'date' => Array.new(n), | |
'messageid' => Array.new(n), | |
'replyto' => Array.new(n), | |
'inreplyto' => Array.new(n), | |
'body' => Array.new(n) } ) | |
emails_to_advisors.each_with_index do |email, ind| | |
# email fields defined at https://github.com/mikel/mail/tree/master/lib/mail/fields | |
emails_to_advisors_df['subject'][ind] = email.subject.to_s.gsub(",", ";") | |
# (commas are replaced with semicolons in order to write csv below) | |
emails_to_advisors_df['from'][ind] = email.from.to_s.gsub(",", ";") | |
emails_to_advisors_df['to'][ind] = email.to.to_s.gsub(",", ";") | |
emails_to_advisors_df['cc'][ind] = email.cc.to_s.gsub(",", ";") | |
emails_to_advisors_df['bcc'][ind] = email.bcc.to_s.gsub(",", ";") | |
emails_to_advisors_df['date'][ind] = email.date.to_s.gsub(",", ";") | |
emails_to_advisors_df['messageid'][ind] = email.message_id.to_s.gsub(",", ";") | |
emails_to_advisors_df['replyto'][ind] = email.reply_to.to_s.gsub(",", ";") | |
emails_to_advisors_df['inreplyto'][ind] = email.in_reply_to.to_s.gsub(",", ";") | |
#emails_to_advisors_df['body'][ind] = email.body.to_s.gsub(",", ";") | |
end | |
emails_to_advisors_df.write_csv "emails_to_advisors.csv" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment