Skip to content

Instantly share code, notes, and snippets.

@samliu
Created October 24, 2014 00:33
Show Gist options
  • Save samliu/31f6764f80e370511f60 to your computer and use it in GitHub Desktop.
Save samliu/31f6764f80e370511f60 to your computer and use it in GitHub Desktop.
# textanalysis.py
#
# Analyzing iphone text message csv dump from iBackup Viewer
# (http://www.imactools.com/iphonebackupviewer/)
#
# Requirements:
# - WordCloud (http://github.com/amueller/word_cloud)
#
# How to use:
# 1. Generate csv dump via iphonebackupviewer and point to it in __main__
# 1a. Delete the first line of the CSV dump, it's a header.
# 2. Change numbers param in __main__ to be the 2 phone numbers you're
# interested in (or just 1 is okay too) and the relative file path of the
# csv to this program.
# 3. Choose a TrueType font on your system to use with the wordcloud generator
# and point to it with the font variable in __main__
#
# OR write your own program that sets these params for the local system.
# This is just a couple-hour hack for fun. DWTFYW license.
#
# Original Author: Sam Liu <sam@ambushnetworks.com>
from wordcloud import WordCloud, STOPWORDS
import datetime
import os
import re
class WCGenerator():
# Word cloud generator from text messages between two people!
def __init__(self, input_file='', words_to_remove=None,
font='',
numbers=None):
# Note: Replace font parameter with the path of a font on your system.
self.input_file = input_file
self.words_to_remove = words_to_remove
self.font = font
self.numbers = numbers
def generate_word_clouds(self):
f = open(self.input_file)
print "Reading from " + self.input_file + "..."
for line in f.readlines():
values = line.split(',')
number = values[0]
datestring = ''.join(values[1:4])
datestring = datestring.strip('"')
timestamp = datetime.datetime.strptime(datestring, '%b %d %Y %H:%M %p');
# You can look at a specific month or day or anything using the timestamp.
# I parsed the format provided by ibackupviewer for your convenience.
# Example:
# if timestamp.month != 7:
# continue
# Stick all the text bodies into one string, removing unwanted words
# from being put into the word cloud.
if number in self.numbers:
msg = values[5]
if self.words_to_remove:
remove = '|'.join(words_to_remove)
regex = re.compile(r'('+remove+')', flags=re.IGNORECASE)
msg = regex.sub("", msg)
self.numbers[number]['text'] += msg
else:
print "ERROR: " + number + " was not a valid number."
# Make wordcloud from each string and save image with filename using the
# number.
d = os.path.dirname(__file__)
wc = WordCloud(font_path=self.font)
for number in self.numbers:
if not self.numbers[number]['text']:
continue
wc.generate(self.numbers[number]['text'])
filename = number[1:] # Remove the prepended '+' for the filename.
filepath = os.path.join(d, filename + '.png')
wc.to_file(filepath)
print "Wrote file: " + filepath
if __name__ == '__main__':
# TODO(samcliu): Use argparse to take cmdline args.
numbers = {
'+16505551234' : { 'name': 'Name1', 'text': '' },
'+16505554321' : { 'name': 'Name2', 'text': '' },
}
input_file = 'putyourfilename.csv'
# Assuming you like DroidSansMono and have it installed...
font = '/Users/youruser/Library/Fonts/DroidSansMono.ttf'
wc_generator = WCGenerator(numbers=numbers, input_file=input_file,
font=font)
wc_generator.generate_word_clouds()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment