Created
January 31, 2016 21:59
-
-
Save taylorzane/4a47ec5c63af62198101 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- NOTE: This SQLite3 DB is located at /Users/you/Library/Messages/chat.db | |
-- NOTE: You can get your chat_id from the chat table. | |
select ROWID from chat where display_name="Display Name" | |
-- OR | |
select ROWID from chat where guid like "%SOME_PHONE_NUMBER%" | |
-- NOTE: This will get all messages from a particular chat thread. | |
select text from message inner join (select message_id from chat_message_join where chat_id=YOUR_CHAT_ID) query2 on message.ROWID = query2.message_id; | |
-- NOTE: You could join these two queries together...but I like to keep things somewhat readable ;) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env/bin ruby | |
# NOTE: This file only finds the most frequently used words. It does not include emoji characters. | |
# cont: It normalizes words to only a-z and ' (to account for contractions.) | |
# TODO: This could be improved to count emojis. | |
the_file = "file_name.txt" | |
words = Hash.new 0 | |
File.foreach(the_file) do |line| | |
line_words = line.split ' ' | |
line_words.each do |word| | |
word = word.downcase.gsub(/[^a-z']/, '') | |
unless word == '' | |
words[word] += 1 | |
end | |
end | |
end | |
total_word_count = words.reduce(0) { |total, word| total + word[1] } | |
top_fifty_count = words.first(50).reduce(0) { |total, word| total + word [1]} | |
puts "Total Word Count: #{total_word_count}" | |
puts "Top 50 Word Count: #{top_fifty_count}" | |
puts"#: word - count - % of total - % of top 50" | |
words.sort_by(&:last).reverse.first(50).each_with_index.map {|(k, v), i| puts "#{i+1}: #{k} - #{v} - #{((v.to_f/total_word_count)*100).round(2)}% - #{((v.to_f/top_fifty_count)*100).round(2)}%"} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment