Last active
August 29, 2015 14:03
-
-
Save JohnathanWeisner/8d980a8dd9cb755b7c00 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'snoo' | |
require 'json' | |
config = { | |
export_file_name: 'comments_for_ginger', | |
subreddit: 'askreddit' | |
} | |
class RedditParser | |
attr_accessor :subreddit | |
def initialize | |
@reddit = Snoo::Client.new | |
@subreddit = "gaming" | |
@last_api_call_time = Time.now | |
@delay_since_last_api_call = 2 | |
end | |
def delay | |
@delay_since_last_api_call = Time.now - @last_api_call_time | |
sleep 2 - @delay_since_last_api_call if @delay_since_last_api_call < 2 | |
@last_api_call_time = Time.now | |
end | |
def link_ids | |
start = Time.now | |
puts "link_ids start" | |
ids = (@reddit.get_listing subreddit: subreddit)["data"]["children"] | |
.map{ |link| link["data"]["id"] } | |
delay | |
puts "link_ids finished: #{Time.now - start}" | |
ids | |
end | |
def get_post link_id | |
start = Time.now | |
puts "get_post start" | |
post = @reddit.get_comments link_id: link_id, limit: 1000 | |
delay | |
puts "get_post finished: #{Time.now - start}" | |
post | |
end | |
def comments_from post | |
post[1]["data"]["children"] | |
end | |
def flatten_threaded_comments comments | |
start = Time.now | |
puts "flatten_threaded_comments start" | |
all_comments = Array.new | |
stack = comments | |
while !stack.empty? | |
check_comments = stack.pop | |
unless check_comments.nil? || check_comments.is_a?(String) | |
check_comments.each do |k , v| | |
if v.is_a? Hash | |
stack << v["data"] unless v["data"].nil? | |
stack << v["replies"] unless v["replies"].nil? | |
stack << v["children"] unless v["children"].nil? | |
all_comments << { body: v["body"], id: v["id"] } unless v.nil? || v["body"].nil? | |
end | |
stack << k if k.is_a?(Array) || k.is_a?(Hash) | |
end | |
end | |
end | |
puts "flatten_threaded_comments finished: #{Time.now - start}" | |
all_comments | |
end | |
def all_comments_flattened | |
puts "all_comments_flattened start" | |
link_ids.map do |link_id| | |
flatten_threaded_comments comments_from get_post link_id | |
end.flatten | |
end | |
end | |
require 'gingerice' | |
class GrammarChecker | |
def initialize | |
@responses = ["Excuse me, but I think you made a mistake there", "Ehem", "...", "FTFY"] | |
end | |
def contains?(words, comment) | |
!!(/#{words.join("|")}/.match(comment[:body])) | |
end | |
def different_your?(one, two) | |
match_one = /youre|your|you're|you are/.match(one.downcase) | |
match_two = /youre|your|you're|you are/.match(two.downcase) | |
if match_one.respond_to?(:[]) && match_two.respond_to?(:[]) | |
match_one[0] != match_two[0] | |
else | |
false | |
end | |
end | |
def your_error?(comment) | |
comment[:grammar_fails].each do |error| | |
if contains?(["your","you're","you are"], body: error["text"]) && | |
different_your?(error["text"],error["correct"]) | |
return true | |
end | |
end | |
false | |
end | |
def snootify(comment) | |
start = Time.now | |
puts "snootify start" | |
results = Gingerice::Parser.new.parse(comment[:body]) | |
comment[:grammar_fails] = results["corrections"] | |
comment[:corrected] = "#{@responses.sample} \n#{results['result']}" | |
puts "snootify finished #{Time.now - start}" | |
comment | |
end | |
end | |
puts "Start program!" | |
start = Time.now | |
bot = RedditParser.new | |
bot.subreddit = config[:subreddit] | |
flattened = bot.all_comments_flattened.map{|comment| comment[:body] } # comment this out if you would like to use the commented out code below | |
# flattened = bot.all_comments_flattened | |
# puts "Finished Collecting comments: #{Time.now - start}" | |
# grammar_checker = GrammarChecker.new | |
# your_comments = flattened.select do |comment| | |
# grammar_checker.contains?(["your","you're","you are"], comment) | |
# end | |
# # comment_ids = your_comments.map{ |comment| comment["id"] } | |
# your_comments = your_comments.select do |comment| | |
# comment = grammar_checker.snootify comment | |
# end | |
# wrong_your = your_comments.select do |comment| | |
# grammar_checker.your_error? comment | |
# end | |
# puts "#{wrong_your.length} errors #{your_comments.length-wrong_your.length} correct" | |
puts "#{flattened.length} total comments collected" | |
puts "FINISHED : #{Time.now - start}" | |
File.open(config[:export_file_name], 'w') { |file| file.write(flattened) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment