Skip to content

Instantly share code, notes, and snippets.

@JohnathanWeisner
Last active August 29, 2015 14:03
Show Gist options
  • Save JohnathanWeisner/8d980a8dd9cb755b7c00 to your computer and use it in GitHub Desktop.
Save JohnathanWeisner/8d980a8dd9cb755b7c00 to your computer and use it in GitHub Desktop.
require 'snoo'
require 'json'
config = {
export_file_name: 'comments_for_ginger',
subreddit: 'askreddit'
}
class RedditParser
attr_accessor :subreddit
def initialize
@reddit = Snoo::Client.new
@subreddit = "gaming"
@last_api_call_time = Time.now
@delay_since_last_api_call = 2
end
def delay
@delay_since_last_api_call = Time.now - @last_api_call_time
sleep 2 - @delay_since_last_api_call if @delay_since_last_api_call < 2
@last_api_call_time = Time.now
end
def link_ids
start = Time.now
puts "link_ids start"
ids = (@reddit.get_listing subreddit: subreddit)["data"]["children"]
.map{ |link| link["data"]["id"] }
delay
puts "link_ids finished: #{Time.now - start}"
ids
end
def get_post link_id
start = Time.now
puts "get_post start"
post = @reddit.get_comments link_id: link_id, limit: 1000
delay
puts "get_post finished: #{Time.now - start}"
post
end
def comments_from post
post[1]["data"]["children"]
end
def flatten_threaded_comments comments
start = Time.now
puts "flatten_threaded_comments start"
all_comments = Array.new
stack = comments
while !stack.empty?
check_comments = stack.pop
unless check_comments.nil? || check_comments.is_a?(String)
check_comments.each do |k , v|
if v.is_a? Hash
stack << v["data"] unless v["data"].nil?
stack << v["replies"] unless v["replies"].nil?
stack << v["children"] unless v["children"].nil?
all_comments << { body: v["body"], id: v["id"] } unless v.nil? || v["body"].nil?
end
stack << k if k.is_a?(Array) || k.is_a?(Hash)
end
end
end
puts "flatten_threaded_comments finished: #{Time.now - start}"
all_comments
end
def all_comments_flattened
puts "all_comments_flattened start"
link_ids.map do |link_id|
flatten_threaded_comments comments_from get_post link_id
end.flatten
end
end
require 'gingerice'
class GrammarChecker
def initialize
@responses = ["Excuse me, but I think you made a mistake there", "Ehem", "...", "FTFY"]
end
def contains?(words, comment)
!!(/#{words.join("|")}/.match(comment[:body]))
end
def different_your?(one, two)
match_one = /youre|your|you're|you are/.match(one.downcase)
match_two = /youre|your|you're|you are/.match(two.downcase)
if match_one.respond_to?(:[]) && match_two.respond_to?(:[])
match_one[0] != match_two[0]
else
false
end
end
def your_error?(comment)
comment[:grammar_fails].each do |error|
if contains?(["your","you're","you are"], body: error["text"]) &&
different_your?(error["text"],error["correct"])
return true
end
end
false
end
def snootify(comment)
start = Time.now
puts "snootify start"
results = Gingerice::Parser.new.parse(comment[:body])
comment[:grammar_fails] = results["corrections"]
comment[:corrected] = "#{@responses.sample} \n#{results['result']}"
puts "snootify finished #{Time.now - start}"
comment
end
end
puts "Start program!"
start = Time.now
bot = RedditParser.new
bot.subreddit = config[:subreddit]
flattened = bot.all_comments_flattened.map{|comment| comment[:body] } # comment this out if you would like to use the commented out code below
# flattened = bot.all_comments_flattened
# puts "Finished Collecting comments: #{Time.now - start}"
# grammar_checker = GrammarChecker.new
# your_comments = flattened.select do |comment|
# grammar_checker.contains?(["your","you're","you are"], comment)
# end
# # comment_ids = your_comments.map{ |comment| comment["id"] }
# your_comments = your_comments.select do |comment|
# comment = grammar_checker.snootify comment
# end
# wrong_your = your_comments.select do |comment|
# grammar_checker.your_error? comment
# end
# puts "#{wrong_your.length} errors #{your_comments.length-wrong_your.length} correct"
puts "#{flattened.length} total comments collected"
puts "FINISHED : #{Time.now - start}"
File.open(config[:export_file_name], 'w') { |file| file.write(flattened) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment