public
Last active

  • Download Gist
fb_scrape.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
# gem install crack rest-client fastercsv
require 'rubygems'
require 'crack'
require 'rest_client'
require 'fastercsv'
 
# To use, get an access token here, by clicking "get access token"
# and checking user.groups in the dialog box
# https://developers.facebook.com/tools/explorer?method=GET&path=209024949216061%2Ffeed
#
# Run `ruby fb_scrape.rb ACCESS_TOKEN GROUP_ID`
#
# Your CSV should show up as "fb_posts_GROUP_ID.csv" in the same directory.
 
class GroupScraper
def initialize(access_token, group_id)
@access_token = access_token
@group_id = group_id
@url = "https://graph.facebook.com/#{@group_id}/feed?access_token=#{@access_token}"
@data = []
end
 
def start
scrape(@url)
end
 
def scrape(url)
resp = Crack::JSON.parse(RestClient.get(url))
 
if resp['data'] && resp['data'].length > 0
resp['data'].each do |fb_post|
post = {
:fb_id => fb_post['id'],
:fb_author => fb_post["from"]["name"],
:fb_author_id => fb_post["from"]["id"],
:message => fb_post["message"],
:fb_created_time => fb_post["created_time"],
:fb_updated_time => fb_post["updated_time"]
}
p post
@data << post
if fb_post['comments'] && fb_post['comments']['data']
fb_post['comments']['data'].each do |fb_comment|
comment = {
:fb_id => fb_comment['id'],
:fb_author => fb_comment["from"]["name"],
:fb_author_id => fb_comment["from"]["id"],
:message => fb_comment["message"],
:fb_created_time => fb_comment["created_time"],
:fb_likes => fb_comment['likes']
}
p comment
@data << comment
end
end
end
if resp['paging']['next']
scrape(resp['paging']['next'])
end
else
return
end
end
 
def to_csv
FasterCSV.open("fb_posts_#{@group_id}.csv", "w") do |csv|
csv << %w[name fb_id date text url]
@data.each do |post|
csv << [post[:fb_author], post[:fb_id], post[:fb_created_time], post[:message], "https://www.facebook.com/groups/#{post[:fb_id].split(/_/)[0]}/permalink/#{post[:fb_id].split(/_/)[1]}"]
end
end
end
end
 
if __FILE__ == $0
gs = GroupScraper.new(ARGV[0], ARGV[1])
gs.start
gs.to_csv
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.