Skip to content

@ashaw /fb_scrape.rb
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
# gem install crack rest-client fastercsv
require 'rubygems'
require 'crack'
require 'rest_client'
require 'fastercsv'
# To use, get an access token here, by clicking "get access token"
# and checking user.groups in the dialog box
# https://developers.facebook.com/tools/explorer?method=GET&path=209024949216061%2Ffeed
#
# Run `ruby fb_scrape.rb ACCESS_TOKEN GROUP_ID`
#
# Your CSV should show up as "fb_posts_GROUP_ID.csv" in the same directory.
class GroupScraper
def initialize(access_token, group_id)
@access_token = access_token
@group_id = group_id
@url = "https://graph.facebook.com/#{@group_id}/feed?access_token=#{@access_token}"
@data = []
end
def start
scrape(@url)
end
def scrape(url)
resp = Crack::JSON.parse(RestClient.get(url))
if resp['data'] && resp['data'].length > 0
resp['data'].each do |fb_post|
post = {
:fb_id => fb_post['id'],
:fb_author => fb_post["from"]["name"],
:fb_author_id => fb_post["from"]["id"],
:message => fb_post["message"],
:fb_created_time => fb_post["created_time"],
:fb_updated_time => fb_post["updated_time"]
}
p post
@data << post
if fb_post['comments'] && fb_post['comments']['data']
fb_post['comments']['data'].each do |fb_comment|
comment = {
:fb_id => fb_comment['id'],
:fb_author => fb_comment["from"]["name"],
:fb_author_id => fb_comment["from"]["id"],
:message => fb_comment["message"],
:fb_created_time => fb_comment["created_time"],
:fb_likes => fb_comment['likes']
}
p comment
@data << comment
end
end
end
if resp['paging']['next']
scrape(resp['paging']['next'])
end
else
return
end
end
def to_csv
FasterCSV.open("fb_posts_#{@group_id}.csv", "w") do |csv|
csv << %w[name fb_id date text url]
@data.each do |post|
csv << [post[:fb_author], post[:fb_id], post[:fb_created_time], post[:message], "https://www.facebook.com/groups/#{post[:fb_id].split(/_/)[0]}/permalink/#{post[:fb_id].split(/_/)[1]}"]
end
end
end
end
if __FILE__ == $0
gs = GroupScraper.new(ARGV[0], ARGV[1])
gs.start
gs.to_csv
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.