Skip to content

Instantly share code, notes, and snippets.

@rashidkpc
Last active December 14, 2015 19:09
Show Gist options
  • Save rashidkpc/5134627 to your computer and use it in GitHub Desktop.
Save rashidkpc/5134627 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'tire'
require 'tweetstream'
# How many events to index at once
bulk_limit = 50
# Twitter will feed us ~50 events/sec, how many of those do we really want?
throttle = 10
# Index this many total events, then start the _id counter over
total_limit = 4320000
# Your twitter API auth information, you probably only need the oauth stuff.
TweetStream.configure do |config|
config.consumer_key = 'foo'
config.consumer_secret = 'bar'
config.oauth_token = 'baz'
config.oauth_token_secret = 'buh'
config.auth_method = :oauth
end
Tire.index 'twitter' do
delete
create :mappings => {
:tweet => {
:properties => {
:id => { :type => 'integer', :index => 'not_analyzed' },
:user => {
:properties => {
:name => { :type => 'string', :index => 'not_analyzed' },
:lang => { :type => 'string', :index => 'not_analyzed' },
}
},
:place => {
:properties => {
:name => { :type => 'string', :index => 'not_analyzed' },
:type => { :type => 'string', :index => 'not_analyzed' },
:country_name => { :type => 'string', :index => 'not_analyzed' },
:country_code => { :type => 'string', :index => 'not_analyzed' },
}
}
}
}
}
end
i=0
tweets = Array.new
TweetStream::Client.new.on_error { |error|
puts error
}.sample do |status|
# The status object is a special Hash with
# method access to its keys.
# puts "#{i} #{status.text}"
i += 1
if tweets.length < throttle
tweet = {
:id => i,
:type => 'tweet',
:created_at => status.created_at.iso8601,
:text => status.text,
:user => {
:name => status.user.screen_name,
:created_at => status.user.created_at.iso8601,
:lang => status.user.lang
}
}
if !status.place.nil?
tweet[:place] = {
:name => status.place.full_name,
:type => status.place.place_type,
:country_code => status.place.country_code,
:country_name => status.place.country
}
end
tweets.push(tweet)
end
if i % bulk_limit == 0
Tire.index 'twitter' do
import tweets
tweets.clear
end
end
if i > total_limit
i = 0
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment