Skip to content

Instantly share code, notes, and snippets.

@teenst
Last active December 24, 2015 07:29
Show Gist options
  • Save teenst/6764004 to your computer and use it in GitHub Desktop.
Save teenst/6764004 to your computer and use it in GitHub Desktop.
tweet_crawler
source "http://rubygems.org"
gem "twitter"
gem "pit"
gem "tweetstream"
#!usr/bin/env ruby
# -*- coding: utf-8 -*-
#
# Copyright: (c) 2013, teenst
# All rights reserved.
# License : The BSD 3-Clause License
# <http://opensource.org/licenses/BSD-3-Clause>
require "pp"
require "zlib"
require "json"
require "tweetstream"
require "pit"
def configure()
pit = Pit.get("teenst_dev", :require =>{
"CONSUMER_KEY" => "consumer_key",
"CONSUMER_SECRET" => "cousumer_secret",
"OAUTH_TOKEN" => "OAuth_token",
"OAUTH_TOKEN_SECRET" => "token_secret"
})
TweetStream.configure do |config|
config.consumer_key = pit["CONSUMER_KEY"]
config.consumer_secret = pit["CONSUMER_SECRET"]
config.oauth_token = pit["OAUTH_TOKEN"]
config.oauth_token_secret = pit["OAUTH_TOKEN_SECRET"]
config.auth_method = :oauth
end
end
def get_filename()
data = Time.now.strftime('%Y/%m/%d/%H')
filename = "data/#{data}.gz"
end
def file_open(filename)
FileUtils.mkdir_p(File.dirname(filename))
f = File.open(filename,"wb")
g = Zlib::GzipWriter.wrap(f)
end
configure()
client = TweetStream::Client.new
current_filename = get_filename()
g = file_open(current_filename)
client.sample do |status|
if status.user.lang == "ja" #日本語ユーザのみを取得
g.puts(status.attrs)
next_filename = get_filename()
if next_filename != current_filename # 日付が変わったら
g.close # f.closeも呼び出す
current_filename = next_filename
g = file_open(current_filename)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment