Created
May 11, 2014 04:43
-
-
Save nezuQ/6a7ba8cc3e7cc00075b3 to your computer and use it in GitHub Desktop.
Rubyで前処理。PixivのユーザープロフィールをCSV形式で取得する。 ref: http://qiita.com/nezuq/items/b076d7e6ea6deecfc3ce
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table user( | |
user_id INTEGER PRIMARY KEY, | |
nick TEXT , | |
hp_url TEXT , | |
gender TEXT , | |
blood_type TEXT , | |
age INTEGER , | |
birthday TEXT , | |
job TEXT , | |
introduction TEXT , | |
pc TEXT , | |
monitor TEXT , | |
soft TEXT , | |
scanner TEXT , | |
tablet TEXT , | |
printer TEXT , | |
desktop_item TEXT , | |
music TEXT , | |
desk TEXT , | |
chair TEXT , | |
etc TEXT); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9999999 | 人物1 | http:// | 男性 | A型 | 24 | 12/31 | 職業 | 自己紹介 | ノーパソ | モニター | ソフト | スキャナー | タブレット | プリンター | 机上の物 | 音楽 | 机 | 椅子 | その他 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0000000 | 人物2 | 女性 | 自己紹介 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/ruby | |
# -*- coding: utf-8 -*- | |
=begin | |
pxprofile.rb | |
Copyright (c) 2014 nezuq | |
This software is released under the MIT License. | |
http://opensource.org/licenses/mit-license.php | |
=end | |
require 'uri' | |
require 'net/http' | |
require 'csv' | |
SCHEMAS = [ | |
{:id => 'user_id', :name => 'ID', :name_api => 'ID', :type => 'INTEGER'}, | |
{:id => 'nick', :name => 'ニックネーム', :name_api => 'ニックネーム', :type => 'TEXT'}, | |
{:id => 'hp_url', :name => 'HPアドレス', :name_api => 'ホームページアドレス', :type => 'TEXT'}, | |
{:id => 'gender', :name => '性別', :name_api => '性別', :type => 'TEXT'}, | |
{:id => 'blood_type', :name => '血液型', :name_api => '血液型', :type => 'TEXT'}, | |
{:id => 'age', :name => '年齢', :name_api => '年齢', :type => 'INTEGER'}, | |
{:id => 'birthday', :name => '誕生日', :name_api => '誕生日', :type => 'TEXT'}, | |
{:id => 'job', :name => '仕事', :name_api => '職業', :type => 'TEXT'}, | |
{:id => 'intro', :name => '自己紹介', :name_api => '自己紹介', :type => 'TEXT'}, | |
{:id => 'pc', :name => 'コンピュータ', :name_api => 'コンピュータ', :type => 'TEXT'}, | |
{:id => 'monitor', :name => 'モニター', :name_api => 'モニタ', :type => 'TEXT'}, | |
{:id => 'soft', :name => 'ソフト', :name_api => 'ソフト', :type => 'TEXT'}, | |
{:id => 'scanner', :name => 'スキャナー', :name_api => 'スキャナー', :type => 'TEXT'}, | |
{:id => 'tablet', :name => 'タブレット', :name_api => 'タブレット', :type => 'TEXT'}, | |
{:id => 'printer', :name => 'プリンター', :name_api => 'プリンター', :type => 'TEXT'}, | |
{:id => 'desktop_item', :name => '机の上にあるもの', :name_api => '机の上にあるもの', :type => 'TEXT'}, | |
{:id => 'music', :name => '絵を描く時に聞く音楽', :name_api => '絵を描くときに聴く音楽', :type => 'TEXT'}, | |
{:id => 'desk', :name => '机', :name_api => '机', :type => 'TEXT'}, | |
{:id => 'chair', :name => '椅子', :name_api => '椅子', :type => 'TEXT'}, | |
{:id => 'etc', :name => 'その他', :name_api => 'その他', :type => 'TEXT'} | |
] | |
IDS = ARGV.uniq | |
USERS = [] | |
for id in IDS do | |
uri_s = 'http://spapi.pixiv.net/iphone/profile.php?&id=' + id | |
puts '問い合わせ中:' + uri_s | |
sleep(5 + rand(6)) | |
uri = URI.parse(uri_s) | |
text = Net::HTTP.get(uri).force_encoding(Encoding.default_external).gsub(/<br\s*\/*>/,' ').gsub(/,/,',') | |
row = {'user_id' => id} | |
for scm in SCHEMAS | |
re = /<tr><th>#{scm[:name_api]}<td>([^<]+)/ | |
md = text.match(re) | |
row[scm[:id]] = md[1].strip if md | |
end | |
USERS.push(row) | |
end | |
CSV.open('INPUT.csv', 'wb') do |csv| | |
#csv << SCHEMAS.map {|scm_h| scm_h[:id]} | |
for user in USERS do | |
csv << SCHEMAS.map {|scm_d| user[scm_d[:id]].to_s} | |
end | |
end | |
puts 'end!' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ruby pxprofile.rb 9999999 0000000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment