Skip to content

Instantly share code, notes, and snippets.

@nezuQ
Created May 11, 2014 04:43
Show Gist options
  • Save nezuQ/6a7ba8cc3e7cc00075b3 to your computer and use it in GitHub Desktop.
Save nezuQ/6a7ba8cc3e7cc00075b3 to your computer and use it in GitHub Desktop.
Rubyで前処理。PixivのユーザープロフィールをCSV形式で取得する。 ref: http://qiita.com/nezuq/items/b076d7e6ea6deecfc3ce
create table user(
user_id INTEGER PRIMARY KEY,
nick TEXT ,
hp_url TEXT ,
gender TEXT ,
blood_type TEXT ,
age INTEGER ,
birthday TEXT ,
job TEXT ,
introduction TEXT ,
pc TEXT ,
monitor TEXT ,
soft TEXT ,
scanner TEXT ,
tablet TEXT ,
printer TEXT ,
desktop_item TEXT ,
music TEXT ,
desk TEXT ,
chair TEXT ,
etc TEXT);
9999999 人物1 http:// 男性 A型 24 12/31 職業 自己紹介 ノーパソ モニター ソフト スキャナー タブレット プリンター 机上の物 音楽 椅子 その他
0000000 人物2 女性 自己紹介
#!/usr/local/bin/ruby
# -*- coding: utf-8 -*-
=begin
pxprofile.rb
Copyright (c) 2014 nezuq
This software is released under the MIT License.
http://opensource.org/licenses/mit-license.php
=end
require 'uri'
require 'net/http'
require 'csv'
SCHEMAS = [
{:id => 'user_id', :name => 'ID', :name_api => 'ID', :type => 'INTEGER'},
{:id => 'nick', :name => 'ニックネーム', :name_api => 'ニックネーム', :type => 'TEXT'},
{:id => 'hp_url', :name => 'HPアドレス', :name_api => 'ホームページアドレス', :type => 'TEXT'},
{:id => 'gender', :name => '性別', :name_api => '性別', :type => 'TEXT'},
{:id => 'blood_type', :name => '血液型', :name_api => '血液型', :type => 'TEXT'},
{:id => 'age', :name => '年齢', :name_api => '年齢', :type => 'INTEGER'},
{:id => 'birthday', :name => '誕生日', :name_api => '誕生日', :type => 'TEXT'},
{:id => 'job', :name => '仕事', :name_api => '職業', :type => 'TEXT'},
{:id => 'intro', :name => '自己紹介', :name_api => '自己紹介', :type => 'TEXT'},
{:id => 'pc', :name => 'コンピュータ', :name_api => 'コンピュータ', :type => 'TEXT'},
{:id => 'monitor', :name => 'モニター', :name_api => 'モニタ', :type => 'TEXT'},
{:id => 'soft', :name => 'ソフト', :name_api => 'ソフト', :type => 'TEXT'},
{:id => 'scanner', :name => 'スキャナー', :name_api => 'スキャナー', :type => 'TEXT'},
{:id => 'tablet', :name => 'タブレット', :name_api => 'タブレット', :type => 'TEXT'},
{:id => 'printer', :name => 'プリンター', :name_api => 'プリンター', :type => 'TEXT'},
{:id => 'desktop_item', :name => '机の上にあるもの', :name_api => '机の上にあるもの', :type => 'TEXT'},
{:id => 'music', :name => '絵を描く時に聞く音楽', :name_api => '絵を描くときに聴く音楽', :type => 'TEXT'},
{:id => 'desk', :name => '机', :name_api => '机', :type => 'TEXT'},
{:id => 'chair', :name => '椅子', :name_api => '椅子', :type => 'TEXT'},
{:id => 'etc', :name => 'その他', :name_api => 'その他', :type => 'TEXT'}
]
IDS = ARGV.uniq
USERS = []
for id in IDS do
uri_s = 'http://spapi.pixiv.net/iphone/profile.php?&id=' + id
puts '問い合わせ中:' + uri_s
sleep(5 + rand(6))
uri = URI.parse(uri_s)
text = Net::HTTP.get(uri).force_encoding(Encoding.default_external).gsub(/<br\s*\/*>/,' ').gsub(/,/,',')
row = {'user_id' => id}
for scm in SCHEMAS
re = /<tr><th>#{scm[:name_api]}<td>([^<]+)/
md = text.match(re)
row[scm[:id]] = md[1].strip if md
end
USERS.push(row)
end
CSV.open('INPUT.csv', 'wb') do |csv|
#csv << SCHEMAS.map {|scm_h| scm_h[:id]}
for user in USERS do
csv << SCHEMAS.map {|scm_d| user[scm_d[:id]].to_s}
end
end
puts 'end!'
ruby pxprofile.rb 9999999 0000000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment