Created
December 29, 2016 15:10
-
-
Save fjustin/bc1db950940e5c84475c874a7faaf486 to your computer and use it in GitHub Desktop.
can see all searching result if you type the keyword after "="
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'uri' | |
url = 'https://www.google.co.jp/search?q=シャドウバース リセマラ' | |
url_escape = URI.escape(url) | |
user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.63 Safari/537.36' | |
charset = nil | |
html = open(url_escape, "User-Agent" => user_agent) do |f| | |
charset = f.charset | |
f.read | |
end | |
# <h3 class="r">-ここにはさまれた文字列-</h3>を集める | |
strings = html.scan(%r{<h3 class="r">(.+?)</h3>}) | |
# <a>タグの中のhref属性とタイトルを抜き出す | |
for i in 0...strings.length do | |
url, title = (strings[i][0].scan(%r{<a href="(.+?)".+?>(.+?)</a>}))[0] | |
puts "#{title} #{url}" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment