Skip to content

Instantly share code, notes, and snippets.

@aycabta
Created October 5, 2012 17:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aycabta/3841182 to your computer and use it in GitHub Desktop.
Save aycabta/3841182 to your computer and use it in GitHub Desktop.
usage: mkdir images; mkdir images_small; ruby negi_t.rb > output && ruby data_to_html.rb > html && sh resize.sh
puts "<table border=\"0\" width=\"700\" align=\"center\">\n"
puts " <tr bgcolor=\"#000000\">\n"
puts " <th></th>\n"
puts " <th></th>\n"
puts " <th></th>\n"
puts " <th></th>\n"
puts " <th></th>\n"
puts " </tr>\n"
items = Array.new
open('output') do |f|
while not f.eof?
begin
name = f.gets.chomp!
image = f.gets.chomp!
path = f.gets.chomp!
rescue
break
end
items << {'name' => name, 'image' => image, 'path' => path}
end
end
while not items.empty?
items_of_line = Array.new
5.times do
items_of_line << items.pop if not items.empty?
end
puts " <tr align=center>\n"
items_of_line.each do |item|
puts " <td><a href=\"http://ttrinity.jp#{item['path']}\" target=\"_blank\">" +
"<img src=\"http://negineesan.com/etc/tshirts/#{item['image']}\" alt=\"#{item['name']}\" width=\"144\" height=\"144\" border=\"0\" style=\"background-color: #4c4c4c;\" />" +
"</a></td>\n"
end
(5 - items_of_line.size).times do
puts " <td></td>\n"
end
puts " </tr>\n"
puts " <tr align=\"center\">\n"
items_of_line.each do |item|
puts " <td><a href=\"http://ttrinity.jp#{item['path']}\">#{item['name']}</a></td>\n"
end
(5 - items_of_line.size).times do
puts " <td></td>\n"
end
puts " </tr>\n"
puts " <tr align=\"center\">\n"
items_of_line.each do |item|
puts " <td><a href=\"http://ttrinity.jp#{item['path']}\">#{item['name']}</a></td>\n"
end
(5 - items_of_line.size).times do
puts " <td></td>\n"
end
puts " </tr>\n"
end
puts "</table>\n"
require 'net/http'
def download_image(http, path)
filename_regexp = Regexp.compile('/([^/]+)$')
filename = filename_regexp.match(path).captures[0]
begin
resp = http.get(path)
rescue
sleep 30
retry
end
open(File.join('images', filename), 'w') do |f|
f.puts resp.body
end
filename
end
def scrape_item(http, path)
/(\d+)/ =~ path
id = $~.captures[0]
begin
resp = http.get(path)
rescue
sleep 30
retry
end
page = resp.body
begin
itemname_regexp = Regexp.compile('<div id="itemNameArea">\s+<h2 class="wb">([^<]+)</h2>', Regexp::MULTILINE)
itemname_match = itemname_regexp.match(page)
itemname = itemname_match.captures[0]
rescue
p "itemname: #{path}"
end
# images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+" id="img_f"[^>]+>')
# images_match = images_regexp.match(page)
# foreside_image = images_match.captures[0]
# images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+" id="img_b"[^>]+>')
# images_match = images_regexp.match(page)
# backside_image = images_match.captures[0]
begin
images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+"[^>]+rel="photo"[^>]+class="png[^>]*>')
images_match = images_regexp.match(page)
orig_image = images_match.captures[0]
rescue
p "orig_image: #{path}"
end
# download_image(http, foreside_image)
# download_image(http, backside_image)
orig_image_filename = download_image(http, orig_image)
{'itemname' => itemname, 'image_filename' => orig_image_filename, 'path' => path}
end
def scrape_list(http, path)
items = Array.new
begin
resp = http.get(path)
rescue
sleep 30
retry
end
page = resp.body
list_regexp = Regexp.compile('<p class="item"><a href="(/product/\d+)#\d+">')
while not (list_match = list_regexp.match(page)).nil?
sleep 2
items << scrape_item(http, list_match.captures[0])
page = list_match.post_match
end
next_page_regexp = Regexp.compile('<li class="next active"><a href="(.+)">')
next_page_match = next_page_regexp.match(resp.body)
if next_page_match
next_page_path = '/shop/negineesan/' + next_page_match.captures[0]
next_page_path.gsub!(/&amp;/, '&')
a = scrape_list(http, next_page_path)
items.concat a
end
items
end
items = nil
Net::HTTP.start('ttrinity.jp') do |http|
items = scrape_list(http, '/shop/negineesan/')
end
items.each do |item|
puts "#{item['itemname']}\n#{item['image_filename']}\n#{item['path']}\n"
end
cd images
find . -name "*" -exec convert -resize 130x130 {} ../images_small/{} \;
cd ..
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment