aycabta/data_to_html.rb

## data_to_html.rb
puts "<table border=\"0\" width=\"700\" align=\"center\">\n"
puts " <tr bgcolor=\"#000000\">\n"
puts "  <th></th>\n"
puts "  <th></th>\n"
puts "  <th></th>\n"
puts "  <th></th>\n"
puts "  <th></th>\n"
puts " </tr>\n"

items = Array.new

open('output') do |f|
  while not f.eof?
    begin
      name = f.gets.chomp!
      image = f.gets.chomp!
      path = f.gets.chomp!
    rescue
      break
    end
    items << {'name' => name, 'image' => image, 'path' => path}
  end
end

while not items.empty?
  items_of_line = Array.new
  5.times do
    items_of_line << items.pop if not items.empty?
  end

  puts " <tr align=center>\n"

  items_of_line.each do |item|
    puts "  <td><a href=\"http://ttrinity.jp#{item['path']}\" target=\"_blank\">" +
         "<img src=\"http://negineesan.com/etc/tshirts/#{item['image']}\" alt=\"#{item['name']}\" width=\"144\" height=\"144\" border=\"0\" style=\"background-color: #4c4c4c;\" />" +
         "</a></td>\n"
  end

  (5 - items_of_line.size).times do
    puts "  <td></td>\n"
  end

  puts " </tr>\n"
  puts " <tr align=\"center\">\n"

  items_of_line.each do |item|
    puts "  <td><a href=\"http://ttrinity.jp#{item['path']}\">#{item['name']}</a></td>\n"
  end
  (5 - items_of_line.size).times do
    puts "  <td></td>\n"
  end

  puts " </tr>\n"
  puts " <tr align=\"center\">\n"

  items_of_line.each do |item|
    puts "  <td><a href=\"http://ttrinity.jp#{item['path']}\">#{item['name']}</a></td>\n"
  end

  (5 - items_of_line.size).times do
    puts "  <td></td>\n"
  end

  puts " </tr>\n"
end

puts "</table>\n"

## negi_t.rb
require 'net/http'

def download_image(http, path)
  filename_regexp = Regexp.compile('/([^/]+)$')
  filename = filename_regexp.match(path).captures[0]
  begin
    resp = http.get(path)
  rescue
    sleep 30
    retry
  end
  open(File.join('images', filename), 'w') do |f|
    f.puts resp.body
  end
  filename
end

def scrape_item(http, path)
  /(\d+)/ =~ path
  id = $~.captures[0]

  begin
    resp = http.get(path)
  rescue
    sleep 30
    retry
  end
  page = resp.body
  begin
    itemname_regexp = Regexp.compile('<div id="itemNameArea">\s+<h2 class="wb">([^<]+)</h2>', Regexp::MULTILINE)
    itemname_match = itemname_regexp.match(page)
    itemname = itemname_match.captures[0]
  rescue
    p "itemname: #{path}"
  end

#  images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+" id="img_f"[^>]+>')
#  images_match = images_regexp.match(page)
#  foreside_image = images_match.captures[0]

#  images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+" id="img_b"[^>]+>')
#  images_match = images_regexp.match(page)
#  backside_image = images_match.captures[0]

  begin
    images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+"[^>]+rel="photo"[^>]+class="png[^>]*>')
    images_match = images_regexp.match(page)
    orig_image = images_match.captures[0]
  rescue
    p "orig_image: #{path}"
  end
#  download_image(http, foreside_image)
#  download_image(http, backside_image)
  orig_image_filename = download_image(http, orig_image)

  {'itemname' => itemname, 'image_filename' => orig_image_filename, 'path' => path}
end

def scrape_list(http, path)
  items = Array.new

  begin
    resp = http.get(path)
  rescue
    sleep 30
    retry
  end
  page = resp.body
  list_regexp = Regexp.compile('<p class="item"><a href="(/product/\d+)#\d+">')
  while not (list_match = list_regexp.match(page)).nil?
    sleep 2
    items << scrape_item(http, list_match.captures[0])
    page = list_match.post_match
  end

  next_page_regexp = Regexp.compile('<li class="next active"><a href="(.+)">')
  next_page_match = next_page_regexp.match(resp.body)
  if next_page_match
    next_page_path = '/shop/negineesan/' + next_page_match.captures[0]
    next_page_path.gsub!(/&amp;/, '&')
    a = scrape_list(http, next_page_path)
    items.concat a
  end

  items
end

items = nil

Net::HTTP.start('ttrinity.jp') do |http|
  items = scrape_list(http, '/shop/negineesan/')
end

items.each do |item|
  puts "#{item['itemname']}\n#{item['image_filename']}\n#{item['path']}\n"
end

## resize.sh
cd images
find . -name "*" -exec convert -resize 130x130 {} ../images_small/{} \;
cd ..
	puts "<table border=\"0\" width=\"700\" align=\"center\">\n"
	puts " <tr bgcolor=\"#000000\">\n"
	puts " <th></th>\n"
	puts " <th></th>\n"
	puts " <th></th>\n"
	puts " <th></th>\n"
	puts " <th></th>\n"
	puts " </tr>\n"

	items = Array.new

	open('output') do \|f\|
	while not f.eof?
	begin
	name = f.gets.chomp!
	image = f.gets.chomp!
	path = f.gets.chomp!
	rescue
	break
	end
	items << {'name' => name, 'image' => image, 'path' => path}
	end
	end

	while not items.empty?
	items_of_line = Array.new
	5.times do
	items_of_line << items.pop if not items.empty?
	end

	puts " <tr align=center>\n"

	items_of_line.each do \|item\|
	puts " <td><a href=\"http://ttrinity.jp#{item['path']}\" target=\"_blank\">" +
	"<img src=\"http://negineesan.com/etc/tshirts/#{item['image']}\" alt=\"#{item['name']}\" width=\"144\" height=\"144\" border=\"0\" style=\"background-color: #4c4c4c;\" />" +
	"</a></td>\n"
	end

	(5 - items_of_line.size).times do
	puts " <td></td>\n"
	end

	puts " </tr>\n"
	puts " <tr align=\"center\">\n"

	items_of_line.each do \|item\|
	puts " <td><a href=\"http://ttrinity.jp#{item['path']}\">#{item['name']}</a></td>\n"
	end
	(5 - items_of_line.size).times do
	puts " <td></td>\n"
	end

	puts " </tr>\n"
	puts " <tr align=\"center\">\n"

	items_of_line.each do \|item\|
	puts " <td><a href=\"http://ttrinity.jp#{item['path']}\">#{item['name']}</a></td>\n"
	end

	(5 - items_of_line.size).times do
	puts " <td></td>\n"
	end

	puts " </tr>\n"
	end

	puts "</table>\n"
	require 'net/http'

	def download_image(http, path)
	filename_regexp = Regexp.compile('/([^/]+)$')
	filename = filename_regexp.match(path).captures[0]
	begin
	resp = http.get(path)
	rescue
	sleep 30
	retry
	end
	open(File.join('images', filename), 'w') do \|f\|
	f.puts resp.body
	end
	filename
	end

	def scrape_item(http, path)
	/(\d+)/ =~ path
	id = $~.captures[0]

	begin
	resp = http.get(path)
	rescue
	sleep 30
	retry
	end
	page = resp.body
	begin
	itemname_regexp = Regexp.compile('<div id="itemNameArea">\s+<h2 class="wb">([^<]+)</h2>', Regexp::MULTILINE)
	itemname_match = itemname_regexp.match(page)
	itemname = itemname_match.captures[0]
	rescue
	p "itemname: #{path}"
	end

	# images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+" id="img_f"[^>]+>')
	# images_match = images_regexp.match(page)
	# foreside_image = images_match.captures[0]

	# images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+" id="img_b"[^>]+>')
	# images_match = images_regexp.match(page)
	# backside_image = images_match.captures[0]

	begin
	images_regexp = Regexp.compile('<a href="([^"]+)"><img src="[^"]+"[^>]+rel="photo"[^>]+class="png[^>]*>')
	images_match = images_regexp.match(page)
	orig_image = images_match.captures[0]
	rescue
	p "orig_image: #{path}"
	end
	# download_image(http, foreside_image)
	# download_image(http, backside_image)
	orig_image_filename = download_image(http, orig_image)

	{'itemname' => itemname, 'image_filename' => orig_image_filename, 'path' => path}
	end

	def scrape_list(http, path)
	items = Array.new

	begin
	resp = http.get(path)
	rescue
	sleep 30
	retry
	end
	page = resp.body
	list_regexp = Regexp.compile('<p class="item"><a href="(/product/\d+)#\d+">')
	while not (list_match = list_regexp.match(page)).nil?
	sleep 2
	items << scrape_item(http, list_match.captures[0])
	page = list_match.post_match
	end

	next_page_regexp = Regexp.compile('<li class="next active"><a href="(.+)">')
	next_page_match = next_page_regexp.match(resp.body)
	if next_page_match
	next_page_path = '/shop/negineesan/' + next_page_match.captures[0]
	next_page_path.gsub!(/&/, '&')
	a = scrape_list(http, next_page_path)
	items.concat a
	end

	items
	end

	items = nil

	Net::HTTP.start('ttrinity.jp') do \|http\|
	items = scrape_list(http, '/shop/negineesan/')
	end

	items.each do \|item\|
	puts "#{item['itemname']}\n#{item['image_filename']}\n#{item['path']}\n"
	end
	cd images
	find . -name "*" -exec convert -resize 130x130 {} ../images_small/{} \;
	cd ..