Walking a sticko-themed blog to create sitemap links
require 'nokogiri' | |
require 'open-uri' | |
home = 'http://your-blog.com' | |
sitemap_file = File.open('sitemap.xml', 'wb') | |
sitemap_file.puts('<?xml version="1.0" encoding="UTF-8"?> | |
<urlset | |
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 | |
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"> | |
') | |
url = home + '/page/1' | |
begin | |
puts "Parsing #{url}" | |
doc = Nokogiri::XML(open(url)) | |
links = doc.css("body article header h1 a") | |
links.each do |link| | |
puts "Added link " + link['href'] | |
sitemap_file.puts('<url>') | |
sitemap_file.puts('<loc>' + home + link['href'] + '</loc>') | |
sitemap_file.puts('<changefreq>daily</changefreq>') | |
sitemap_file.puts('</url>') | |
end unless links.nil? | |
navi = doc.css('nav.pagination a.older-posts') | |
url = home + navi.first['href'] if navi.size > 0 | |
end while navi.size > 0 | |
sitemap_file.puts('</urlset>') | |
sitemap_file.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment