Skip to content

Instantly share code, notes, and snippets.

@walterdavis
Created July 21, 2011 14:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walterdavis/1097316 to your computer and use it in GitHub Desktop.
Save walterdavis/1097316 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby -wKU
# Create an XML report of all html files in a folder or any of its children
require 'rubygems'
require 'fileutils'
gem 'nokogiri'
require 'nokogiri'
#map the current folder
here = File.dirname(__FILE__)
#the URL to prepend to all filenames in this folder
base = 'http://example.com'
puts 'Deleting previous report' if ( File::exists?( File.join( here, 'report.xml' ) ) )
FileUtils::rm_rf( File.join( here, 'report.xml' ) )
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
xml.links {
Dir.glob( File.join(here, '**/*.html') ).each do | filename |
relative_path = filename.sub( here, '' )
doc = Nokogiri::HTML( File.read( filename ) )
xml.link{
xml.title( doc.at_css('title') ? doc.at_css('title').content : 'No title found' )
xml.url( base + relative_path )
}
end
}
end
# this next part is just to "pretty-print" the XML
xsl = Nokogiri::XSLT('<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8"/>
<xsl:param name="indent-increment" select="\' \'"/>
<xsl:template name="newline">
<xsl:text disable-output-escaping="yes">
</xsl:text>
</xsl:template>
<xsl:template match="comment() | processing-instruction()">
<xsl:param name="indent" select="\'\'"/>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
<xsl:copy />
</xsl:template>
<xsl:template match="text()">
<xsl:param name="indent" select="\'\'"/>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
<xsl:value-of select="normalize-space(.)"/>
</xsl:template>
<xsl:template match="text()[normalize-space(.)=\'\']"/>
<xsl:template match="*">
<xsl:param name="indent" select="\'\'"/>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
<xsl:choose>
<xsl:when test="count(child::*) > 0">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates select="*|text()">
<xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
</xsl:apply-templates>
<xsl:call-template name="newline"/>
<xsl:value-of select="$indent"/>
</xsl:copy>
</xsl:when>
<xsl:otherwise>
<xsl:copy-of select="."/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>')
report_output = File.open( File.join( here, 'report.xml' ),'wb', 0664 )
#apply the pretty-printer
report_output.print xsl.apply_to( builder.doc ).to_s
report_output.close
puts 'Report generated!!!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment