Skip to content

Instantly share code, notes, and snippets.

@metade
Created October 12, 2010 08:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save metade/621873 to your computer and use it in GitHub Desktop.
Save metade/621873 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'hpricot'
require 'htmlentities'
require 'mustache'
require 'pp'
def clean_string(string)
coder = HTMLEntities.new
clean = coder.decode(string)
clean.gsub!("\302\240", ' ')
clean.gsub!(/ +/, ' ')
clean.strip
end
def clean_text(text)
twitpic_links = (text/'//a').select { |a| a['onclick'] =~ %r[twitpic.com/show] }
twitpic_links.each { |l| l.swap(l.inner_html) }
(text/'//div/img[@class="correct-png"]').remove
text.inner_html
end
def parse_transcript(filename)
doc = Hpricot(open(filename))
messages = (doc/"//div/table/tr").map do |elem|
text = (elem/"//td/span").detect { |e| e.attributes['id'] =~ /^txt/ }
time = (elem/"//span[@class='bottomtime']").inner_html
date = (elem/"//span[@class='bottomdate']").inner_html
user = (elem/"//span[@class='bottomdisplayname']").first
if (text and date.any?)
username = user.nil? ? "Host" : user.inner_html.strip
source = (elem.parent.parent['class'] =~ /writercomment/) ? 'host' : 'audience'
data = {
:class => source,
:text => clean_text(text),
:time => clean_string(time),
:date => clean_string(date),
:user => username
}
data
else
p elem.inner_text
end
end
messages.compact!
template = %[
<ul>
{{#messages}}
<li class="{{class}}">
<span class="msg">{{{text}}}</span>
<span class="details">
<span class="user">{{user}}</span>
<span class="time">{{time}}</span>
</span>
</li>
{{/messages}}
</ul>
]
Mustache.render(template, :messages => messages.reverse)
end
puts parse_transcript(ARGV[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment