Skip to content

Instantly share code, notes, and snippets.

@scottlingran
Created March 28, 2015 03:20
Show Gist options
  • Save scottlingran/2182c1b2955aeb78395b to your computer and use it in GitHub Desktop.
Save scottlingran/2182c1b2955aeb78395b to your computer and use it in GitHub Desktop.
require 'nokogiri'
require 'time'
require 'json'
module Parser
extend self
def message(el)
raise "Requires Nokogiri::XML::Element" if el.class != Nokogiri::XML::Element
names = el.css("> text()").text.split(", ")
data = {}
# data[:contact] = names[0].scan(/Scott Li/) ? names[1] : names[0]
data[:contact] = names
data[:messages] = []
last = {}
el.elements.each_with_index do |e, i|
if i.even?
last[:user] = e.css(".message_header .user").text
last[:meta] = e.css(".message_header .meta").text
last[:timestamp] = Time.parse(last[:meta]).to_i
end
if i.odd?
e.text
msg = {
user: last[:user],
meta: last[:meta],
timestamp: last[:timestamp],
content: e.text
}
data[:messages] << msg
end
end
data[:messages].sort! {|before, after| before[:timestamp] <=> after[:timestamp]}
return data
end
end
file = File.open("html/messages.htm").read
noko = Nokogiri::HTML.parse(file)
threads = noko.css(".thread")
all_messages = []
job_n = 0
jobs = []
threads.each_with_index do |thread, index|
jobs[index] = Thread.new do
all_messages << Parser.message(thread)
job_n += 1
puts "#{job_n}\n"
end
end
jobs.each(&:join)
puts "converting to json"
m_json = all_messages.to_json
puts "saving to file"
file = File.open("messages.json", "w+")
file.write(m_json)
file.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment