Skip to content

Instantly share code, notes, and snippets.

@josh-lauer
Created August 31, 2011 00:37
Show Gist options
  • Save josh-lauer/1182529 to your computer and use it in GitHub Desktop.
Save josh-lauer/1182529 to your computer and use it in GitHub Desktop.
# this is within a module named "Parser"
# be gentle, i'm rusty, lol
def Parser.parse_email(source)
# if source is empty, do nothing
return {} if source.empty?
# initialize some locally-scoped variables
results = { :header => {}, :original_keys => {}, :body => [] }
body_begin = 0
# strip blank lines off the top of the source, if they exist.
# the parser will choke on a noncompliant email that starts with a blank line.
source = source.drop_while {|n| n.blank?}
# make sure the fist line doesn't have any leading whitespace, or
# the parser will choke
source[0].lstrip!
# first pass: rstrip each line, remove trailing ";", split lines that
# contain ";" into separate lines (with leading whitespace)
source.each_with_index do |this_line, n|
break if this_line.blank?
this_line.rstrip!
this_line.chop! if this_line[-1] == ";"
if this_line.include?(";")
this_line.split(";")[1 .. -1].reverse.each do |this_chunk|
source.insert(n + 1, " " + this_chunk)
end
source[n] = source[n].split(";")[0]
end
end
# iterate through email header line by line
source.each_with_index do |this_line, n|
# if this line is blank, you have reached the end of the header.
if this_line.blank?
body_begin = n + 1
break
# if this line has leading whitespace and @last_key is defined
# it is a "folded" continuation of the previous line.
elsif this_line =~ /^\s+/ && defined?(@last_key)
# if the value corresponding to @last_key is a string, concatenate
# this line onto the string with a line break. If it is an array,
# concatenate it onto the last item in the array with a line break.
if results[:header][@last_key].class == String
results[:header][@last_key] << "\n#{this_line.strip}"
else
results[:header][@last_key][-1] << "\n#{this_line.strip}"
end
# if this line defines something which already exists in results[:header]
# then convert the value corresponding to key into an array (if it is
# currently a string) and push the new value string onto the array.
# TODO: this conditional is ugly and raises a warning. But, it's faster
# than scanning the string twice, so for now it stays. Fix or leave as is?
elsif results[:header].has_key?((this_key = this_line.scan(/^[a-z|A-Z|-]+/)[0]) &&
this_key_symbolized = this_key.down_under.intern)
@last_key = this_key_symbolized
if results[:header][this_key_symbolized].class == String
temp = results[:header][this_key_symbolized]
results[:header][this_key_symbolized] = Array[temp]
end
results[:header][this_key_symbolized].push(this_line.sub(/^[a-z|A-Z|-]+[:]/, "").strip)
# otherwise, this line contains a new key, so push it on to results[:header], and save the
# original key in results[:original_keys].
else
@last_key = this_key_symbolized
results[:header][this_key_symbolized] = this_line.sub(/^[a-z|A-Z|-]+[:]/, "").strip
results[:original_keys][this_key_symbolized] = this_key
end
end
# if multipart, get boundary value and chop it up
if results[:header].has_key?(:content_type)
if results[:header][:content_type].start_with?("multipart")
boundary_line = results[:header][:content_type].split("\n")[1]
#puts "boundary line: " + boundary_line
if boundary_line.start_with?("boundary=\"")
boundary = boundary_line.sub("boundary=\"", "").chop
elsif boundary_line.start_with?("boundary=")
boundary = boundary_line.sub("boundary=", "")
else
puts "Error: broken boundary line!!"
Process.exit
end
#puts "boundary: " + boundary
unparsed_bodies = Parser.break_by_boundary(source[body_begin .. -1], boundary)
#puts "number of chunks: " + unparsed_bodies.size.to_s
unparsed_bodies.each_with_index do |this_body, index|
#puts "parsing chunk " + index.to_s + " ..."
results[:body].push( parse_email(this_body) )
end
#puts "parsed chunk classes:"
results[:body].each do |this_chunk|
#puts this_chunk.class
end
else
results[:body] = [ source[body_begin .. -1] ]
#puts "number of chunks: " + results[:body].size.to_s
#puts "parsed chunk classes:"
results[:body].each do |this_chunk|
#puts this_chunk.class
end
end
else
puts "NO CONTENT TYPE!!!"
Process.exit
end
# return the results to the caller
return results
end # end parse()
def Parser.break_by_boundary(source, boundary)
results = []
boundaries = []
source.each_index do |index|
if source[index].start_with?("--#{boundary}--")
boundaries.push(index)
break
elsif source[index].start_with?("--#{boundary}")
boundaries.push(index)
end
end
last_boundary = boundaries[0]
boundaries[1 .. -1].each do |n|
results.push(source[(last_boundary + 1) .. (n - 1)])
last_boundary = n
end
return results
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment