josh-lauer/gist:1182529

## gistfile1.txt
# this is within a module named "Parser"
# be gentle, i'm rusty, lol

def Parser.parse_email(source)
	# if source is empty, do nothing
	return {} if source.empty?

	# initialize some locally-scoped variables
	results = { :header => {}, :original_keys => {}, :body => [] }
	body_begin = 0

	# strip blank lines off the top of the source, if they exist.
	# the parser will choke on a noncompliant email that starts with a blank line.
	source = source.drop_while {|n| n.blank?}

	# make sure the fist line doesn't have any leading whitespace, or
	# the parser will choke
	source[0].lstrip!

	# first pass: rstrip each line, remove trailing ";", split lines that
	# contain ";" into separate lines (with leading whitespace)
	source.each_with_index do |this_line, n|
		break if this_line.blank?
		this_line.rstrip!
		this_line.chop! if this_line[-1] == ";"
		if this_line.include?(";")
			this_line.split(";")[1 .. -1].reverse.each do |this_chunk|
				source.insert(n + 1, " " + this_chunk)
			end
			source[n] = source[n].split(";")[0]
		end
	end

	# iterate through email header line by line
	source.each_with_index do |this_line, n|
		# if this line is blank, you have reached the end of the header.
		if this_line.blank?
			body_begin = n + 1
			break
		# if this line has leading whitespace and @last_key is defined
		# it is a "folded" continuation of the previous line.
		elsif this_line =~ /^\s+/ && defined?(@last_key)
			# if the value corresponding to @last_key is a string, concatenate
			# this line onto the string with a line break. If it is an array,
			# concatenate it onto the last item in the array with a line break.
			if results[:header][@last_key].class == String
				results[:header][@last_key] << "\n#{this_line.strip}"
			else
				results[:header][@last_key][-1] << "\n#{this_line.strip}"
			end
		# if this line defines something which already exists in results[:header]
		# then convert the value corresponding to key into an array (if it is
		# currently a string) and push the new value string onto the array.
		# TODO: this conditional is ugly and raises a warning. But, it's faster
		# than scanning the string twice, so for now it stays. Fix or leave as is?
		elsif results[:header].has_key?((this_key = this_line.scan(/^[a-z|A-Z|-]+/)[0]) &&
									this_key_symbolized = this_key.down_under.intern)
			@last_key = this_key_symbolized
			if results[:header][this_key_symbolized].class == String
				temp = results[:header][this_key_symbolized]
				results[:header][this_key_symbolized] = Array[temp]
			end
			results[:header][this_key_symbolized].push(this_line.sub(/^[a-z|A-Z|-]+[:]/, "").strip)
		# otherwise, this line contains a new key, so push it on to results[:header], and save the
		# original key in results[:original_keys].
		else
			@last_key = this_key_symbolized
			results[:header][this_key_symbolized] = this_line.sub(/^[a-z|A-Z|-]+[:]/, "").strip
			results[:original_keys][this_key_symbolized] = this_key
		end
	end

	# if multipart, get boundary value and chop it up
	if results[:header].has_key?(:content_type)
		if results[:header][:content_type].start_with?("multipart")
			boundary_line = results[:header][:content_type].split("\n")[1]
			#puts "boundary line: " + boundary_line
			if boundary_line.start_with?("boundary=\"")
				boundary = boundary_line.sub("boundary=\"", "").chop
			elsif boundary_line.start_with?("boundary=")
				boundary = boundary_line.sub("boundary=", "")
			else
				puts "Error: broken boundary line!!"
				Process.exit
			end
			#puts "boundary: " + boundary
			unparsed_bodies =  Parser.break_by_boundary(source[body_begin .. -1], boundary)
			#puts "number of chunks: " + unparsed_bodies.size.to_s
			unparsed_bodies.each_with_index do |this_body, index|
				#puts "parsing chunk " + index.to_s + " ..."
				results[:body].push( parse_email(this_body) )
			end
			#puts "parsed chunk classes:"
			results[:body].each do |this_chunk|
				#puts this_chunk.class
			end
		else
			results[:body] = [ source[body_begin .. -1] ]
			#puts "number of chunks: " + results[:body].size.to_s
			#puts "parsed chunk classes:"
			results[:body].each do |this_chunk|
				#puts this_chunk.class
			end
		end
	else
		puts "NO CONTENT TYPE!!!"
		Process.exit
	end

	# return the results to the caller
	return results
end # end parse()

def Parser.break_by_boundary(source, boundary)
	results = []
	boundaries = []
	source.each_index do |index|
		if source[index].start_with?("--#{boundary}--")
			boundaries.push(index)
			break
		elsif source[index].start_with?("--#{boundary}")
			boundaries.push(index)
		end
	end
	last_boundary = boundaries[0]
	boundaries[1 .. -1].each do |n|
		results.push(source[(last_boundary + 1) .. (n - 1)])
		last_boundary = n
	end
	return results
end
	# this is within a module named "Parser"
	# be gentle, i'm rusty, lol

	def Parser.parse_email(source)
	# if source is empty, do nothing
	return {} if source.empty?

	# initialize some locally-scoped variables
	results = { :header => {}, :original_keys => {}, :body => [] }
	body_begin = 0

	# strip blank lines off the top of the source, if they exist.
	# the parser will choke on a noncompliant email that starts with a blank line.
	source = source.drop_while {\|n\| n.blank?}

	# make sure the fist line doesn't have any leading whitespace, or
	# the parser will choke
	source[0].lstrip!

	# first pass: rstrip each line, remove trailing ";", split lines that
	# contain ";" into separate lines (with leading whitespace)
	source.each_with_index do \|this_line, n\|
	break if this_line.blank?
	this_line.rstrip!
	this_line.chop! if this_line[-1] == ";"
	if this_line.include?(";")
	this_line.split(";")[1 .. -1].reverse.each do \|this_chunk\|
	source.insert(n + 1, " " + this_chunk)
	end
	source[n] = source[n].split(";")[0]
	end
	end

	# iterate through email header line by line
	source.each_with_index do \|this_line, n\|
	# if this line is blank, you have reached the end of the header.
	if this_line.blank?
	body_begin = n + 1
	break
	# if this line has leading whitespace and @last_key is defined
	# it is a "folded" continuation of the previous line.
	elsif this_line =~ /^\s+/ && defined?(@last_key)
	# if the value corresponding to @last_key is a string, concatenate
	# this line onto the string with a line break. If it is an array,
	# concatenate it onto the last item in the array with a line break.
	if results[:header][@last_key].class == String
	results[:header][@last_key] << "\n#{this_line.strip}"
	else
	results[:header][@last_key][-1] << "\n#{this_line.strip}"
	end
	# if this line defines something which already exists in results[:header]
	# then convert the value corresponding to key into an array (if it is
	# currently a string) and push the new value string onto the array.
	# TODO: this conditional is ugly and raises a warning. But, it's faster
	# than scanning the string twice, so for now it stays. Fix or leave as is?
	elsif results[:header].has_key?((this_key = this_line.scan(/^[a-z\|A-Z\|-]+/)[0]) &&
	this_key_symbolized = this_key.down_under.intern)
	@last_key = this_key_symbolized
	if results[:header][this_key_symbolized].class == String
	temp = results[:header][this_key_symbolized]
	results[:header][this_key_symbolized] = Array[temp]
	end
	results[:header][this_key_symbolized].push(this_line.sub(/^[a-z\|A-Z\|-]+[:]/, "").strip)
	# otherwise, this line contains a new key, so push it on to results[:header], and save the
	# original key in results[:original_keys].
	else
	@last_key = this_key_symbolized
	results[:header][this_key_symbolized] = this_line.sub(/^[a-z\|A-Z\|-]+[:]/, "").strip
	results[:original_keys][this_key_symbolized] = this_key
	end
	end

	# if multipart, get boundary value and chop it up
	if results[:header].has_key?(:content_type)
	if results[:header][:content_type].start_with?("multipart")
	boundary_line = results[:header][:content_type].split("\n")[1]
	#puts "boundary line: " + boundary_line
	if boundary_line.start_with?("boundary=\"")
	boundary = boundary_line.sub("boundary=\"", "").chop
	elsif boundary_line.start_with?("boundary=")
	boundary = boundary_line.sub("boundary=", "")
	else
	puts "Error: broken boundary line!!"
	Process.exit
	end
	#puts "boundary: " + boundary
	unparsed_bodies = Parser.break_by_boundary(source[body_begin .. -1], boundary)
	#puts "number of chunks: " + unparsed_bodies.size.to_s
	unparsed_bodies.each_with_index do \|this_body, index\|
	#puts "parsing chunk " + index.to_s + " ..."
	results[:body].push( parse_email(this_body) )
	end
	#puts "parsed chunk classes:"
	results[:body].each do \|this_chunk\|
	#puts this_chunk.class
	end
	else
	results[:body] = [ source[body_begin .. -1] ]
	#puts "number of chunks: " + results[:body].size.to_s
	#puts "parsed chunk classes:"
	results[:body].each do \|this_chunk\|
	#puts this_chunk.class
	end
	end
	else
	puts "NO CONTENT TYPE!!!"
	Process.exit
	end

	# return the results to the caller
	return results
	end # end parse()

	def Parser.break_by_boundary(source, boundary)
	results = []
	boundaries = []
	source.each_index do \|index\|
	if source[index].start_with?("--#{boundary}--")
	boundaries.push(index)
	break
	elsif source[index].start_with?("--#{boundary}")
	boundaries.push(index)
	end
	end
	last_boundary = boundaries[0]
	boundaries[1 .. -1].each do \|n\|
	results.push(source[(last_boundary + 1) .. (n - 1)])
	last_boundary = n
	end
	return results
	end