Created
November 4, 2009 15:52
-
-
Save rkumar/226143 to your computer and use it in GitHub Desktop.
program to parse rfc2822 type data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby -w | |
#*******************************************************# | |
# This is a sample program which reads a data file | |
# into a hash. | |
# The format can be either: | |
# Key: value | |
# | |
# Key: <<ENDHERE | |
# ... any kind of multiline | |
# ENDHERE | |
# | |
# Description: | |
# Some data comes here. | |
# Multiline data as in RFC2822. | |
# # | |
# The motive for this is that yaml coughs if the multiline | |
# date has data in it that appear like another definition. | |
# My data is often programming code which will give | |
# problems to YAML. | |
# To test this call this with intro.page | |
# http://www.benegal.org/files/intro.page | |
# or test.page | |
# at http://gist.github.com/226145 | |
# @Author: rkumar | |
# $Id$ # | |
#*******************************************************# | |
def parse_heredoc_to_hash(filename) | |
file = File.new(filename, "r"); | |
myhash=Hash.new | |
lastk= nil # last keyword, so we can append to it | |
first_line = false | |
line = file.gets; | |
while line | |
# check for heredoc start | |
if line =~ /^([_\w]+):\s*<<(.*)$/ | |
kword=$1; | |
delim=$2; | |
buffer=""; | |
line = file.gets; | |
# keep reading until you find the end of heredoc | |
while line && line !~ /^#{delim}$/ | |
buffer = buffer + line; | |
line = file.gets; | |
end | |
myhash[kword]=buffer; | |
lastk = kword | |
elsif line =~ /XXX/ #/^([_\w]+):\s*$/ | |
kword=$1; | |
buffer=""; | |
line = file.gets; | |
# keep reading until you find the end or another tag | |
while line && line !~ /^([_\w]+):/ | |
buffer = buffer + line; | |
line = file.gets; | |
end | |
myhash[kword]=buffer; | |
lastk = kword | |
else | |
# this is a simple, single line assignment | |
if line =~ /^([_\w]+):\s*(.*)$/ | |
myhash[$1]=$2; | |
lastk = $1 | |
first_line = true | |
else | |
# did not know what to know with this | |
# maybe add it to prev item. | |
value = myhash[lastk] | |
if first_line | |
# this is when adding the second line, since REGEXP eats up EOL. | |
value = value + "\n" + line | |
else | |
value = value + line | |
end | |
myhash[lastk] = value | |
first_line = false | |
end | |
end | |
line = file.gets; | |
end | |
return myhash; | |
end # def | |
# java's main method !! | |
if __FILE__ == $0 | |
require 'pp' | |
filename=ARGV[0]; | |
myhash = parse_heredoc_to_hash(filename) | |
print "==========\n" | |
pp myhash | |
print "\n==========\n" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment