emk/csv2pbo

## csv2pbo
#!/usr/bin/env ruby
#
# Usage:
#     csv2pbo book.csv > book.pbo

require 'csv'

path = ARGV[0]

# PBO format _looks_ like XML, but the parser is very fragile.  So we
# generate this by hand.
print '<ParallelBook lang1="" author1="" title1="" info1="" lang2="" author2="" title2="" info2="" info="">'

def escape(s)
  s.gsub('&', '&amp;').gsub('"', '&quot;').gsub('<', "&lt;").gsub('>', '&gt;')
end

first = true
CSV.foreach(path) do |row|
  s = row[0] || ""
  t = row[1] || ""
  if first
    # Assume the first line is the title, and mark it l="4".  These codes
    # seem a bit weird...
    first = false
    print "<p l=\"4\" s=\"#{escape(s)}\" t=\"#{escape(t)}\" />"
  elsif s =~ /(\A\d+\.) / && t =~ /(\A\d+\.) /
    # Try to mark chapter headings as l="5" so we get a table of contents.
    # You may need to tweak this regex.
    print "<p l=\"5\" s=\"#{escape(s)}\" t=\"#{escape(t)}\" />"
  else
    # If we still had paragraphs, we could mark them as l="3".
    print "<p s=\"#{escape(s)}\" t=\"#{escape(t)}\" />"
  end
end

print '</ParallelBook>'
	#!/usr/bin/env ruby
	#
	# Usage:
	# csv2pbo book.csv > book.pbo

	require 'csv'

	path = ARGV[0]

	# PBO format _looks_ like XML, but the parser is very fragile. So we
	# generate this by hand.
	print '<ParallelBook lang1="" author1="" title1="" info1="" lang2="" author2="" title2="" info2="" info="">'

	def escape(s)
	s.gsub('&', '&').gsub('"', '"').gsub('<', "<").gsub('>', '>')
	end

	first = true
	CSV.foreach(path) do \|row\|
	s = row[0] \|\| ""
	t = row[1] \|\| ""
	if first
	# Assume the first line is the title, and mark it l="4". These codes
	# seem a bit weird...
	first = false
	print "<p l=\"4\" s=\"#{escape(s)}\" t=\"#{escape(t)}\" />"
	elsif s =~ /(\A\d+\.) / && t =~ /(\A\d+\.) /
	# Try to mark chapter headings as l="5" so we get a table of contents.
	# You may need to tweak this regex.
	print "<p l=\"5\" s=\"#{escape(s)}\" t=\"#{escape(t)}\" />"
	else
	# If we still had paragraphs, we could mark them as l="3".
	print "<p s=\"#{escape(s)}\" t=\"#{escape(t)}\" />"
	end
	end

	print '</ParallelBook>'