Skip to content

Instantly share code, notes, and snippets.

@kardeiz
Created August 15, 2012 21:06
Show Gist options
  • Save kardeiz/3363686 to your computer and use it in GitHub Desktop.
Save kardeiz/3363686 to your computer and use it in GitHub Desktop.
UMI ETD XML to MARCXML with Ruby
#!/usr/bin/env ruby
require 'nokogiri'
# where the files at
my_files = Dir.chdir(ARGV[0]) { Dir.glob("./*").map{|x| File.expand_path(x) } }
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
xml.collection(:xmlns => 'http://www.loc.gov/MARC21/slim') {
my_files.each do |my_file|
my_file = Nokogiri::XML(File.open(my_file))
author = my_file.at_xpath("//DISS_author[@type='primary']/DISS_name")
title = my_file.at_xpath("//DISS_description/DISS_title")
xml.record {
xml.datafield(:tag => "100", :ind1 => "1", :ind2 => " ") {
xml.subfield(:code => "a") {
xml.text "#{author.at_xpath('./DISS_surname').text}, #{author.at_xpath('./DISS_fname').text}"
xml.text " #{author.at_xpath('./DISS_middle').text}" if author.at_xpath('./DISS_middle')
xml.text author.at_xpath('./DISS_suffix').inner_text.empty? ? "." : ","
}
unless author.at_xpath('./DISS_suffix').inner_text.empty?
xml.subfield(:code => "c") {
xml.text author.at_xpath('./DISS_suffix').text
}
end
}
xml.datafield(:tag => "245", :ind1 => "1", :ind2 => "0") {
xml.subfield(:code => "a") {
xml.text title.text.split(':',2).first.strip
}
if title.text.include?(':')
xml.subfield(:code => "b") {
xml.text title.text.split(':',2).last.strip
}
end
xml.subfield(:code => "c") {
xml.text "by #{author.at_xpath('./DISS_fname').text}"
xml.text " #{author.at_xpath('./DISS_middle').text}"
xml.text " #{author.at_xpath('./DISS_surname').text}"
unless author.at_xpath('./DISS_suffix').inner_text.empty?
xml.text ", #{author.at_xpath('./DISS_suffix').text}"
end
}
xml.subfield(:code => "h") {
xml.text "[electronic resource]"
}
}
if title.text.include?(':')
xml.datafield(:tag => "246", :ind1 => "3", :ind2 => "0") {
xml.subfield(:code => "a") {
title_temp = title.text.split(':',2).last.strip
title_temp[0] = title_temp[0].capitalize
xml.text title_temp
}
}
end
xml.datafield(:tag => "260") {
xml.subfield(:code => "a") {
xml.text "[Fort Worth, Tex.]"
}
xml.subfield(:code => "b") {
xml.text my_file.at_xpath('//DISS_institution/DISS_inst_name').text
}
xml.subfield(:code => "c") {
xml.text my_file.at_xpath('//DISS_dates/DISS_comp_date').text
}
}
xml.datafield(:tag => "490", :ind1 => "0") {
xml.subfield(:code => "a") {
xml.text my_file.at_xpath('//DISS_dates/DISS_comp_date').text
xml.text " dissertation"
}
}
xml.datafield(:tag => "500") {
xml.subfield(:code => "a") {
xml.text "Title from dissertation title page"
}
}
unless my_file.at_xpath('//DISS_abstract/DISS_para').text.empty?
xml.datafield(:tag => "500") {
xml.subfield(:code => "a") {
xml.text "Includes abstract"
}
}
end
xml.datafield(:tag => "500") {
xml.subfield(:code => "a") {
xml.text my_file.at_xpath('//DISS_institution/DISS_inst_contact').text
xml.text "; advisor, #{my_file.at_xpath('//DISS_advisor/DISS_name/DISS_fname').text}"
xml.text " #{my_file.at_xpath('//DISS_advisor/DISS_name/DISS_surname').text}"
}
}
my_file.xpath('//DISS_abstract/DISS_para').each do |para_txt|
xml.datafield(:tag => "520") {
xml.subfield(:code => "a") {
xml.text para_txt.text
}
}
end
xml.datafield(:tag => "538") {
xml.subfield(:code => "a") {
xml.text "Mode of access: World Wide Web"
}
}
if my_file.at_xpath('//DISS_binary/@type').text == 'PDF'
xml.datafield(:tag => "538") {
xml.subfield(:code => "a") {
xml.text "System requirements: Adobe Acrobat reader"
}
}
end
}
end
}
end
puts builder.to_xml
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment