wdenton/clean-the-list.rb

## clean-the-list.rb
#!/usr/bin/env ruby

require "csv"

file = ARGV[0]

exit unless file;

puts %i[loans isbn title].to_csv

# Data looks like this:

# 52,1552396185; 9781552396186,"Administrative law : cases, text, and materials /"
# 51,0470900598; 9780470900598,Fundamentals of molecular virology /
# 50,1323885722; 9781323885727,"Biology II : evolution, ecology, biodiversity and conservation biology."
# 50,9780199019700; 0199019703,Cognition /

# That's
# loans,isbn,title
# but the loans column has a super ugly name so we'll skip it and use our own column names

begin
  CSV.foreach(file, headers: %i[loans isbn title], skip_lines: "Loans") do |row|
    # The isbn column may have multiple ISBNs in it.
    # If there is one or more that starts with 978 or 979, keep the first one of those.
    # Otherwise, keep the first one.
    # First, split the ISBNs on "; "
    isbns = row[:isbn].split("; ")
    # Find the index position of the first ISBN that starts with 978 or 979
    which_isbn = isbns.map {|i| /97[89]/ =~ i }.index(0)
    # If there isn't one, ue the index 0
    which_isbn = 0 if which_isbn.nil?
    # Print the row of data, but this time with just one ISBN: the one at the chosen index
    puts [row[:loans], isbns[which_isbn].gsub(":", ""), row[:title]].to_csv # A few have trailing colons, so remove them.
  end
end
	#!/usr/bin/env ruby

	require "csv"

	file = ARGV[0]

	exit unless file;

	puts %i[loans isbn title].to_csv

	# Data looks like this:

	# 52,1552396185; 9781552396186,"Administrative law : cases, text, and materials /"
	# 51,0470900598; 9780470900598,Fundamentals of molecular virology /
	# 50,1323885722; 9781323885727,"Biology II : evolution, ecology, biodiversity and conservation biology."
	# 50,9780199019700; 0199019703,Cognition /

	# That's
	# loans,isbn,title
	# but the loans column has a super ugly name so we'll skip it and use our own column names

	begin
	CSV.foreach(file, headers: %i[loans isbn title], skip_lines: "Loans") do \|row\|
	# The isbn column may have multiple ISBNs in it.
	# If there is one or more that starts with 978 or 979, keep the first one of those.
	# Otherwise, keep the first one.
	# First, split the ISBNs on "; "
	isbns = row[:isbn].split("; ")
	# Find the index position of the first ISBN that starts with 978 or 979
	which_isbn = isbns.map {\|i\| /97[89]/ =~ i }.index(0)
	# If there isn't one, ue the index 0
	which_isbn = 0 if which_isbn.nil?
	# Print the row of data, but this time with just one ISBN: the one at the chosen index
	puts [row[:loans], isbns[which_isbn].gsub(":", ""), row[:title]].to_csv # A few have trailing colons, so remove them.
	end
	end