evantravers/migrate_booknotes.rb

## migrate_booknotes.rb
require 'date'
require 'uri'
require 'net/http'
require 'json'
require 'rubygems/text'

include Gem::Text

SRC    = "./"
DST    = "./migrated"
ORGTAG = /:([a-zA-Z\-_]+)/

def render(data)
if data["subtitle"]
  subtitle = "\nsubtitle: #{data["subtitle"]}"
else
  subtitle = ""
end
%{title: #{data["title"]}#{subtitle}
author: #{mla_authors(data["authors"])}
publisher: #{data["publisher"]}
year: #{data["publishedDate"]}
identifier: #{data[:identifier]}
tags: #{data[:tags]}
id: #{data[:id]}

#{data[:body]}}
end

def id(datetime)
  datetime.strftime("%Y%m%d%H%M%S")
end

def clean_body(string)
  string.strip
end

def mla_authors(authors)
  authors.join(", ") if authors
end

def clean_title(string)
  string
    .downcase
    .gsub(/[^a-zA-Z0-9\-]/, "-")
    .gsub(/-{2,}/, '-')
end

def extract_link(string)
  URI.extract(string)
    .filter{|url| url =~ /\A#{URI::regexp(['http', 'https'])}\z/}
    .first
end

def string_to_tag(str)
  str
    .gsub(/[^a-zA-Z_]/, '_')
    .gsub(/_{2,}/, '_')
    .downcase
    .prepend('#')
end

def process_folder(folder)
  Dir.children(folder).each do |filename|
    unless ["migrated", ".DS_Store", "migrate_booknotes.rb"].include?(filename)
      if File.directory?(filename)
        process_folder(filename)
      else
        puts "Processing #{filename}…"
        if filename.match?(/.*\.(?:md|txt)/)
          path    = File.join(folder, filename)
          data    = Hash.new
          content = File.read(path)

          # EXTRACT read date from file
          # I had the files randomly labeled and foldered based on year.
          begin
            date = Date.parse(content)
          rescue StandardError
            date = File.birthtime(path)
            # date = Date.parse("#{t.month} #{t.day}, #{folder}")
          end

          # ADJUST dates (this is _crazy_ lazy, I know)
          until date.year == folder.to_i
            puts "Adjusting date... #{date.year}"
            if date.year > folder.to_i
              date = date.prev_year
            else
              date = date.next_year
            end
          end

          # ADD id to metadata based on date
          data[:id] = id(date)
          data[:date] = date.strftime("%a, %e %b %Y %T")

          # EXTRACT tags from /:\w+:/ format and transform to hashtags
          tags =
            content
            .scan(ORGTAG)
            .flatten
            .map{ |t| '#' + t.gsub(":", "").gsub('-', '_').downcase }
            .uniq

          tags.push("#book")

          content.gsub!(ORGTAG, '\1')

          # EXTRACT search query from filename
          query  = filename.gsub(/\..{2,3}$/, '')
          title  = query.split(" by ").first.strip
          author = query.split(" by ").last.strip

          books =
            JSON.parse(Net::HTTP.get_response(
                       URI("https://www.googleapis.com/books/v1/volumes?q=#{URI.encode(query)}")).body)

          # https://stackoverflow.com/questions/16323571/measure-the-distance-between-two-strings-with-ruby
          book = books["items"].min_by do |b|
            levenshtein_distance(b["volumeInfo"]["title"], title)
          end

          if book
            data.merge!(book["volumeInfo"])
            data[:identifier] = data["industryIdentifiers"][0]["identifier"]

            if data["categories"]
              data["categories"].map{ |t| tags.push(string_to_tag(t)) }
            end
          else
            data[:title]  = title
            data[:author] = author
          end

          data[:tags] = tags.join(", ")
          data[:body] = clean_body(content)

          # WRITE filename with new filename to a new folder
          filename = "#{data[:id]}-#{clean_title("#{data["title"]} by #{mla_authors(data["authors"])}")}.md"
          # puts "\n\n>> #{filename} <<\n"
          # puts render(data)
          IO.write(File.join(DST, filename), render(data))
        end
      end
    end
  end
end

process_folder(SRC)
	require 'date'
	require 'uri'
	require 'net/http'
	require 'json'
	require 'rubygems/text'

	include Gem::Text

	SRC = "./"
	DST = "./migrated"
	ORGTAG = /:([a-zA-Z\-_]+)/

	def render(data)
	if data["subtitle"]
	subtitle = "\nsubtitle: #{data["subtitle"]}"
	else
	subtitle = ""
	end
	%{title: #{data["title"]}#{subtitle}
	author: #{mla_authors(data["authors"])}
	publisher: #{data["publisher"]}
	year: #{data["publishedDate"]}
	identifier: #{data[:identifier]}
	tags: #{data[:tags]}
	id: #{data[:id]}

	#{data[:body]}}
	end

	def id(datetime)
	datetime.strftime("%Y%m%d%H%M%S")
	end

	def clean_body(string)
	string.strip
	end

	def mla_authors(authors)
	authors.join(", ") if authors
	end

	def clean_title(string)
	string
	.downcase
	.gsub(/[^a-zA-Z0-9\-]/, "-")
	.gsub(/-{2,}/, '-')
	end

	def extract_link(string)
	URI.extract(string)
	.filter{\|url\| url =~ /\A#{URI::regexp(['http', 'https'])}\z/}
	.first
	end

	def string_to_tag(str)
	str
	.gsub(/[^a-zA-Z_]/, '_')
	.gsub(/_{2,}/, '_')
	.downcase
	.prepend('#')
	end

	def process_folder(folder)
	Dir.children(folder).each do \|filename\|
	unless ["migrated", ".DS_Store", "migrate_booknotes.rb"].include?(filename)
	if File.directory?(filename)
	process_folder(filename)
	else
	puts "Processing #{filename}…"
	if filename.match?(/.*\.(?:md\|txt)/)
	path = File.join(folder, filename)
	data = Hash.new
	content = File.read(path)

	# EXTRACT read date from file
	# I had the files randomly labeled and foldered based on year.
	begin
	date = Date.parse(content)
	rescue StandardError
	date = File.birthtime(path)
	# date = Date.parse("#{t.month} #{t.day}, #{folder}")
	end

	# ADJUST dates (this is _crazy_ lazy, I know)
	until date.year == folder.to_i
	puts "Adjusting date... #{date.year}"
	if date.year > folder.to_i
	date = date.prev_year
	else
	date = date.next_year
	end
	end

	# ADD id to metadata based on date
	data[:id] = id(date)
	data[:date] = date.strftime("%a, %e %b %Y %T")

	# EXTRACT tags from /:\w+:/ format and transform to hashtags
	tags =
	content
	.scan(ORGTAG)
	.flatten
	.map{ \|t\| '#' + t.gsub(":", "").gsub('-', '_').downcase }
	.uniq

	tags.push("#book")

	content.gsub!(ORGTAG, '\1')

	# EXTRACT search query from filename
	query = filename.gsub(/\..{2,3}$/, '')
	title = query.split(" by ").first.strip
	author = query.split(" by ").last.strip

	books =
	JSON.parse(Net::HTTP.get_response(
	URI("https://www.googleapis.com/books/v1/volumes?q=#{URI.encode(query)}")).body)

	# https://stackoverflow.com/questions/16323571/measure-the-distance-between-two-strings-with-ruby
	book = books["items"].min_by do \|b\|
	levenshtein_distance(b["volumeInfo"]["title"], title)
	end

	if book
	data.merge!(book["volumeInfo"])
	data[:identifier] = data["industryIdentifiers"][0]["identifier"]

	if data["categories"]
	data["categories"].map{ \|t\| tags.push(string_to_tag(t)) }
	end
	else
	data[:title] = title
	data[:author] = author
	end

	data[:tags] = tags.join(", ")
	data[:body] = clean_body(content)

	# WRITE filename with new filename to a new folder
	filename = "#{data[:id]}-#{clean_title("#{data["title"]} by #{mla_authors(data["authors"])}")}.md"
	# puts "\n\n>> #{filename} <<\n"
	# puts render(data)
	IO.write(File.join(DST, filename), render(data))
	end
	end
	end
	end
	end

	process_folder(SRC)