Created
April 10, 2020 01:05
-
-
Save evantravers/e573095dd8daed889d6bc24ea2ab719c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'date' | |
require 'uri' | |
require 'net/http' | |
require 'json' | |
require 'rubygems/text' | |
include Gem::Text | |
SRC = "./" | |
DST = "./migrated" | |
ORGTAG = /:([a-zA-Z\-_]+)/ | |
def render(data) | |
if data["subtitle"] | |
subtitle = "\nsubtitle: #{data["subtitle"]}" | |
else | |
subtitle = "" | |
end | |
%{title: #{data["title"]}#{subtitle} | |
author: #{mla_authors(data["authors"])} | |
publisher: #{data["publisher"]} | |
year: #{data["publishedDate"]} | |
identifier: #{data[:identifier]} | |
tags: #{data[:tags]} | |
id: #{data[:id]} | |
#{data[:body]}} | |
end | |
def id(datetime) | |
datetime.strftime("%Y%m%d%H%M%S") | |
end | |
def clean_body(string) | |
string.strip | |
end | |
def mla_authors(authors) | |
authors.join(", ") if authors | |
end | |
def clean_title(string) | |
string | |
.downcase | |
.gsub(/[^a-zA-Z0-9\-]/, "-") | |
.gsub(/-{2,}/, '-') | |
end | |
def extract_link(string) | |
URI.extract(string) | |
.filter{|url| url =~ /\A#{URI::regexp(['http', 'https'])}\z/} | |
.first | |
end | |
def string_to_tag(str) | |
str | |
.gsub(/[^a-zA-Z_]/, '_') | |
.gsub(/_{2,}/, '_') | |
.downcase | |
.prepend('#') | |
end | |
def process_folder(folder) | |
Dir.children(folder).each do |filename| | |
unless ["migrated", ".DS_Store", "migrate_booknotes.rb"].include?(filename) | |
if File.directory?(filename) | |
process_folder(filename) | |
else | |
puts "Processing #{filename}…" | |
if filename.match?(/.*\.(?:md|txt)/) | |
path = File.join(folder, filename) | |
data = Hash.new | |
content = File.read(path) | |
# EXTRACT read date from file | |
# I had the files randomly labeled and foldered based on year. | |
begin | |
date = Date.parse(content) | |
rescue StandardError | |
date = File.birthtime(path) | |
# date = Date.parse("#{t.month} #{t.day}, #{folder}") | |
end | |
# ADJUST dates (this is _crazy_ lazy, I know) | |
until date.year == folder.to_i | |
puts "Adjusting date... #{date.year}" | |
if date.year > folder.to_i | |
date = date.prev_year | |
else | |
date = date.next_year | |
end | |
end | |
# ADD id to metadata based on date | |
data[:id] = id(date) | |
data[:date] = date.strftime("%a, %e %b %Y %T") | |
# EXTRACT tags from /:\w+:/ format and transform to hashtags | |
tags = | |
content | |
.scan(ORGTAG) | |
.flatten | |
.map{ |t| '#' + t.gsub(":", "").gsub('-', '_').downcase } | |
.uniq | |
tags.push("#book") | |
content.gsub!(ORGTAG, '\1') | |
# EXTRACT search query from filename | |
query = filename.gsub(/\..{2,3}$/, '') | |
title = query.split(" by ").first.strip | |
author = query.split(" by ").last.strip | |
books = | |
JSON.parse(Net::HTTP.get_response( | |
URI("https://www.googleapis.com/books/v1/volumes?q=#{URI.encode(query)}")).body) | |
# https://stackoverflow.com/questions/16323571/measure-the-distance-between-two-strings-with-ruby | |
book = books["items"].min_by do |b| | |
levenshtein_distance(b["volumeInfo"]["title"], title) | |
end | |
if book | |
data.merge!(book["volumeInfo"]) | |
data[:identifier] = data["industryIdentifiers"][0]["identifier"] | |
if data["categories"] | |
data["categories"].map{ |t| tags.push(string_to_tag(t)) } | |
end | |
else | |
data[:title] = title | |
data[:author] = author | |
end | |
data[:tags] = tags.join(", ") | |
data[:body] = clean_body(content) | |
# WRITE filename with new filename to a new folder | |
filename = "#{data[:id]}-#{clean_title("#{data["title"]} by #{mla_authors(data["authors"])}")}.md" | |
# puts "\n\n>> #{filename} <<\n" | |
# puts render(data) | |
IO.write(File.join(DST, filename), render(data)) | |
end | |
end | |
end | |
end | |
end | |
process_folder(SRC) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment