Skip to content

Instantly share code, notes, and snippets.

@bensheldon
Last active January 4, 2016 10:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bensheldon/007781c3c3e6f0afa948 to your computer and use it in GitHub Desktop.
Save bensheldon/007781c3c3e6f0afa948 to your computer and use it in GitHub Desktop.
require 'pathname'
require "open-uri"
require 'uri'
require 'reverse_markdown'
# Custom gem (not published to rubygems, so reference the git repo)
# https://github.com/bensheldon/middleman_writer
require 'middleman_writer'
# Helper method to save an array of articles
def save(articles)
articles.each do |article|
begin
content_string = article.article.serialize
article.path.open('w') { |f| f.write(content_string) }
rescue => e
puts "#{e}: #{article.path}"
end
end
end
# 1. Load up all the articles
articles = MiddlemanWriter::ArticleFile.load('source/articles')
# 2. Regex all the file urls and add them to their article's frontmatter
articles.each do |article|
# Will need to reference the domain name of the blog
matches = article.content.scan(/(http:[^"\n]*island94[^"\n]*(?:wp-content|\/files\/)[^"]*)/).flatten.uniq
if matches.size > 0
article.frontmatter['files'] = matches.map { |m| URI.escape(m) }
else
article.frontmatter.delete('files')
end
end;
# articles.select{ |a| a.content.strip =~ /\A(?!<p|<ul|<blockquote|<div|<h).*/ }.each{ |a| a.content.gsub!(/^(.*)$/, '<p>\1</p>') }
# 3. Save the articles with their file urls in the frontmatter
save(articles)
articles.select{ |a| a.frontmatter.fetch('files', false) }.each do |article|
directory = Pathname.new "source/articles/#{article.path.basename.to_s.match /[^\.]*/ }"
directory.mkdir unless directory.directory?
# clear out old failed file urls
article.frontmatter.delete("failed_files")
article.frontmatter['files'].each do |image_url|
image_path = directory + Pathname.new(image_url).basename
# check if the file has already been downloaded
unless image_path.file?
begin
filename = directory + Pathname.new(image_path).basename
File.open(filename, 'wb') do |fo|
fo.write open(image_url).read
end
rescue
begin
# whelp, delete the file
filename.delete
rescue
end
failed_files = article.frontmatter.fetch("failed_files", []) << image_url
article.frontmatter["failed_files"] = failed_files.uniq
puts "Failed: #{image_url}"
end
end
# Now let's try to download the full-sizec big image for posterity
bimage_url = image_url.sub(/-\d*x\d*\./, ".")
bimage_path = directory + Pathname.new(bimage_url).basename
unless bimage_path.file?
filename = directory + Pathname.new(bimage_path).basename
begin
File.open(filename, 'wb') do |fo|
fo.write open(bimage_url).read
end
rescue
begin
filename.delete
rescue
end
failed_files = article.frontmatter.fetch("failed_files", []) << bimage_url
article.frontmatter["failed_files"] = failed_files.uniq
puts "Failed (big): #{bimage_url}"
end
end
end
end;
# Save the articles
save(articles)
# Rewrite the content to reference the new relative paths
articles.each do |article|
article.frontmatter.fetch('files', []).each do |image_url|
rel_path = article.path.basename( '.html.markdown' ) + Pathname.new(image_url).basename
article.content.gsub!(image_url.to_s, rel_path.to_s)
puts "Rewrite content: #{image_url} -> #{rel_path} "
end
end;
# Save the articles
save(articles)
require 'csv'
text = File.read('redirects_p.csv').gsub(/\\"/,'""')
redirects = []
CSV.parse(text, headers: true, header_converters: :symbol) do |row|
redirects << row
end
redirects.each do |redirect|
article = articles.find { |a| a.frontmatter['wp:post_id'] == redirect[:id] }
# articles.find{|f| p f.frontmatter['wp_link']; false}
if article
article.frontmatter['redirects'] = article.frontmatter.fetch('redirects', []) << redirect[:source]
end
end;
articles.each do |article|
redirects = article.frontmatter.fetch('redirects', []) << "/?p=#{article.frontmatter['wp:post_id']}"
article.frontmatter['redirects'] = redirects.uniq
end
save(articles)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment