-
-
Save bensheldon/007781c3c3e6f0afa948 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'pathname' | |
require "open-uri" | |
require 'uri' | |
require 'reverse_markdown' | |
# Custom gem (not published to rubygems, so reference the git repo) | |
# https://github.com/bensheldon/middleman_writer | |
require 'middleman_writer' | |
# Helper method to save an array of articles | |
def save(articles) | |
articles.each do |article| | |
begin | |
content_string = article.article.serialize | |
article.path.open('w') { |f| f.write(content_string) } | |
rescue => e | |
puts "#{e}: #{article.path}" | |
end | |
end | |
end | |
# 1. Load up all the articles | |
articles = MiddlemanWriter::ArticleFile.load('source/articles') | |
# 2. Regex all the file urls and add them to their article's frontmatter | |
articles.each do |article| | |
# Will need to reference the domain name of the blog | |
matches = article.content.scan(/(http:[^"\n]*island94[^"\n]*(?:wp-content|\/files\/)[^"]*)/).flatten.uniq | |
if matches.size > 0 | |
article.frontmatter['files'] = matches.map { |m| URI.escape(m) } | |
else | |
article.frontmatter.delete('files') | |
end | |
end; | |
# articles.select{ |a| a.content.strip =~ /\A(?!<p|<ul|<blockquote|<div|<h).*/ }.each{ |a| a.content.gsub!(/^(.*)$/, '<p>\1</p>') } | |
# 3. Save the articles with their file urls in the frontmatter | |
save(articles) | |
articles.select{ |a| a.frontmatter.fetch('files', false) }.each do |article| | |
directory = Pathname.new "source/articles/#{article.path.basename.to_s.match /[^\.]*/ }" | |
directory.mkdir unless directory.directory? | |
# clear out old failed file urls | |
article.frontmatter.delete("failed_files") | |
article.frontmatter['files'].each do |image_url| | |
image_path = directory + Pathname.new(image_url).basename | |
# check if the file has already been downloaded | |
unless image_path.file? | |
begin | |
filename = directory + Pathname.new(image_path).basename | |
File.open(filename, 'wb') do |fo| | |
fo.write open(image_url).read | |
end | |
rescue | |
begin | |
# whelp, delete the file | |
filename.delete | |
rescue | |
end | |
failed_files = article.frontmatter.fetch("failed_files", []) << image_url | |
article.frontmatter["failed_files"] = failed_files.uniq | |
puts "Failed: #{image_url}" | |
end | |
end | |
# Now let's try to download the full-sizec big image for posterity | |
bimage_url = image_url.sub(/-\d*x\d*\./, ".") | |
bimage_path = directory + Pathname.new(bimage_url).basename | |
unless bimage_path.file? | |
filename = directory + Pathname.new(bimage_path).basename | |
begin | |
File.open(filename, 'wb') do |fo| | |
fo.write open(bimage_url).read | |
end | |
rescue | |
begin | |
filename.delete | |
rescue | |
end | |
failed_files = article.frontmatter.fetch("failed_files", []) << bimage_url | |
article.frontmatter["failed_files"] = failed_files.uniq | |
puts "Failed (big): #{bimage_url}" | |
end | |
end | |
end | |
end; | |
# Save the articles | |
save(articles) | |
# Rewrite the content to reference the new relative paths | |
articles.each do |article| | |
article.frontmatter.fetch('files', []).each do |image_url| | |
rel_path = article.path.basename( '.html.markdown' ) + Pathname.new(image_url).basename | |
article.content.gsub!(image_url.to_s, rel_path.to_s) | |
puts "Rewrite content: #{image_url} -> #{rel_path} " | |
end | |
end; | |
# Save the articles | |
save(articles) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
text = File.read('redirects_p.csv').gsub(/\\"/,'""') | |
redirects = [] | |
CSV.parse(text, headers: true, header_converters: :symbol) do |row| | |
redirects << row | |
end | |
redirects.each do |redirect| | |
article = articles.find { |a| a.frontmatter['wp:post_id'] == redirect[:id] } | |
# articles.find{|f| p f.frontmatter['wp_link']; false} | |
if article | |
article.frontmatter['redirects'] = article.frontmatter.fetch('redirects', []) << redirect[:source] | |
end | |
end; | |
articles.each do |article| | |
redirects = article.frontmatter.fetch('redirects', []) << "/?p=#{article.frontmatter['wp:post_id']}" | |
article.frontmatter['redirects'] = redirects.uniq | |
end | |
save(articles) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment