Created
April 17, 2013 03:08
-
-
Save compleatang/5401492 to your computer and use it in GitHub Desktop.
For cleaning automagic jekyll imports from extraneous wordpress plugins and also importing multi-site photos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'yaml' | |
require 'English' | |
class Cleaner | |
def initialize | |
@pictures_pull_dir = "/home/coda/sites/gandi-clients/watershedplatforms.com/htdocs/wp-content/blogs.dir/4" | |
@pictures_push_dir = "/home/coda/sites/csk/udb/assets/images" | |
@exported_posts = "./_posts" | |
@all_the_files = `ls #{@exported_posts}`.split("\n") | |
for file in @all_the_files do | |
next if file[-3..-1] != ".md" | |
puts "Working on File: #{file}" | |
yamlized = find_yaml( load_file( file ) ) | |
split_the_content = yamlized[1].split("\n") | |
while split_the_content[0] == "# " || split_the_content[0] == ""; split_the_content.shift; end | |
@publish_year = file[0..3] | |
first_para = split_the_content[0] =~ /^\[\!\[/ ? split_the_content[3] : split_the_content[0] | |
clean_yaml = clean_the_yaml( yamlized[0], first_para ) | |
clean_contents = clean_the_content( split_the_content ).join("\n") | |
write_it( file, clean_yaml + clean_contents ) | |
end | |
end | |
def load_file( file ) | |
source_file = File::read(@exported_posts + "/" + file) | |
end | |
def find_yaml( file ) | |
yaml_pattern = /\A(---\s*\n.*?\n?)^(---\s*$\n?)/m | |
if file =~ yaml_pattern | |
yaml_block = YAML.load($1) | |
entry_block = $POSTMATCH | |
else | |
yaml_block = {} | |
entry_block = file | |
end | |
return [yaml_block, entry_block] | |
end | |
def clean_the_yaml( yaml_block, first_para ) | |
new_yaml = {} | |
new_yaml["layout"] = "post" | |
if yaml_block["title"] != "" && yaml_block["title"] | |
yaml_block["title"].gsub!("\"", "\'") | |
if yaml_block["title"] =~ /\A"/ | |
new_yaml["title"] = yaml_block["title"] | |
else | |
new_yaml["title"] = "\"" + yaml_block["title"] + "\"" | |
end | |
else | |
new_yaml["title"] = "" | |
end | |
new_yaml["published"] = "true" | |
new_yaml["comments"] = "true" | |
new_yaml["meta"] = "true" | |
if yaml_block["categories"] | |
new_yaml["category"] = yaml_block["categories"].first.downcase | |
else | |
new_yaml["category"] = yaml_block["category"] || "unclassified" | |
end | |
new_yaml["tags"] = yaml_block["tags"] if yaml_block["tags"] | |
if yaml_block["excerpt"] | |
new_yaml["excerpt"] = yaml_block["excerpt"].gsub("\n", "") | |
new_yaml["excerpt"] = new_yaml["excerpt"][2..-1].strip if new_yaml["excerpt"][0] == ">" | |
else | |
new_yaml["excerpt"] = first_para.gsub("\"", "'") | |
end | |
if new_yaml["excerpt"][0] == "\"" | |
new_yaml["excerpt"][1..-2].gsub!("\"", "'") | |
else | |
new_yaml["excerpt"].gsub!("\"", "'") | |
end | |
new_yaml["excerpt"] = "\"" + new_yaml["excerpt"].strip unless new_yaml["excerpt"][0] == "\"" | |
new_yaml["excerpt"] = new_yaml["excerpt"] + "\"" if new_yaml["excerpt"][-1] != "\"" || new_yaml["excerpt"].length == 1 | |
final_yaml = "---\n\n" | |
new_yaml.each{ | head, val | final_yaml << head + ": " + val.to_s + "\n" } | |
final_yaml << "\n---\n\n" | |
end | |
def clean_the_content( entry_block ) | |
pictures_pattern = /\A(\[\!\[(.*?)\])(.*?\z)/ | |
file_pattern = /\A\s*\[\]:(.*?\/(.*?))\z/ | |
delete_lines = [] | |
for para in entry_block do | |
if para[pictures_pattern] | |
pict_line = $1 | |
caption = $2 | |
rest_of_it = $3 | |
picture_to_copy = false | |
next if rest_of_it[/\A\(\{\{/] | |
p_index = entry_block.index para | |
delete_lines << entry_block[p_index + 1] if entry_block[p_index + 1][/\A#{caption}/] | |
if entry_block[(p_index + 5)] =~ file_pattern | |
full_pict_ref = $1.strip | |
picture_to_copy = $2.split("/").last | |
delete_lines << entry_block[p_index + 5] | |
picture_to_copy = copy_over_pictures( picture_to_copy ) | |
end | |
if picture_to_copy | |
entry_block[p_index] = "#{pict_line}({{ site.url }}{{ site.root }}{{ site.images_dir }}/{{ page.date | date: \"%Y\" }}/#{picture_to_copy})][#{full_pict_ref}]" | |
end | |
end | |
end | |
delete_lines.each{ | line | entry_block.delete( line ) } | |
return entry_block | |
end | |
def copy_over_pictures( picture_name ) | |
location = `find #{@pictures_pull_dir} -name '#{picture_name}*' -type f | sort -nr` | |
if location != "" | |
location = location.split("\n").first.strip | |
picture_name = location.split("/").last | |
if `ls #{@pictures_push_dir}/#{@publish_year}` == ""; `mkdir #{@pictures_push_dir}/#{@publish_year}`; end | |
`cp #{location} #{@pictures_push_dir}/#{@publish_year}/#{picture_name}` | |
return picture_name | |
else | |
`echo "Error importing to #{@publish_year} the file: #{location}/#{picture_name}" >> cleaner-errors.log` | |
return false | |
end | |
end | |
def write_it( file, final_content ) | |
file = @exported_posts + "/" + file | |
if File.writable?( file ) | |
File::open file, 'w' do |f| | |
f.write final_content | |
end | |
end | |
end | |
end | |
Cleaner.new |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment