Skip to content

Instantly share code, notes, and snippets.

@Heliosmaster
Created April 28, 2012 10:26
Show Gist options
  • Save Heliosmaster/2517777 to your computer and use it in GitHub Desktop.
Save Heliosmaster/2517777 to your computer and use it in GitHub Desktop.
Recode to UTF-8
#out = `find . -name "*.htm*" | xargs file -i`
out = (ARGV.size > 0 ? `file -i #{ARGV[0]}/*.htm*` : `file -i *.htm*`)
out.split("\n").each do |l|
l =~ /(.*):(.*)charset=(.*)/
filename = "#{$1}"
charset = "#{$3}"
charset = "windows-1252" if charset == "unknown-8bit"
if (charset!= "binary" && charset != "us-ascii" && charset != "utf-8")
succ = system("iconv -f #{charset} -t utf-8 #{filename} -o #{filename}.tmp")
if !succ
File.open("iconv_failed.txt", "a"){ |file| file.puts "#{filename}"}
puts "#{filename}"
end
`mv #{filename}.tmp #{filename}`
#puts "#{filename} - #{charset}"
end
# append the html tag to force utf-8 encoding
if (charset!= "binary")
begin
text = File.read("#{filename}")
already_encoded = (text =~ /<META(.*)NAME="?GENERATOR"? CONTENT="(Microsoft FrontPage|Claris Home Page)(.*)">/i)
rescue
puts "@@ error with #{filename}!"
end
unless already_encoded.nil?
replace = text.gsub!(/<META(.*)NAME="?GENERATOR"? CONTENT="(Microsoft FrontPage|Claris Home Page)(.*)">/i, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>")
File.open("#{filename}", "w") { |file| file.puts replace }
end
# begin
# text2 = File.read("#{filename}")
# utf_tag = (text2 =~ /<meta http-equiv="Content-Type" content="text\/html; charset=UTF-8"\/>/)
# rescue
# puts "@@@ error with #{filename}!"
# end
# if utf_tag.nil?
# puts "#{filename} does not have utf-8 tag!"
# `gedit #{filename} &`
# end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment