Created
June 8, 2011 00:14
-
-
Save arika/1013527 to your computer and use it in GitHub Desktop.
expand zip-files (with pathname encoding conversion) <http://arika.org/2011/06/07/unzip-rb>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby1.9.1 | |
# encoding: UTF-8 | |
=begin | |
Copyright (c) 2011, akira yamada | |
All rights reserved. | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions | |
are met: | |
1. Redistributions of source code must retain the above copyright | |
notice, this list of conditions and the following disclaimer. | |
2. Redistributions in binary form must reproduce the above copyright | |
notice, this list of conditions and the following disclaimer in the | |
documentation and/or other materials provided with the distribution. | |
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
SUCH DAMAGE. | |
コマンドライン引数で指定された一つ以上のzipファイルを | |
カレントディレクトリに展開します。 | |
その際、zipファイル中のファイル名の文字コードを | |
システムに適したものに変換します。 | |
zipファイル内のトップディレクトリ直下に | |
二つ以上のファイルやディレクトリを含む場合には | |
zipファイルのファイル名から拡張子を除いた | |
ディレクトリを作りその中に展開します。 | |
zipファイル内のトップディレクトリ直下に | |
一つのファイルまたはディレクトリしかない場合には | |
ディレクトリを作らず直接展開します。 | |
いずれのケースでもカレントディレクトリに | |
すでに同名のファイルやディレクトリがあれば、 | |
上書きせずランダムな名前のディレクトリを作成し | |
その中に展開します。 | |
=end | |
require 'zipruby' | |
require 'fileutils' | |
require 'tmpdir' | |
require 'optparse' | |
# 壊れた文字列をremove_entry_secureで削除しようしたときに | |
# invalid byte sequenceが発生するのを回避する…… | |
class FileUtils::Entry_ | |
def remove_dir1 | |
platform_support { | |
Dir.rmdir path().chomp('/') | |
} | |
end | |
end | |
def path_conv(orig_path, path_encoding, cand_encodings) | |
converted = nil | |
path = orig_path.dup | |
fallback_enc = nil | |
cand_encodings.each do |enc| | |
next unless enc | |
path.force_encoding(enc) | |
if path.valid_encoding? | |
fallback_enc ||= enc | |
converted = path.encode(path_encoding) rescue nil | |
break if converted | |
end | |
end | |
unless converted | |
if fallback_enc | |
path.force_encoding(fallback_enc) | |
converted = path.encode(path_encoding, :undef => :replace) | |
end | |
unless converted | |
path.force_encoding('ASCII-8BIT') | |
converted = path | |
end | |
end | |
converted.gsub(/\\/, '/') | |
end | |
def term_conv(str, option) | |
if str.encoding != Encoding::US_ASCII && | |
str.encoding != Encoding::BINARY | |
str.encode(option[:term_encoding], :undef => :replace) | |
else | |
str.dup | |
end | |
end | |
def unzip(zip_path, dst_dir, option) | |
path_encoding = option[:path_encoding] | |
cand_encodings = option[:cand_encodings] | |
list_only = option[:mode] != :extract | |
verbose = option[:verbose] | |
last_encoding = nil | |
Zip::Archive.open(zip_path) do |ar| | |
ar.each do |zf| | |
path = path_conv(zf.name, path_encoding, | |
[last_encoding, *cand_encodings]) | |
if path.encoding != Encoding::US_ASCII && | |
path.encoding != Encoding::BINARY | |
last_encoding = path.encoding | |
end | |
t_path = term_conv(path, option) | |
if list_only | |
mtime = zf.mtime.strftime('%Y-%m-%d %H:%M') | |
unless verbose | |
puts "#{t_path}" | |
else | |
if zf.directory? | |
puts "d #{mtime} #{t_path}" | |
else | |
puts "f #{mtime} #{t_path} #{zf.size}" | |
end | |
end | |
next | |
end | |
dst_path = File.expand_path(path, dst_dir) | |
unless File.fnmatch(File.join(dst_dir, '*'), dst_path) | |
warn "invalid path name: #{t_path} (skipped)" | |
next | |
end | |
if zf.directory? | |
FileUtils.mkdir_p(dst_path) | |
warn "created: #{t_path}" if verbose | |
else | |
FileUtils.mkdir_p(File.dirname(dst_path)) | |
open(dst_path, 'wb') do |o| | |
zf.read do |data| | |
o.print data | |
end | |
end | |
warn "extracted: #{t_path}" if verbose | |
end | |
File.utime(zf.mtime, zf.mtime, dst_path) | |
end | |
end | |
end | |
cand_encodings = | |
%w(US-ASCII UTF8-MAC UTF-8 Shift_JIS Windows-31J EUC-JP). | |
map {|e| Encoding.find(e) } | |
path_encoding = | |
Encoding.find('filesystem') || Encoding::UTF_8 | |
term_encoding = | |
Encoding.find('external') || Encoding::UTF_8 | |
option = { | |
:mode => :extract, | |
:verbose => false, | |
:cand_encodings => cand_encodings, | |
:path_encoding => path_encoding, | |
:term_encoding => term_encoding, | |
} | |
ARGV.options do |o| | |
o.banner << ' zipfile...' | |
o.on | |
o.on 'options:' | |
o.on('-l', '--list') { option[:mode] = :list } | |
o.on('--encodings=enc1,enc2,...', Array, | |
"(default: #{option[:cand_encodings].map(&:to_s).join(', ')})") {|encs| | |
option[:cand_encodings] = encs.map {|e| Encoding.find(e) } | |
} | |
o.on('--try-all-encodings') { | |
option[:cand_encodings] = Encoding.list - [Encoding::BINARY] | |
} | |
o.on('--list-encodings') { puts Encoding.list.map(&:to_s).sort; exit } | |
o.on('-v', '--verbose') { option[:verbose] = true } | |
o.on('-h', '--help') { puts o; exit } | |
o.parse! | |
end | |
verbose = option[:verbose] | |
list_only = option[:mode] != :extract | |
multi_zip = ARGV.size > 1 | |
head = true | |
ARGV.each do |zip_path| | |
conved_zip_path = | |
path_conv(zip_path, option[:path_encoding], | |
option[:cand_encodings]) | |
t_zip_path = term_conv(conved_zip_path, option) | |
unless File.exist?(zip_path) | |
warn "no such file or directory: #{t_zip_path} (ignored)" | |
next | |
end | |
if list_only | |
tmpdir = nil | |
else | |
tmpdir = Dir.mktmpdir(nil, Dir.pwd) | |
end | |
begin | |
if multi_zip | |
if list_only | |
puts unless head | |
puts "#{t_zip_path}:" | |
else | |
warn '' unless head | |
warn "expanding #{t_zip_path}..." | |
end | |
head = false | |
end | |
unzip(zip_path, tmpdir, option) | |
next if list_only | |
entries = Dir.entries(tmpdir).reject {|e| e == '.' || e == '..' } | |
if entries.size == 1 | |
path = entries.first | |
if File.exist?(path) | |
warn "extracted file in #{File.basename(tmpdir)}" | |
else | |
FileUtils.mv(File.join(tmpdir, path), '.') | |
FileUtils.remove_entry_secure(tmpdir) | |
warn "extracted file at #{term_conv(path, option)}" if verbose || multi_zip | |
end | |
else | |
dst_name = File.basename(conved_zip_path) | |
dst_name.sub!(/\.zip\z/i, '') | |
if File.exist?(dst_name) | |
warn "extracted files in #{File.basename(tmpdir)}" | |
else | |
FileUtils.mv(tmpdir, dst_name) | |
warn "extracted files in #{term_conv(dst_name, option)}" | |
end | |
end | |
rescue Exception | |
FileUtils.remove_entry_secure(tmpdir) if tmpdir | |
raise | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment