Skip to content

Instantly share code, notes, and snippets.

@arika
Created June 8, 2011 00:14
Show Gist options
  • Save arika/1013527 to your computer and use it in GitHub Desktop.
Save arika/1013527 to your computer and use it in GitHub Desktop.
expand zip-files (with pathname encoding conversion) <http://arika.org/2011/06/07/unzip-rb>
#!/usr/bin/ruby1.9.1
# encoding: UTF-8
=begin
Copyright (c) 2011, akira yamada
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
コマンドライン引数で指定された一つ以上のzipファイルを
カレントディレクトリに展開します。
その際、zipファイル中のファイル名の文字コードを
システムに適したものに変換します。
zipファイル内のトップディレクトリ直下に
二つ以上のファイルやディレクトリを含む場合には
zipファイルのファイル名から拡張子を除いた
ディレクトリを作りその中に展開します。
zipファイル内のトップディレクトリ直下に
一つのファイルまたはディレクトリしかない場合には
ディレクトリを作らず直接展開します。
いずれのケースでもカレントディレクトリに
すでに同名のファイルやディレクトリがあれば、
上書きせずランダムな名前のディレクトリを作成し
その中に展開します。
=end
require 'zipruby'
require 'fileutils'
require 'tmpdir'
require 'optparse'
# 壊れた文字列をremove_entry_secureで削除しようしたときに
# invalid byte sequenceが発生するのを回避する……
class FileUtils::Entry_
def remove_dir1
platform_support {
Dir.rmdir path().chomp('/')
}
end
end
def path_conv(orig_path, path_encoding, cand_encodings)
converted = nil
path = orig_path.dup
fallback_enc = nil
cand_encodings.each do |enc|
next unless enc
path.force_encoding(enc)
if path.valid_encoding?
fallback_enc ||= enc
converted = path.encode(path_encoding) rescue nil
break if converted
end
end
unless converted
if fallback_enc
path.force_encoding(fallback_enc)
converted = path.encode(path_encoding, :undef => :replace)
end
unless converted
path.force_encoding('ASCII-8BIT')
converted = path
end
end
converted.gsub(/\\/, '/')
end
def term_conv(str, option)
if str.encoding != Encoding::US_ASCII &&
str.encoding != Encoding::BINARY
str.encode(option[:term_encoding], :undef => :replace)
else
str.dup
end
end
def unzip(zip_path, dst_dir, option)
path_encoding = option[:path_encoding]
cand_encodings = option[:cand_encodings]
list_only = option[:mode] != :extract
verbose = option[:verbose]
last_encoding = nil
Zip::Archive.open(zip_path) do |ar|
ar.each do |zf|
path = path_conv(zf.name, path_encoding,
[last_encoding, *cand_encodings])
if path.encoding != Encoding::US_ASCII &&
path.encoding != Encoding::BINARY
last_encoding = path.encoding
end
t_path = term_conv(path, option)
if list_only
mtime = zf.mtime.strftime('%Y-%m-%d %H:%M')
unless verbose
puts "#{t_path}"
else
if zf.directory?
puts "d #{mtime} #{t_path}"
else
puts "f #{mtime} #{t_path} #{zf.size}"
end
end
next
end
dst_path = File.expand_path(path, dst_dir)
unless File.fnmatch(File.join(dst_dir, '*'), dst_path)
warn "invalid path name: #{t_path} (skipped)"
next
end
if zf.directory?
FileUtils.mkdir_p(dst_path)
warn "created: #{t_path}" if verbose
else
FileUtils.mkdir_p(File.dirname(dst_path))
open(dst_path, 'wb') do |o|
zf.read do |data|
o.print data
end
end
warn "extracted: #{t_path}" if verbose
end
File.utime(zf.mtime, zf.mtime, dst_path)
end
end
end
cand_encodings =
%w(US-ASCII UTF8-MAC UTF-8 Shift_JIS Windows-31J EUC-JP).
map {|e| Encoding.find(e) }
path_encoding =
Encoding.find('filesystem') || Encoding::UTF_8
term_encoding =
Encoding.find('external') || Encoding::UTF_8
option = {
:mode => :extract,
:verbose => false,
:cand_encodings => cand_encodings,
:path_encoding => path_encoding,
:term_encoding => term_encoding,
}
ARGV.options do |o|
o.banner << ' zipfile...'
o.on
o.on 'options:'
o.on('-l', '--list') { option[:mode] = :list }
o.on('--encodings=enc1,enc2,...', Array,
"(default: #{option[:cand_encodings].map(&:to_s).join(', ')})") {|encs|
option[:cand_encodings] = encs.map {|e| Encoding.find(e) }
}
o.on('--try-all-encodings') {
option[:cand_encodings] = Encoding.list - [Encoding::BINARY]
}
o.on('--list-encodings') { puts Encoding.list.map(&:to_s).sort; exit }
o.on('-v', '--verbose') { option[:verbose] = true }
o.on('-h', '--help') { puts o; exit }
o.parse!
end
verbose = option[:verbose]
list_only = option[:mode] != :extract
multi_zip = ARGV.size > 1
head = true
ARGV.each do |zip_path|
conved_zip_path =
path_conv(zip_path, option[:path_encoding],
option[:cand_encodings])
t_zip_path = term_conv(conved_zip_path, option)
unless File.exist?(zip_path)
warn "no such file or directory: #{t_zip_path} (ignored)"
next
end
if list_only
tmpdir = nil
else
tmpdir = Dir.mktmpdir(nil, Dir.pwd)
end
begin
if multi_zip
if list_only
puts unless head
puts "#{t_zip_path}:"
else
warn '' unless head
warn "expanding #{t_zip_path}..."
end
head = false
end
unzip(zip_path, tmpdir, option)
next if list_only
entries = Dir.entries(tmpdir).reject {|e| e == '.' || e == '..' }
if entries.size == 1
path = entries.first
if File.exist?(path)
warn "extracted file in #{File.basename(tmpdir)}"
else
FileUtils.mv(File.join(tmpdir, path), '.')
FileUtils.remove_entry_secure(tmpdir)
warn "extracted file at #{term_conv(path, option)}" if verbose || multi_zip
end
else
dst_name = File.basename(conved_zip_path)
dst_name.sub!(/\.zip\z/i, '')
if File.exist?(dst_name)
warn "extracted files in #{File.basename(tmpdir)}"
else
FileUtils.mv(tmpdir, dst_name)
warn "extracted files in #{term_conv(dst_name, option)}"
end
end
rescue Exception
FileUtils.remove_entry_secure(tmpdir) if tmpdir
raise
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment