Last active
December 16, 2015 00:58
-
-
Save arton/5351152 to your computer and use it in GitHub Desktop.
Grep for CP932, utf-8 and utf-16 as batch file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@echo off | |
c:\progra~2\RUBY-2~1.0\bin\ruby -x "%~f0" %* | |
@goto endofruby | |
#!C:/PROGRA~2/RUBY-2~1.0/bin/ruby | |
# coding: utf-8 | |
if ARGV.size < 2 | |
exit 1 | |
end | |
def find_pattern(fn, enc, pattern) | |
r = '' | |
File.open(fn, "rb:#{enc}") do |f| | |
r = f.read | |
end | |
if enc != 'cp932' | |
if enc == 'utf-8' && r[0] == "\uFEFF" | |
r = r[1..-1] | |
end | |
r = r.encode('cp932') | |
end | |
disp = nil | |
$cline = 0 | |
r.each_line do |line| | |
$cline += 1 | |
line.rstrip! | |
if line =~ pattern | |
puts("File: #{fn}") unless disp | |
disp = true | |
puts(line) | |
end | |
end | |
end | |
icase = false | |
recursive = false | |
loop do | |
if ARGV[0] == '-i' | |
icase = true | |
elsif ARGV[0] == '-r' | |
recursive = true | |
elsif ARGV[0] == '-n' | |
# ignore | |
else | |
break | |
end | |
ARGV.shift | |
end | |
if icase | |
pattern = /#{ARGV[0]}/i | |
else | |
pattern = /#{ARGV[0]}/ | |
end | |
ARGV.shift | |
ARGV.each do |fn| | |
['cp932', 'utf-8', 'utf-16'].each do |enc| | |
begin | |
find_pattern(fn, enc, pattern) | |
break | |
rescue ArgumentError => e | |
if $DBG | |
$stderr.puts e.message + ' while reading: ' + fn | |
$stderr.puts 'line: ' + $cline.to_s | |
$stderr.puts e.backtrace | |
end | |
rescue Encoding::InvalidByteSequenceError => e | |
if $DBG | |
$stderr.puts e.message + ' while reading: ' + fn | |
$stderr.puts 'line: ' + $cline.to_s | |
$stderr.puts e.backtrace | |
end | |
end | |
end | |
end | |
__END__ | |
:endofruby |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment