Skip to content

Instantly share code, notes, and snippets.

@piroyoung
Created February 2, 2014 10:53
Show Gist options
  • Save piroyoung/8766443 to your computer and use it in GitHub Desktop.
Save piroyoung/8766443 to your computer and use it in GitHub Desktop.
# encoding: UTF-8
require "fileutils"
###random sampling
class ProcessCSV
def initialize (orig)
#本当はコンストラクタで禁則文字,文字コードの設定もしたい.
@origFileName = orig
end
def sampling(num,samp)
sampFileName = samp
remainFileName = "remain_#{sampFileName}"
File.open(sampFileName,"w").close
File.open(remainFileName,"w").close
##元データの行数をカウントする部分
orig = File.open(@origFileName,"r")
numOrig = 0
orig.each_line do |line|
numOrig += 1
end
orig.close
begin
orig = File.open(@origFileName,"r")
sampled = File.open(sampFileName,"a")
remain = File.open(remainFileName,"a")
#元データの行数と同じ長さの配列を生成してシャッフル
i = 0
indicies = [*1..numOrig].shuffle
orig.each_line do |line|
if i == 0 #header行の検出
sampled.write(line)
remain.write(line)
elsif indicies[i] <= num
#入力値より小さい値が割り振られた行はsampledへ
sampled.write(line)
else
#さもなくばremainへ
remain.write(line)
end
i += 1
end
remain.close
sampled.close
orig.close
rescue => ex
print ex.message,"\n"
end
end
def grepSeparate(word,samp)
pattern = Regexp.new(word)
sampFileName = samp
remainFileName = "remain_#{sampFileName}"
File.open(sampFileName,"w").close
File.open(remainFileName,"w").close
begin
headerFlag = 1
orig = File.open(@origFileName,"r")
sampled = File.open(sampFileName,"a")
remain = File.open(remainFileName,"a")
orig.each_line do |line|
if headerFlag == 1
#header行の検出
sampled.write(line)
remain.write(line)
headerFlag = 0
elsif pattern =~ line
#マッチしたらsampledへ
sampled.write(line)
else
#さもなくばremainへ
remain.write(line)
end
end
remain.close
sampled.close
orig.close
rescue => ex
print ex.message,"\n"
end
end
end
begin
##type commands you wanna execute
file = ProcessCSV.new("ChangeLog")
file.sampling(60,"samp.csv")
# file.grepSeparate("Scott","greped.csv")
rescue => ex
puts ex.message
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment