chezou/WordCount.jl

## word_count.rb
# Ruby using -O wakati
require "mecab"
require "benchmark"

def count_word(text)
  tagger = MeCab::Tagger.new("-O wakati")
  counts = Hash.new(0)
  tagger.parse(text).split.each do |e|
    counts[e] += 1
  end
  counts
end

def main
  if ARGV.size < 2
    puts "File name required"
    return
  end
  fname = ARGV.shift
  Benchmark.bmbm do |x|
    x.report {
      10.times {
        count_word(File.read(fname))
      }
    }
  end
end

main

## word_count2.rb
# Ruby using MeCab node
require "mecab"
require "benchmark"

def count_word(text)
  tagger = MeCab::Tagger.new
  counts = Hash.new(0)

  node = tagger.parseToNode(text)
  while node
    if node.surface.empty?
      node = node.next
      next
    end
    counts[node.surface] += 1
    node = node.next
  end
  counts
end

def main
  if ARGV.size < 2
    puts "File name required"
    return
  end
  fname = ARGV.shift
  Benchmark.bmbm do |x|
    x.report {
      10.times {
        count_word(File.read(fname))
      }
    }
  end
end

main

## WordCount.jl
# Julia using -O wakati
using Benchmark
using MeCab

function count_word(text::UTF8String)
  mecab = Mecab("-O wakati")
  counts = Dict{UTF8String, Int}()
  for word in split(sparse_tostr(mecab, text))
      counts[word] = get(counts, word, 0) + 1
    end
  counts
end

function main()
  if length(ARGS) < 1
    println("File name required")
    return
  end
  f = open(ARGS[1])
  text::UTF8String
  text = readall(f);
  f1() = count_word(text)
  println(benchmark(f1, "WordCount1", 10))
end

main()

## wordcount.r
# RMeCabFreq
library(RMeCab)
library(microbenchmark)

fname <- commandArgs(trailingOnly=TRUE)[1]
microbenchmark(
  result <- RMeCabFreq(fname),
  unit="s",
  times=10L
)

## WordCount2.jl
# Julia using MeCab node
using Benchmark
using MeCab

function count_word(text::UTF8String)
  mecab = Mecab()
  counts = Dict{UTF8String, Int}()
  for line in split(text, '\n')
    _line::UTF8String
    _line = line
    for word in parse_surface2(mecab, _line)
      counts[word] = get(counts, word, 0) + 1
    end
  end
  counts
end

function main()
  if length(ARGS) < 1
    println("Require file name")
    return
  end
  f = open(ARGS[1])
  text::UTF8String
  text = readall(f);
  f1() = count_word(text)
  println(benchmark(f1, "WordCount2", 10))
end

main()

## WordCount3.jl
# Julia considering gc
using MeCab

function count_word(text::UTF8String)
  mecab = Mecab("-O wakati")
  counts = Dict{UTF8String, Int}()
  for word in split(sparse_tostr(mecab, text))
      counts[word] = get(counts, word, 0) + 1
    end
  counts
end

function main()
  if length(ARGS) < 1
    println("File name required")
    return
  end
  f = open(ARGS[1])
  text::UTF8String
  text = readall(f);
  t = 0.0
  for i in [1:10]
  	gc()
  	tic()
  	count_word(text)
  	t += toq()
  end
  println(t/10)
end

main()
	# Ruby using -O wakati
	require "mecab"
	require "benchmark"

	def count_word(text)
	tagger = MeCab::Tagger.new("-O wakati")
	counts = Hash.new(0)
	tagger.parse(text).split.each do \|e\|
	counts[e] += 1
	end
	counts
	end

	def main
	if ARGV.size < 2
	puts "File name required"
	return
	end
	fname = ARGV.shift
	Benchmark.bmbm do \|x\|
	x.report {
	10.times {
	count_word(File.read(fname))
	}
	}
	end
	end

	main
	# Ruby using MeCab node
	require "mecab"
	require "benchmark"

	def count_word(text)
	tagger = MeCab::Tagger.new
	counts = Hash.new(0)

	node = tagger.parseToNode(text)
	while node
	if node.surface.empty?
	node = node.next
	next
	end
	counts[node.surface] += 1
	node = node.next
	end
	counts
	end

	def main
	if ARGV.size < 2
	puts "File name required"
	return
	end
	fname = ARGV.shift
	Benchmark.bmbm do \|x\|
	x.report {
	10.times {
	count_word(File.read(fname))
	}
	}
	end
	end

	main
	# Julia using -O wakati
	using Benchmark
	using MeCab

	function count_word(text::UTF8String)
	mecab = Mecab("-O wakati")
	counts = Dict{UTF8String, Int}()
	for word in split(sparse_tostr(mecab, text))
	counts[word] = get(counts, word, 0) + 1
	end
	counts
	end

	function main()
	if length(ARGS) < 1
	println("File name required")
	return
	end
	f = open(ARGS[1])
	text::UTF8String
	text = readall(f);
	f1() = count_word(text)
	println(benchmark(f1, "WordCount1", 10))
	end

	main()
	# RMeCabFreq
	library(RMeCab)
	library(microbenchmark)

	fname <- commandArgs(trailingOnly=TRUE)[1]
	microbenchmark(
	result <- RMeCabFreq(fname),
	unit="s",
	times=10L
	)
	# Julia using MeCab node
	using Benchmark
	using MeCab

	function count_word(text::UTF8String)
	mecab = Mecab()
	counts = Dict{UTF8String, Int}()
	for line in split(text, '\n')
	_line::UTF8String
	_line = line
	for word in parse_surface2(mecab, _line)
	counts[word] = get(counts, word, 0) + 1
	end
	end
	counts
	end

	function main()
	if length(ARGS) < 1
	println("Require file name")
	return
	end
	f = open(ARGS[1])
	text::UTF8String
	text = readall(f);
	f1() = count_word(text)
	println(benchmark(f1, "WordCount2", 10))
	end

	main()
	# Julia considering gc
	using MeCab

	function count_word(text::UTF8String)
	mecab = Mecab("-O wakati")
	counts = Dict{UTF8String, Int}()
	for word in split(sparse_tostr(mecab, text))
	counts[word] = get(counts, word, 0) + 1
	end
	counts
	end

	function main()
	if length(ARGS) < 1
	println("File name required")
	return
	end
	f = open(ARGS[1])
	text::UTF8String
	text = readall(f);
	t = 0.0
	for i in [1:10]
	gc()
	tic()
	count_word(text)
	t += toq()
	end
	println(t/10)
	end

	main()