Skip to content

Instantly share code, notes, and snippets.

@timothyandrew
Created April 20, 2012 03:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timothyandrew/fbb13cd39acad569aeb4 to your computer and use it in GitHub Desktop.
Save timothyandrew/fbb13cd39acad569aeb4 to your computer and use it in GitHub Desktop.
jRuby Tests for twitter-cldr-rb Normalizer
source :rubygems
gem "twitter_cldr", :path => "~/dev/repos/twitter-cldr-rb"
gem "rspec"
# encoding: UTF-8
include Java
require 'rubygems'
require 'bundler/setup'
require 'lucene-test-framework-3.6.0.jar'
require 'lucene-core-3.6.0'
require 'rspec'
require 'twitter_cldr'
java_import(['java.util.Random', 'java.text.Normalizer', 'org.apache.lucene.util._TestUtil'])
def random_unicode_string
JavaUtilities.get_proxy_class('org.apache.lucene.util._TestUtil').randomRealisticUnicodeString(Random.new)
end
def java_normalize(string)
Normalizer.normalize(string, Normalizer::Form::NFD)
end
def ruby_normalize(string)
TwitterCldr::Normalizers::NFD.normalize(string)
end
include TwitterCldr::Normalizers
describe NFD do
it "normalizes any unicode string exactly like java.text.Normalizer" do
50.times do
p string = random_unicode_string
string_code_point = string.split('').map { |char| Base.char_to_code_point(char) }
ruby_code_point = ruby_normalize(string).split('').map { |char| Base.char_to_code_point(char) }
java_code_point = java_normalize(string).split('').map { |char| Base.char_to_code_point(char) }
p "String is #{string_code_point.inspect}"
p "Ruby is #{ruby_code_point.inspect}"
p "Java is #{java_code_point.inspect}"
ruby_normalize(string).should == java_normalize(string)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment