Skip to content

Instantly share code, notes, and snippets.

@moro
Created September 9, 2008 00:33
Show Gist options
  • Save moro/9581 to your computer and use it in GitHub Desktop.
Save moro/9581 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# vim:set fileencoding=utf-8 filetype=ruby
$KCODE = 'u'
describe "text spliting" do
before do
special = [
[0x3001].pack("U"), # ten
[0x3002].pack("U"), # maru
[0x30fc].pack("U"), # bar/multibyte hyphun
].join("|")
ascii = "[[:alnum:]]|[[:punct:]]"
@re = /(?:#{ascii})+|.(?:#{special})*|\s+/
end
it '"ほげほげRuby on Railsふーばー、日本語。".scan(@re).should == ["ほ", "げ", "ほ", "げ", "Ruby", " ", "on", " ", "Rails", "ふー", "ばー、", "日", "本", "語。"]' do
text = "ほげほげRuby on Railsふーばー、日本語。"
text.scan(@re).should == ["ほ", "げ", "ほ", "げ", "Ruby", " ", "on", " ", "Rails", "ふー", "ばー、", "日", "本", "語。"]
end
it '"moronatural@gmail.comが私のメールアドレスです。".scan(@re).should == ["moronatural@gmail.com", "が", "私","の","メー","ル","ア","ド","レ","ス", "で", "す。"]' do
text = "moronatural@gmail.comが私のメールアドレスです。"
text.scan(@re).should ==
["moronatural@gmail.com", "が", "私","の","メー","ル","ア","ド","レ","ス", "で", "す。"]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment