Skip to content

Instantly share code, notes, and snippets.

@rafbm
Last active December 17, 2015 04:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rafbm/5550827 to your computer and use it in GitHub Desktop.
Save rafbm/5550827 to your computer and use it in GitHub Desktop.
Fix common downcase/whitespace issues in people names
# encoding: utf-8
require 'minitest/autorun'
# `gem install unicode_utils`
require 'unicode_utils/downcase'
require 'unicode_utils/upcase'
module HumanName
def self.all_lower_case?(name)
name.to_s == UnicodeUtils.downcase(name.to_s)
end
def self.normalize(*names)
if names.size > 1
return names.map { |name| normalize(name) }
end
name = names[0]
return '' if !name.is_a? String
name = name.strip.gsub(/\s+/, ' ')
if all_lower_case? name
name.gsub(/(\A|[^\p{Word}])(\p{Word})/) { $1 + UnicodeUtils.upcase($2) }
else
name
end
end
end
describe HumanName do
describe :all_lower_case? do
it 'works without Unicode characters' do
assert HumanName.all_lower_case?("uber-rafael o'neil mcmason")
refute HumanName.all_lower_case?("Uber-Rafael O'Neil McMason")
refute HumanName.all_lower_case?("UBER-RAFAEL O'NEIL MCMASON")
end
it 'works with Unicode characters' do
assert HumanName.all_lower_case?('über-rafaél o’neil mcmason')
refute HumanName.all_lower_case?('Über-Rafaél O’Neil McMason')
refute HumanName.all_lower_case?('ÜBER-RAFAÉL O’NEIL MCMASON')
end
end
describe :normalize do
it 'strips whitespace' do
assert_equal 'Rafaél Über-Mason', HumanName.normalize('Rafaél Über-Mason ')
assert_equal 'Rafaél Über-Mason', HumanName.normalize(' Rafaél Über-Mason ')
assert_equal 'Rafaél Über-Mason', HumanName.normalize(' Rafaél Über-Mason')
assert_equal 'Rafaél Über-Mason', HumanName.normalize('Rafaél Über-Mason')
assert_equal 'Rafaél Über-Mason', HumanName.normalize('Rafaél Über-Mason') # tab, why not
end
it 'leaves name untouched as soon as there’s an uppercase character' do
assert_equal 'Über-Rafaél O’Neil McMason', HumanName.normalize('Über-Rafaél O’Neil McMason')
assert_equal 'JP Doozle', HumanName.normalize('JP Doozle')
assert_equal 'OJ moozle', HumanName.normalize('OJ moozle')
assert_equal 'STEVEN BARZLE', HumanName.normalize('STEVEN BARZLE')
end
it 'capitalizes every word when there’s no uppercase character' do
assert_equal 'Über-Rafaél O’Neil Mcmason', HumanName.normalize('über-rafaél o’neil mcmason')
assert_equal 'Rafaél Über-Mason', HumanName.normalize('rafaél über-mason')
assert_equal 'Elize Ballock', HumanName.normalize('elize ballock')
assert_equal 'Roche', HumanName.normalize('roche')
assert_equal 'Larry Mckinnon', HumanName.normalize('larry mckinnon')
end
it 'returns an empty string on bad input' do
assert_equal '', HumanName.normalize(' ')
assert_equal '', HumanName.normalize(nil)
assert_equal '', HumanName.normalize(0)
assert_equal '', HumanName.normalize(1)
assert_equal '', HumanName.normalize(true)
assert_equal '', HumanName.normalize(false)
assert_equal '', HumanName.normalize([])
assert_equal '', HumanName.normalize({})
end
it 'returns an array when passed multiple arguments' do
assert_equal ['Jean', 'de la Fontaine'], HumanName.normalize(' Jean', 'de la Fontaine ')
assert_equal ['Jean', 'de la Fontaine'], HumanName.normalize('Jean', 'de la Fontaine')
assert_equal ['Jean', 'De La Fontaine'], HumanName.normalize('jean', 'de la fontaine')
assert_equal ['Jean', 'de la Fontaine'], HumanName.normalize('jean', 'de la Fontaine')
assert_equal ['Über-Rafaél', 'O’Neil', 'McMason'], HumanName.normalize('Über-Rafaél', 'O’Neil', 'McMason')
assert_equal ['Über-Rafaél', 'O’Neil', 'Mcmason'], HumanName.normalize('über-rafaél', 'o’neil', 'mcmason')
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment