Skip to content

Instantly share code, notes, and snippets.

@comuttun
Last active August 3, 2016 09:14
Show Gist options
  • Save comuttun/c88763ff20bbcd14c4ef0981ab198f64 to your computer and use it in GitHub Desktop.
Save comuttun/c88763ff20bbcd14c4ef0981ab198f64 to your computer and use it in GitHub Desktop.
import static org.hamcrest.CoreMatchers.*;
import static org.hamcrest.MatcherAssert.*;
import java.text.Normalizer;
import java.text.Normalizer.Form;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.junit.Test;
public class NfdTest {
@Test
public void existsNfdChar1() {
assertThat(Normalizer.isNormalized("ポケモン", Normalizer.Form.NFC), is(true));
assertThat(Normalizer.isNormalized("ポケモン", Normalizer.Form.NFC), is(false));
String normalized = Normalizer.normalize("ポケモン", Normalizer.Form.NFC);
assertThat(Normalizer.isNormalized(normalized, Normalizer.Form.NFC), is(true));
assertThat(normalized, is("ポケモン"));
}
private static final List<Normalizer.Form> FORMS = Arrays.asList(Form.values());
private Form detectNormalization(String s) {
return FORMS.stream().map(form -> Normalizer.isNormalized(s, form) ? form : null)
.filter(Objects::nonNull).findFirst().get();
}
@Test
public void existsNfdChar2() {
String str = "ポケモンポケモン";
System.out.println(String.join("\n", str.codePoints().mapToObj(codepoint -> {
String s = String.valueOf(Character.toChars(codepoint));
return s + " : " + detectNormalization(s).name();
}).collect(Collectors.toList())));
/*
ポ : NFC
ケ : NFD
モ : NFD
ン : NFD
ホ : NFD
゚ : NFD
ケ : NFD
モ : NFD
ン : NFD
*/
}
@Test
public void existsNfdChar3() {
String str = "ポケモンポケモン";
System.out.println(String.join("\n", IntStream.range(0, str.length()).mapToObj(index -> {
String s = String.valueOf(str.charAt(index));
return s + " : " + detectNormalization(s).name();
}).collect(Collectors.toList())));
/*
ポ : NFC
ケ : NFD
モ : NFD
ン : NFD
ホ : NFD
゚ : NFD
ケ : NFD
モ : NFD
ン : NFD
*/
// こっちでも変わらない
System.out.println(String.join("\n", str.chars().mapToObj(codepoint -> {
String s = String.valueOf(Character.toChars(codepoint));
return s + " : " + detectNormalization(s).name();
}).collect(Collectors.toList())));
/*
ポ : NFC
ケ : NFD
モ : NFD
ン : NFD
ホ : NFD
゚ : NFD
ケ : NFD
モ : NFD
ン : NFD
*/
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment