Last active
August 29, 2015 14:05
-
-
Save ybiquitous/c4a510627e93709c138e to your computer and use it in GitHub Desktop.
Unicode - 結合文字と補助文字 (Java 1.6+)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="ja"> | |
<head> | |
<meta charset="UTF-8" /> | |
<title>Unicode - 結合文字と補助文字</title> | |
</head> | |
<body> | |
<form action="test.jsp" method="POST"> | |
<input type="text" name="p" value="がが丈𠀋" /> | |
<input type="submit" /> | |
<input type="reset" /> | |
</form> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<%@page contentType="text/html; charset=UTF-8" %> | |
<%@page import="java.text.*" %> | |
<%@page import="java.util.*" %> | |
<% request.setCharacterEncoding("UTF-8"); %> | |
<%! | |
String withUnicode(String text) { | |
StringBuilder sb = new StringBuilder(text); | |
sb.append(" ("); | |
for (int i = 0; i < text.length(); i++) { | |
if (Character.isLowSurrogate(text.charAt(i))) continue; | |
if (i > 0) sb.append(" "); | |
sb.append("U+"); | |
sb.append(Integer.toHexString(text.codePointAt(i)).toUpperCase()); | |
} | |
sb.append(")"); | |
return sb.toString(); | |
} | |
String hexDump(String text) { | |
StringBuilder sb = new StringBuilder(); | |
for (int i = 0; i < text.length(); i++) { | |
if (i > 0) sb.append(" "); | |
sb.append(Integer.toHexString(text.charAt(i)).toUpperCase()); | |
} | |
return sb.toString(); | |
} | |
int countByBreakIterator(String text) { | |
BreakIterator bi = BreakIterator.getCharacterInstance(Locale.JAPAN); | |
bi.setText(text); | |
int count = 0; | |
for (int start = bi.first(), end = bi.next(); | |
end != BreakIterator.DONE; | |
start = end, end = bi.next()) { | |
count++; | |
} | |
return count; | |
} | |
String byBreakIterator(String text) { | |
BreakIterator bi = BreakIterator.getCharacterInstance(); | |
bi.setText(text); | |
StringBuilder sb = new StringBuilder(text.length()); | |
for (int start = bi.first(), end = bi.next(); | |
end != BreakIterator.DONE; | |
start = end, end = bi.next()) { | |
sb.append(text.substring(start, end)); | |
} | |
return sb.toString(); | |
} | |
%> | |
<!DOCTYPE html> | |
<html lang="ja"> | |
<head><title>Unicode - 結合文字と補助文字</title></head> | |
<body> | |
<% String p = request.getParameter("p"); %> | |
<h2><%= withUnicode(p) %></h2> | |
<pre> | |
hex dump = <%= hexDump(p) %> | |
String.length() = <%= p.length() %> | |
String.codePointCount(all) = <%= p.codePointCount(0, p.length()) %> | |
BreakIterator(count) = <%= countByBreakIterator(p) %> | |
BreakIterator = <%= withUnicode(byBreakIterator(p)) %> | |
Normalizer.normalize(NFC) = <%= withUnicode(Normalizer.normalize(p,Normalizer.Form.NFC)) %> | |
Normalizer.normalize(NFD) = <%= withUnicode(Normalizer.normalize(p,Normalizer.Form.NFD)) %> | |
Normalizer.normalize(NFKC) = <%= withUnicode(Normalizer.normalize(p,Normalizer.Form.NFKC)) %> | |
Normalizer.normalize(NFKD) = <%= withUnicode(Normalizer.normalize(p,Normalizer.Form.NFKD)) %> | |
</pre> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment