Skip to content

Instantly share code, notes, and snippets.

@mediavrog
Last active August 26, 2020 09:03
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
  • Save mediavrog/6b13669533ac20d5ba0f to your computer and use it in GitHub Desktop.
Save mediavrog/6b13669533ac20d5ba0f to your computer and use it in GitHub Desktop.
Simple string conversion from Hiragana to Katakana and vice versa. Uses the JapaneseCharacter class from Duane J. May and combines it with a simple Utility class method to perform the actual conversion.
/**
* JapaneseCharacter contains static functions to do various tests
* on characters to determine if it is one of the various types of
* characters used in the japanese writing system.
* <p/>
* There are also a functions to translate between Katakana, Hiragana,
* and Romaji.
*
* @author Duane J. May <djmay@mayhoo.com>
* @version $Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
* @since 10:37 AM - 6/3/14
*
* @see <a href="http://sourceforge.net/projects/kanjixml/">http://sourceforge.net/projects/kanjixml/</a>
*/
public class JapaneseCharacter {
/**
* Version information
*/
private final static String VERSION =
"$Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $";
/**
* Determines if this character is a Japanese Kana.
*/
public static boolean isKana(char c) {
return (isHiragana(c) || isKatakana(c));
}
/**
* Determines if this character is one of the Japanese Hiragana.
*/
public static boolean isHiragana(char c) {
return (('\u3041' <= c) && (c <= '\u309e'));
}
/**
* Determines if this character is one of the Japanese Katakana.
*/
public static boolean isKatakana(char c) {
return (isHalfWidthKatakana(c) || isFullWidthKatakana(c));
}
/**
* Determines if this character is a Half width Katakana.
*/
public static boolean isHalfWidthKatakana(char c) {
return (('\uff66' <= c) && (c <= '\uff9d'));
}
/**
* Determines if this character is a Full width Katakana.
*/
public static boolean isFullWidthKatakana(char c) {
return (('\u30a1' <= c) && (c <= '\u30fe'));
}
/**
* Determines if this character is a Kanji character.
*/
public static boolean isKanji(char c) {
if (('\u4e00' <= c) && (c <= '\u9fa5')) {
return true;
}
if (('\u3005' <= c) && (c <= '\u3007')) {
return true;
}
return false;
}
/**
* Determines if this character could be used as part of
* a romaji character.
*/
public static boolean isRomaji(char c) {
if (('\u0041' <= c) && (c <= '\u0090'))
return true;
else if (('\u0061' <= c) && (c <= '\u007a'))
return true;
else if (('\u0021' <= c) && (c <= '\u003a'))
return true;
else if (('\u0041' <= c) && (c <= '\u005a'))
return true;
else
return false;
}
/**
* Translates this character into the equivalent Katakana character.
* The function only operates on Hiragana and always returns the
* Full width version of the Katakana. If the character is outside the
* Hiragana then the origianal character is returned.
*/
public static char toKatakana(char c) {
if (isHiragana(c)) {
return (char) (c + 0x60);
}
return c;
}
/**
* Translates this character into the equivalent Hiragana character.
* The function only operates on Katakana characters
* If the character is outside the Full width or Half width
* Katakana then the origianal character is returned.
*/
public static char toHiragana(char c) {
if (isFullWidthKatakana(c)) {
return (char) (c - 0x60);
} else if (isHalfWidthKatakana(c)) {
return (char) (c - 0xcf25);
}
return c;
}
/**
* Translates this character into the equivalent Romaji character.
* The function only operates on Hiragana and Katakana characters
* If the character is outside the given range then
* the origianal character is returned.
* <p/>
* The resulting string is lowercase if the input was Hiragana and
* UPPERCASE if the input was Katakana.
*/
public static String toRomaji(char c) {
if (isHiragana(c)) {
return lookupRomaji(c);
} else if (isKatakana(c)) {
c = toHiragana(c);
String str = lookupRomaji(c);
return str.toUpperCase();
}
return String.valueOf(c);
}
/**
* The array used to map hirgana to romaji.
*/
protected static String romaji[] = {
"a", "a",
"i", "i",
"u", "u",
"e", "e",
"o", "o",
"ka", "ga",
"ki", "gi",
"ku", "gu",
"ke", "ge",
"ko", "go",
"sa", "za",
"shi", "ji",
"su", "zu",
"se", "ze",
"so", "zo",
"ta", "da",
"chi", "ji",
"tsu", "tsu", "zu",
"te", "de",
"to", "do",
"na",
"ni",
"nu",
"ne",
"no",
"ha", "ba", "pa",
"hi", "bi", "pi",
"fu", "bu", "pu",
"he", "be", "pe",
"ho", "bo", "po",
"ma",
"mi",
"mu",
"me",
"mo",
"a", "ya",
"u", "yu",
"o", "yo",
"ra",
"ri",
"ru",
"re",
"ro",
"wa", "wa",
"wi", "we",
"o",
"n",
"v",
"ka",
"ke"
};
/**
* Access the array to return the correct romaji string.
*/
private static String lookupRomaji(char c) {
return romaji[c - 0x3041];
}
}
public class Utils{
public static String convertKana(String input) {
if (input == null || input.length() == 0) return "";
StringBuilder out = new StringBuilder();
char ch = input.charAt(0);
if (JapaneseCharacter.isHiragana(ch)) { // convert to hiragana to katakana
for (int i = 0; i < input.length(); i++) {
out.append(JapaneseCharacter.toKatakana(input.charAt(i)));
}
} else if (JapaneseCharacter.isKatakana(ch)) { // convert to katakana to hiragana
for (int i = 0; i < input.length(); i++) {
out.append(JapaneseCharacter.toHiragana(input.charAt(i)));
}
} else { // do nothing if neither
return input;
}
return out.toString();
}
}
@bebop-001
Copy link

Really useful tool. Thanks!

@dalv0911
Copy link

thank you so murch !

@HTLife
Copy link

HTLife commented May 28, 2016

It seems that you forget to handle "長音符" in katagana.
The "ー" could be affect by the vowel from the character before it.
For example,
"サー"=>"さあ"
"コー"=>"こう"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment