Last active
March 2, 2019 04:56
-
-
Save Trumeet/dac51a72382a7bd2a5e458ed2e7c49d1 to your computer and use it in GitHub Desktop.
Get the Jyutping of a single Chinese character, data are parsed from cantonese.org
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package moe.yuuta.jyutpingconverter; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
import java.io.IOException; | |
import java.util.*; | |
import java.util.stream.Collectors; | |
/** | |
* Get the Jyutping of a single Chinese character, data are parsed from cantonese.org | |
* Needs org.jsoup:jsoup. | |
*/ | |
public class JyutpingConverter { | |
public static List<Character> queryOnline(String character) throws IOException { | |
Map<String, String> cookies = new HashMap<>(11); | |
cookies.put("cantoDisplay", "jp"); | |
cookies.put("charDisplay", "s"); | |
cookies.put("mandDisplay", "none"); | |
cookies.put("searchAlphaEN", "false"); | |
cookies.put("searchAlphaJP", "false"); | |
cookies.put("searchAlphaPY", "false"); | |
cookies.put("searchAlphaYL", "false"); | |
cookies.put("searchHZSC", "true"); | |
cookies.put("searchHZTC", "false"); | |
cookies.put("toneColorHZ", "false"); | |
cookies.put("toneColorYY", "false"); | |
Document doc = Jsoup.connect("http://cantonese.org/search.php?q=" + character) | |
.cookies(cookies) | |
.get(); | |
Element table = doc.selectFirst(".results-table"); | |
if (table == null) return null; | |
try { | |
Elements trs = table.selectFirst("tbody").select(".result").select(".resulthead"); | |
return trs.stream() | |
.map(element -> { | |
String text = element.text(); | |
String[] array = text.split(" "); | |
if (array.length < 1) { | |
return null; | |
} | |
return new Character(array[0], | |
text.substring(array[0].length())); | |
}) | |
.collect(Collectors.toList()); | |
} catch (NullPointerException e) { | |
return Collections.emptyList(); | |
} | |
} | |
public static class Character { | |
private String term; | |
private String jyutping; | |
public Character(String term, String jyutping) { | |
this.term = term; | |
this.jyutping = jyutping; | |
} | |
@Override | |
public String toString() { | |
return String.format("%1$s (%2$s)", term, jyutping); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment