Skip to content

Instantly share code, notes, and snippets.

@Trumeet
Last active March 2, 2019 04:56
Show Gist options
  • Save Trumeet/dac51a72382a7bd2a5e458ed2e7c49d1 to your computer and use it in GitHub Desktop.
Save Trumeet/dac51a72382a7bd2a5e458ed2e7c49d1 to your computer and use it in GitHub Desktop.
Get the Jyutping of a single Chinese character, data are parsed from cantonese.org
package moe.yuuta.jyutpingconverter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
/**
* Get the Jyutping of a single Chinese character, data are parsed from cantonese.org
* Needs org.jsoup:jsoup.
*/
public class JyutpingConverter {
public static List<Character> queryOnline(String character) throws IOException {
Map<String, String> cookies = new HashMap<>(11);
cookies.put("cantoDisplay", "jp");
cookies.put("charDisplay", "s");
cookies.put("mandDisplay", "none");
cookies.put("searchAlphaEN", "false");
cookies.put("searchAlphaJP", "false");
cookies.put("searchAlphaPY", "false");
cookies.put("searchAlphaYL", "false");
cookies.put("searchHZSC", "true");
cookies.put("searchHZTC", "false");
cookies.put("toneColorHZ", "false");
cookies.put("toneColorYY", "false");
Document doc = Jsoup.connect("http://cantonese.org/search.php?q=" + character)
.cookies(cookies)
.get();
Element table = doc.selectFirst(".results-table");
if (table == null) return null;
try {
Elements trs = table.selectFirst("tbody").select(".result").select(".resulthead");
return trs.stream()
.map(element -> {
String text = element.text();
String[] array = text.split(" ");
if (array.length < 1) {
return null;
}
return new Character(array[0],
text.substring(array[0].length()));
})
.collect(Collectors.toList());
} catch (NullPointerException e) {
return Collections.emptyList();
}
}
public static class Character {
private String term;
private String jyutping;
public Character(String term, String jyutping) {
this.term = term;
this.jyutping = jyutping;
}
@Override
public String toString() {
return String.format("%1$s (%2$s)", term, jyutping);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment