Last active
April 8, 2024 14:02
-
-
Save asdf913/299c6a0dca1070b4f19b7488be6fff25 to your computer and use it in GitHub Desktop.
Download Japanese Pitch Accent from Takoboto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.awt.GraphicsEnvironment; | |
import java.io.File; | |
import java.io.IOException; | |
import java.net.URL; | |
import java.util.ArrayList; | |
import java.util.Collection; | |
import java.util.Collections; | |
import java.util.Iterator; | |
import java.util.LinkedHashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.Objects; | |
import java.util.Properties; | |
import java.util.Set; | |
import java.util.function.Function; | |
import java.util.function.Predicate; | |
import java.util.stream.Stream; | |
import javax.swing.JOptionPane; | |
import org.apache.commons.io.FileUtils; | |
import org.apache.commons.lang3.ObjectUtils; | |
import org.apache.commons.lang3.StringUtils; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.nodes.Node; | |
import org.jsoup.select.Elements; | |
import com.helger.css.ECSSVersion; | |
import com.helger.css.ICSSWriteable; | |
import com.helger.css.decl.CSSDeclaration; | |
import com.helger.css.decl.CSSExpression; | |
import com.helger.css.reader.CSSReaderDeclarationList; | |
import com.j256.simplemagic.ContentInfo; | |
import com.j256.simplemagic.ContentInfoUtil; | |
import com.microsoft.playwright.Browser; | |
import com.microsoft.playwright.BrowserType; | |
import com.microsoft.playwright.Locator; | |
import com.microsoft.playwright.Page; | |
import com.microsoft.playwright.Playwright; | |
import com.microsoft.playwright.options.LoadState; | |
public class DownloadJapanesePitchAccentFromTakoboto { | |
public static void main(final String[] args) throws IOException { | |
// | |
String arg = null; | |
// | |
if (args == null || args.length == 0) { | |
// | |
if (GraphicsEnvironment.isHeadless()) { | |
// | |
return; | |
// | |
} // if | |
// | |
final Properties properties = System.getProperties(); | |
// | |
if (properties != null && properties.containsKey("word")) { | |
// | |
arg = JOptionPane.showInputDialog("Word", properties.getProperty("word")); | |
// | |
} else { | |
// | |
arg = JOptionPane.showInputDialog("Word"); | |
// | |
} // if | |
// | |
perform(arg); | |
// | |
return; | |
// | |
} // if | |
// | |
for (int i = 0; args != null && i < args.length; i++) { | |
// | |
System.out.println(arg = args[i]); | |
// | |
perform(arg); | |
// | |
} // if | |
// | |
} | |
private static void perform(final String arg) throws IOException { | |
// | |
final Map<String, byte[]> map = getAccentImageMap(arg); | |
// | |
if (iterator(entrySet(map)) == null) { | |
// | |
return; | |
// | |
} // if | |
// | |
File file = null; | |
// | |
ContentInfoUtil ciu = null; | |
// | |
String[] fileExtensions = null; | |
// | |
for (final Entry<String, byte[]> en : entrySet(map)) { | |
// | |
if (en == null || (ciu = ObjectUtils.getIfNull(ciu, ContentInfoUtil::new)) == null) { | |
// | |
continue; | |
// | |
} // if | |
// | |
FileUtils | |
.writeByteArrayToFile( | |
file = new File(en.getKey() + StringUtils.defaultIfBlank( | |
(fileExtensions = getFileExtensions(ciu.findMatch(en.getValue()))) != null | |
&& fileExtensions.length == 1 ? fileExtensions[0] : null, | |
"png")), | |
en.getValue()); | |
// | |
System.out.println(file.getAbsolutePath()); | |
// | |
} // for | |
// | |
} | |
private static String[] getFileExtensions(final ContentInfo instance) { | |
return instance != null ? instance.getFileExtensions() : null; | |
} | |
private static <K, V> Set<Entry<K, V>> entrySet(final Map<K, V> instance) { | |
return instance != null ? instance.entrySet() : null; | |
} | |
private static <E> Iterator<E> iterator(final Iterable<E> instance) { | |
return instance != null ? instance.iterator() : null; | |
} | |
private static Map<String, byte[]> getAccentImageMap(final String string) throws IOException { | |
// | |
return getAccentImageMapByUrl(String.format("https://takoboto.jp/?q=%1$s", string)); | |
// | |
} | |
private static Map<String, byte[]> getAccentImageMapByUrl(final String url) throws IOException { | |
// | |
Map<String, byte[]> map = null; | |
// | |
try (final Playwright playwright = Playwright.create()) { | |
// | |
final Page page = newPage(launch(chromium(playwright))); | |
// | |
if (page != null) { | |
// | |
page.navigate(url); | |
// | |
page.waitForLoadState(LoadState.NETWORKIDLE); | |
// | |
} // if | |
// | |
if (count(locator(page, ".ResultDiv")) > 1) { | |
// | |
final String string = inputValue(locator(page, "#SearchBoxInput")); | |
// | |
final Document document = Jsoup.parse(new URL(String.format("https://takoboto.jp/?q=%s1$", string)), 0); | |
// | |
final List<Element> es = new ArrayList<>(toList(filter(stream(select(document, "span")), | |
x -> x != null && x.hasAttr("style") && StringUtils.equals(text(x), string)))); | |
// | |
es.removeIf(x -> { | |
// | |
if (Objects.equals(Collections.singletonList("34px"), | |
toList(map( | |
filter(stream(CSSReaderDeclarationList.readFromString(attr(x, "style"), | |
ECSSVersion.LATEST)), y -> Objects.equals(getProperty(y), "font-size")), | |
y -> getAsCSSString(getExpression(y)))))) { | |
// | |
return true; | |
// | |
} else if (Objects.equals(Collections.singletonList("34px"), | |
toList(map( | |
filter(stream(CSSReaderDeclarationList.readFromString(attr(parent(x), "style"), | |
ECSSVersion.LATEST)), y -> Objects.equals(getProperty(y), "font-size")), | |
y -> getAsCSSString(getExpression(y)))))) { | |
// | |
return true; | |
// | |
} else if (Objects.equals(Collections.singletonList("#FF6020"), | |
toList(map( | |
filter(stream(CSSReaderDeclarationList.readFromString(attr(x, "style"), | |
ECSSVersion.LATEST)), y -> Objects.equals(getProperty(y), "color")), | |
y -> getAsCSSString(getExpression(y)))))) { | |
// | |
return true; | |
// | |
} // if | |
// | |
return false; | |
// | |
}); | |
// | |
if (es != null && es.size() > 1) { | |
// | |
throw new IllegalStateException(); | |
// | |
} // if | |
// | |
} // if | |
// | |
final List<Element> es = select( | |
Jsoup.parse(new URL(String.format("https://takoboto.jp/?ajaxw=%1$s", | |
attr(select(Jsoup.parse(new URL(url), 0), "input[id=\"ResultWordId0\"]"), "value"))), 0), | |
".WordJapDiv"); | |
// | |
if (iterator(es) != null) { | |
// | |
for (final Element e : es) { | |
// | |
if (!Objects.equals(text(previousElementSibling(cast(Element.class, parent(e)))), "Readings")) { | |
// | |
continue; | |
// | |
} // if | |
// | |
if (map == null) { | |
// | |
map = new LinkedHashMap<>(); | |
// | |
} // if | |
// | |
map.put(e.id(), screenshot(locator(page, String.format("div#%1$s", e != null ? e.id() : null)))); | |
// | |
} // for | |
// | |
} // if | |
// | |
} // try | |
// | |
return map; | |
// | |
} | |
private static BrowserType chromium(final Playwright instance) { | |
return instance != null ? instance.chromium() : null; | |
} | |
private static Browser launch(final BrowserType instance) { | |
return instance != null ? instance.launch() : null; | |
} | |
private static Page newPage(final Browser instance) { | |
return instance != null ? instance.newPage() : null; | |
} | |
private static <E> Stream<E> stream(final Collection<E> instance) { | |
return instance != null ? instance.stream() : null; | |
} | |
private static <T, R> Stream<R> map(final Stream<T> instance, final Function<? super T, ? extends R> mapper) { | |
return instance != null ? instance.map(mapper) : null; | |
} | |
private static <T> Stream<T> filter(final Stream<T> instance, final Predicate<? super T> predicate) { | |
return instance != null ? instance.filter(predicate) : null; | |
} | |
private static <T> List<T> toList(final Stream<T> instance) { | |
return instance != null ? instance.toList() : null; | |
} | |
private static int count(final Locator instance) { | |
return instance != null ? instance.count() : 0; | |
} | |
private static CSSExpression getExpression(final CSSDeclaration instance) { | |
return instance != null ? instance.getExpression() : null; | |
} | |
private static String getProperty(final CSSDeclaration instance) { | |
return instance != null ? instance.getProperty() : null; | |
} | |
private static String getAsCSSString(final ICSSWriteable instance) { | |
return instance != null ? instance.getAsCSSString() : null; | |
} | |
private static Element previousElementSibling(final Element instance) { | |
return instance != null ? instance.previousElementSibling() : null; | |
} | |
private static <T> T cast(final Class<T> clz, final Object instance) { | |
return clz != null && clz.isInstance(instance) ? clz.cast(instance) : null; | |
} | |
private static Node parent(final Node instance) { | |
return instance != null ? instance.parent() : null; | |
} | |
private static String text(final Element instance) { | |
return instance != null ? instance.text() : null; | |
} | |
private static String inputValue(final Locator instance) { | |
return instance != null ? instance.inputValue() : null; | |
} | |
private static byte[] screenshot(final Locator instance) { | |
return instance != null ? instance.screenshot() : null; | |
} | |
private static Locator locator(final Page instance, final String selector) { | |
return instance != null ? instance.locator(selector) : null; | |
} | |
private static Elements select(final Document instance, final String cssQuery) { | |
return instance != null ? instance.select(cssQuery) : null; | |
} | |
private static String attr(final Elements instance, final String attributeKey) { | |
return instance != null ? instance.attr(attributeKey) : null; | |
} | |
private static String attr(final Node instance, final String attributeKey) { | |
return instance != null ? instance.attr(attributeKey) : null; | |
} | |
} |
URL
Purpose / 目的
Download Japanese Accent Image from Takoboto
Takobot から日本語アクセント画像をダウンロード
Sample / 見本
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Required Dependencies