Skip to content

Instantly share code, notes, and snippets.

@asdf913
Last active April 8, 2024 14:02
Show Gist options
  • Save asdf913/299c6a0dca1070b4f19b7488be6fff25 to your computer and use it in GitHub Desktop.
Save asdf913/299c6a0dca1070b4f19b7488be6fff25 to your computer and use it in GitHub Desktop.
Download Japanese Pitch Accent from Takoboto
import java.awt.GraphicsEnvironment;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Stream;
import javax.swing.JOptionPane;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import com.helger.css.ECSSVersion;
import com.helger.css.ICSSWriteable;
import com.helger.css.decl.CSSDeclaration;
import com.helger.css.decl.CSSExpression;
import com.helger.css.reader.CSSReaderDeclarationList;
import com.j256.simplemagic.ContentInfo;
import com.j256.simplemagic.ContentInfoUtil;
import com.microsoft.playwright.Browser;
import com.microsoft.playwright.BrowserType;
import com.microsoft.playwright.Locator;
import com.microsoft.playwright.Page;
import com.microsoft.playwright.Playwright;
import com.microsoft.playwright.options.LoadState;
public class DownloadJapanesePitchAccentFromTakoboto {
public static void main(final String[] args) throws IOException {
//
String arg = null;
//
if (args == null || args.length == 0) {
//
if (GraphicsEnvironment.isHeadless()) {
//
return;
//
} // if
//
final Properties properties = System.getProperties();
//
if (properties != null && properties.containsKey("word")) {
//
arg = JOptionPane.showInputDialog("Word", properties.getProperty("word"));
//
} else {
//
arg = JOptionPane.showInputDialog("Word");
//
} // if
//
perform(arg);
//
return;
//
} // if
//
for (int i = 0; args != null && i < args.length; i++) {
//
System.out.println(arg = args[i]);
//
perform(arg);
//
} // if
//
}
private static void perform(final String arg) throws IOException {
//
final Map<String, byte[]> map = getAccentImageMap(arg);
//
if (iterator(entrySet(map)) == null) {
//
return;
//
} // if
//
File file = null;
//
ContentInfoUtil ciu = null;
//
String[] fileExtensions = null;
//
for (final Entry<String, byte[]> en : entrySet(map)) {
//
if (en == null || (ciu = ObjectUtils.getIfNull(ciu, ContentInfoUtil::new)) == null) {
//
continue;
//
} // if
//
FileUtils
.writeByteArrayToFile(
file = new File(en.getKey() + StringUtils.defaultIfBlank(
(fileExtensions = getFileExtensions(ciu.findMatch(en.getValue()))) != null
&& fileExtensions.length == 1 ? fileExtensions[0] : null,
"png")),
en.getValue());
//
System.out.println(file.getAbsolutePath());
//
} // for
//
}
private static String[] getFileExtensions(final ContentInfo instance) {
return instance != null ? instance.getFileExtensions() : null;
}
private static <K, V> Set<Entry<K, V>> entrySet(final Map<K, V> instance) {
return instance != null ? instance.entrySet() : null;
}
private static <E> Iterator<E> iterator(final Iterable<E> instance) {
return instance != null ? instance.iterator() : null;
}
private static Map<String, byte[]> getAccentImageMap(final String string) throws IOException {
//
return getAccentImageMapByUrl(String.format("https://takoboto.jp/?q=%1$s", string));
//
}
private static Map<String, byte[]> getAccentImageMapByUrl(final String url) throws IOException {
//
Map<String, byte[]> map = null;
//
try (final Playwright playwright = Playwright.create()) {
//
final Page page = newPage(launch(chromium(playwright)));
//
if (page != null) {
//
page.navigate(url);
//
page.waitForLoadState(LoadState.NETWORKIDLE);
//
} // if
//
if (count(locator(page, ".ResultDiv")) > 1) {
//
final String string = inputValue(locator(page, "#SearchBoxInput"));
//
final Document document = Jsoup.parse(new URL(String.format("https://takoboto.jp/?q=%s1$", string)), 0);
//
final List<Element> es = new ArrayList<>(toList(filter(stream(select(document, "span")),
x -> x != null && x.hasAttr("style") && StringUtils.equals(text(x), string))));
//
es.removeIf(x -> {
//
if (Objects.equals(Collections.singletonList("34px"),
toList(map(
filter(stream(CSSReaderDeclarationList.readFromString(attr(x, "style"),
ECSSVersion.LATEST)), y -> Objects.equals(getProperty(y), "font-size")),
y -> getAsCSSString(getExpression(y)))))) {
//
return true;
//
} else if (Objects.equals(Collections.singletonList("34px"),
toList(map(
filter(stream(CSSReaderDeclarationList.readFromString(attr(parent(x), "style"),
ECSSVersion.LATEST)), y -> Objects.equals(getProperty(y), "font-size")),
y -> getAsCSSString(getExpression(y)))))) {
//
return true;
//
} else if (Objects.equals(Collections.singletonList("#FF6020"),
toList(map(
filter(stream(CSSReaderDeclarationList.readFromString(attr(x, "style"),
ECSSVersion.LATEST)), y -> Objects.equals(getProperty(y), "color")),
y -> getAsCSSString(getExpression(y)))))) {
//
return true;
//
} // if
//
return false;
//
});
//
if (es != null && es.size() > 1) {
//
throw new IllegalStateException();
//
} // if
//
} // if
//
final List<Element> es = select(
Jsoup.parse(new URL(String.format("https://takoboto.jp/?ajaxw=%1$s",
attr(select(Jsoup.parse(new URL(url), 0), "input[id=\"ResultWordId0\"]"), "value"))), 0),
".WordJapDiv");
//
if (iterator(es) != null) {
//
for (final Element e : es) {
//
if (!Objects.equals(text(previousElementSibling(cast(Element.class, parent(e)))), "Readings")) {
//
continue;
//
} // if
//
if (map == null) {
//
map = new LinkedHashMap<>();
//
} // if
//
map.put(e.id(), screenshot(locator(page, String.format("div#%1$s", e != null ? e.id() : null))));
//
} // for
//
} // if
//
} // try
//
return map;
//
}
private static BrowserType chromium(final Playwright instance) {
return instance != null ? instance.chromium() : null;
}
private static Browser launch(final BrowserType instance) {
return instance != null ? instance.launch() : null;
}
private static Page newPage(final Browser instance) {
return instance != null ? instance.newPage() : null;
}
private static <E> Stream<E> stream(final Collection<E> instance) {
return instance != null ? instance.stream() : null;
}
private static <T, R> Stream<R> map(final Stream<T> instance, final Function<? super T, ? extends R> mapper) {
return instance != null ? instance.map(mapper) : null;
}
private static <T> Stream<T> filter(final Stream<T> instance, final Predicate<? super T> predicate) {
return instance != null ? instance.filter(predicate) : null;
}
private static <T> List<T> toList(final Stream<T> instance) {
return instance != null ? instance.toList() : null;
}
private static int count(final Locator instance) {
return instance != null ? instance.count() : 0;
}
private static CSSExpression getExpression(final CSSDeclaration instance) {
return instance != null ? instance.getExpression() : null;
}
private static String getProperty(final CSSDeclaration instance) {
return instance != null ? instance.getProperty() : null;
}
private static String getAsCSSString(final ICSSWriteable instance) {
return instance != null ? instance.getAsCSSString() : null;
}
private static Element previousElementSibling(final Element instance) {
return instance != null ? instance.previousElementSibling() : null;
}
private static <T> T cast(final Class<T> clz, final Object instance) {
return clz != null && clz.isInstance(instance) ? clz.cast(instance) : null;
}
private static Node parent(final Node instance) {
return instance != null ? instance.parent() : null;
}
private static String text(final Element instance) {
return instance != null ? instance.text() : null;
}
private static String inputValue(final Locator instance) {
return instance != null ? instance.inputValue() : null;
}
private static byte[] screenshot(final Locator instance) {
return instance != null ? instance.screenshot() : null;
}
private static Locator locator(final Page instance, final String selector) {
return instance != null ? instance.locator(selector) : null;
}
private static Elements select(final Document instance, final String cssQuery) {
return instance != null ? instance.select(cssQuery) : null;
}
private static String attr(final Elements instance, final String attributeKey) {
return instance != null ? instance.attr(attributeKey) : null;
}
private static String attr(final Node instance, final String attributeKey) {
return instance != null ? instance.attr(attributeKey) : null;
}
}
@asdf913
Copy link
Author

asdf913 commented Apr 8, 2024

URL

https://takoboto.jp

Purpose / 目的

Download Japanese Accent Image from Takoboto
Takobot から日本語アクセント画像をダウンロード

Sample / 見本

https://takoboto.jp/?q=%E3%81%95%E3%81%8F%E3%82%89
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment