Created
April 16, 2024 01:30
-
-
Save asdf913/98c2f519a30798e05a8c45e28285f563 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.net.URL; | |
import java.nio.charset.StandardCharsets; | |
import java.util.Collection; | |
import java.util.List; | |
import java.util.function.Predicate; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import java.util.stream.Stream; | |
import org.apache.commons.collections4.IterableUtils; | |
import org.apache.commons.io.FileUtils; | |
import org.apache.commons.lang3.StringUtils; | |
import org.apache.poi.ss.usermodel.Cell; | |
import org.apache.poi.ss.usermodel.Row; | |
import org.apache.poi.ss.usermodel.Sheet; | |
import org.apache.poi.ss.usermodel.Workbook; | |
import org.apache.poi.ss.usermodel.WorkbookFactory; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.nodes.Node; | |
import org.jsoup.select.Elements; | |
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; | |
import com.fasterxml.jackson.annotation.PropertyAccessor; | |
import com.fasterxml.jackson.databind.ObjectMapper; | |
import com.mariten.kanatools.KanaConverter; | |
public class TodofukenAndShiKuChoSonCode { | |
private static class KanjiHiraganaKatakana { | |
private String kanji, hiragana, katakana; | |
} | |
private static class Denshijiti { | |
private String sheetName, dantaiCode; | |
private Boolean todofuken; | |
private KanjiHiraganaKatakana todofukenName, shiKuChoSonName; | |
} | |
public static void main(final String[] args) throws IOException { | |
// | |
List<Element> es = toList( | |
filter(stream(select(Jsoup.parse(new URL("https://www.soumu.go.jp/denshijiti/code.html"), 0), "li")), | |
x -> StringUtils.startsWith(x.text(), "「都道府県コード及び市区町村コード」"))); | |
// | |
try (final InputStream is = new URL(absUrl(IterableUtils.size( | |
(es = toList(filter(stream(select(IterableUtils.size(es) > 0 ? IterableUtils.get(es, 0) : null, "* a")), | |
x -> StringUtils.endsWith(x.attr("href"), ".xls"))))) == 1 ? IterableUtils.get(es, 0) : null, | |
"href")).openStream(); final Workbook wb = WorkbookFactory.create(is)) { | |
// | |
if (wb != null && wb.iterator() != null) { | |
// | |
Denshijiti item = null; | |
// | |
int columnIndex; | |
// | |
Matcher matcher = null; | |
// | |
File file = null; | |
// | |
for (final Sheet sheet : wb) { | |
// | |
if (sheet == null || sheet.iterator() == null) { | |
// | |
continue; | |
// | |
} // if | |
// | |
for (final Row row : sheet) { | |
// | |
if (row == null || row.iterator() == null) { | |
// | |
continue; | |
// | |
} // if | |
// | |
for (final Cell cell : row) { | |
// | |
if (cell == null) { | |
// | |
break; | |
// | |
} // if | |
// | |
if ((columnIndex = cell.getColumnIndex()) == 0) { | |
// | |
if ((matcher = Pattern.compile("^(\\d{6})(.\\d)?$").matcher(toString(cell))) == null | |
|| !matcher.matches() || matcher.groupCount() < 1) { | |
// | |
item = null; | |
// | |
break; | |
// | |
} // if | |
// | |
(item = new Denshijiti()).sheetName = sheet.getSheetName(); | |
// | |
item.dantaiCode = matcher.group(0); | |
// | |
} else if ((columnIndex = cell.getColumnIndex()) == 1) { | |
// | |
(item.todofukenName = new KanjiHiraganaKatakana()).kanji = toString(cell); | |
// | |
} else if (columnIndex == 2) { | |
// | |
(item.shiKuChoSonName = new KanjiHiraganaKatakana()).kanji = toString(cell); | |
// | |
} else if (columnIndex == 3 && item.todofukenName != null) { | |
// | |
item.todofukenName.hiragana = KanaConverter.convertKana( | |
item.todofukenName.katakana = toString(cell), | |
KanaConverter.OP_HAN_KATA_TO_ZEN_HIRA); | |
// | |
} else if (columnIndex == 4 && item.shiKuChoSonName != null) { | |
// | |
item.shiKuChoSonName.hiragana = KanaConverter.convertKana( | |
item.shiKuChoSonName.katakana = toString(cell), | |
KanaConverter.OP_HAN_KATA_TO_ZEN_HIRA); | |
// | |
} // if | |
// | |
} // for | |
// | |
if (item == null) { | |
// | |
continue; | |
// | |
} // if | |
// | |
if (item.shiKuChoSonName != null) { | |
// | |
item.todofuken = StringUtils.isBlank(item.shiKuChoSonName.katakana); | |
// | |
} // if | |
// | |
if (file == null) { | |
// | |
file = new File("Denshijita.txt"); | |
// | |
FileUtils.writeStringToFile(file, "", StandardCharsets.UTF_8); | |
// | |
} // if | |
// | |
FileUtils.writeStringToFile(file, | |
StringUtils.join(StringUtils.joinWith(",", StringUtils.join(row, ","), | |
new ObjectMapper().setVisibility(PropertyAccessor.ALL, Visibility.ANY) | |
.writeValueAsString(item)), | |
System.lineSeparator()), | |
StandardCharsets.UTF_8, true); | |
// | |
} // for | |
// | |
} // for | |
// | |
System.out.println(file != null ? file.getAbsolutePath() : null); | |
// | |
} // if | |
// | |
} // try | |
// | |
} | |
private static String absUrl(final Node instance, final String attributeKey) { | |
return instance != null ? instance.absUrl(attributeKey) : null; | |
} | |
private static String toString(final Object instance) { | |
return instance != null ? instance.toString() : null; | |
} | |
private static <T> List<T> toList(final Stream<T> instance) { | |
return instance != null ? instance.toList() : null; | |
} | |
private static <T> Stream<T> filter(final Stream<T> instance, final Predicate<? super T> predicate) { | |
return instance != null ? instance.filter(predicate) : null; | |
} | |
private static <E> Stream<E> stream(final Collection<E> instance) { | |
return instance != null ? instance.stream() : null; | |
} | |
private static Elements select(final Element instance, final String cssQuery) { | |
return instance != null ? instance.select(cssQuery) : null; | |
} | |
} |
都道府県
010006,北海道,,ホッカイドウ,,{"sheetName":"R6.1.1現在の団体","dantaiCode":"010006","todofuken":true,"todofukenName":{"kanji":"北海道","hiragana":"ほっかいどう","katakana":"ホッカイドウ"},"shiKuChoSonName":{"kanji":"","hiragana":"","katakana":""}}
市区町村
011002,北海道,札幌市,ホッカイドウ,サッポロシ,{"sheetName":"R6.1.1現在の団体","dantaiCode":"011002","todofuken":false,"todofukenName":{"kanji":"北海道","hiragana":"ほっかいどう","katakana":"ホッカイドウ"},"shiKuChoSonName":{"kanji":"札幌市","hiragana":"さっぽろし","katakana":"サッポロシ"}}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Required dependencies