Skip to content

Instantly share code, notes, and snippets.

@asdf913
Created April 16, 2024 01:30
Show Gist options
  • Save asdf913/98c2f519a30798e05a8c45e28285f563 to your computer and use it in GitHub Desktop.
Save asdf913/98c2f519a30798e05a8c45e28285f563 to your computer and use it in GitHub Desktop.
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.collections4.IterableUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.annotation.PropertyAccessor;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mariten.kanatools.KanaConverter;
public class TodofukenAndShiKuChoSonCode {
private static class KanjiHiraganaKatakana {
private String kanji, hiragana, katakana;
}
private static class Denshijiti {
private String sheetName, dantaiCode;
private Boolean todofuken;
private KanjiHiraganaKatakana todofukenName, shiKuChoSonName;
}
public static void main(final String[] args) throws IOException {
//
List<Element> es = toList(
filter(stream(select(Jsoup.parse(new URL("https://www.soumu.go.jp/denshijiti/code.html"), 0), "li")),
x -> StringUtils.startsWith(x.text(), "「都道府県コード及び市区町村コード」")));
//
try (final InputStream is = new URL(absUrl(IterableUtils.size(
(es = toList(filter(stream(select(IterableUtils.size(es) > 0 ? IterableUtils.get(es, 0) : null, "* a")),
x -> StringUtils.endsWith(x.attr("href"), ".xls"))))) == 1 ? IterableUtils.get(es, 0) : null,
"href")).openStream(); final Workbook wb = WorkbookFactory.create(is)) {
//
if (wb != null && wb.iterator() != null) {
//
Denshijiti item = null;
//
int columnIndex;
//
Matcher matcher = null;
//
File file = null;
//
for (final Sheet sheet : wb) {
//
if (sheet == null || sheet.iterator() == null) {
//
continue;
//
} // if
//
for (final Row row : sheet) {
//
if (row == null || row.iterator() == null) {
//
continue;
//
} // if
//
for (final Cell cell : row) {
//
if (cell == null) {
//
break;
//
} // if
//
if ((columnIndex = cell.getColumnIndex()) == 0) {
//
if ((matcher = Pattern.compile("^(\\d{6})(.\\d)?$").matcher(toString(cell))) == null
|| !matcher.matches() || matcher.groupCount() < 1) {
//
item = null;
//
break;
//
} // if
//
(item = new Denshijiti()).sheetName = sheet.getSheetName();
//
item.dantaiCode = matcher.group(0);
//
} else if ((columnIndex = cell.getColumnIndex()) == 1) {
//
(item.todofukenName = new KanjiHiraganaKatakana()).kanji = toString(cell);
//
} else if (columnIndex == 2) {
//
(item.shiKuChoSonName = new KanjiHiraganaKatakana()).kanji = toString(cell);
//
} else if (columnIndex == 3 && item.todofukenName != null) {
//
item.todofukenName.hiragana = KanaConverter.convertKana(
item.todofukenName.katakana = toString(cell),
KanaConverter.OP_HAN_KATA_TO_ZEN_HIRA);
//
} else if (columnIndex == 4 && item.shiKuChoSonName != null) {
//
item.shiKuChoSonName.hiragana = KanaConverter.convertKana(
item.shiKuChoSonName.katakana = toString(cell),
KanaConverter.OP_HAN_KATA_TO_ZEN_HIRA);
//
} // if
//
} // for
//
if (item == null) {
//
continue;
//
} // if
//
if (item.shiKuChoSonName != null) {
//
item.todofuken = StringUtils.isBlank(item.shiKuChoSonName.katakana);
//
} // if
//
if (file == null) {
//
file = new File("Denshijita.txt");
//
FileUtils.writeStringToFile(file, "", StandardCharsets.UTF_8);
//
} // if
//
FileUtils.writeStringToFile(file,
StringUtils.join(StringUtils.joinWith(",", StringUtils.join(row, ","),
new ObjectMapper().setVisibility(PropertyAccessor.ALL, Visibility.ANY)
.writeValueAsString(item)),
System.lineSeparator()),
StandardCharsets.UTF_8, true);
//
} // for
//
} // for
//
System.out.println(file != null ? file.getAbsolutePath() : null);
//
} // if
//
} // try
//
}
private static String absUrl(final Node instance, final String attributeKey) {
return instance != null ? instance.absUrl(attributeKey) : null;
}
private static String toString(final Object instance) {
return instance != null ? instance.toString() : null;
}
private static <T> List<T> toList(final Stream<T> instance) {
return instance != null ? instance.toList() : null;
}
private static <T> Stream<T> filter(final Stream<T> instance, final Predicate<? super T> predicate) {
return instance != null ? instance.filter(predicate) : null;
}
private static <E> Stream<E> stream(final Collection<E> instance) {
return instance != null ? instance.stream() : null;
}
private static Elements select(final Element instance, final String cssQuery) {
return instance != null ? instance.select(cssQuery) : null;
}
}
@asdf913
Copy link
Author

asdf913 commented Apr 16, 2024

Required dependencies

<!--https://mvnrepository.com/artifact/org.jsoup/jsoup-->
<dependency>
	<groupId>org.jsoup</groupId>
	<artifactId>jsoup</artifactId>
	<version>1.17.2</version>
</dependency>
<!--https://mvnrepository.com/artifact/org.apache.commons/commons-lang3-->
<dependency>
	<groupId>org.apache.commons</groupId>
	<artifactId>commons-lang3</artifactId>
	<version>3.14.0</version>
</dependency>
<!--https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
	<groupId>org.apache.poi</groupId>
	<artifactId>poi</artifactId>
	<version>5.2.5</version>
</dependency>
<!--https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind-->
<dependency>
	<groupId>com.fasterxml.jackson.core</groupId>
	<artifactId>jackson-databind</artifactId>
	<version>2.17.0</version>
</dependency>
<!--https://mvnrepository.com/artifact/com.mariten/kanatools-->
<dependency>
	<groupId>com.mariten</groupId>
	<artifactId>kanatools</artifactId>
	<version>1.3.0</version>
</dependency>

@asdf913
Copy link
Author

asdf913 commented Apr 16, 2024

都道府県

010006,北海道,,ホッカイドウ,,{"sheetName":"R6.1.1現在の団体","dantaiCode":"010006","todofuken":true,"todofukenName":{"kanji":"北海道","hiragana":"ほっかいどう","katakana":"ホッカイドウ"},"shiKuChoSonName":{"kanji":"","hiragana":"","katakana":""}}

市区町村

011002,北海道,札幌市,ホッカイドウ,サッポロシ,{"sheetName":"R6.1.1現在の団体","dantaiCode":"011002","todofuken":false,"todofukenName":{"kanji":"北海道","hiragana":"ほっかいどう","katakana":"ホッカイドウ"},"shiKuChoSonName":{"kanji":"札幌市","hiragana":"さっぽろし","katakana":"サッポロシ"}}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment