Skip to content

Instantly share code, notes, and snippets.

@kawasima
Created March 16, 2021 08:12
Show Gist options
  • Save kawasima/23d72b50433d6997f2ea707e800d7d13 to your computer and use it in GitHub Desktop.
Save kawasima/23d72b50433d6997f2ea707e800d7d13 to your computer and use it in GitHub Desktop.
Shift_JISの全角文字のみにマッチする正規表現ジェネレータです。
import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.Objects;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
public class ShiftJISRegexGenerator {
public static void main(String[] args) {
byte[] moji = new byte[2];
Stream<Integer> upperByteStream = Stream.concat(IntStream.rangeClosed(0x81, 0x9f).boxed(), IntStream.rangeClosed(0xe0, 0xef).boxed());
LinkedList<Integer> chars = upperByteStream.flatMap(upper ->
IntStream.rangeClosed(0x40, 0xfc).boxed().map(lower -> {
moji[0] = upper.byteValue();
moji[1] = lower.byteValue();
try {
String s = new String(moji, 0, 2, "WINDOWS-31J");
int p = s.codePointAt(0);
return p == 0xfffd ? null : p;
} catch (UnsupportedEncodingException e) {
return null;
}
}).filter(Objects::nonNull))
.sorted(Integer::compareTo)
.collect(Collectors.toCollection(LinkedList::new));
int from = chars.pop();
int to = chars.pop();
BiConsumer<Integer, Integer> outputFunc = (f, t) -> {
if (f.equals(t)) {
System.out.printf("%c", t);
} else if (t-f == 1) {
System.out.printf("%c%c", f, t);
} else {
System.out.printf("%c-%c", f, t);
}
};
System.out.print("/^[");
for (int c : chars) {
if (c - to > 1) {
outputFunc.accept(from, to);
from = to = c;
} else {
to = c;
}
}
outputFunc.accept(from, to);
System.out.println("]+$/");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment