Skip to content

Instantly share code, notes, and snippets.

@basil
Last active June 29, 2022 22:38
Show Gist options
  • Save basil/c4dc312c19c6ddc80d03275a5ed0f484 to your computer and use it in GitHub Desktop.
Save basil/c4dc312c19c6ddc80d03275a5ed0f484 to your computer and use it in GitHub Desktop.
Convert `.properties` files to UTF-8
package com.basilcrow.jenkins;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.text.translate.UnicodeUnescaper;
public class Main {
public static void main(String[] args) throws Exception {
List<Path> list;
try (Stream<Path> stream =
Files.walk(Paths.get(Objects.requireNonNull(System.getenv("JENKINS_CORE"))))) {
list =
stream.filter(Files::isRegularFile)
.filter(f -> f.toString().endsWith(".properties"))
.sorted()
.collect(Collectors.toUnmodifiableList());
}
for (Path file : list) {
if (!isEncoded(file, StandardCharsets.US_ASCII)) {
boolean isUtf8 = isEncoded(file, StandardCharsets.UTF_8);
boolean isIso88591 = isEncoded(file, StandardCharsets.ISO_8859_1);
if (isUtf8 && isIso88591) {
throw new IllegalStateException(file + " is valid UTF-8 and valid ISO-8859-1. To avoid problems when auto-detecting the encoding, use the lowest common denominator of ASCII encoding and express non-ASCII characters with escape sequences using a tool like `native2ascii`.");
}
}
if (!isEncoded(file, StandardCharsets.US_ASCII)
&& !isEncoded(file, StandardCharsets.ISO_8859_1)) {
throw new IllegalStateException(file + " is neither ASCII nor ISO-8859-1");
}
if (isEncoded(file, StandardCharsets.ISO_8859_1)) {
String str = Files.readString(file, StandardCharsets.ISO_8859_1);
Files.writeString(file, str, StandardCharsets.UTF_8);
}
if (!isEncoded(file, StandardCharsets.US_ASCII)
&& !isEncoded(file, StandardCharsets.UTF_8)) {
throw new IllegalStateException(file + " is neither ASCII nor UTF-8");
}
{
String inStr = Files.readString(file, StandardCharsets.UTF_8);
UnicodeUnescaper unicodeUnescaper = new UnicodeUnescaper();
String outStr = unicodeUnescaper.translate(inStr);
Files.writeString(file, outStr, StandardCharsets.UTF_8);
}
if (!isEncoded(file, StandardCharsets.US_ASCII)
&& !isEncoded(file, StandardCharsets.UTF_8)) {
throw new IllegalStateException(file + " is neither ASCII nor UTF-8");
}
}
}
private static boolean isEncoded(Path file, Charset charset) throws IOException {
byte[] bytes = Files.readAllBytes(file);
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
ByteBuffer buffer = ByteBuffer.wrap(bytes);
try {
decoder.decode(buffer);
return true;
} catch (CharacterCodingException e) {
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment