Skip to content

Instantly share code, notes, and snippets.

@sualeh
Created April 21, 2018 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sualeh/4d261e736ef773198fdeede3076bd555 to your computer and use it in GitHub Desktop.
Save sualeh/4d261e736ef773198fdeede3076bd555 to your computer and use it in GitHub Desktop.
EBCDIC to ASCII CodePrinter
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
public class EBCDICToASCII_CodePrinter
{
public static void main(final String[] args)
throws Exception
{
final String[] EBCDIC_CODE_PAGES = { "IBM037", "IBM01148", "IBM1047" };
for (final String element: EBCDIC_CODE_PAGES)
{
final Charset charset = Charset.forName(element);
System.out.println(String
.format("private static final char[] %s_TO_ASCII_MAP = {",
charset.name()));
for (int i = 0; i < 256; i++)
{
// Convert EBCDIC character to a Java string
final byte ebcdicChar = (byte) i;
final String chstr = new String(new byte[] { ebcdicChar }, charset);
final char normchstr = normalizeChar(ebcdicChar, charset);
// Get character (code point) properties
final int ch = chstr.codePointAt(0);
final int numUTF8Bytes = chstr.getBytes(StandardCharsets.UTF_8).length;
final String unicodeCharacterName = Character.getName(ch);
final boolean isISOControl = Character
.isISOControl(chstr.codePointAt(0));
final boolean isASCIIPrintable = ch > 31 && ch < 127;
// Sanity checks
final boolean alternateASCIIPrintable = numUTF8Bytes == 1
&& !isISOControl;
if (isASCIIPrintable && !alternateASCIIPrintable)
{
throw new Exception(String
.format("%3d %d %b - We have not detected printable ASCII characters correctly",
ch,
numUTF8Bytes,
isISOControl));
}
// Print properties
System.out.println(String
.format(" '%s', // %s", normchstr, unicodeCharacterName));
}
System.out.println("};\n");
}
}
private static char normalizeChar(final byte ebcdicChar,
final Charset charset)
{
// Decode the EBCDIC character (which is a byte), based on the
// EBCDIC charset
final String decodedString = new String(new byte[] { ebcdicChar }, charset);
// Normalize by removing accents and diacriticals
final char normalizedChar = Normalizer
.normalize(decodedString, Normalizer.Form.NFKD).charAt(0);
// Check for characters within the ASCII printable range
if (normalizedChar > 31 && normalizedChar < 127)
{
return normalizedChar;
}
else
{
return ' ';
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment