Created
April 21, 2018 23:15
-
-
Save sualeh/4d261e736ef773198fdeede3076bd555 to your computer and use it in GitHub Desktop.
EBCDIC to ASCII CodePrinter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.charset.Charset; | |
import java.nio.charset.StandardCharsets; | |
import java.text.Normalizer; | |
public class EBCDICToASCII_CodePrinter | |
{ | |
public static void main(final String[] args) | |
throws Exception | |
{ | |
final String[] EBCDIC_CODE_PAGES = { "IBM037", "IBM01148", "IBM1047" }; | |
for (final String element: EBCDIC_CODE_PAGES) | |
{ | |
final Charset charset = Charset.forName(element); | |
System.out.println(String | |
.format("private static final char[] %s_TO_ASCII_MAP = {", | |
charset.name())); | |
for (int i = 0; i < 256; i++) | |
{ | |
// Convert EBCDIC character to a Java string | |
final byte ebcdicChar = (byte) i; | |
final String chstr = new String(new byte[] { ebcdicChar }, charset); | |
final char normchstr = normalizeChar(ebcdicChar, charset); | |
// Get character (code point) properties | |
final int ch = chstr.codePointAt(0); | |
final int numUTF8Bytes = chstr.getBytes(StandardCharsets.UTF_8).length; | |
final String unicodeCharacterName = Character.getName(ch); | |
final boolean isISOControl = Character | |
.isISOControl(chstr.codePointAt(0)); | |
final boolean isASCIIPrintable = ch > 31 && ch < 127; | |
// Sanity checks | |
final boolean alternateASCIIPrintable = numUTF8Bytes == 1 | |
&& !isISOControl; | |
if (isASCIIPrintable && !alternateASCIIPrintable) | |
{ | |
throw new Exception(String | |
.format("%3d %d %b - We have not detected printable ASCII characters correctly", | |
ch, | |
numUTF8Bytes, | |
isISOControl)); | |
} | |
// Print properties | |
System.out.println(String | |
.format(" '%s', // %s", normchstr, unicodeCharacterName)); | |
} | |
System.out.println("};\n"); | |
} | |
} | |
private static char normalizeChar(final byte ebcdicChar, | |
final Charset charset) | |
{ | |
// Decode the EBCDIC character (which is a byte), based on the | |
// EBCDIC charset | |
final String decodedString = new String(new byte[] { ebcdicChar }, charset); | |
// Normalize by removing accents and diacriticals | |
final char normalizedChar = Normalizer | |
.normalize(decodedString, Normalizer.Form.NFKD).charAt(0); | |
// Check for characters within the ASCII printable range | |
if (normalizedChar > 31 && normalizedChar < 127) | |
{ | |
return normalizedChar; | |
} | |
else | |
{ | |
return ' '; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment