Skip to content

Instantly share code, notes, and snippets.

@alex3305
Created January 5, 2015 12:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alex3305/36dab7ae3ce3c21ee716 to your computer and use it in GitHub Desktop.
Save alex3305/36dab7ae3ce3c21ee716 to your computer and use it in GitHub Desktop.
Byte Order Mark Utility
import java.io.*;
/** Source: http://bugs.java.com/bugdatabase/view_bug.do?bug_id=6206835 */
public class BOMUtil {
public final static int NONE=-1;
public final static int UTF32BE=0;
public final static int UTF32LE=1;
public final static int UTF16BE=2;
public final static int UTF16LE=3;
public final static int UTF8=4;
public final static byte[] UTF32BEBOMBYTES = new byte[]{(byte)0x00 ,(byte)0x00 ,(byte)0xFE ,(byte)0xFF ,};
public final static byte[] UTF32LEBOMBYTES = new byte[]{(byte)0xFF ,(byte)0xFE ,(byte)0x00 ,(byte)0x00 ,};
public final static byte[] UTF16BEBOMBYTES = new byte[]{(byte)0xFE ,(byte)0xFF ,};
public final static byte[] UTF16LEBOMBYTES = new byte[]{(byte)0xFF ,(byte)0xFE ,};
public final static byte[] UTF8BOMBYTES = new byte[]{(byte)0xEF ,(byte)0xBB ,(byte)0xBF ,};
public final static byte[][] BOMBYTES=new byte[][]{
UTF32BEBOMBYTES,
UTF32LEBOMBYTES,
UTF16BEBOMBYTES,
UTF16LEBOMBYTES,
UTF8BOMBYTES,
};
public final static int MAXBOMBYTES=4;//no bom sequence is longer than 4 byte
public static int getBOMType(byte[] _bomBytes){
return getBOMType(_bomBytes,_bomBytes.length);
}
public static int getBOMType(byte[] _bomBytes, int _length){
for (int i = 0; i < BOMBYTES.length; i++) {
for(int j=0; j<_length && j<BOMBYTES[i].length; j++){
if(_bomBytes[j]!=BOMBYTES[i][j]) break;
if(_bomBytes[j]==BOMBYTES[i][j] && j==BOMBYTES[i].length-1) return i;
}
}
return NONE;
}
public static int getBOMType(File _f) throws IOException{
FileInputStream fIn=new FileInputStream(_f);
byte[] buff=new byte[MAXBOMBYTES];
int read=fIn.read(buff);
int BOMType=getBOMType(buff,read);
fIn.close();
return BOMType;
}
public static String getFirstBytes(File _f) throws IOException{
FileInputStream fIn=new FileInputStream(_f);
byte[] buff=new byte[MAXBOMBYTES];
fIn.read(buff);
String read = "";
for (int i = 0; i < buff.length; i++) {
read += String.format("%02x", buff[i]) + " ";
}
fIn.close();
return read;
}
public static int getSkipBytes(int BOMType){
if(BOMType<0 || BOMType>=BOMBYTES.length) return 0;
return BOMBYTES[BOMType].length;
}
/**
* Just reads necessary bytes from the stream
* @param _fIn
*/
public static Reader getReader(File _f, String encoding) throws IOException{
int BOMType=getBOMType(_f);
int skipBytes=getSkipBytes(BOMType);
FileInputStream fIn=new FileInputStream(_f);
fIn.skip(skipBytes);
Reader reader=new InputStreamReader(fIn,encoding);
return reader;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment