Skip to content

Instantly share code, notes, and snippets.

@autch
Created August 26, 2015 01:34
Show Gist options
  • Save autch/c3c880350798130b4a1b to your computer and use it in GitHub Desktop.
Save autch/c3c880350798130b4a1b to your computer and use it in GitHub Desktop.
find -name \*.java | xargs find_bom
#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#define UTF8_BOM "\xef\xbb\xbf"
#define UTF16BE_BOM "\xfe\xff"
#define UTF16LE_BOM "\xff\xfe"
#define UTF7_BOM "\x2b\x2f\x76"
int match(const char* name, const char* bom, size_t bom_bytes, const char* filename, int lineno, const char* buffer)
{
int istty = isatty(fileno(stdout));
char* p;
if((p = strstr(buffer, bom)) != NULL) {
int pos = p - buffer;
printf("%s:%d: %s BOM at column %d\n", filename, lineno, name, pos);
printf("> %.*s%s%s\n", pos, buffer,
istty ? "\033[1;33m[BOM]\033[0m" : "[BOM]",
buffer + pos + bom_bytes);
}
return p != NULL;
}
int main(int ac, char** av)
{
char buffer[4096];
while(*++av != NULL) {
char* filename = *av;
FILE* fp = fopen(filename, "rb");
if(fp == NULL) {
perror(filename);
continue;
}
int lineno = 1;
while(fgets(buffer, sizeof buffer, fp) != NULL) {
match("UTF-7", UTF7_BOM, 4, filename, lineno, buffer);
match("UTF-8", UTF8_BOM, 3, filename, lineno, buffer);
match("UTF-16 LE", UTF16LE_BOM, 2, filename, lineno, buffer);
match("UTF-16 BE", UTF16BE_BOM, 2, filename, lineno, buffer);
lineno++;
}
fclose(fp);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment