Skip to content

Instantly share code, notes, and snippets.

@flying19880517
Created July 7, 2012 16:27
Show Gist options
  • Save flying19880517/3067078 to your computer and use it in GitHub Desktop.
Save flying19880517/3067078 to your computer and use it in GitHub Desktop.
检查文件编码是否是utf-8
bool MainWindow::isUtf8File(QIODevice *file)
{
const int testSize = 1024;
char str[testSize];///
int size = file->peek(str, testSize);
// char buf[3];
// if (f->peek(buf, sizeof(buf)) == sizeof(buf))
// return (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF);
int encodingBytesCount = 0;
bool allTextsAreASCIIChars = true;
for (int i = 0; i < size; ++i){
char current = str[i];
if ((current & 0x80) == 0x80)
allTextsAreASCIIChars = false;
// First byte
if (encodingBytesCount == 0){
if ((current & 0x80) == 0)
continue;// ASCII chars, from 0x00-0x7F
if ((current & 0xC0) == 0xC0){
encodingBytesCount = 1;
current <<= 2;
// More than two bytes used to encoding a unicode char.
// Calculate the real length.
while ((current & 0x80) == 0x80){
current <<= 1;
++encodingBytesCount;
}
}else{
// Invalid bits structure for UTF8 encoding rule.
return false;
}
}else{
// Following bytes, must start with 10.
if ((current & 0xC0) == 0x80)
--encodingBytesCount;
else
return false;
}
}
// if(encodingBytesCount != 0)
// {
// // Invalid bits structure for UTF8 encoding rule.
// // Wrong following bytes count.
// return false;
// }
// Although UTF8 supports encoding for ASCII chars, we regard as a input stream, whose contents are all ASCII as default encoding.
return !allTextsAreASCIIChars;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment