Skip to content

Instantly share code, notes, and snippets.

@mharris717
Created January 5, 2010 22:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mharris717/269811 to your computer and use it in GitHub Desktop.
Save mharris717/269811 to your computer and use it in GitHub Desktop.
#include <iostream>
using namespace std;
int leadingOnes(int num) {
for(int i=0;i<8;i++) {
if (num >= 128) {
num = (num << 1) & 255;
}
else {
return i;
}
}
return 8;
}
bool validUTF8(int* bytes, int numBytes) {
int firstOnes = leadingOnes(bytes[0]);
if (firstOnes == 0 && numBytes == 1) return true;
if (firstOnes < 2 || firstOnes > 4) return false;
if (firstOnes != numBytes) return false;
for(int i=1;i<numBytes;i++) {
if (leadingOnes(bytes[i]) != 1) return false;
}
return true;
}
int main() {
int one[] = {0x04};
assert(validUTF8(one,1));
int twoGood[] = {0xC1,0xA1};
assert(validUTF8(twoGood,2));
int twoBad1[] = {0xC1,0xD1};
assert(!validUTF8(twoBad1,2));
int twoBad2[] = {0xF1,0xA1};
assert(!validUTF8(twoBad2,2));
int threeGood[] = {0xE1,0xA1,0xA3};
assert(validUTF8(threeGood,3));
int threeBad1[] = {0xE1,0xA1,0xD1};
assert(!validUTF8(threeBad1,3));
assert(!validUTF8(threeBad1,2));
int threeBad2[] = {0xF1,0xA1,0xA1};
assert(!validUTF8(threeBad2,3));
int fourGood[] = {0xF1,0xA1,0xA3,0xA2};
assert(validUTF8(fourGood,4));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment