Skip to content

Instantly share code, notes, and snippets.

@seiji
Created November 6, 2012 04:22
Show Gist options
  • Save seiji/4022535 to your computer and use it in GitHub Desktop.
Save seiji/4022535 to your computer and use it in GitHub Desktop.
utf8mb4
#import "NSString+Unicode.h"
@implementation NSString (Unicode)
- (NSUInteger)utf8mb4length {
const char *bytes= [self UTF8String];
unsigned int length = [self lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
unsigned int calc = 0;
for (int i=0; i<length; i++) {
if ((unsigned char)bytes[i] < 0x80) {
calc++;
} else if ((unsigned char)bytes[i] < 0xE0) {
calc++;
i+=1;
} else if ((unsigned char)bytes[i] < 0xF0) {
calc++;
i+=2;
} else if ((unsigned char)bytes[i] < 0xF8) {
calc++;
i+=3;
} else if ((unsigned char)bytes[i] < 0xFC) {
calc++;
i+=4;
} else if ((unsigned char)bytes[i] < 0xFE) {
calc++;
i+=5;
}
}
return calc;
}
- (NSUInteger)utf8mb4Offset:(NSUInteger)offset {
const char *bytes= [self UTF8String];
unsigned int length = [self lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
unsigned int calc = 0;
for (int i=0; i<length && offset != 0; i++) {
if ((unsigned char)bytes[i] < 0x80) {
offset--;
calc++;
} else if ((unsigned char)bytes[i] < 0xE0) {
offset--;
calc++;
i+=1;
} else if ((unsigned char)bytes[i] < 0xF0) {
offset--;
calc++;
i+=2;
} else if ((unsigned char)bytes[i] < 0xF8) {
offset-=2;
calc++;
i+=3;
} else if ((unsigned char)bytes[i] < 0xFC) {
offset-=2;
calc++;
i+=4;
} else if ((unsigned char)bytes[i] < 0xFE) {
offset-=2;
calc++;
i+=5;
}
}
return calc;
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment