Skip to content

Instantly share code, notes, and snippets.

@ianp
Created April 16, 2012 20:20
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ianp/2401251 to your computer and use it in GitHub Desktop.
Save ianp/2401251 to your computer and use it in GitHub Desktop.
Word count functions and main wrapper.
#import <Foundation/Foundation.h>
NSUInteger scannerWordCount(NSString* string)
{
NSScanner* scanner = [NSScanner scannerWithString:string];
NSCharacterSet* ws = [NSCharacterSet whitespaceAndNewlineCharacterSet];
NSUInteger words = 0;
while ([scanner scanUpToCharactersFromSet:ws intoString:nil])
++words;
return words;
}
NSUInteger regexWordCount(NSString* string)
{
NSRegularExpression* regex = [NSRegularExpression regularExpressionWithPattern:@"\\w+" options:0 error:nil];
return [regex numberOfMatchesInString:string options:0 range:NSMakeRange(0, [string length])];
}
NSUInteger componentsByStringWordCount(NSString* string)
{
return [[string componentsSeparatedByString:@" "] count];
}
NSUInteger componentsByCharsWordCount(NSString* string)
{
NSCharacterSet* ws = [NSCharacterSet whitespaceAndNewlineCharacterSet];
return [[string componentsSeparatedByCharactersInSet:ws] count];
}
NSUInteger taggerWordCount(NSString* string)
{
NSArray* schemes = [NSArray arrayWithObject:NSLinguisticTagSchemeTokenType];
NSLinguisticTagger* tagger = [[NSLinguisticTagger alloc] initWithTagSchemes:schemes
options:0];
[tagger setString:string];
__block NSUInteger words = 0;
[tagger enumerateTagsInRange:NSMakeRange(0, [string length])
scheme:NSLinguisticTagSchemeTokenType
options:0
usingBlock:^(NSString* tag, NSRange token, NSRange sentence, BOOL *stop) {
if ([tag isEqualTo: NSLinguisticTagWord]) ++words;
}];
return words;
}
int main(int argc, char *argv[]) {
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
NSString* string = @"Peter piper picked a peck of pickled pepper . No — really — he did!";
printf("scanner (original)\t: %lu\n", scannerWordCount(string));
printf("regular expression\t: %lu\n", regexWordCount(string));
printf("components (string)\t: %lu\n", componentsByStringWordCount(string));
printf("components (chars)\t: %lu\n", componentsByCharsWordCount(string));
printf("linguistic tagger\t: %lu\n", taggerWordCount(string));
[pool release];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment