simd adoption?
#import <Foundation/Foundation.h> | |
#import <mach/mach_time.h> | |
#import <simd/simd.h> | |
NSTimeInterval SecondsFromMachTimeInterval(uint64_t machTimeInterval) { | |
static double timeScaleSeconds = 0.0; | |
if (timeScaleSeconds == 0.0) { | |
mach_timebase_info_data_t timebaseInfo; | |
if (mach_timebase_info(&timebaseInfo) == KERN_SUCCESS) { // returns scale factor for ns | |
double timeScaleMicroSeconds = ((double) timebaseInfo.numer / (double) timebaseInfo.denom) / 1000; | |
timeScaleSeconds = timeScaleMicroSeconds / 1000000; | |
} | |
} | |
return timeScaleSeconds*machTimeInterval; | |
} | |
NSTimeInterval TimeToPerformBlock(void (^blockToTime)(void), long repCount) { | |
uint64_t tick = mach_absolute_time(); | |
for (long i = 0; i < repCount; i++) { | |
blockToTime(); | |
} | |
uint64_t tock = mach_absolute_time(); | |
return SecondsFromMachTimeInterval(tock - tick); | |
} | |
void logMachTimeInterval_withIdentifier_(NSTimeInterval seconds, NSString *identifier) { | |
NSLog(@"%-80s %g seconds\n", [identifier UTF8String], seconds); | |
} | |
__attribute__((noinline)) void PresentTimeToPerformBlockWithIdentifier(NSString *identifier, long repCount,void (^blockToTime)(void)) { | |
logMachTimeInterval_withIdentifier_(TimeToPerformBlock(blockToTime, repCount), identifier); | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { | |
#if __LP64__ | |
/* A bit of unrolling; go by 32s, 16s, and 8s first */ | |
while (len >= 32) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 32; | |
} | |
while (len >= 16) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 16; | |
} | |
while (len >= 8) { | |
uint64_t val = *(const uint64_t *)bytes; | |
if (val & 0x8080808080808080ULL) return false; | |
bytes += 8; | |
len -= 8; | |
} | |
#endif | |
/* Go by 4s */ | |
while (len >= 4) { | |
uint32_t val = *(const uint32_t *)bytes; | |
if (val & 0x80808080U) return false; | |
bytes += 4; | |
len -= 4; | |
} | |
/* Handle the rest one byte at a time */ | |
while (len--) { | |
if (*bytes++ & 0x80) return false; | |
} | |
return true; | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII_vec(const uint8_t *bytes, CFIndex len) { | |
#if __LP64__ | |
/* A bit of unrolling; go by 32s, 16s, and 8s first */ | |
while (len >= 32) { | |
vector_uchar32 val = *(const vector_uchar32 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 32; | |
len -= 32; | |
} | |
while (len >= 16) { | |
vector_uchar16 val = *(const vector_uchar16 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 16; | |
len -= 16; | |
} | |
while (len >= 8) { | |
vector_uchar8 val = *(const vector_uchar8 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 8; | |
len -= 8; | |
} | |
#endif | |
/* Go by 4s */ | |
while (len >= 4) { | |
vector_uchar4 val = *(const vector_uchar4 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 4; | |
len -= 4; | |
} | |
/* Handle the rest one byte at a time */ | |
while (len--) { | |
if (*bytes++ & 0x80) return false; | |
} | |
return true; | |
} | |
long total = 0; | |
long repeatCount = 10000; | |
int main(int argc, const char * argv[]) { | |
@autoreleasepool { | |
const uint8_t *shortStringBytes = (const uint8_t *)[@"hello!" UTF8String]; | |
CFIndex shortLen = strlen((char *)shortStringBytes); | |
const uint8_t *longStringBytes = (const uint8_t *)[[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL] UTF8String]; | |
CFIndex longLen = strlen((char *)longStringBytes); | |
const uint8_t *longEarlyOutStringBytes = (const uint8_t *)[[@"ü" stringByAppendingString:[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL]] UTF8String]; | |
CFIndex longEarlyLen = strlen((char *)longEarlyOutStringBytes); | |
NSLog(@"shortLen: %ld longLen:%ld", shortLen, longLen); | |
PresentTimeToPerformBlockWithIdentifier(@"short no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"short vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment