/gist:228144648ee250a4ba07
Forked from kongtomorrow/gist:3e35d926ec17d9b621c1
Last active Aug 29, 2015
#import <Foundation/Foundation.h> | |
#import <mach/mach_time.h> | |
#import <simd/simd.h> | |
NSTimeInterval SecondsFromMachTimeInterval(uint64_t machTimeInterval) { | |
static double timeScaleSeconds = 0.0; | |
if (timeScaleSeconds == 0.0) { | |
mach_timebase_info_data_t timebaseInfo; | |
if (mach_timebase_info(&timebaseInfo) == KERN_SUCCESS) { // returns scale factor for ns | |
double timeScaleMicroSeconds = ((double) timebaseInfo.numer / (double) timebaseInfo.denom) / 1000; | |
timeScaleSeconds = timeScaleMicroSeconds / 1000000; | |
} | |
} | |
return timeScaleSeconds*machTimeInterval; | |
} | |
NSTimeInterval TimeToPerformBlock(void (^blockToTime)(void), long repCount) { | |
uint64_t tick = mach_absolute_time(); | |
for (long i = 0; i < repCount; i++) { | |
blockToTime(); | |
} | |
uint64_t tock = mach_absolute_time(); | |
return SecondsFromMachTimeInterval(tock - tick); | |
} | |
void logMachTimeInterval_withIdentifier_(NSTimeInterval seconds, NSString *identifier) { | |
NSLog(@"%-80s %g seconds\n", [identifier UTF8String], seconds); | |
} | |
__attribute__((noinline)) void PresentTimeToPerformBlockWithIdentifier(NSString *identifier, long repCount,void (^blockToTime)(void)) { | |
logMachTimeInterval_withIdentifier_(TimeToPerformBlock(blockToTime, repCount), identifier); | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { | |
#if __LP64__ | |
/* A bit of unrolling; go by 32s, 16s, and 8s first */ | |
while (len >= 32) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 32; | |
} | |
while (len >= 16) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 16; | |
} | |
while (len >= 8) { | |
uint64_t val = *(const uint64_t *)bytes; | |
if (val & 0x8080808080808080ULL) return false; | |
bytes += 8; | |
len -= 8; | |
} | |
#endif | |
/* Go by 4s */ | |
while (len >= 4) { | |
uint32_t val = *(const uint32_t *)bytes; | |
if (val & 0x80808080U) return false; | |
bytes += 4; | |
len -= 4; | |
} | |
/* Handle the rest one byte at a time */ | |
while (len--) { | |
if (*bytes++ & 0x80) return false; | |
} | |
return true; | |
} | |
static inline CFIndex firstSetIndex(vector_char32 x) { | |
#if defined __AVX2__ | |
return __builtin_ctz(_mm256_movemask_epi8(x)); | |
#else | |
return __builtin_ctz(_mm_movemask_epi8(x.hi) << 16 | _mm_movemask_epi8(x.lo)); | |
#endif | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII_vec(const uint8_t *bytes, CFIndex len) { | |
// Early-out if length is zero. | |
if (len == 0) return true; | |
// Back up to a 16-byte aligned memory location. | |
const vector_char32 *aligned = (const vector_char32 *)((uintptr_t)bytes & -32); | |
const size_t adjust = bytes - (const uint8_t *)aligned; | |
// Generate a mask to zero-out the first distance bytes of a vector. | |
static const uint8_t maskSource[64] = { | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
}; | |
vector_char32 mask = *(const packed_char32 *)&maskSource[32 - adjust]; | |
// Load the first vector and zero out any bytes preceeding the buffer we | |
// are actually interested in. Adjust the length to account for these | |
// extra bytes as well. | |
vector_char32 data = *aligned & mask; | |
len += adjust; | |
// Now scan by aligned vectors until we either find a non-ASCII byte or | |
// we reach the end of the buffer. This may read past the end of the | |
// buffer, but it will never cross a page boundary beyond the end of the | |
// buffer, so it is safe. | |
while (!vector_any(data)) { | |
if (len <= 32) return true; | |
len -= 32; | |
data = *++aligned; | |
} | |
// We found a non-ASCII byte. If its index in the vector is less than the | |
// remaining length, return false. Otherwise, return true. | |
return firstSetIndex(data) >= len; | |
} | |
long total = 0; | |
long repeatCount = 10000; | |
int main(int argc, const char * argv[]) { | |
@autoreleasepool { | |
const uint8_t *shortStringBytes = (const uint8_t *)[@"hello!" UTF8String]; | |
CFIndex shortLen = strlen((char *)shortStringBytes); | |
const uint8_t *longStringBytes = (const uint8_t *)[[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL] UTF8String]; | |
CFIndex longLen = strlen((char *)longStringBytes); | |
const uint8_t *longEarlyOutStringBytes = (const uint8_t *)[[@"ü" stringByAppendingString:[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL]] UTF8String]; | |
CFIndex longEarlyLen = strlen((char *)longEarlyOutStringBytes); | |
NSLog(@"shortLen: %ld longLen:%ld", shortLen, longLen); | |
PresentTimeToPerformBlockWithIdentifier(@"short no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"short vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment