Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#import <Foundation/Foundation.h>
#import <mach/mach_time.h>
#import <simd/simd.h>
NSTimeInterval SecondsFromMachTimeInterval(uint64_t machTimeInterval) {
static double timeScaleSeconds = 0.0;
if (timeScaleSeconds == 0.0) {
mach_timebase_info_data_t timebaseInfo;
if (mach_timebase_info(&timebaseInfo) == KERN_SUCCESS) { // returns scale factor for ns
double timeScaleMicroSeconds = ((double) timebaseInfo.numer / (double) timebaseInfo.denom) / 1000;
timeScaleSeconds = timeScaleMicroSeconds / 1000000;
}
}
return timeScaleSeconds*machTimeInterval;
}
NSTimeInterval TimeToPerformBlock(void (^blockToTime)(void), long repCount) {
uint64_t tick = mach_absolute_time();
for (long i = 0; i < repCount; i++) {
blockToTime();
}
uint64_t tock = mach_absolute_time();
return SecondsFromMachTimeInterval(tock - tick);
}
void logMachTimeInterval_withIdentifier_(NSTimeInterval seconds, NSString *identifier) {
NSLog(@"%-80s %g seconds\n", [identifier UTF8String], seconds);
}
__attribute__((noinline)) void PresentTimeToPerformBlockWithIdentifier(NSString *identifier, long repCount,void (^blockToTime)(void)) {
logMachTimeInterval_withIdentifier_(TimeToPerformBlock(blockToTime, repCount), identifier);
}
__attribute__((noinline)) Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
#if __LP64__
/* A bit of unrolling; go by 32s, 16s, and 8s first */
while (len >= 32) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 32;
}
while (len >= 16) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 16;
}
while (len >= 8) {
uint64_t val = *(const uint64_t *)bytes;
if (val & 0x8080808080808080ULL) return false;
bytes += 8;
len -= 8;
}
#endif
/* Go by 4s */
while (len >= 4) {
uint32_t val = *(const uint32_t *)bytes;
if (val & 0x80808080U) return false;
bytes += 4;
len -= 4;
}
/* Handle the rest one byte at a time */
while (len--) {
if (*bytes++ & 0x80) return false;
}
return true;
}
static inline CFIndex firstSetIndex(vector_char32 x) {
#if defined __AVX2__
return __builtin_ctz(_mm256_movemask_epi8(x));
#else
return __builtin_ctz(_mm_movemask_epi8(x.hi) << 16 | _mm_movemask_epi8(x.lo));
#endif
}
__attribute__((noinline)) Boolean __CFBytesInASCII_vec(const uint8_t *bytes, CFIndex len) {
// Early-out if length is zero.
if (len == 0) return true;
// Back up to a 16-byte aligned memory location.
const vector_char32 *aligned = (const vector_char32 *)((uintptr_t)bytes & -32);
const size_t adjust = bytes - (const uint8_t *)aligned;
// Generate a mask to zero-out the first distance bytes of a vector.
static const uint8_t maskSource[64] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
};
vector_char32 mask = *(const packed_char32 *)&maskSource[32 - adjust];
// Load the first vector and zero out any bytes preceeding the buffer we
// are actually interested in. Adjust the length to account for these
// extra bytes as well.
vector_char32 data = *aligned & mask;
len += adjust;
// Now scan by aligned vectors until we either find a non-ASCII byte or
// we reach the end of the buffer. This may read past the end of the
// buffer, but it will never cross a page boundary beyond the end of the
// buffer, so it is safe.
while (!vector_any(data)) {
if (len <= 32) return true;
len -= 32;
data = *++aligned;
}
// We found a non-ASCII byte. If its index in the vector is less than the
// remaining length, return false. Otherwise, return true.
return firstSetIndex(data) >= len;
}
long total = 0;
long repeatCount = 10000;
int main(int argc, const char * argv[]) {
@autoreleasepool {
const uint8_t *shortStringBytes = (const uint8_t *)[@"hello!" UTF8String];
CFIndex shortLen = strlen((char *)shortStringBytes);
const uint8_t *longStringBytes = (const uint8_t *)[[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL] UTF8String];
CFIndex longLen = strlen((char *)longStringBytes);
const uint8_t *longEarlyOutStringBytes = (const uint8_t *)[[@"ü" stringByAppendingString:[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL]] UTF8String];
CFIndex longEarlyLen = strlen((char *)longEarlyOutStringBytes);
NSLog(@"shortLen: %ld longLen:%ld", shortLen, longLen);
PresentTimeToPerformBlockWithIdentifier(@"short no-vec", repeatCount, ^{
total += __CFBytesInASCII((const uint8_t *)shortStringBytes, shortLen);
});
PresentTimeToPerformBlockWithIdentifier(@"short vec", repeatCount, ^{
total += __CFBytesInASCII_vec((const uint8_t *)shortStringBytes, shortLen);
});
PresentTimeToPerformBlockWithIdentifier(@"long no-vec", repeatCount, ^{
total += __CFBytesInASCII((const uint8_t *)longStringBytes, longLen);
});
PresentTimeToPerformBlockWithIdentifier(@"long vec", repeatCount, ^{
total += __CFBytesInASCII_vec((const uint8_t *)longStringBytes, longLen);
});
PresentTimeToPerformBlockWithIdentifier(@"long early-out no-vec", repeatCount, ^{
total += __CFBytesInASCII((const uint8_t *)longEarlyOutStringBytes, longEarlyLen);
});
PresentTimeToPerformBlockWithIdentifier(@"long early-out vec", repeatCount, ^{
total += __CFBytesInASCII_vec((const uint8_t *)longEarlyOutStringBytes, longEarlyLen);
});
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.