Created
May 25, 2010 05:37
-
-
Save saga/412811 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "stdafx.h" | |
#include <stdlib.h> | |
/* Magic numbers for the algorithm */ | |
static const unsigned long mask01 = 0x01010101; | |
static const unsigned long mask80 = 0x80808080; | |
#define LONGPTR_MASK (sizeof(long) - 1) | |
/* | |
* Helper macro to return string length if we caught the zero | |
* byte. | |
*/ | |
#define testbyte(x) \ | |
do { \ | |
if (p[x] == '\0') \ | |
return (p - str + x); \ | |
} while (0) | |
inline size_t | |
strlen2(const char *str) | |
{ | |
const char *p; | |
const unsigned long *lp; | |
/* Skip the first few bytes until we have an aligned p */ | |
for (p = str; (uintptr_t)p & LONGPTR_MASK; p++) | |
if (*p == '\0') | |
return (p - str); | |
/* Scan the rest of the string using word sized operation */ | |
for (lp = (const unsigned long *)p; ; lp++) | |
if ((*lp - mask01) & mask80) { | |
p = (const char *)(lp); | |
testbyte(0); | |
testbyte(1); | |
testbyte(2); | |
testbyte(3); | |
} | |
/* NOTREACHED */ | |
return 0; | |
} | |
inline int strlen3(const char* string) | |
{ | |
__asm | |
{ | |
mov ecx, string ; ecx -> string | |
test ecx,3 ; test if string is aligned on 32 bits | |
je short main_loop | |
str_misaligned: | |
; simple byte loop until string is aligned | |
mov al,byte ptr [ecx] | |
add ecx,1 | |
test al,al | |
je short byte_3 | |
test ecx,3 | |
jne short str_misaligned | |
add eax,dword ptr 0 ; 5 byte nop to align label below | |
align 16 ; should be redundant | |
main_loop: | |
mov eax,dword ptr [ecx] ; read 4 bytes | |
mov edx,7efefeffh | |
add edx,eax | |
xor eax,-1 | |
xor eax,edx | |
add ecx,4 | |
test eax,81010100h | |
je short main_loop | |
; found zero byte in the loop | |
mov eax,[ecx - 4] | |
test al,al ; is it byte 0 | |
je short byte_0 | |
test ah,ah ; is it byte 1 | |
je short byte_1 | |
test eax,00ff0000h ; is it byte 2 | |
je short byte_2 | |
test eax,0ff000000h ; is it byte 3 | |
je short byte_3 | |
jmp short main_loop ; taken if bits 24-30 are clear and bit | |
; 31 is set | |
byte_3: | |
lea eax,[ecx - 1] | |
mov ecx,string | |
sub eax,ecx | |
jmp ReturnLabel | |
byte_2: | |
lea eax,[ecx - 2] | |
mov ecx,string | |
sub eax,ecx | |
jmp ReturnLabel | |
byte_1: | |
lea eax,[ecx - 3] | |
mov ecx,string | |
sub eax,ecx | |
jmp ReturnLabel | |
byte_0: | |
lea eax,[ecx - 4] | |
mov ecx,string | |
sub eax,ecx | |
jmp ReturnLabel | |
} | |
ReturnLabel: | |
return; | |
} | |
#include "logger.h" | |
int _tmain(int argc, _TCHAR* argv[]) | |
{ | |
char* str = "aaabcaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbdefffffghgfgfgfgfggaaaaaaaaaaaaffffff" \ | |
"aaabcaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbdefffffghgfgfgfgfggaaaaaaaaaaaaffffff" \ | |
"fffgggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhheeeeeeeeeeeeeeeeeeeeghhhhhhhhhhhhhh"; | |
CPerfTimer timer1(TRUE); | |
for (int i=0; i< 100000; i++) | |
{ | |
int len = strlen2(str); | |
} | |
timer1.Stop(); | |
double timeA = timer1.Elapsedus(); | |
timer1.Start(TRUE); | |
for (int i=0; i< 100000; i++) | |
{ | |
int len = strlen(str); | |
} | |
timer1.Stop(); | |
double timeB = timer1.Elapsedus(); | |
timer1.Start(TRUE); | |
for (int i=0; i< 100000; i++) | |
{ | |
int len = strlen3(str); | |
} | |
timer1.Stop(); | |
double timeC = timer1.Elapsedus(); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment