Skip to content

Instantly share code, notes, and snippets.

@benwills
Last active October 11, 2019 07:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benwills/5170f2e6adbb67e3ec4c to your computer and use it in GitHub Desktop.
Save benwills/5170f2e6adbb67e3ec4c to your computer and use it in GitHub Desktop.
Faster Letter Comparison and Lowercasing in C
#include <time.h>
#include <stdint.h>
#include <sys/types.h>
#include <stdio.h>
#include <ctype.h>
/*
Character-by-character comparisons for equality and lowercasing.
Custom functions vs tolower.
Spoiler: lookup table is the fastest.
Ref:
http://stackoverflow.com/questions/1533131/what-useful-bitwise-operator-code-tricks-should-a-developer-know-about
OSX, Early 2008 Core2duo Macbook Pro:
Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)
Target: x86_64-apple-darwin14.0.0
Default compile options.
----------------------------------------
Character equality: c=a, i=a
Int Comparison : 0.572851, ret=1
Bitwise by Char : 0.922786, ret=1
Bitwise by Int : 0.838946, ret=1
Lowercasing : c=a, i=a
Int Comparison : 1.213178, retc=a, reti=a
Bitwise by Char : 0.769129, retc=a, reti=a
Bitwise by Int : 0.925871, retc=a, reti=a
Lookup Table : 0.246519, retc=a, reti=a
C, tolower() : 1.422983, retc=a, reti=a
----------------------------------------
Character equality: c=A, i=A
Int Comparison : 0.572280, ret=1
Bitwise by Char : 0.922831, ret=1
Bitwise by Int : 0.838934, ret=1
Lowercasing : c=A, i=A
Int Comparison : 1.175130, retc=a, reti=a
Bitwise by Char : 0.768786, retc=a, reti=a
Bitwise by Int : 0.925634, retc=a, reti=a
Lookup Table : 0.246336, retc=a, reti=a
C, tolower() : 1.423670, retc=a, reti=a
----------------------------------------
Character equality: c=A, i=a
Int Comparison : 1.004436, ret=1
Bitwise by Char : 0.922884, ret=1
Bitwise by Int : 0.839327, ret=1
Lowercasing : c=A, i=a
Int Comparison : 1.541964, retc=a, reti=a
Bitwise by Char : 0.769409, retc=a, reti=a
Bitwise by Int : 0.929989, retc=a, reti=a
Lookup Table : 0.245945, retc=a, reti=a
C, tolower() : 1.423798, retc=a, reti=a
----------------------------------------
Character equality: c=a, i=A
Int Comparison : 1.066608, ret=1
Bitwise by Char : 0.923044, ret=1
Bitwise by Int : 0.839211, ret=1
Lowercasing : c=a, i=A
Int Comparison : 1.461407, retc=a, reti=a
Bitwise by Char : 0.768517, retc=a, reti=a
Bitwise by Int : 0.920370, retc=a, reti=a
Lookup Table : 0.244825, retc=a, reti=a
C, tolower() : 1.425014, retc=a, reti=a
----------------------------------------
Character equality: c=a, i=b
Int Comparison : 1.025282, ret=0
Bitwise by Char : 0.917728, ret=0
Bitwise by Int : 0.817272, ret=0
Lowercasing : c=a, i=b
Int Comparison : 1.196898, retc=a, reti=b
Bitwise by Char : 0.753090, retc=a, reti=b
Bitwise by Int : 0.881950, retc=a, reti=b
Lookup Table : 0.244865, retc=a, reti=b
C, tolower() : 1.507584, retc=a, reti=b
----------------------------------------
Character equality: c=A, i=B
Int Comparison : 1.074913, ret=0
Bitwise by Char : 0.917362, ret=0
Bitwise by Int : 0.817814, ret=0
Lowercasing : c=A, i=B
Int Comparison : 1.186432, retc=a, reti=b
Bitwise by Char : 0.753529, retc=a, reti=b
Bitwise by Int : 0.883034, retc=a, reti=b
Lookup Table : 0.246983, retc=a, reti=b
C, tolower() : 1.506687, retc=a, reti=b
----------------------------------------
Character equality: c=A, i=b
Int Comparison : 1.066994, ret=0
Bitwise by Char : 0.916946, ret=0
Bitwise by Int : 0.817921, ret=0
Lowercasing : c=A, i=b
Int Comparison : 1.519257, retc=a, reti=b
Bitwise by Char : 0.752842, retc=a, reti=b
Bitwise by Int : 0.883423, retc=a, reti=b
Lookup Table : 0.244678, retc=a, reti=b
C, tolower() : 1.506798, retc=a, reti=b
----------------------------------------
Character equality: c=a, i=B
Int Comparison : 1.046371, ret=0
Bitwise by Char : 0.917923, ret=0
Bitwise by Int : 0.817478, ret=0
Lowercasing : c=a, i=B
Int Comparison : 1.193203, retc=a, reti=b
Bitwise by Char : 0.753517, retc=a, reti=b
Bitwise by Int : 0.882687, retc=a, reti=b
Lookup Table : 0.246258, retc=a, reti=b
C, tolower() : 1.507298, retc=a, reti=b
----------------------------------------
Character equality: c=a, i=#
Int Comparison : 0.920624, ret=0
Bitwise by Char : 0.922115, ret=0
Bitwise by Int : 0.826664, ret=0
Lowercasing : c=a, i=#
Int Comparison : 1.139462, retc=a, reti=#
Bitwise by Char : 0.753159, retc=a, reti=#
Bitwise by Int : 0.882935, retc=a, reti=#
Lookup Table : 0.246948, retc=a, reti=#
C, tolower() : 1.507011, retc=a, reti=#
----------------------------------------
Character equality: c=A, i=#
Int Comparison : 0.963188, ret=0
Bitwise by Char : 0.917427, ret=0
Bitwise by Int : 0.817474, ret=0
Lowercasing : c=A, i=#
Int Comparison : 1.476776, retc=a, reti=#
Bitwise by Char : 0.754234, retc=a, reti=#
Bitwise by Int : 0.880066, retc=a, reti=#
Lookup Table : 0.247589, retc=a, reti=#
C, tolower() : 1.506737, retc=a, reti=#
Ubuntu 14.04, Intel Core i3-4130 3.4GHz:
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)
Default compile options.
----------------------------------------
Character equality: c=a, i=a
Int Comparison : 0.258454, ret=1
Bitwise by Char : 0.325387, ret=1
Bitwise by Int : 0.325156, ret=1
Lowercasing : c=a, i=a
Int Comparison : 0.431930, retc=a, reti=a
Bitwise by Char : 0.384494, retc=a, reti=a
Bitwise by Int : 0.412528, retc=a, reti=a
Lookup Table : 0.212934, retc=a, reti=a
C, tolower() : 0.413987, retc=a, reti=a
----------------------------------------
Character equality: c=A, i=A
Int Comparison : 0.248701, ret=1
Bitwise by Char : 0.325432, ret=1
Bitwise by Int : 0.325123, ret=1
Lowercasing : c=A, i=A
Int Comparison : 0.453836, retc=a, reti=a
Bitwise by Char : 0.383638, retc=a, reti=a
Bitwise by Int : 0.412110, retc=a, reti=a
Lookup Table : 0.212758, retc=a, reti=a
C, tolower() : 0.413019, retc=a, reti=a
----------------------------------------
Character equality: c=A, i=a
Int Comparison : 0.400491, ret=1
Bitwise by Char : 0.325215, ret=1
Bitwise by Int : 0.325129, ret=1
Lowercasing : c=A, i=a
Int Comparison : 0.436054, retc=a, reti=a
Bitwise by Char : 0.383542, retc=a, reti=a
Bitwise by Int : 0.412027, retc=a, reti=a
Lookup Table : 0.213572, retc=a, reti=a
C, tolower() : 0.413230, retc=a, reti=a
----------------------------------------
Character equality: c=a, i=A
Int Comparison : 0.434153, ret=1
Bitwise by Char : 0.325272, ret=1
Bitwise by Int : 0.325127, ret=1
Lowercasing : c=a, i=A
Int Comparison : 0.447174, retc=a, reti=a
Bitwise by Char : 0.383665, retc=a, reti=a
Bitwise by Int : 0.412064, retc=a, reti=a
Lookup Table : 0.211747, retc=a, reti=a
C, tolower() : 0.413054, retc=a, reti=a
----------------------------------------
Character equality: c=a, i=b
Int Comparison : 0.441284, ret=0
Bitwise by Char : 0.325369, ret=0
Bitwise by Int : 0.325198, ret=0
Lowercasing : c=a, i=b
Int Comparison : 0.431231, retc=a, reti=b
Bitwise by Char : 0.384089, retc=a, reti=b
Bitwise by Int : 0.412801, retc=a, reti=b
Lookup Table : 0.212828, retc=a, reti=b
C, tolower() : 0.413872, retc=a, reti=b
----------------------------------------
Character equality: c=A, i=B
Int Comparison : 0.492577, ret=0
Bitwise by Char : 0.325664, ret=0
Bitwise by Int : 0.325726, ret=0
Lowercasing : c=A, i=B
Int Comparison : 0.454852, retc=a, reti=b
Bitwise by Char : 0.384137, retc=a, reti=b
Bitwise by Int : 0.412802, retc=a, reti=b
Lookup Table : 0.214767, retc=a, reti=b
C, tolower() : 0.413261, retc=a, reti=b
----------------------------------------
Character equality: c=A, i=b
Int Comparison : 0.436243, ret=0
Bitwise by Char : 0.326071, ret=0
Bitwise by Int : 0.326138, ret=0
Lowercasing : c=A, i=b
Int Comparison : 0.436651, retc=a, reti=b
Bitwise by Char : 0.384182, retc=a, reti=b
Bitwise by Int : 0.412729, retc=a, reti=b
Lookup Table : 0.213827, retc=a, reti=b
C, tolower() : 0.413548, retc=a, reti=b
----------------------------------------
Character equality: c=a, i=B
Int Comparison : 0.435257, ret=0
Bitwise by Char : 0.325695, ret=0
Bitwise by Int : 0.325453, ret=0
Lowercasing : c=a, i=B
Int Comparison : 0.447456, retc=a, reti=b
Bitwise by Char : 0.383884, retc=a, reti=b
Bitwise by Int : 0.412691, retc=a, reti=b
Lookup Table : 0.210888, retc=a, reti=b
C, tolower() : 0.413269, retc=a, reti=b
----------------------------------------
Character equality: c=a, i=#
Int Comparison : 0.359236, ret=0
Bitwise by Char : 0.325235, ret=0
Bitwise by Int : 0.325312, ret=0
Lowercasing : c=a, i=#
Int Comparison : 0.421505, retc=a, reti=#
Bitwise by Char : 0.383549, retc=a, reti=#
Bitwise by Int : 0.412104, retc=a, reti=#
Lookup Table : 0.214018, retc=a, reti=#
C, tolower() : 0.413122, retc=a, reti=#
----------------------------------------
Character equality: c=A, i=#
Int Comparison : 0.384119, ret=0
Bitwise by Char : 0.325407, ret=0
Bitwise by Int : 0.325157, ret=0
Lowercasing : c=A, i=#
Int Comparison : 0.426874, retc=a, reti=#
Bitwise by Char : 0.383512, retc=a, reti=#
Bitwise by Int : 0.412132, retc=a, reti=#
Lookup Table : 0.210590, retc=a, reti=#
C, tolower() : 0.413017, retc=a, reti=#
Ubuntu 14.04, Dual XEON 5639...
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1)
Default compile options.
----------------------------------------
Character equality: c=a, i=a
Int Comparison : 0.565190, ret=1
Bitwise by Char : 0.785993, ret=1
Bitwise by Int : 0.763952, ret=1
Lowercasing : c=a, i=a
Int Comparison : 0.821011, retc=a, reti=a
Bitwise by Char : 0.527960, retc=a, reti=a
Bitwise by Int : 0.689596, retc=a, reti=a
Lookup Table : 0.262672, retc=a, reti=a
C, tolower() : 0.712874, retc=a, reti=a
----------------------------------------
Character equality: c=A, i=A
Int Comparison : 0.449957, ret=1
Bitwise by Char : 0.629027, ret=1
Bitwise by Int : 0.632443, ret=1
Lowercasing : c=A, i=A
Int Comparison : 0.815186, retc=a, reti=a
Bitwise by Char : 0.527770, retc=a, reti=a
Bitwise by Int : 0.689184, retc=a, reti=a
Lookup Table : 0.262650, retc=a, reti=a
C, tolower() : 0.712935, retc=a, reti=a
----------------------------------------
Character equality: c=A, i=a
Int Comparison : 0.903786, ret=1
Bitwise by Char : 0.628831, ret=1
Bitwise by Int : 0.631501, ret=1
Lowercasing : c=A, i=a
Int Comparison : 0.820620, retc=a, reti=a
Bitwise by Char : 0.528388, retc=a, reti=a
Bitwise by Int : 0.689361, retc=a, reti=a
Lookup Table : 0.262671, retc=a, reti=a
C, tolower() : 0.712910, retc=a, reti=a
----------------------------------------
Character equality: c=a, i=A
Int Comparison : 0.993957, ret=1
Bitwise by Char : 0.628832, ret=1
Bitwise by Int : 0.632551, ret=1
Lowercasing : c=a, i=A
Int Comparison : 0.825468, retc=a, reti=a
Bitwise by Char : 0.527641, retc=a, reti=a
Bitwise by Int : 0.690275, retc=a, reti=a
Lookup Table : 0.262650, retc=a, reti=a
C, tolower() : 0.713079, retc=a, reti=a
----------------------------------------
Character equality: c=a, i=b
Int Comparison : 0.906740, ret=0
Bitwise by Char : 0.628899, ret=0
Bitwise by Int : 0.632397, ret=0
Lowercasing : c=a, i=b
Int Comparison : 0.821150, retc=a, reti=b
Bitwise by Char : 0.529154, retc=a, reti=b
Bitwise by Int : 0.690779, retc=a, reti=b
Lookup Table : 0.262642, retc=a, reti=b
C, tolower() : 0.712898, retc=a, reti=b
----------------------------------------
Character equality: c=A, i=B
Int Comparison : 1.066530, ret=0
Bitwise by Char : 0.628826, ret=0
Bitwise by Int : 0.631491, ret=0
Lowercasing : c=A, i=B
Int Comparison : 0.815569, retc=a, reti=b
Bitwise by Char : 0.528093, retc=a, reti=b
Bitwise by Int : 0.690138, retc=a, reti=b
Lookup Table : 0.262651, retc=a, reti=b
C, tolower() : 0.713039, retc=a, reti=b
----------------------------------------
Character equality: c=A, i=b
Int Comparison : 0.982759, ret=0
Bitwise by Char : 0.628859, ret=0
Bitwise by Int : 0.632969, ret=0
Lowercasing : c=A, i=b
Int Comparison : 0.820663, retc=a, reti=b
Bitwise by Char : 0.529026, retc=a, reti=b
Bitwise by Int : 0.689308, retc=a, reti=b
Lookup Table : 0.262657, retc=a, reti=b
C, tolower() : 0.712891, retc=a, reti=b
----------------------------------------
Character equality: c=a, i=B
Int Comparison : 0.975544, ret=0
Bitwise by Char : 0.628689, ret=0
Bitwise by Int : 0.632282, ret=0
Lowercasing : c=a, i=B
Int Comparison : 0.825451, retc=a, reti=b
Bitwise by Char : 0.528252, retc=a, reti=b
Bitwise by Int : 0.689892, retc=a, reti=b
Lookup Table : 0.262653, retc=a, reti=b
C, tolower() : 0.712891, retc=a, reti=b
----------------------------------------
Character equality: c=a, i=#
Int Comparison : 0.750271, ret=0
Bitwise by Char : 0.628792, ret=0
Bitwise by Int : 0.633015, ret=0
Lowercasing : c=a, i=#
Int Comparison : 0.749043, retc=a, reti=#
Bitwise by Char : 0.528778, retc=a, reti=#
Bitwise by Int : 0.689922, retc=a, reti=#
Lookup Table : 0.262653, retc=a, reti=#
C, tolower() : 0.712948, retc=a, reti=#
----------------------------------------
Character equality: c=A, i=#
Int Comparison : 0.814945, ret=0
Bitwise by Char : 0.628757, ret=0
Bitwise by Int : 0.631622, ret=0
Lowercasing : c=A, i=#
Int Comparison : 0.745289, retc=a, reti=#
Bitwise by Char : 0.528481, retc=a, reti=#
Bitwise by Int : 0.689926, retc=a, reti=#
Lookup Table : 0.262661, retc=a, reti=#
C, tolower() : 0.712898, retc=a, reti=#
uint8_t is_eq(char c, char i){
if (c == i)
return 1;
if ( ( (c >= 97) && (c <= 122) )
|| ( (c >= 65) && (c <= 90) ) )
if ( ( (i >= 97) && (i <= 122) )
|| ( (i >= 65) && (i <= 90) ) )
if ( ((c+32) == i) || ((c-32) == i) ) {
return 1;
}
return 0;
}
*/
uint8_t is_eq_int(char c, char i){
if (c == i)
return 1;
if ( ( (c >= 97) && (c <= 122) )
|| ( (c >= 65) && (c <= 90) ) )
if ( ( (i >= 97) && (i <= 122) )
|| ( (i >= 65) && (i <= 90) ) )
if ( ((c+32) == i) || ((c-32) == i) ) {
return 1;
}
return 0;
}
uint8_t is_eq_bsc(char c, char i){
c |= (c | ' ');
i |= (i | ' ');
return c == i;
}
uint8_t is_eq_bsi(char c, char i){
c |= (1 << 5);
i |= (1 << 5);
return c == i;
}
char to_lower(char c){
if ( (c >= 65) && (c <= 90) )
return c + 32;
return c; // already lower or not an alpha
}
char to_lower_bsc(char c){
return (c | ' ');
}
char to_lower_bsi(char c){
return c |= (1 << 5);
}
// Convert to lowercase
// Preserves \t, \r, \n
static const uint8_t cnv_char_tolower[128] = {
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,0x09,0x0A, 0 , 0 ,0x0D, 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
' ', '!', '"', '#', '$', '%', '&','\'', '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
'@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[','\\', ']', '^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 0,
};
void run_test(c, i, iters){
int ti;
float tStart, tDiff;
puts("----------------------------------------");
printf("Character equality: c=%c, i=%c\n", c, i);
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (is_eq_int(c, i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Int Comparison : %f, ret=%d\n",
tDiff, is_eq_int(c, i));
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (is_eq_bsc(c, i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Bitwise by Char : %f, ret=%d\n",
tDiff, is_eq_bsc(c, i));
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (is_eq_bsi(c, i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Bitwise by Int : %f, ret=%d\n",
tDiff, is_eq_bsi(c, i));
printf("\nLowercasing : c=%c, i=%c\n", c, i);
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (to_lower(c) == to_lower(i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Int Comparison : %f, retc=%c, reti=%c\n",
tDiff, to_lower(c), to_lower(i) );
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (to_lower_bsc(c) == to_lower_bsc(i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Bitwise by Char : %f, retc=%c, reti=%c\n",
tDiff, to_lower_bsc(c), to_lower_bsc(i) );
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (to_lower_bsi(c) == to_lower_bsi(i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Bitwise by Int : %f, retc=%c, reti=%c\n",
tDiff, to_lower_bsi(c), to_lower_bsi(i) );
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (cnv_char_tolower[c]) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "Lookup Table : %f, retc=%c, reti=%c\n",
tDiff, cnv_char_tolower[c], cnv_char_tolower[i] );
tStart = (float)clock()/CLOCKS_PER_SEC;
for (ti=0;ti<iters;++ti){
if (tolower(c) == tolower(i)) {}
}
tDiff = (float)clock()/CLOCKS_PER_SEC - tStart;
printf( "C, tolower() : %f, retc=%c, reti=%c\n",
tDiff, tolower(c), tolower(i) );
printf("\n");
}
int main(){
uint32_t iters = 100000000;
char c, i;
// Case variations of the same alpha
c = 'a'; i = 'a';
run_test(c, i, iters);
c = 'A'; i = 'A';
run_test(c, i, iters);
c = 'A'; i = 'a';
run_test(c, i, iters);
c = 'a'; i = 'A';
run_test(c, i, iters);
// Case variations of different alphas
c = 'a'; i = 'b';
run_test(c, i, iters);
c = 'A'; i = 'B';
run_test(c, i, iters);
c = 'A'; i = 'b';
run_test(c, i, iters);
c = 'a'; i = 'B';
run_test(c, i, iters);
// One alpha, one non-alpha
c = 'a'; i = '#';
run_test(c, i, iters);
c = 'A'; i = '#';
run_test(c, i, iters);
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment