这是从lua-5.1.1中分离出来的字符串实现代码。很好地利用散列值优化。只需要一次整数的比较,即可实现字符串的比较。这个实现还有另一大好处,那就是空间优化,多份。
Last active
July 26, 2021 10:09
-
-
Save losophy/2dab2916b752088ddb669aa6fed42450 to your computer and use it in GitHub Desktop.
lua-5.1.1中的字符串实现
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void luaS_resize (lua_State *L, int newsize) { | |
GCObject **newhash; | |
stringtable *tb; | |
int i; | |
if (G(L)->gcstate == GCSsweepstring) | |
return; /* cannot resize during GC traverse */ | |
newhash = luaM_newvector(L, newsize, GCObject *); | |
tb = &G(L)->strt; | |
for (i=0; i<newsize; i++) newhash[i] = NULL; | |
/* rehash */ | |
for (i=0; i<tb->size; i++) { | |
GCObject *p = tb->hash[i]; | |
while (p) { /* for each node in the list */ | |
GCObject *next = p->gch.next; /* save next */ | |
unsigned int h = gco2ts(p)->hash; | |
int h1 = lmod(h, newsize); /* new position */ | |
lua_assert(cast_int(h%newsize) == lmod(h, newsize)); | |
p->gch.next = newhash[h1]; /* chain it */ | |
newhash[h1] = p; | |
p = next; | |
} | |
} | |
luaM_freearray(L, tb->hash, tb->size, TString *); | |
tb->size = newsize; | |
tb->hash = newhash; | |
} | |
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { | |
GCObject *o; | |
unsigned int h = cast(unsigned int, l); /* seed */ | |
size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */ | |
size_t l1; | |
for (l1=l; l1>=step; l1-=step) /* compute hash */ | |
h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); | |
for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];//如果这里已经有元素,则使用链表串接起来 | |
o != NULL; | |
o = o->gch.next) { | |
TString *ts = rawgco2ts(o); | |
if (ts->tsv.len == l && (memcmp(str, getstr(ts), l) == 0)) { | |
/* string may be dead */ | |
if (isdead(G(L), o)) changewhite(o); | |
return ts; | |
} | |
} | |
return newlstr(L, str, l, h); /* not found */ | |
} | |
static TString *newlstr (lua_State *L, const char *str, size_t l, | |
unsigned int h) { | |
TString *ts; | |
stringtable *tb; | |
if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char)) | |
luaM_toobig(L); | |
ts = cast(TString *, luaM_malloc(L, (l+1)*sizeof(char)+sizeof(TString))); | |
ts->tsv.len = l; | |
ts->tsv.hash = h; | |
ts->tsv.marked = luaC_white(G(L)); | |
ts->tsv.tt = LUA_TSTRING; | |
ts->tsv.reserved = 0; | |
memcpy(ts+1, str, l*sizeof(char)); | |
((char *)(ts+1))[l] = '\0'; /* ending 0 */ | |
tb = &G(L)->strt; | |
h = lmod(h, tb->size); | |
ts->tsv.next = tb->hash[h]; /* chain new entry */ | |
tb->hash[h] = obj2gco(ts); | |
tb->nuse++; | |
if (tb->nuse > cast(lu_int32, tb->size) && tb->size <= MAX_INT/2) | |
luaS_resize(L, tb->size*2); /* too crowded */ | |
return ts; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
@@ LUAI_USER_ALIGNMENT_T is a type that requires maximum alignment. | |
** CHANGE it if your system requires alignments larger than double. (For | |
** instance, if your system supports long doubles and they must be | |
** aligned in 16-byte boundaries, then you should add long double in the | |
** union.) Probably you do not need to change this. | |
*/ | |
#define LUAI_USER_ALIGNMENT_T union { double u; void *s; long l; } | |
/* type to ensure maximum alignment */ | |
typedef LUAI_USER_ALIGNMENT_T L_Umaxalign; | |
/* | |
** String headers for string table | |
*/ | |
typedef union TString { | |
L_Umaxalign dummy; /* ensures maximum alignment for strings */ | |
struct { | |
CommonHeader; | |
lu_byte reserved;//是否是Lua虚拟机中的保留字符串,1不会在GC阶段被回收 | |
unsigned int hash;//字符串的散列值 | |
size_t len;//字符串长度 | |
} tsv; | |
} TString; | |
union GCObject { | |
GCheader gch; | |
union TString ts; | |
}; | |
typedef struct stringtable { | |
GCObject **hash;//这是一个散列数组,专门用于存放字符串 | |
lu_int32 nuse; /* number of elements */ | |
int size; | |
} stringtable; | |
typedef struct global_State { | |
stringtable strt; /* hash table for strings */ | |
} global_State; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment