Skip to content

Instantly share code, notes, and snippets.

@amyangfei
Last active August 29, 2015 14:13
Show Gist options
  • Save amyangfei/f8f30a049016b706ecd7 to your computer and use it in GitHub Desktop.
Save amyangfei/f8f30a049016b706ecd7 to your computer and use it in GitHub Desktop.
python source learn
/* Dictionary reuse scheme to save calls to malloc, free, and memset */
// Dict对象缓冲池
#ifndef PyDict_MAXFREELIST
#define PyDict_MAXFREELIST 80
#endif
static PyDictObject *free_list[PyDict_MAXFREELIST];
static int numfree = 0;
PyObject *
PyDict_New(void)
{
register PyDictObject *mp;
// 创建 dummy 对象
if (dummy == NULL) { /* Auto-initialize dummy */
dummy = PyString_FromString("<dummy key>");
if (dummy == NULL)
return NULL;
}
if (numfree) {
// 使用缓冲池
mp = free_list[--numfree];
assert (mp != NULL);
assert (Py_TYPE(mp) == &PyDict_Type);
_Py_NewReference((PyObject *)mp);
if (mp->ma_fill) {
EMPTY_TO_MINSIZE(mp);
} else {
/* At least set ma_table and ma_mask; these are wrong
if an empty but presized dict is added to freelist */
INIT_NONZERO_DICT_SLOTS(mp);
}
assert (mp->ma_used == 0);
assert (mp->ma_table == mp->ma_smalltable);
assert (mp->ma_mask == PyDict_MINSIZE - 1);
// some counter update ...
} else {
// 创建PyDictObject对象
mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
if (mp == NULL)
return NULL;
EMPTY_TO_MINSIZE(mp);
// some counter update ...
}
mp->ma_lookup = lookdict_string;
// some counter update ...
return (PyObject *)mp;
}
/*
The basic lookup function used by all operations.
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent
probe indices are computed as explained earlier.
All arithmetic on hash should ignore overflow.
(The details in this version are due to Tim Peters, building on many past
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
Christian Tismer).
lookdict() is general-purpose, and may return NULL if (and only if) a
comparison raises an exception (this was new in Python 2.5).
lookdict_string() below is specialized to string keys, comparison of which can
never raise an exception; that function can never return NULL. For both, when
the key isn't found a PyDictEntry* is returned for which the me_value field is
NULL; this is the slot in the dict at which the key would have been found, and
the caller can (if it wishes) add the <key, value> pair to the returned
PyDictEntry*.
*/
// 搜索成功返回搜索到的 entry;搜索失败返回一个可用的 entry
static PyDictEntry *
lookdict(PyDictObject *mp, PyObject *key, register long hash)
{
register size_t i;
register size_t perturb;
register PyDictEntry *freeslot;
register size_t mask = (size_t)mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
register PyDictEntry *ep;
register int cmp;
PyObject *startkey;
// [1]根据 hash 指获取 entry 的索引
i = (size_t)hash & mask;
ep = &ep0[i];
// [2]entry 处于 Unused 状态,表明冲突探测链搜索完成,搜索失败,返回 ep
// ep->me_key == key 引用相同,搜索成功
if (ep->me_key == NULL || ep->me_key == key)
return ep;
// [3]若当前 entry 处于 dummy 状态,设置 freeslot
if (ep->me_key == dummy)
freeslot = ep;
else {
// [4]检查 Active 态的 entry 中的 key 与待查找的 key 是否“值相同”,若成立,搜索成功
// 值检查时先比较 hash,然后通过 PyObject_RichCompareBool
if (ep->me_hash == hash) {
startkey = ep->me_key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
if (ep0 == mp->ma_table && ep->me_key == startkey) {
if (cmp > 0)
return ep;
}
else {
/* The compare did major nasty stuff to the
* dict: start over.
* XXX A clever adversary could prevent this
* XXX from terminating.
*/
return lookdict(mp, key, hash);
}
}
freeslot = NULL;
}
/* In the loop, me_key == dummy is by far (factor of 100s) the
least likely outcome, so test for that last. */
// 冲突探测链上第一个 entry的 key 与待查找的 key 不匹配,继续寻找
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
// [5]寻找探测链上的下一个 entry
i = (i << 2) + i + perturb + 1;
ep = &ep0[i & mask];
// [6]Unused 态的 entry,搜索失败, 返回 ep 或者 freeslot
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
// [7]检查 key 的引用是否相同
if (ep->me_key == key)
return ep;
// [8]检查值相同是否成立
if (ep->me_hash == hash && ep->me_key != dummy) {
startkey = ep->me_key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
if (ep0 == mp->ma_table && ep->me_key == startkey) {
if (cmp > 0)
return ep;
}
else {
/* The compare did major nasty stuff to the
* dict: start over.
* XXX A clever adversary could prevent this
* XXX from terminating.
*/
return lookdict(mp, key, hash);
}
}
// [9]设置 freeslot
else if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
}
assert(0); /* NOT REACHED */
return 0;
}
/*
* Hacked up version of lookdict which can assume keys are always strings;
* this assumption allows testing for errors during PyObject_RichCompareBool()
* to be dropped; string-string comparisons never raise exceptions. This also
* means we don't need to go through PyObject_RichCompareBool(); we can always
* use _PyString_Eq() directly.
*
* This is valuable because dicts with only string keys are very common.
*/
static PyDictEntry *
lookdict_string(PyDictObject *mp, PyObject *key, register long hash)
{
register size_t i;
register size_t perturb;
register PyDictEntry *freeslot;
register size_t mask = (size_t)mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
register PyDictEntry *ep;
/* Make sure this function doesn't have to handle non-string keys,
including subclasses of str; e.g., one reason to subclass
strings is to override __eq__, and for speed we don't cater to
that here. */
if (!PyString_CheckExact(key)) {
#ifdef SHOW_CONVERSION_COUNTS
++converted;
#endif
mp->ma_lookup = lookdict;
return lookdict(mp, key, hash);
}
i = hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key)
return ep;
if (ep->me_key == dummy)
freeslot = ep;
else {
if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key))
return ep;
freeslot = NULL;
}
/* In the loop, me_key == dummy is by far (factor of 100s) the
least likely outcome, so test for that last. */
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1;
ep = &ep0[i & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
if (ep->me_key == key
|| (ep->me_hash == hash
&& ep->me_key != dummy
&& _PyString_Eq(ep->me_key, key)))
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
}
assert(0); /* NOT REACHED */
return 0;
}
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
* dictionary if it's merely replacing the value for an existing key.
* This means that it's safe to loop over a dictionary with PyDict_Next()
* and occasionally replace a value -- but you can't insert new keys or
* remove them.
*/
// 插入元素
int
PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
{
register long hash;
if (!PyDict_Check(op)) {
PyErr_BadInternalCall();
return -1;
}
assert(key);
assert(value);
// 计算 key 的 hash 值
if (PyString_CheckExact(key)) {
hash = ((PyStringObject *)key)->ob_shash;
if (hash == -1)
hash = PyObject_Hash(key);
}
else {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value);
}
static int
dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key,
long hash, PyDictEntry *ep, PyObject *value)
{
register PyDictObject *mp;
register Py_ssize_t n_used;
mp = (PyDictObject *)op;
assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */
n_used = mp->ma_used;
Py_INCREF(value);
Py_INCREF(key);
if (ep == NULL) {
// PyDict_SetItem 函数会走此分支,insertdict会按照 key 查找一个 entry 并调用insertdict_by_entry
if (insertdict(mp, key, hash, value) != 0)
return -1;
}
else {
// dict_setdefault 函数会走此分支
if (insertdict_by_entry(mp, key, hash, ep, value) != 0)
return -1;
}
/* If we added a key, we can safely resize. Otherwise just return!
* If fill >= 2/3 size, adjust size. Normally, this doubles or
* quaduples the size, but it's also possible for the dict to shrink
* (if ma_fill is much larger than ma_used, meaning a lot of dict
* keys have been * deleted).
*
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
* the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
}
// 返回迭代对象,使用 dict.iteritems() 遍历时使用
static PyObject *
dict_iteritems(PyDictObject *dict)
{
return dictiter_new(dict, &PyDictIterItem_Type);
}
/* Dictionary iterator types */
typedef struct {
PyObject_HEAD
PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
Py_ssize_t di_used;
Py_ssize_t di_pos; /* 记录当前遍历的位置 */
PyObject* di_result; /* reusable result tuple for iteritems */
Py_ssize_t len;
} dictiterobject;
static PyObject *
dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
{
dictiterobject *di;
di = PyObject_GC_New(dictiterobject, itertype);
if (di == NULL)
return NULL;
Py_INCREF(dict);
di->di_dict = dict;
di->di_used = dict->ma_used;
di->di_pos = 0;
di->len = dict->ma_used;
if (itertype == &PyDictIterItem_Type) {
di->di_result = PyTuple_Pack(2, Py_None, Py_None);
if (di->di_result == NULL) {
Py_DECREF(di);
return NULL;
}
}
else
di->di_result = NULL;
_PyObject_GC_TRACK(di);
return (PyObject *)di;
}
static PyObject *dictiter_iternextitem(dictiterobject *di)
{
PyObject *key, *value, *result = di->di_result;
register Py_ssize_t i, mask;
register PyDictEntry *ep;
PyDictObject *d = di->di_dict;
if (d == NULL)
return NULL;
assert (PyDict_Check(d));
if (di->di_used != d->ma_used) {
PyErr_SetString(PyExc_RuntimeError,
"dictionary changed size during iteration");
di->di_used = -1; /* Make this state sticky */
return NULL;
}
i = di->di_pos;
if (i < 0)
goto fail;
ep = d->ma_table;
mask = d->ma_mask;
while (i <= mask && ep[i].me_value == NULL)
i++;
di->di_pos = i+1;
if (i > mask)
goto fail;
if (result->ob_refcnt == 1) {
// result 还保存着刚刚遍历过的记录,修改相关的引用计数
Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0));
Py_DECREF(PyTuple_GET_ITEM(result, 1));
} else {
// 创建新的元组
result = PyTuple_New(2);
if (result == NULL)
return NULL;
}
di->len--;
key = ep[i].me_key;
value = ep[i].me_value;
Py_INCREF(key);
Py_INCREF(value);
PyTuple_SET_ITEM(result, 0, key);
PyTuple_SET_ITEM(result, 1, value);
return result;
fail:
Py_DECREF(d);
di->di_dict = NULL;
return NULL;
}
#define PyDict_MINSIZE 8
typedef struct {
/* Cached hash code of me_key. Note that hash codes are C longs.
* We have to use Py_ssize_t instead because dict_popitem() abuses
* me_hash to hold a search finger.
*/
Py_ssize_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry;
/*
To ensure the lookup algorithm terminates, there must be at least one Unused
slot (NULL key) in the table.
The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
values == the number of Active items).
To avoid slowing down lookups on a near-full table, we resize the table when
it's two-thirds full.
*/
typedef struct _dictobject PyDictObject;
struct _dictobject {
PyObject_HEAD
Py_ssize_t ma_fill; /* # Active + # Dummy */
Py_ssize_t ma_used; /* # Active */
/* The table contains ma_mask + 1 slots, and that's a power of 2.
* We store the mask instead of the size because the mask is more
* frequently needed.
*/
Py_ssize_t ma_mask;
/* ma_table points to ma_smalltable for small tables, else to
* additional malloc'ed memory. ma_table is never NULL! This rule
* saves repeated runtime null-tests in the workhorse getitem and
* setitem calls.
*/
PyDictEntry *ma_table;
PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, long hash);
PyDictEntry ma_smalltable[PyDict_MINSIZE];
};
typedef struct {
int b_type; /* what kind of block this is */
int b_handler; /* where to jump to find handler */
int b_level; /* value stack level to pop to */
} PyTryBlock;
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
PyObject *f_trace; /* Trace function */
/* If an exception is raised in this frame, the next three are used to
* record the exception info (if any) originally in the thread state. See
* comments before set_exc_info() -- it's not obvious.
* Invariant: if _type is NULL, then so are _value and _traceback.
* Desired invariant: all three are NULL, or all three are non-NULL. That
* one isn't currently true, but "should be".
*/
PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;
PyThreadState *f_tstate;
int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
int f_lineno; /* Current line number */
int f_iblock; /* index in f_blockstack */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
struct bootstate {
PyInterpreterState *interp;
PyObject *func;
PyObject *args;
PyObject *keyw;
PyThreadState *tstate;
};
// thread 模块初始化线程会调用此接口
static PyObject *
thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs)
{
PyObject *func, *args, *keyw = NULL;
struct bootstate *boot;
long ident;
if (!PyArg_UnpackTuple(fargs, "start_new_thread", 2, 3,
&func, &args, &keyw))
return NULL;
if (!PyCallable_Check(func)) {
PyErr_SetString(PyExc_TypeError,
"first arg must be callable");
return NULL;
}
if (!PyTuple_Check(args)) {
PyErr_SetString(PyExc_TypeError,
"2nd arg must be a tuple");
return NULL;
}
if (keyw != NULL && !PyDict_Check(keyw)) {
PyErr_SetString(PyExc_TypeError,
"optional 3rd arg must be a dictionary");
return NULL;
}
// 创建 bootstate 结构
boot = PyMem_NEW(struct bootstate, 1);
if (boot == NULL)
return PyErr_NoMemory();
boot->interp = PyThreadState_GET()->interp;
boot->func = func;
boot->args = args;
boot->keyw = keyw;
boot->tstate = _PyThreadState_Prealloc(boot->interp);
if (boot->tstate == NULL) {
PyMem_DEL(boot);
return PyErr_NoMemory();
}
Py_INCREF(func);
Py_INCREF(args);
Py_XINCREF(keyw);
PyEval_InitThreads(); /* Start the interpreter's thread-awareness */
// 创建线程
ident = PyThread_start_new_thread(t_bootstrap, (void*) boot);
if (ident == -1) {
PyErr_SetString(ThreadError, "can't start new thread");
Py_DECREF(func);
Py_DECREF(args);
Py_XDECREF(keyw);
PyThreadState_Clear(boot->tstate);
PyMem_DEL(boot);
return NULL;
}
return PyInt_FromLong(ident);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment