Skip to content

Instantly share code, notes, and snippets.

@diezguerra
Last active December 15, 2015 18:29
Show Gist options
  • Save diezguerra/5303711 to your computer and use it in GitHub Desktop.
Save diezguerra/5303711 to your computer and use it in GitHub Desktop.
Python Slice Upsert
a_range = range(12)
a_range[-2:] = range(10,20)
assert a_range == range(20)
a_range[10:11] = range(10)[::-1]
a_range[20:] = []
assert a_range == range(10) + range(10)[::-1]
keywords = "apple, pear, banana, lemon and orange"
keywords = keywords.split(', ')
keywords[-1:] = keywords[-1].split(' and ')
assert keywords == ["apple", "pear", "banana", "lemon", "orange"]
@dutc
Copy link

dutc commented Apr 7, 2013

This triggers the STORE_SLICE bytecode. +1, +2 mark the presence of endpoints for the range.

        case STORE_SLICE+0:
        case STORE_SLICE+1:
        case STORE_SLICE+2:
        case STORE_SLICE+3:
            if ((opcode-STORE_SLICE) & 2)
                w = POP();
            else
                w = NULL;
            if ((opcode-STORE_SLICE) & 1)
                v = POP();
            else
                v = NULL;
            u = POP();
            t = POP();
            err = assign_slice(u, v, w, t); /* u[v:w] = t */
            Py_DECREF(t);
            Py_DECREF(u);
            Py_XDECREF(v);
            Py_XDECREF(w);
            if (err == 0) continue;
            break;

@dutc
Copy link

dutc commented Apr 7, 2013

assign_slice looks like this:

#undef ISINDEX
#define ISINDEX(x) ((x) == NULL || \
                    PyInt_Check(x) || PyLong_Check(x) || PyIndex_Check(x))

static int
assign_slice(PyObject *u, PyObject *v, PyObject *w, PyObject *x)
    /* u[v:w] = x */
{
    PyTypeObject *tp = u->ob_type;
    PySequenceMethods *sq = tp->tp_as_sequence;

    if (sq && sq->sq_ass_slice && ISINDEX(v) && ISINDEX(w)) {
        Py_ssize_t ilow = 0, ihigh = PY_SSIZE_T_MAX;
        if (!_PyEval_SliceIndex(v, &ilow))
            return -1;
        if (!_PyEval_SliceIndex(w, &ihigh))
            return -1;
        if (x == NULL)
            return PySequence_DelSlice(u, ilow, ihigh);
        else
            return PySequence_SetSlice(u, ilow, ihigh, x);
    }
    else {
        PyObject *slice = PySlice_New(v, w, NULL);
        if (slice != NULL) {
            int res;
            if (x != NULL)
                res = PyObject_SetItem(u, slice, x);
            else
                res = PyObject_DelItem(u, slice);
            Py_DECREF(slice);
            return res;
        }
        else
            return -1;
    }
}

@dutc
Copy link

dutc commented Apr 7, 2013

PySequence_SetSlice looks like:

int
PySequence_SetSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2, PyObject *o)
{
    PySequenceMethods *m;
    PyMappingMethods *mp;

    if (s == NULL) {
        null_error();
        return -1;
    }

    m = s->ob_type->tp_as_sequence;
    if (m && m->sq_ass_slice) {
        if (i1 < 0 || i2 < 0) {
            if (m->sq_length) {
                Py_ssize_t l = (*m->sq_length)(s);
                if (l < 0)
                    return -1;
                if (i1 < 0)
                    i1 += l;
                if (i2 < 0)
                    i2 += l;
            }
        }
        return m->sq_ass_slice(s, i1, i2, o);
    } else if ((mp = s->ob_type->tp_as_mapping) && mp->mp_ass_subscript) {
        int res;
        PyObject *slice = _PySlice_FromIndices(i1, i2);
        if (!slice)
            return -1;
        res = mp->mp_ass_subscript(s, slice, o);
        Py_DECREF(slice);
        return res;
    }

    type_error("'%.200s' object doesn't support slice assignment", s);
    return -1;
}

@dutc
Copy link

dutc commented Apr 7, 2013

For a list object (which has sq/sequence slots and not mp/mapping slots)...

static PySequenceMethods list_as_sequence = {
    (lenfunc)list_length,                       /* sq_length */
    (binaryfunc)list_concat,                    /* sq_concat */
    (ssizeargfunc)list_repeat,                  /* sq_repeat */
    (ssizeargfunc)list_item,                    /* sq_item */
    (ssizessizeargfunc)list_slice,              /* sq_slice */
    (ssizeobjargproc)list_ass_item,             /* sq_ass_item */
    (ssizessizeobjargproc)list_ass_slice,       /* sq_ass_slice */
    (objobjproc)list_contains,                  /* sq_contains */
    (binaryfunc)list_inplace_concat,            /* sq_inplace_concat */
    (ssizeargfunc)list_inplace_repeat,          /* sq_inplace_repeat */
};

Wherein...

/* a[ilow:ihigh] = v if v != NULL.
 * del a[ilow:ihigh] if v == NULL.
 *
 * Special speed gimmick:  when v is NULL and ihigh - ilow <= 8, it's
 * guaranteed the call cannot fail.
 */
static int
list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
{
    /* Because [X]DECREF can recursively invoke list operations on
       this list, we must postpone all [X]DECREF activity until
       after the list is back in its canonical shape.  Therefore
       we must allocate an additional array, 'recycle', into which
       we temporarily copy the items that are deleted from the
       list. :-( */
    PyObject *recycle_on_stack[8];
    PyObject **recycle = recycle_on_stack; /* will allocate more if needed */
    PyObject **item;
    PyObject **vitem = NULL;
    PyObject *v_as_SF = NULL; /* PySequence_Fast(v) */
    Py_ssize_t n; /* # of elements in replacement list */
    Py_ssize_t norig; /* # of elements in list getting replaced */
    Py_ssize_t d; /* Change in size */
    Py_ssize_t k;
    size_t s;
    int result = -1;            /* guilty until proved innocent */
#define b ((PyListObject *)v)
    if (v == NULL)
        n = 0;
    else {
        if (a == b) {
            /* Special case "a[i:j] = a" -- copy b first */
            v = list_slice(b, 0, Py_SIZE(b));
            if (v == NULL)
                return result;
            result = list_ass_slice(a, ilow, ihigh, v);
            Py_DECREF(v);
            return result;
        }
        v_as_SF = PySequence_Fast(v, "can only assign an iterable");
        if(v_as_SF == NULL)
            goto Error;
        n = PySequence_Fast_GET_SIZE(v_as_SF);
        vitem = PySequence_Fast_ITEMS(v_as_SF);
    }
    if (ilow < 0)
        ilow = 0;
    else if (ilow > Py_SIZE(a))
        ilow = Py_SIZE(a);

    if (ihigh < ilow)
        ihigh = ilow;
    else if (ihigh > Py_SIZE(a))
        ihigh = Py_SIZE(a);

    norig = ihigh - ilow;
    assert(norig >= 0);
    d = n - norig;
    if (Py_SIZE(a) + d == 0) {
        Py_XDECREF(v_as_SF);
        return list_clear(a);
    }
    item = a->ob_item;
    /* recycle the items that we are about to remove */
    s = norig * sizeof(PyObject *);
    if (s > sizeof(recycle_on_stack)) {
        recycle = (PyObject **)PyMem_MALLOC(s);
        if (recycle == NULL) {
            PyErr_NoMemory();
            goto Error;
        }
    }
    memcpy(recycle, &item[ilow], s);

    if (d < 0) { /* Delete -d items */
        memmove(&item[ihigh+d], &item[ihigh],
            (Py_SIZE(a) - ihigh)*sizeof(PyObject *));
        list_resize(a, Py_SIZE(a) + d);
        item = a->ob_item;
    }

Parts of of interest right here.

    else if (d > 0) { /* Insert d items */
        k = Py_SIZE(a);
        if (list_resize(a, k+d) < 0)
            goto Error;
        item = a->ob_item;
        memmove(&item[ihigh+d], &item[ihigh],
            (k - ihigh)*sizeof(PyObject *));
    }
    for (k = 0; k < n; k++, ilow++) {
        PyObject *w = vitem[k];
        Py_XINCREF(w);
        item[ilow] = w;
    }
    for (k = norig - 1; k >= 0; --k)
        Py_XDECREF(recycle[k]);
    result = 0;
 Error:
    if (recycle != recycle_on_stack)
        PyMem_FREE(recycle);
    Py_XDECREF(v_as_SF);
    return result;
#undef b
}

@dutc
Copy link

dutc commented Apr 7, 2013

Just for confirmation, this is (gdb) backtrace for

x = range(10)
x[-2:] = range(10,20)

on Python 2.7.3 x64.

#0  list_ass_slice (a=0x7ffff7eab320, ilow=8, ihigh=9223372036854775807, v=0x7ffff7ea9ea8) at Objects/listobject.c:623
#1  0x000000000041e2e2 in PySequence_SetSlice (s=0x7ffff7eab320, i1=8, i2=9223372036854775807, o=0x7ffff7ea9ea8) at Objects/abstract.c:2108
#2  0x00000000004b08cf in assign_slice (u=0x7ffff7eab320, v=0x803800, w=0x0, x=0x7ffff7ea9ea8) at Python/ceval.c:4426
#3  0x00000000004a9276 in PyEval_EvalFrameEx (f=0x8c2b20, throwflag=0) at Python/ceval.c:1671
#4  0x00000000004ad61a in PyEval_EvalCodeEx (co=0x7ffff7f21730, globals=0x826160, locals=0x826160, args=0x0, argcount=0, kws=0x0, kwcount=0, defs=0x0, defcount=0, closure=0x0)
    at Python/ceval.c:3253
#5  0x00000000004a77d5 in PyEval_EvalCode (co=0x7ffff7f21730, globals=0x826160, locals=0x826160) at Python/ceval.c:667
#6  0x00000000004d8cd6 in run_mod (mod=0x8c02f0, filename=0x54c582 "<stdin>", globals=0x826160, locals=0x826160, flags=0x7fffffffe060, arena=0x8265a0) at Python/pythonrun.c:1353
#7  0x00000000004d762b in PyRun_InteractiveOneFlags (fp=0x7ffff74b4360 <_IO_2_1_stdin_>, filename=0x54c582 "<stdin>", flags=0x7fffffffe060) at Python/pythonrun.c:852
#8  0x00000000004d735d in PyRun_InteractiveLoopFlags (fp=0x7ffff74b4360 <_IO_2_1_stdin_>, filename=0x54c582 "<stdin>", flags=0x7fffffffe060) at Python/pythonrun.c:772
#9  0x00000000004d720a in PyRun_AnyFileExFlags (fp=0x7ffff74b4360 <_IO_2_1_stdin_>, filename=0x54c582 "<stdin>", closeit=0, flags=0x7fffffffe060) at Python/pythonrun.c:741
#10 0x00000000004154d6 in Py_Main (argc=1, argv=0x7fffffffe278) at Modules/main.c:639
#11 0x00000000004141cc in main (argc=1, argv=0x7fffffffe278) at ./Modules/python.c:23

@dutc
Copy link

dutc commented Apr 7, 2013

For comparison, this is what [].extend looks like:

static PyObject *
listextend(PyListObject *self, PyObject *b)
{
    PyObject *it;      /* iter(v) */
    Py_ssize_t m;                  /* size of self */
    Py_ssize_t n;                  /* guess for size of b */
    Py_ssize_t mn;                 /* m + n */
    Py_ssize_t i;
    PyObject *(*iternext)(PyObject *);

    /* Special cases:
       1) lists and tuples which can use PySequence_Fast ops
       2) extending self to self requires making a copy first
    */
    if (PyList_CheckExact(b) || PyTuple_CheckExact(b) || (PyObject *)self == b) {
        PyObject **src, **dest;
        b = PySequence_Fast(b, "argument must be iterable");
        if (!b)
            return NULL;
        n = PySequence_Fast_GET_SIZE(b);
        if (n == 0) {
            /* short circuit when b is empty */
            Py_DECREF(b);
            Py_RETURN_NONE;
        }
        m = Py_SIZE(self);
        if (list_resize(self, m + n) == -1) {
            Py_DECREF(b);
            return NULL;
        }
        /* note that we may still have self == b here for the
         * situation a.extend(a), but the following code works
         * in that case too.  Just make sure to resize self
         * before calling PySequence_Fast_ITEMS.
         */
        /* populate the end of self with b's items */
        src = PySequence_Fast_ITEMS(b);
        dest = self->ob_item + m;
        for (i = 0; i < n; i++) {
            PyObject *o = src[i];
            Py_INCREF(o);
            dest[i] = o;
        }
        Py_DECREF(b);
        Py_RETURN_NONE;
    }

    it = PyObject_GetIter(b);
    if (it == NULL)
        return NULL;
    iternext = *it->ob_type->tp_iternext;

    /* Guess a result list size. */
    n = _PyObject_LengthHint(b, 8);
    if (n == -1) {
        Py_DECREF(it);
        return NULL;
    }
    m = Py_SIZE(self);
    mn = m + n;
    if (mn >= m) {
        /* Make room. */
        if (list_resize(self, mn) == -1)
            goto error;
        /* Make the list sane again. */
        Py_SIZE(self) = m;
    }
    /* Else m + n overflowed; on the chance that n lied, and there really
     * is enough room, ignore it.  If n was telling the truth, we'll
     * eventually run out of memory during the loop.
     */

    /* Run iterator to exhaustion. */
    for (;;) {
        PyObject *item = iternext(it);
        if (item == NULL) {
            if (PyErr_Occurred()) {
                if (PyErr_ExceptionMatches(PyExc_StopIteration))
                    PyErr_Clear();
                else
                    goto error;
            }
            break;
        }
        if (Py_SIZE(self) < self->allocated) {
            /* steals ref */
            PyList_SET_ITEM(self, Py_SIZE(self), item);
            ++Py_SIZE(self);
        }
        else {
            int status = app1(self, item);
            Py_DECREF(item);  /* append creates a new ref */
            if (status < 0)
                goto error;
        }
    }

    /* Cut back result list if initial guess was too large. */
    if (Py_SIZE(self) < self->allocated)
        list_resize(self, Py_SIZE(self));  /* shrinking can't fail */

    Py_DECREF(it);
    Py_RETURN_NONE;

  error:
    Py_DECREF(it);
    return NULL;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment