The principle of this aNEP is to separate the APIs for masking and for missing values, according to
- The current implementation of masked arrays
- Nathaniel Smith's proposal.
To try and make more progress on the whole missing values/masked arrays/... debate, it seems useful to have a more technical discussion of the pieces which we can agree on. This is the first, which attempts to nail down the details of the new where=
argument to ufuncs.
####################################### miniNEP 2: NA support via special dtypes #######################################
To try and make more progress on the whole missing values/masked arrays/... debate, it seems useful to have a more technical discussion of the pieces which we can agree on. This is the second, which attempts to nail down the details of how NAs can be implemented using special dtype's.
import ctypes | |
PyFrame_LocalsToFast = ctypes.pythonapi.PyFrame_LocalsToFast | |
PyFrame_LocalsToFast.argtypes = [ctypes.py_object] | |
# Doing this with cython instead of ctypes would be much easier (and more | |
# robust). I just do it this way to keep the example self-contained. | |
frameobject_fields = [ | |
# PyObject_VAR_HEAD | |
("ob_refcnt", ctypes.c_int64), |
# It's very common for a class to have a repr that looks like MyClass(a, b, c=d). | |
# But the IPython pretty-printing machinery doesn't make this very convenient. | |
# Here's a helper to make it convenient. | |
def pretty_constructor_call(p, obj, args, kwargs=[]): | |
name = obj.__class__.__name__ | |
p.begin_group(len(name) + 1, "%s(" % (name,)) | |
started = [False] | |
def new_item(): | |
if started[0]: |
# To measure cold-cache import speed: | |
# $ echo 3 > /proc/sys/vm/drop_caches | |
# >>> timeimport("mymod", 1) | |
# | |
# To measure hot-cache import speed: | |
# >>> timeimport("mymod", 1) # warm the cache | |
# >>> timeimport("mymod", 10) # make the measurement | |
import sys | |
import timeit |
# Example output: | |
# a + a: 1.181 ns/item | |
# a / a: 2.577 ns/item | |
# a ** a: 15.259 ns/item | |
# np.log(a): 28.241 ns/item | |
# np.sin(a): 22.202 ns/item | |
# sp.gammaln(a): 40.876 ns/item | |
# sp.erf(a): 21.297 ns/item | |
import timeit |
#!/usr/bin/env python | |
# https://gist.github.com/njsmith/9157645 | |
# usage: | |
# python3 grep-dot-dot.py path [path path ...] | |
DOT_NAMES = ["dot", | |
# scikit-learn: | |
"fast_dot", | |
"safe_sparse_dot", |
import six | |
import tokenize | |
import numpy as np | |
def _filter_tokens(s): | |
last_token_was_number = False | |
for token in tokenize.generate_tokens(six.StringIO(s).read): | |
token_type = token[0] | |
token_string = token[1] | |
if (last_token_was_number |
>>> np.__version__ | |
'1.8.1' | |
>>> ar = np.ma.array([1, 1, 1, np.ma.masked, 1, 1, 1]) | |
>>> ar | |
masked_array(data = [1.0 1.0 1.0 -- 1.0 1.0 1.0], | |
mask = [False False False True False False False], | |
fill_value = 1e+20) | |
>>> np.gradient(ar) | |
masked_array(data = [0.0 0.0 -- 0.0 -- 0.0 0.0], |