Nathaniel J. Smith njsmith

## masked-arrays-and-missing-values-aNEP.rst

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                njsmith
                / masked-arrays-and-missing-values-aNEP.rst
            
            
              Created
              June 30, 2011 14:48
            
              
                A alternative-NEP on masking and missing values
              
          
    A alternative-NEP on masking and missing values

The principle of this aNEP is to separate the APIs for masking and for missing values, according to

The current implementation of masked arrays
Nathaniel Smith's proposal.


## miniNEP1-where.rst

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                njsmith
                / miniNEP1-where.rst
            
            
              Created
              July 6, 2011 19:03
            
              
                miniNEP 1: where= argument for ufuncs
              
          
    A mini-NEP for the where= argument to ufuncs

To try and make more progress on the whole missing values/masked arrays/... debate, it seems useful to have a more technical discussion of the pieces which we can agree on. This is the first, which attempts to nail down the details of the new where= argument to ufuncs.
Rationale


## miniNEP2-NA-dtype.rst

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                njsmith
                / miniNEP2-NA-dtype.rst
            
            
              Created
              July 6, 2011 20:35
            
              
                miniNEP 2: NAs support in dtypes
              
          
    ####################################### miniNEP 2: NA support via special dtypes #######################################
To try and make more progress on the whole missing values/masked arrays/... debate, it seems useful to have a more technical discussion of the pieces which we can agree on. This is the second, which attempts to nail down the details of how NAs can be implemented using special dtype's.
Table of contents


## inject.py
import ctypes

PyFrame_LocalsToFast = ctypes.pythonapi.PyFrame_LocalsToFast
PyFrame_LocalsToFast.argtypes = [ctypes.py_object]

# Doing this with cython instead of ctypes would be much easier (and more
# robust). I just do it this way to keep the example self-contained.
frameobject_fields = [
    # PyObject_VAR_HEAD
    ("ob_refcnt", ctypes.c_int64),

## gist:3113186
# It's very common for a class to have a repr that looks like MyClass(a, b, c=d).
# But the IPython pretty-printing machinery doesn't make this very convenient.
# Here's a helper to make it convenient.

def pretty_constructor_call(p, obj, args, kwargs=[]):
    name = obj.__class__.__name__
    p.begin_group(len(name) + 1, "%s(" % (name,))
    started = [False]
    def new_item():
        if started[0]:

## gist:4025870
# To measure cold-cache import speed:
#  $ echo 3 > /proc/sys/vm/drop_caches
#  >>> timeimport("mymod", 1)
#
# To measure hot-cache import speed:
#  >>> timeimport("mymod", 1)  # warm the cache
#  >>> timeimport("mymod", 10) # make the measurement

import sys
import timeit

## per-op-times.py
# Example output:
#   a + a: 1.181 ns/item
#   a / a: 2.577 ns/item
#   a ** a: 15.259 ns/item
#   np.log(a): 28.241 ns/item
#   np.sin(a): 22.202 ns/item
#   sp.gammaln(a): 40.876 ns/item
#   sp.erf(a): 21.297 ns/item

import timeit

## grep-dot-dot.py
#!/usr/bin/env python
# https://gist.github.com/njsmith/9157645

# usage:
#   python3 grep-dot-dot.py path [path path ...]

DOT_NAMES = ["dot",
             # scikit-learn:
             "fast_dot",
             "safe_sparse_dot",

## gist:0651d1d3266bf017c9e2
import six
import tokenize
import numpy as np

def _filter_tokens(s):
    last_token_was_number = False
    for token in tokenize.generate_tokens(six.StringIO(s).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number

## gist:551738469b74d175e039
>>> np.__version__
'1.8.1'
>>> ar = np.ma.array([1, 1, 1, np.ma.masked, 1, 1, 1])
>>> ar
masked_array(data = [1.0 1.0 1.0 -- 1.0 1.0 1.0],
             mask = [False False False  True False False False],
       fill_value = 1e+20)

>>> np.gradient(ar)
masked_array(data = [0.0 0.0 -- 0.0 -- 0.0 0.0],
	import ctypes

	PyFrame_LocalsToFast = ctypes.pythonapi.PyFrame_LocalsToFast
	PyFrame_LocalsToFast.argtypes = [ctypes.py_object]

	# Doing this with cython instead of ctypes would be much easier (and more
	# robust). I just do it this way to keep the example self-contained.
	frameobject_fields = [
	# PyObject_VAR_HEAD
	("ob_refcnt", ctypes.c_int64),
	# It's very common for a class to have a repr that looks like MyClass(a, b, c=d).
	# But the IPython pretty-printing machinery doesn't make this very convenient.
	# Here's a helper to make it convenient.

	def pretty_constructor_call(p, obj, args, kwargs=[]):
	name = obj.__class__.__name__
	p.begin_group(len(name) + 1, "%s(" % (name,))
	started = [False]
	def new_item():
	if started[0]:
	# To measure cold-cache import speed:
	# $ echo 3 > /proc/sys/vm/drop_caches
	# >>> timeimport("mymod", 1)
	#
	# To measure hot-cache import speed:
	# >>> timeimport("mymod", 1) # warm the cache
	# >>> timeimport("mymod", 10) # make the measurement

	import sys
	import timeit
	# Example output:
	# a + a: 1.181 ns/item
	# a / a: 2.577 ns/item
	# a ** a: 15.259 ns/item
	# np.log(a): 28.241 ns/item
	# np.sin(a): 22.202 ns/item
	# sp.gammaln(a): 40.876 ns/item
	# sp.erf(a): 21.297 ns/item

	import timeit
	#!/usr/bin/env python
	# https://gist.github.com/njsmith/9157645

	# usage:
	# python3 grep-dot-dot.py path [path path ...]

	DOT_NAMES = ["dot",
	# scikit-learn:
	"fast_dot",
	"safe_sparse_dot",
	import six
	import tokenize
	import numpy as np

	def _filter_tokens(s):
	last_token_was_number = False
	for token in tokenize.generate_tokens(six.StringIO(s).read):
	token_type = token[0]
	token_string = token[1]
	if (last_token_was_number
	>>> np.__version__
	'1.8.1'
	>>> ar = np.ma.array([1, 1, 1, np.ma.masked, 1, 1, 1])
	>>> ar
	masked_array(data = [1.0 1.0 1.0 -- 1.0 1.0 1.0],
	mask = [False False False True False False False],
	fill_value = 1e+20)

	>>> np.gradient(ar)
	masked_array(data = [0.0 0.0 -- 0.0 -- 0.0 0.0],