Skip to content

Instantly share code, notes, and snippets.

@yoki
Created June 21, 2014 08:42
Show Gist options
  • Save yoki/b5a165fdf8e3ea404c68 to your computer and use it in GitHub Desktop.
Save yoki/b5a165fdf8e3ea404c68 to your computer and use it in GitHub Desktop.
Chapter 4 Numpy
# -*- coding: utf-8 -*-
import numpy as np
#===========================================
#%% Index and element wise operation
#===========================================
#%% logical indexing
data = np.random.randn(7,4)
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data[names=='Bob', 2:]
data[names!='Bob', 3]
# complex logical indexing
mask = (names == 'Bob') | (names == 'Will')
data[mask]
# logical substitution
data[data<0] = 0
data[names != 'Joe'] = 7
#-------------------
#%% fancy indexing
arr = np.empty((8,4))
for i in range(8):
arr[i] = i
# select subset of rows in particular order
arr[[4,3,0,6]]
arr[[-1,-7,-5]] # -1 denote the last row
arr = np.arange(32).reshape((8,4))
# select (1,0), (3,2), (7,1)
arr[[1,3,7],[0,2,1]]
# recutangular region selection
arr[[1,3,7]][:,[0,2,1]]
arr[np.ix_([1,3,7],[0,2,1])]
#---------------------------
#%% transpose and axis change
arr = np.arange(15).reshape((3,5))
arr.T
np.dot(arr.T, arr) # X^T X
arr = np.arange(16).reshape((2,2,4))
print arr
arr.transpose((1,0,2))
#------------------------------------
#%% Fast element-wise Array functions
x = np.random.randn(8)
y = np.random.randn(8)
#unitery operators
np.sqrt(x)
np.exp(x)
# binary operators
# add, subtract, multiply, divide, power, maximum
# mod, copysign, >, >=, <=, <, ==, !=, &, |, ^(xor)
np.maximum(x,y)
#------------------------------------
#%% Vectorization
import matplotlib.pyplot as plt
points = np.arange(-5,5,0.01)
xs,ys = np.meshgrid(points,points)
z = np.sqrt(xs ** 2 + ys ** 2)
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
#===========================================
#%% Data processing
#===========================================
#------------------------------------
#%% Conditional Logic
#--
#Two types of selection
xarr = np.array([1,2,3,4,5])
yarr = np.array([11,12,13,14,15])
cond = np.array([True, False,True, True, False])
# pure python selection
result = [(x if c else y) for x, y, c
in zip(xarr,yarr,cond)]
# numpy selection
result = np.where(cond, xarr, yarr)
#------
# where with scalars
arr = np.random.randn(4,4)
np.where(arr > 0, 2, arr)
#-----
# advanced where conversion
#result = []
#for i in range(n):
# if cond1[i] and cond2[i]: result.append(0)
# elif cond1[i]: result.append(1)
# elif cond2[i]: result.append(2)
# else: result.append(3)
#np.where(cond1 & cond2, 0,
# np.where(cond1, 1,
# np.where(cond2, 2, 3)))
#------------------------------------
#%% math and stats
arr = np.random.randn(5,4)
arr.mean()
arr.sum()
arr.mean(axis=1)
arr.sum(0) # specify axis
arr.cumsum(0)
arr.cumprod(1)
arr.min()
arr.argmin()
arr.std()
#------------------------
#%% sorting and quantile
arr = np.random.randn(8,4)
arr.sort() #modifys original array
np.sort(arr) # returns new array
large_arr = np.random.randn(1000)
#quantile
large_arr.sort()
large_arr[int(0.05* len(large_arr))] # 5% quantile
#------------------------
#%% uniq and set logic
n = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
n2 = np.array(['John','Joe','Jay'])
np.unique(n) # unique element
sorted(set(n2)) # pure python alternative of uniq
np.intersect1(n, n2) # sorted common element of n, n2
np.union1d(n, n2) # sorted union
np.in1d(n,n2) #boolean array indicating if y contains x
np.setdiff(n,n2) # set difference
np.setxor1d(n,n2) # set exclusive or
#===========================================
#%% Binary File input and output
#===========================================
# Save and load in Binary format
arr = np.arange(10)
# save single array
np.save('some_array', arr)
np.load('some_array.npy')
# save multiple arrays
np.savez('arrays', a = arr, b = arr)
arch = np.load('arrays.npz')
arch['a']
# for non binary file, use pandas
#===========================================
#%% Linear algebra
#===========================================
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
# multiplication
x.dot(y)
from numpy.linalg import inv, qr
X = np.random.randn(5,5)
mat = X.T.dot(X)
inv(mat)
mat.dot(inv(mat))
q,r = qr(mat)
# -*- coding: utf-8 -*-
#%%
# pure python way random walk
import numpy as np
import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
step = 1 if random.randint(0, 1) else -1
position += step
walk.append(position)
# numpy style way randome walk
nsteps = 1000
draws = np.random.randint(0,2,size=nsteps)
steps = np.where(draws>0,1,-1)
walk = steps.cumsum()
# min and max
walk.min()
walk.max()
# first crossing 10
(np.abs(walk) > 10).argmax()
# randome walk repeat 5000 times
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0,2,size=(nsteps,nwalks))
steps = np.where(draws>0,1,-1)
walks = steps.cumsum(1)
walks.min()
hist30 = (np.abs(walks) >= 30).any(1)
hist30
hist30.sum()
crossing_times = (np.abs(walks[hist30])>= 30).argmax(1)
crossing_times.mean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment