Created
June 21, 2014 08:42
-
-
Save yoki/b5a165fdf8e3ea404c68 to your computer and use it in GitHub Desktop.
Chapter 4 Numpy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import numpy as np | |
#=========================================== | |
#%% Index and element wise operation | |
#=========================================== | |
#%% logical indexing | |
data = np.random.randn(7,4) | |
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) | |
data[names=='Bob', 2:] | |
data[names!='Bob', 3] | |
# complex logical indexing | |
mask = (names == 'Bob') | (names == 'Will') | |
data[mask] | |
# logical substitution | |
data[data<0] = 0 | |
data[names != 'Joe'] = 7 | |
#------------------- | |
#%% fancy indexing | |
arr = np.empty((8,4)) | |
for i in range(8): | |
arr[i] = i | |
# select subset of rows in particular order | |
arr[[4,3,0,6]] | |
arr[[-1,-7,-5]] # -1 denote the last row | |
arr = np.arange(32).reshape((8,4)) | |
# select (1,0), (3,2), (7,1) | |
arr[[1,3,7],[0,2,1]] | |
# recutangular region selection | |
arr[[1,3,7]][:,[0,2,1]] | |
arr[np.ix_([1,3,7],[0,2,1])] | |
#--------------------------- | |
#%% transpose and axis change | |
arr = np.arange(15).reshape((3,5)) | |
arr.T | |
np.dot(arr.T, arr) # X^T X | |
arr = np.arange(16).reshape((2,2,4)) | |
print arr | |
arr.transpose((1,0,2)) | |
#------------------------------------ | |
#%% Fast element-wise Array functions | |
x = np.random.randn(8) | |
y = np.random.randn(8) | |
#unitery operators | |
np.sqrt(x) | |
np.exp(x) | |
# binary operators | |
# add, subtract, multiply, divide, power, maximum | |
# mod, copysign, >, >=, <=, <, ==, !=, &, |, ^(xor) | |
np.maximum(x,y) | |
#------------------------------------ | |
#%% Vectorization | |
import matplotlib.pyplot as plt | |
points = np.arange(-5,5,0.01) | |
xs,ys = np.meshgrid(points,points) | |
z = np.sqrt(xs ** 2 + ys ** 2) | |
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar() | |
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values") | |
#=========================================== | |
#%% Data processing | |
#=========================================== | |
#------------------------------------ | |
#%% Conditional Logic | |
#-- | |
#Two types of selection | |
xarr = np.array([1,2,3,4,5]) | |
yarr = np.array([11,12,13,14,15]) | |
cond = np.array([True, False,True, True, False]) | |
# pure python selection | |
result = [(x if c else y) for x, y, c | |
in zip(xarr,yarr,cond)] | |
# numpy selection | |
result = np.where(cond, xarr, yarr) | |
#------ | |
# where with scalars | |
arr = np.random.randn(4,4) | |
np.where(arr > 0, 2, arr) | |
#----- | |
# advanced where conversion | |
#result = [] | |
#for i in range(n): | |
# if cond1[i] and cond2[i]: result.append(0) | |
# elif cond1[i]: result.append(1) | |
# elif cond2[i]: result.append(2) | |
# else: result.append(3) | |
#np.where(cond1 & cond2, 0, | |
# np.where(cond1, 1, | |
# np.where(cond2, 2, 3))) | |
#------------------------------------ | |
#%% math and stats | |
arr = np.random.randn(5,4) | |
arr.mean() | |
arr.sum() | |
arr.mean(axis=1) | |
arr.sum(0) # specify axis | |
arr.cumsum(0) | |
arr.cumprod(1) | |
arr.min() | |
arr.argmin() | |
arr.std() | |
#------------------------ | |
#%% sorting and quantile | |
arr = np.random.randn(8,4) | |
arr.sort() #modifys original array | |
np.sort(arr) # returns new array | |
large_arr = np.random.randn(1000) | |
#quantile | |
large_arr.sort() | |
large_arr[int(0.05* len(large_arr))] # 5% quantile | |
#------------------------ | |
#%% uniq and set logic | |
n = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) | |
n2 = np.array(['John','Joe','Jay']) | |
np.unique(n) # unique element | |
sorted(set(n2)) # pure python alternative of uniq | |
np.intersect1(n, n2) # sorted common element of n, n2 | |
np.union1d(n, n2) # sorted union | |
np.in1d(n,n2) #boolean array indicating if y contains x | |
np.setdiff(n,n2) # set difference | |
np.setxor1d(n,n2) # set exclusive or | |
#=========================================== | |
#%% Binary File input and output | |
#=========================================== | |
# Save and load in Binary format | |
arr = np.arange(10) | |
# save single array | |
np.save('some_array', arr) | |
np.load('some_array.npy') | |
# save multiple arrays | |
np.savez('arrays', a = arr, b = arr) | |
arch = np.load('arrays.npz') | |
arch['a'] | |
# for non binary file, use pandas | |
#=========================================== | |
#%% Linear algebra | |
#=========================================== | |
x = np.array([[1., 2., 3.], [4., 5., 6.]]) | |
y = np.array([[6., 23.], [-1, 7], [8, 9]]) | |
# multiplication | |
x.dot(y) | |
from numpy.linalg import inv, qr | |
X = np.random.randn(5,5) | |
mat = X.T.dot(X) | |
inv(mat) | |
mat.dot(inv(mat)) | |
q,r = qr(mat) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#%% | |
# pure python way random walk | |
import numpy as np | |
import random | |
position = 0 | |
walk = [position] | |
steps = 1000 | |
for i in xrange(steps): | |
step = 1 if random.randint(0, 1) else -1 | |
position += step | |
walk.append(position) | |
# numpy style way randome walk | |
nsteps = 1000 | |
draws = np.random.randint(0,2,size=nsteps) | |
steps = np.where(draws>0,1,-1) | |
walk = steps.cumsum() | |
# min and max | |
walk.min() | |
walk.max() | |
# first crossing 10 | |
(np.abs(walk) > 10).argmax() | |
# randome walk repeat 5000 times | |
nwalks = 5000 | |
nsteps = 1000 | |
draws = np.random.randint(0,2,size=(nsteps,nwalks)) | |
steps = np.where(draws>0,1,-1) | |
walks = steps.cumsum(1) | |
walks.min() | |
hist30 = (np.abs(walks) >= 30).any(1) | |
hist30 | |
hist30.sum() | |
crossing_times = (np.abs(walks[hist30])>= 30).argmax(1) | |
crossing_times.mean() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment