yoki/chap4-numpy.py

## chap4-numpy.py
# -*- coding: utf-8 -*-
import numpy as np

#===========================================
#%% Index and element wise operation
#===========================================

#%% logical indexing
data = np.random.randn(7,4)
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data[names=='Bob', 2:]
data[names!='Bob', 3]

# complex logical indexing
mask = (names == 'Bob') | (names == 'Will')
data[mask]

# logical substitution
data[data<0] = 0
data[names != 'Joe'] = 7

#-------------------
#%% fancy indexing
arr = np.empty((8,4))
for i in range(8):
    arr[i] = i

# select subset of rows in particular order
arr[[4,3,0,6]]
arr[[-1,-7,-5]] # -1 denote the last row


arr = np.arange(32).reshape((8,4))
# select (1,0), (3,2), (7,1)
arr[[1,3,7],[0,2,1]]

# recutangular region selection
arr[[1,3,7]][:,[0,2,1]]
arr[np.ix_([1,3,7],[0,2,1])]

#---------------------------
#%% transpose and axis change
arr = np.arange(15).reshape((3,5))
arr.T
np.dot(arr.T, arr) # X^T X

arr = np.arange(16).reshape((2,2,4))
print arr
arr.transpose((1,0,2))

#------------------------------------
#%% Fast element-wise Array functions
x = np.random.randn(8)
y = np.random.randn(8)

#unitery operators
np.sqrt(x)
np.exp(x)

# binary operators
# add, subtract, multiply, divide, power, maximum
# mod, copysign, >, >=, <=, <, ==, !=, &, |, ^(xor)
np.maximum(x,y)


#------------------------------------
#%% Vectorization
import matplotlib.pyplot as plt
points = np.arange(-5,5,0.01)
xs,ys = np.meshgrid(points,points)
z = np.sqrt(xs ** 2 + ys ** 2)
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

#===========================================
#%% Data processing
#===========================================
#------------------------------------
#%% Conditional Logic
#--
#Two types of selection
xarr = np.array([1,2,3,4,5])
yarr = np.array([11,12,13,14,15])
cond = np.array([True, False,True, True, False])

# pure python selection
result = [(x if c else y) for x, y, c
            in zip(xarr,yarr,cond)]

# numpy selection
result = np.where(cond, xarr, yarr)

#------
# where with scalars
arr = np.random.randn(4,4)
np.where(arr > 0, 2, arr)

#-----
# advanced where conversion
#result = []
#for i in range(n):
#    if cond1[i] and cond2[i]: result.append(0)
#    elif cond1[i]: result.append(1)
#    elif cond2[i]: result.append(2)
#    else: result.append(3)

#np.where(cond1 & cond2, 0,
#         np.where(cond1, 1,
#                 np.where(cond2, 2, 3)))

#------------------------------------
#%% math and stats
arr = np.random.randn(5,4)
arr.mean()
arr.sum()
arr.mean(axis=1)
arr.sum(0) # specify axis

arr.cumsum(0)
arr.cumprod(1)

arr.min()
arr.argmin()
arr.std()

#------------------------
#%% sorting and quantile
arr = np.random.randn(8,4)
arr.sort() #modifys original array
np.sort(arr) # returns new array

large_arr = np.random.randn(1000)

#quantile
large_arr.sort()
large_arr[int(0.05* len(large_arr))] # 5% quantile

#------------------------
#%% uniq and set logic
n = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
n2 = np.array(['John','Joe','Jay'])

np.unique(n)    # unique element
sorted(set(n2)) # pure python alternative of uniq
np.intersect1(n, n2) # sorted common element of n, n2
np.union1d(n, n2)    # sorted union
np.in1d(n,n2)        #boolean array indicating if y contains x
np.setdiff(n,n2)    # set difference
np.setxor1d(n,n2)   # set exclusive or

#===========================================
#%% Binary File input and output
#===========================================
# Save and load in Binary format
arr = np.arange(10)

# save single array
np.save('some_array', arr)
np.load('some_array.npy')

# save multiple arrays
np.savez('arrays', a = arr, b = arr)
arch =  np.load('arrays.npz')
arch['a']

# for non binary file, use pandas

#===========================================
#%% Linear algebra
#===========================================
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])

# multiplication
x.dot(y)

from numpy.linalg import inv, qr
X = np.random.randn(5,5)
mat = X.T.dot(X)
inv(mat)

mat.dot(inv(mat))
q,r = qr(mat)

## random_walk.py
# -*- coding: utf-8 -*-
#%%

# pure python way random walk
import numpy as np
import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)

# numpy style way randome walk
nsteps = 1000
draws = np.random.randint(0,2,size=nsteps)
steps = np.where(draws>0,1,-1)
walk = steps.cumsum()

# min and max
walk.min()
walk.max()

# first crossing 10
(np.abs(walk) > 10).argmax()


# randome walk repeat 5000 times
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0,2,size=(nsteps,nwalks))
steps = np.where(draws>0,1,-1)
walks = steps.cumsum(1)

walks.min()
hist30 = (np.abs(walks) >= 30).any(1)
hist30
hist30.sum()

crossing_times = (np.abs(walks[hist30])>= 30).argmax(1)
crossing_times.mean()
	# -- coding: utf-8 --
	import numpy as np

	#===========================================
	#%% Index and element wise operation
	#===========================================

	#%% logical indexing
	data = np.random.randn(7,4)
	names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
	data[names=='Bob', 2:]
	data[names!='Bob', 3]

	# complex logical indexing
	mask = (names == 'Bob') \| (names == 'Will')
	data[mask]

	# logical substitution
	data[data<0] = 0
	data[names != 'Joe'] = 7

	#-------------------
	#%% fancy indexing
	arr = np.empty((8,4))
	for i in range(8):
	arr[i] = i

	# select subset of rows in particular order
	arr[[4,3,0,6]]
	arr[[-1,-7,-5]] # -1 denote the last row


	arr = np.arange(32).reshape((8,4))
	# select (1,0), (3,2), (7,1)
	arr[[1,3,7],[0,2,1]]

	# recutangular region selection
	arr[[1,3,7]][:,[0,2,1]]
	arr[np.ix_([1,3,7],[0,2,1])]

	#---------------------------
	#%% transpose and axis change
	arr = np.arange(15).reshape((3,5))
	arr.T
	np.dot(arr.T, arr) # X^T X

	arr = np.arange(16).reshape((2,2,4))
	print arr
	arr.transpose((1,0,2))

	#------------------------------------
	#%% Fast element-wise Array functions
	x = np.random.randn(8)
	y = np.random.randn(8)

	#unitery operators
	np.sqrt(x)
	np.exp(x)

	# binary operators
	# add, subtract, multiply, divide, power, maximum
	# mod, copysign, >, >=, <=, <, ==, !=, &, \|, ^(xor)
	np.maximum(x,y)


	#------------------------------------
	#%% Vectorization
	import matplotlib.pyplot as plt
	points = np.arange(-5,5,0.01)
	xs,ys = np.meshgrid(points,points)
	z = np.sqrt(xs 2 + ys 2)
	plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
	plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

	#===========================================
	#%% Data processing
	#===========================================
	#------------------------------------
	#%% Conditional Logic
	#--
	#Two types of selection
	xarr = np.array([1,2,3,4,5])
	yarr = np.array([11,12,13,14,15])
	cond = np.array([True, False,True, True, False])

	# pure python selection
	result = [(x if c else y) for x, y, c
	in zip(xarr,yarr,cond)]

	# numpy selection
	result = np.where(cond, xarr, yarr)

	#------
	# where with scalars
	arr = np.random.randn(4,4)
	np.where(arr > 0, 2, arr)

	#-----
	# advanced where conversion
	#result = []
	#for i in range(n):
	# if cond1[i] and cond2[i]: result.append(0)
	# elif cond1[i]: result.append(1)
	# elif cond2[i]: result.append(2)
	# else: result.append(3)

	#np.where(cond1 & cond2, 0,
	# np.where(cond1, 1,
	# np.where(cond2, 2, 3)))

	#------------------------------------
	#%% math and stats
	arr = np.random.randn(5,4)
	arr.mean()
	arr.sum()
	arr.mean(axis=1)
	arr.sum(0) # specify axis

	arr.cumsum(0)
	arr.cumprod(1)

	arr.min()
	arr.argmin()
	arr.std()

	#------------------------
	#%% sorting and quantile
	arr = np.random.randn(8,4)
	arr.sort() #modifys original array
	np.sort(arr) # returns new array

	large_arr = np.random.randn(1000)

	#quantile
	large_arr.sort()
	large_arr[int(0.05* len(large_arr))] # 5% quantile

	#------------------------
	#%% uniq and set logic
	n = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
	n2 = np.array(['John','Joe','Jay'])

	np.unique(n) # unique element
	sorted(set(n2)) # pure python alternative of uniq
	np.intersect1(n, n2) # sorted common element of n, n2
	np.union1d(n, n2) # sorted union
	np.in1d(n,n2) #boolean array indicating if y contains x
	np.setdiff(n,n2) # set difference
	np.setxor1d(n,n2) # set exclusive or

	#===========================================
	#%% Binary File input and output
	#===========================================
	# Save and load in Binary format
	arr = np.arange(10)

	# save single array
	np.save('some_array', arr)
	np.load('some_array.npy')

	# save multiple arrays
	np.savez('arrays', a = arr, b = arr)
	arch = np.load('arrays.npz')
	arch['a']

	# for non binary file, use pandas

	#===========================================
	#%% Linear algebra
	#===========================================
	x = np.array([[1., 2., 3.], [4., 5., 6.]])
	y = np.array([[6., 23.], [-1, 7], [8, 9]])

	# multiplication
	x.dot(y)

	from numpy.linalg import inv, qr
	X = np.random.randn(5,5)
	mat = X.T.dot(X)
	inv(mat)

	mat.dot(inv(mat))
	q,r = qr(mat)
	# -- coding: utf-8 --
	#%%

	# pure python way random walk
	import numpy as np
	import random
	position = 0
	walk = [position]
	steps = 1000
	for i in xrange(steps):
	step = 1 if random.randint(0, 1) else -1
	position += step
	walk.append(position)

	# numpy style way randome walk
	nsteps = 1000
	draws = np.random.randint(0,2,size=nsteps)
	steps = np.where(draws>0,1,-1)
	walk = steps.cumsum()

	# min and max
	walk.min()
	walk.max()

	# first crossing 10
	(np.abs(walk) > 10).argmax()


	# randome walk repeat 5000 times
	nwalks = 5000
	nsteps = 1000
	draws = np.random.randint(0,2,size=(nsteps,nwalks))
	steps = np.where(draws>0,1,-1)
	walks = steps.cumsum(1)

	walks.min()
	hist30 = (np.abs(walks) >= 30).any(1)
	hist30
	hist30.sum()

	crossing_times = (np.abs(walks[hist30])>= 30).argmax(1)
	crossing_times.mean()