rsotto/blog_python_numsciapp.py Secret

## blog_python_numsciapp.py
# Python variables and types
# say we want to calculate bmi = 703 x weight (lbs) / height (in) ^ 2
weight = 150
height = 70
bmi = 703 * (150 / (70 ** 2)) # ** is the operator to calculate powers
print(bmi) # prints value of bmi on the output console

print(type(weight)) # type is int
print(type(height)) # type is int
print(type(bmi)) # type is float

string = "Hello, World!"
string = 'Hello, World!'
boolean = True
print(type(string)) # type is str
print(type(boolean)) # type is bool

# Python lists
# say we want to list the GTA 2016 population in millions
gta_pop = [2.7, 0.6, 1.4, 1.1, 0.5]
# you can have mixed types in the list
gta_pop = ["toronto", 2.7, "durham", 0.6, "peel", 1.4,
           "york", 1.1, "halton", 0.5]
# you can have lists inside the list
gta_pop = [["toronto", 2.7], ["durham", 0.6], ["peel", 1.4],
           ["york", 1.1], ["halton", 0.5]]
print(gta_pop)
print(type(gta_pop)) # type is list

# subsetting or indexing lists
# a 0 index points to the first element
print(gta_pop[0]) # prints toronto population
print(gta_pop[2]) # prints peel population
# a -1 index points to the last element
# you can index the list backwards starting from the last element
print(gta_pop[-1]) # prints halton population
print(gta_pop[-3]) # prints peel population

# slicing lists - selecting multiple elements in the list
# note that the ending index is exclusive
print(gta_pop[1:3]) # prints durham and peel populations
print(gta_pop[2:]) # prints peel, york, and halton populations
print(gta_pop[:3]) # prints toronto, durham, and peel populations

# adding and removing list elements
del(gta_pop[4]) # removes halton population
print(gta_pop)

gta_pop = gta_pop + [["halton", 0.5]] # adds halton population
#gta_pop.append([["halton", 0.5]]) # or you can use append() method
print(gta_pop)

# copying lists by value (not by reference)
gta_pop_copy = list(gta_pop)
gta_pop_copy = gta_pop[:] # or use slicing
print(gta_pop_copy)

# Python built-in functions
print(max([1, 2, 3, 4, 5])) # prints 5
print(round(1.2345, 2)) # prints 1.23
help(max) # opens up documentation on max() function

# Python built-in object methods
mylist = [1, 2, 2, 3, 4, 4, 4, 5]
print(mylist.count(4)) # prints 3 as there are 3 occurrences of element 4 in the list
print(mylist.index(2)) # prints 1 as the index of the first occurrence of element 2 in the list

mystr = "rodan"
print(mystr.capitalize()) # prints Rodan

# Python packages
# importing NumPy package
import numpy
print(numpy.array([1, 2, 3]))

# import specific function in package
from numpy import array as nparray
print(nparray([4, 5, 6]))

# NumPy array is an alternative to Python list
# it provides calculations over entire arrays
# it is easy and fast and specifically created for data science
# it can only contain single type
# say you have 2 datasets containing the weight and height of 5 people
npa_weight = nparray([150, 165, 134, 210, 110])
npa_height = nparray([70, 68, 65, 72, 61])
npa_bmi = 703 * (npa_weight / (npa_height ** 2))
print(npa_bmi) # prints [ 21.52040816  25.08542388  22.29633136  28.47800926  20.78204784]
print(npa_bmi > 22) # prints [False  True  True  True False]
print(npa_bmi[npa_bmi > 22]) # prints [ 25.08542388  22.29633136  28.47800926]

print(type(npa_weight)) # prints numpy.ndarray
npa_2d = nparray([[150, 165, 134, 210, 110],
                  [70, 68, 65, 72, 61]])
print(npa_2d)
print(npa_2d.shape) # prints (2, 5) - 2 rows and 5 columns

# NumPy array subsetting
print(npa_2d[0]) # prints [150 165 134 210 110], the first row
print(npa_2d[0][2]) # prints 134, the thrid element of the first row
print(npa_2d[0, 2]) # same as above

# NumPy array slicing
print(npa_2d[:, 1:3]) # prints [[165 134] [ 68  65]]
print(npa_2d[0, :]) # prints [150 165 134 210 110], the entire first row

# NumPy statistics
print(numpy.mean(npa_weight)) # prints 153.8
print(numpy.median(npa_weight)) # prints 150.0
print(numpy.std(npa_weight)) # prints 33.4926857687
print(numpy.mean(npa_height)) # prints 67.2
print(numpy.std(npa_height)) # prints 3.86781592116

# NumpPy generate data
# using numpy.random.normal(), passing mean, std, and # of samples, in this case 10 samples
gd_weight = numpy.round(numpy.random.normal(153.8, 33.50, 10), 2)
print(gd_weight) # prints [ 130.67  147.44  153.69  104.84  137.85  164.96  150.62  153.65  194.35  229.11]
#gd_height = numpy.round(numpy)
gd_height = numpy.round(numpy.random.normal(67.2, 3.87, 10), 2)
print(gd_height) # prints [ 72.44  60.82  71.27  70.61  66.06  67.12  68.33  67.81  61.84  74.19]
# using numpy.column_stack() will combine weight and height of each 10 samples
gd_wxh = numpy.column_stack((gd_weight, gd_height))
print(gd_wxh) # prints
#[[ 130.67   72.44]
# [ 147.44   60.82]
# [ 153.69   71.27]
# [ 104.84   70.61]
# [ 137.85   66.06]
# [ 164.96   67.12]
# [ 150.62   68.33]
# [ 153.65   67.81]
# [ 194.35   61.84]
# [ 229.11   74.19]]
	# Python variables and types
	# say we want to calculate bmi = 703 x weight (lbs) / height (in) ^ 2
	weight = 150
	height = 70
	bmi = 703 * (150 / (70 2)) # is the operator to calculate powers
	print(bmi) # prints value of bmi on the output console

	print(type(weight)) # type is int
	print(type(height)) # type is int
	print(type(bmi)) # type is float

	string = "Hello, World!"
	string = 'Hello, World!'
	boolean = True
	print(type(string)) # type is str
	print(type(boolean)) # type is bool

	# Python lists
	# say we want to list the GTA 2016 population in millions
	gta_pop = [2.7, 0.6, 1.4, 1.1, 0.5]
	# you can have mixed types in the list
	gta_pop = ["toronto", 2.7, "durham", 0.6, "peel", 1.4,
	"york", 1.1, "halton", 0.5]
	# you can have lists inside the list
	gta_pop = [["toronto", 2.7], ["durham", 0.6], ["peel", 1.4],
	["york", 1.1], ["halton", 0.5]]
	print(gta_pop)
	print(type(gta_pop)) # type is list

	# subsetting or indexing lists
	# a 0 index points to the first element
	print(gta_pop[0]) # prints toronto population
	print(gta_pop[2]) # prints peel population
	# a -1 index points to the last element
	# you can index the list backwards starting from the last element
	print(gta_pop[-1]) # prints halton population
	print(gta_pop[-3]) # prints peel population

	# slicing lists - selecting multiple elements in the list
	# note that the ending index is exclusive
	print(gta_pop[1:3]) # prints durham and peel populations
	print(gta_pop[2:]) # prints peel, york, and halton populations
	print(gta_pop[:3]) # prints toronto, durham, and peel populations

	# adding and removing list elements
	del(gta_pop[4]) # removes halton population
	print(gta_pop)

	gta_pop = gta_pop + [["halton", 0.5]] # adds halton population
	#gta_pop.append([["halton", 0.5]]) # or you can use append() method
	print(gta_pop)

	# copying lists by value (not by reference)
	gta_pop_copy = list(gta_pop)
	gta_pop_copy = gta_pop[:] # or use slicing
	print(gta_pop_copy)

	# Python built-in functions
	print(max([1, 2, 3, 4, 5])) # prints 5
	print(round(1.2345, 2)) # prints 1.23
	help(max) # opens up documentation on max() function

	# Python built-in object methods
	mylist = [1, 2, 2, 3, 4, 4, 4, 5]
	print(mylist.count(4)) # prints 3 as there are 3 occurrences of element 4 in the list
	print(mylist.index(2)) # prints 1 as the index of the first occurrence of element 2 in the list

	mystr = "rodan"
	print(mystr.capitalize()) # prints Rodan

	# Python packages
	# importing NumPy package
	import numpy
	print(numpy.array([1, 2, 3]))

	# import specific function in package
	from numpy import array as nparray
	print(nparray([4, 5, 6]))

	# NumPy array is an alternative to Python list
	# it provides calculations over entire arrays
	# it is easy and fast and specifically created for data science
	# it can only contain single type
	# say you have 2 datasets containing the weight and height of 5 people
	npa_weight = nparray([150, 165, 134, 210, 110])
	npa_height = nparray([70, 68, 65, 72, 61])
	npa_bmi = 703 * (npa_weight / (npa_height ** 2))
	print(npa_bmi) # prints [ 21.52040816 25.08542388 22.29633136 28.47800926 20.78204784]
	print(npa_bmi > 22) # prints [False True True True False]
	print(npa_bmi[npa_bmi > 22]) # prints [ 25.08542388 22.29633136 28.47800926]

	print(type(npa_weight)) # prints numpy.ndarray
	npa_2d = nparray([[150, 165, 134, 210, 110],
	[70, 68, 65, 72, 61]])
	print(npa_2d)
	print(npa_2d.shape) # prints (2, 5) - 2 rows and 5 columns

	# NumPy array subsetting
	print(npa_2d[0]) # prints [150 165 134 210 110], the first row
	print(npa_2d[0][2]) # prints 134, the thrid element of the first row
	print(npa_2d[0, 2]) # same as above

	# NumPy array slicing
	print(npa_2d[:, 1:3]) # prints [[165 134] [ 68 65]]
	print(npa_2d[0, :]) # prints [150 165 134 210 110], the entire first row

	# NumPy statistics
	print(numpy.mean(npa_weight)) # prints 153.8
	print(numpy.median(npa_weight)) # prints 150.0
	print(numpy.std(npa_weight)) # prints 33.4926857687
	print(numpy.mean(npa_height)) # prints 67.2
	print(numpy.std(npa_height)) # prints 3.86781592116

	# NumpPy generate data
	# using numpy.random.normal(), passing mean, std, and # of samples, in this case 10 samples
	gd_weight = numpy.round(numpy.random.normal(153.8, 33.50, 10), 2)
	print(gd_weight) # prints [ 130.67 147.44 153.69 104.84 137.85 164.96 150.62 153.65 194.35 229.11]
	#gd_height = numpy.round(numpy)
	gd_height = numpy.round(numpy.random.normal(67.2, 3.87, 10), 2)
	print(gd_height) # prints [ 72.44 60.82 71.27 70.61 66.06 67.12 68.33 67.81 61.84 74.19]
	# using numpy.column_stack() will combine weight and height of each 10 samples
	gd_wxh = numpy.column_stack((gd_weight, gd_height))
	print(gd_wxh) # prints
	#[[ 130.67 72.44]
	# [ 147.44 60.82]
	# [ 153.69 71.27]
	# [ 104.84 70.61]
	# [ 137.85 66.06]
	# [ 164.96 67.12]
	# [ 150.62 68.33]
	# [ 153.65 67.81]
	# [ 194.35 61.84]
	# [ 229.11 74.19]]