Skip to content

Instantly share code, notes, and snippets.

@rsotto
Created December 29, 2017 03:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rsotto/9051a1e538c2a5ca213f50dd18e0a5f1 to your computer and use it in GitHub Desktop.
Save rsotto/9051a1e538c2a5ca213f50dd18e0a5f1 to your computer and use it in GitHub Desktop.
# Python variables and types
# say we want to calculate bmi = 703 x weight (lbs) / height (in) ^ 2
weight = 150
height = 70
bmi = 703 * (150 / (70 ** 2)) # ** is the operator to calculate powers
print(bmi) # prints value of bmi on the output console
print(type(weight)) # type is int
print(type(height)) # type is int
print(type(bmi)) # type is float
string = "Hello, World!"
string = 'Hello, World!'
boolean = True
print(type(string)) # type is str
print(type(boolean)) # type is bool
# Python lists
# say we want to list the GTA 2016 population in millions
gta_pop = [2.7, 0.6, 1.4, 1.1, 0.5]
# you can have mixed types in the list
gta_pop = ["toronto", 2.7, "durham", 0.6, "peel", 1.4,
"york", 1.1, "halton", 0.5]
# you can have lists inside the list
gta_pop = [["toronto", 2.7], ["durham", 0.6], ["peel", 1.4],
["york", 1.1], ["halton", 0.5]]
print(gta_pop)
print(type(gta_pop)) # type is list
# subsetting or indexing lists
# a 0 index points to the first element
print(gta_pop[0]) # prints toronto population
print(gta_pop[2]) # prints peel population
# a -1 index points to the last element
# you can index the list backwards starting from the last element
print(gta_pop[-1]) # prints halton population
print(gta_pop[-3]) # prints peel population
# slicing lists - selecting multiple elements in the list
# note that the ending index is exclusive
print(gta_pop[1:3]) # prints durham and peel populations
print(gta_pop[2:]) # prints peel, york, and halton populations
print(gta_pop[:3]) # prints toronto, durham, and peel populations
# adding and removing list elements
del(gta_pop[4]) # removes halton population
print(gta_pop)
gta_pop = gta_pop + [["halton", 0.5]] # adds halton population
#gta_pop.append([["halton", 0.5]]) # or you can use append() method
print(gta_pop)
# copying lists by value (not by reference)
gta_pop_copy = list(gta_pop)
gta_pop_copy = gta_pop[:] # or use slicing
print(gta_pop_copy)
# Python built-in functions
print(max([1, 2, 3, 4, 5])) # prints 5
print(round(1.2345, 2)) # prints 1.23
help(max) # opens up documentation on max() function
# Python built-in object methods
mylist = [1, 2, 2, 3, 4, 4, 4, 5]
print(mylist.count(4)) # prints 3 as there are 3 occurrences of element 4 in the list
print(mylist.index(2)) # prints 1 as the index of the first occurrence of element 2 in the list
mystr = "rodan"
print(mystr.capitalize()) # prints Rodan
# Python packages
# importing NumPy package
import numpy
print(numpy.array([1, 2, 3]))
# import specific function in package
from numpy import array as nparray
print(nparray([4, 5, 6]))
# NumPy array is an alternative to Python list
# it provides calculations over entire arrays
# it is easy and fast and specifically created for data science
# it can only contain single type
# say you have 2 datasets containing the weight and height of 5 people
npa_weight = nparray([150, 165, 134, 210, 110])
npa_height = nparray([70, 68, 65, 72, 61])
npa_bmi = 703 * (npa_weight / (npa_height ** 2))
print(npa_bmi) # prints [ 21.52040816 25.08542388 22.29633136 28.47800926 20.78204784]
print(npa_bmi > 22) # prints [False True True True False]
print(npa_bmi[npa_bmi > 22]) # prints [ 25.08542388 22.29633136 28.47800926]
print(type(npa_weight)) # prints numpy.ndarray
npa_2d = nparray([[150, 165, 134, 210, 110],
[70, 68, 65, 72, 61]])
print(npa_2d)
print(npa_2d.shape) # prints (2, 5) - 2 rows and 5 columns
# NumPy array subsetting
print(npa_2d[0]) # prints [150 165 134 210 110], the first row
print(npa_2d[0][2]) # prints 134, the thrid element of the first row
print(npa_2d[0, 2]) # same as above
# NumPy array slicing
print(npa_2d[:, 1:3]) # prints [[165 134] [ 68 65]]
print(npa_2d[0, :]) # prints [150 165 134 210 110], the entire first row
# NumPy statistics
print(numpy.mean(npa_weight)) # prints 153.8
print(numpy.median(npa_weight)) # prints 150.0
print(numpy.std(npa_weight)) # prints 33.4926857687
print(numpy.mean(npa_height)) # prints 67.2
print(numpy.std(npa_height)) # prints 3.86781592116
# NumpPy generate data
# using numpy.random.normal(), passing mean, std, and # of samples, in this case 10 samples
gd_weight = numpy.round(numpy.random.normal(153.8, 33.50, 10), 2)
print(gd_weight) # prints [ 130.67 147.44 153.69 104.84 137.85 164.96 150.62 153.65 194.35 229.11]
#gd_height = numpy.round(numpy)
gd_height = numpy.round(numpy.random.normal(67.2, 3.87, 10), 2)
print(gd_height) # prints [ 72.44 60.82 71.27 70.61 66.06 67.12 68.33 67.81 61.84 74.19]
# using numpy.column_stack() will combine weight and height of each 10 samples
gd_wxh = numpy.column_stack((gd_weight, gd_height))
print(gd_wxh) # prints
#[[ 130.67 72.44]
# [ 147.44 60.82]
# [ 153.69 71.27]
# [ 104.84 70.61]
# [ 137.85 66.06]
# [ 164.96 67.12]
# [ 150.62 68.33]
# [ 153.65 67.81]
# [ 194.35 61.84]
# [ 229.11 74.19]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment