benjaminxscott/python-ref.md

## python-ref.md

      
    Raw
  

              python-ref.md
            
          
    dictionaries in python

 # init dict
 stuff = {
  'name': 'Zed',
  'age': 39
  }
 
stuff['city'] = "San Francisco" # inserts are constant time
 
stuff['jobs'] = ['Google', 'Dropbox'] # dict element can be a list

 print stuff['city'] # retrieval is constant time

San Francisco

`print stuff.keys() # retrieval order is NOT guaranteed``

['name', 'age' , 'city', 'jobs']

`print stuff.values()

['Zed', '39', 'San Francisco', ['Google', 'Dropbox'] ]

JSON

import json

json_stuff = json.dumps(stuff) # convert a dictionary into json
`'{'name': 'Zed', 'city': 'San Francisco', 'age': 39, 'jobs': ['Google', 'Dropbox']}'`

dict_stuff = json.loads(json_stuff) # convert json back to dictionary

complexity


O(1) = constant time (inserts and lookup into hash table)
O(n * log-base2 (n) ) = sort an array using quicksort
O(n) = iterate all array elements (for loop)
O(n^2) = bubble sort (nested for loop) AVOID

efficient sorting


qsort: divide set at a pivot,
(choosing a middle pivot is better for large datasets)

def mergesort (arr)
 
 # we have chopped it down fully
 if len(arr) <= 1:  
  return arr
  
 middle = len(arr) - 1
 left = arr[:middle] # from index 0 to middle-1
 right = arr[middle:] # from middle to len -1
 
 mergesort (left)
 mergesort (right)
 
 return list ( merge (left, right) )
 
 def merge (left, right)
  sorted = []
  left-index = 0
  right-index = 0 
  while left-index < len(left) and right-index < len ( right)
   left-val = left[left-index]
   right-val = right[right-index]
   if left[left-index] <= right[right-index]
    sorted.append(left-val)
    left-index = left-index +1 
   else:
    sorted.append(right-val)
    right-index = right-index +1 
   
   # tack the rest of the sorted string onto the right side
   if left: 
     sorted.extend(left[ left-index: ])
   if right: 
     sorted.extend(right[ right-index: ])
   
  return sorted

fizzbuzz


for every int between 1 and 100, output the word 'fizz' if a number is divisible by 3, and 'buzz' if divisible by 5, and the number itself if neither

for number in range(1,101) # range(start,end,[step])
 out = ''
 if number % 3:
  out = out + "fizz" # py-style
 if number % 5:
  out += "buzz" # c-style 
  
 if not out
  out += number
 
 print out

pointers in C


strings are arrays, end in null char \0

char mystring [] = "hi" 
printf ("%c" , mystring[0] )// outputs 'h'

char otherstring [6] = {'h', 'e', 'l', 'l', 'o', '\0'}

overflow = strcpy(mystring, otherstring )// copy dest, src in memory
safe = strncpy(mystring, otherstring, strlen(mystring) // will only copy "he\0"

strcmp(mystring,otherstring) // returns zero if bytes are the same

struct pants 
{
 bool has_fly
 int size
 char * name
}

struct pants mypants;
mypants.has_fly = True;

struct pants * pointpants;
pointpants -> size = 5

concurrency


global interpreter lock (GIL) = only one process running at a time

If memory or IO bound, use concurrency i.e. multiple threads for one process:

each thread shares their parent's process memory
can access shared data without IPC

from queue import Queue
from threading import Thread

class DownloadWorker(Thread):
   def __init__(self, queue):
       Thread.__init__(self)
       self.queue = queue

   def run(self):
       while True:
           item = self.queue.get()
           do_thing(item)
           self.queue.task_done()

def main():
   list = items_to_process
   
   queue = Queue() # setup a semaphore-style access blocking queue
   # Create 8 worker threads
   for x in range(8):
       worker = DownloadWorker(queue)
       worker.start()
       
   # Put the tasks into the queue 
   for thing in list:
       queue.put(thing)
       
   # main thread will wait for the queue to finish processing all the tasks
   queue.join()

If CPU bound, use multiple processes

each process has its own copy of the entire memory
need to use IPC to move data around

from multiprocessing.pool import Pool
with Pool(8) as p:
       p.map(download, links) # function pointer, iterable - spawns a process to handle each split piece


CPU bound with many cores = multiprocessing is more efficient

list comprehensions (map)

squares = []
>>> for x in range(10):
       squares.append(x**2)
  
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

squares = [x**2 for x in range(10)]