Skip to content

Instantly share code, notes, and snippets.

@benjaminxscott
Last active August 3, 2016 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benjaminxscott/06ab684dc208833507c2 to your computer and use it in GitHub Desktop.
Save benjaminxscott/06ab684dc208833507c2 to your computer and use it in GitHub Desktop.
Learn about Python and its idioms

dictionaries in python

 # init dict
 stuff = {
  'name': 'Zed',
  'age': 39
  }
 
stuff['city'] = "San Francisco" # inserts are constant time
 
stuff['jobs'] = ['Google', 'Dropbox'] # dict element can be a list

print stuff['city'] # retrieval is constant time

San Francisco

`print stuff.keys() # retrieval order is NOT guaranteed``

['name', 'age' , 'city', 'jobs']

`print stuff.values()

['Zed', '39', 'San Francisco', ['Google', 'Dropbox'] ]

JSON

import json

json_stuff = json.dumps(stuff) # convert a dictionary into json
`'{'name': 'Zed', 'city': 'San Francisco', 'age': 39, 'jobs': ['Google', 'Dropbox']}'`

dict_stuff = json.loads(json_stuff) # convert json back to dictionary

complexity

  • O(1) = constant time (inserts and lookup into hash table)
  • O(n * log-base2 (n) ) = sort an array using quicksort
  • O(n) = iterate all array elements (for loop)
  • O(n^2) = bubble sort (nested for loop) AVOID

efficient sorting

qsort: divide set at a pivot, (choosing a middle pivot is better for large datasets)

def mergesort (arr)
 
 # we have chopped it down fully
 if len(arr) <= 1:  
  return arr
  
 middle = len(arr) - 1
 left = arr[:middle] # from index 0 to middle-1
 right = arr[middle:] # from middle to len -1
 
 mergesort (left)
 mergesort (right)
 
 return list ( merge (left, right) )
 
 def merge (left, right)
  sorted = []
  left-index = 0
  right-index = 0 
  while left-index < len(left) and right-index < len ( right)
   left-val = left[left-index]
   right-val = right[right-index]
   if left[left-index] <= right[right-index]
    sorted.append(left-val)
    left-index = left-index +1 
   else:
    sorted.append(right-val)
    right-index = right-index +1 
   
   # tack the rest of the sorted string onto the right side
   if left: 
     sorted.extend(left[ left-index: ])
   if right: 
     sorted.extend(right[ right-index: ])
   
  return sorted

fizzbuzz

for every int between 1 and 100, output the word 'fizz' if a number is divisible by 3, and 'buzz' if divisible by 5, and the number itself if neither

for number in range(1,101) # range(start,end,[step])
 out = ''
 if number % 3:
  out = out + "fizz" # py-style
 if number % 5:
  out += "buzz" # c-style 
  
 if not out
  out += number
 
 print out

pointers in C

strings are arrays, end in null char \0

char mystring [] = "hi" 
printf ("%c" , mystring[0] )// outputs 'h'

char otherstring [6] = {'h', 'e', 'l', 'l', 'o', '\0'}

overflow = strcpy(mystring, otherstring )// copy dest, src in memory
safe = strncpy(mystring, otherstring, strlen(mystring) // will only copy "he\0"

strcmp(mystring,otherstring) // returns zero if bytes are the same

struct pants 
{
 bool has_fly
 int size
 char * name
}

struct pants mypants;
mypants.has_fly = True;

struct pants * pointpants;
pointpants -> size = 5

concurrency

  • global interpreter lock (GIL) = only one process running at a time

If memory or IO bound, use concurrency i.e. multiple threads for one process:

  • each thread shares their parent's process memory
  • can access shared data without IPC
from queue import Queue
from threading import Thread

class DownloadWorker(Thread):
   def __init__(self, queue):
       Thread.__init__(self)
       self.queue = queue

   def run(self):
       while True:
           item = self.queue.get()
           do_thing(item)
           self.queue.task_done()

def main():
   list = items_to_process
   
   queue = Queue() # setup a semaphore-style access blocking queue
   # Create 8 worker threads
   for x in range(8):
       worker = DownloadWorker(queue)
       worker.start()
       
   # Put the tasks into the queue 
   for thing in list:
       queue.put(thing)
       
   # main thread will wait for the queue to finish processing all the tasks
   queue.join()

If CPU bound, use multiple processes

  • each process has its own copy of the entire memory
  • need to use IPC to move data around
from multiprocessing.pool import Pool
with Pool(8) as p:
       p.map(download, links) # function pointer, iterable - spawns a process to handle each split piece
  • CPU bound with many cores = multiprocessing is more efficient

list comprehensions (map)

squares = []
>>> for x in range(10):
       squares.append(x**2)
  
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

squares = [x**2 for x in range(10)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment