Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save eclecticmiraclecat/efcfa693dda80b9a5ea4df6aa4193691 to your computer and use it in GitHub Desktop.
Save eclecticmiraclecat/efcfa693dda80b9a5ea4df6aa4193691 to your computer and use it in GitHub Desktop.

Based on Trey Hunner's Pythonic Code Refactoring Session

What you’ll learn and how you can apply it

By the end of this live online course, you’ll understand:

  • Where to embrace common Python-specific syntaxes
  • How to idiomatically loop over multiple things at once, loop in reverse, and loop with indexes
  • How the Python standard library can help you avoid reinventing the wheel

And you’ll be able to:

  • Identify places in your code where tuple unpacking should be used
  • Identify places where comprehensions should be used
  • Make your existing Python code feel a bit more like a Python expert wrote it

complete codes

ratios.py

def get_color_ratios(colors, ratios):
    """Return dictionary of color ratios from color and ratio lists."""
    assert len(colors) == len(ratios)
    color_ratios = {}
    for i in range(len(colors)):
        color_ratios[colors[i]] = ratios[i]
    return color_ratios


if __name__ == "__main__":
    test_colors = ["red", "green", "blue"]
    test_ratios = [0.1, 0.6, 0.3]
    combined_dict = {'red': 0.1, 'green': 0.6, 'blue': 0.3}
    assert get_color_ratios(test_colors, test_ratios) == combined_dict

refactored

  • replace the index generated using range with enumerate
def get_color_ratios(colors, ratios):
    """Return dictionary of color ratios from color and ratio lists."""
    assert len(colors) == len(ratios)
    color_ratios = {}
    for i, color in enumerate(color):
        color_ratios[color] = ratios[i]
    return color_ratios

refactored

  • zip since looping over two iterables
def get_color_ratios(colors, ratios):
    """Return dictionary of color ratios from color and ratio lists."""
    assert len(colors) == len(ratios)
    color_ratios = {}
    for color, ratio in zip(colors, ratios):
        color_ratios[color] = ratio
    return color_ratios

refactored

  • use dict comprehension
def get_color_ratios(colors, ratios):
    """Return dictionary of color ratios from color and ratio lists."""
    assert len(colors) == len(ratios)
    color_ratios = {
        color: ratio for color, ratio in zip(colors, ratios)
    }
    return color_ratios

refactored

  • use dict constructor
def get_color_ratios(colors, ratios):
    """Return dictionary of color ratios from color and ratio lists."""
    assert len(colors) == len(ratios)
    color_ratios = dict(zip(colors, ratios))
    return color_ratios

refactored

  • return straight, variable can be removed since function has good name
def get_color_ratios(colors, ratios):
    """Return dictionary of color ratios from color and ratio lists."""
    assert len(colors) == len(ratios)
    return dict(zip(colors, ratios))

get_earliest.py

def get_earliest(date1, date2):
    mdy1 = date1.split('/')
    mdy2 = date2.split('/')
    if mdy1[2] < mdy2[2]:
        return date1
    elif mdy1[2] > mdy2[2]:
        return date2
    elif mdy1[0] < mdy2[0]:
        return date1
    elif mdy1[0] > mdy2[0]:
        return date2
    elif mdy1[1] < mdy2[1]:
        return date1
    elif mdy1[1] > mdy2[1]:
        return date2
    else:
        return date1

if __name__ == "__main__":
    newer = "03/21/1946"
    older = "02/24/1946"
    assert get_earliest(newer, older) == older

refactored

  • convert to string and do string comparison
def get_earliest(date1, date2):
    mdy1 = date1.split('/')
    mdy2 = date2.split('/')
    ymd1 = mdy1[2] + mdy1[0] + mdy1[1]
    ymd2 = mdy2[2] + mdy2[0] + mdy2[1]
    if ymd1 < ymd2:
        return date1
    return date2

refactored

  • replace the indexes, with tuple unpacking (multiple assignment)
def get_earliest(date1, date2):
    m1, d1, y1 = date1.split('/')
    m2, d2, y2 = date2.split('/')
    ymd1 = y1 + m1 + d1
    ymd2 = y2 + m2 + d2
    if ymd1 < ymd2:
        return date1
    return date2

refactored

  • use lexicographical ordering (alphabetical like ordering)
  • works on list and tuple
  • get rid of variable assignments
def get_earliest(date1, date2):
    m1, d1, y1 = date1.split('/')
    m2, d2, y2 = date2.split('/')
    if (y1, m1, d1) < (y2, m2, d2):
        return date1
    return date2

refactored

  • use datetime module
  • ternary if
from datetime import datetime

def get_earliest(date1, date2):
    d1 = datetime.strptime(date1, "%m/%d/%Y")
    d2 = datetime.strptime(date2, "%m/%d/%Y")
    return date1 if (d1 < d2) else date2

fix_csv.py

import sys

old_filename = sys.argv[1]
new_filename = sys.argv[2]

old_file = open(old_filename)
rows = [line.split('|') for line in old_file.read().splitlines()]

new_file = open(new_filename, mode='wt', newline='\r\n')
print("\n".join(",".join(row) for row in rows), file=new_file)
old_file.close()
new_file.close()

refactored

  • use context manager to open a file
import sys

old_filename = sys.argv[1]
new_filename = sys.argv[2]

with open(old_filename) as old_file:
    rows = [line.split('|') for line in old_file.read().splitlines()]

with open(new_filename, mode='wt', newline='\r\n') as new_file:
    print("\n".join(",".join(row) for row in rows), file=new_file)

refactored

  • use argparse instead of sys arguments
  • use csv module
import csv
from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument('old_filename')
parser.add_argument('new_filename')
args = parser.parse_args()

with open(args.old_filename) as old_file:
    reader = csv.reader(old_file, delimiter="|")
    rows = [row for row in reader]

with open(args.new_filename, mode='wt', newline='\r\n') as new_file:
    print("\n".join(",".join(row) for row in rows), file=new_file)

refactored

  • convert list comprehension to list directly [x for x in reader] to list(reader)
  • since csv reader is an iterable, it can directly be passed to list
import csv
from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument('old_filename')
parser.add_argument('new_filename')
args = parser.parse_args()

with open(args.old_filename) as old_file:
    rows = list(csv.reader(old_file, delimiter="|"))

with open(args.new_filename, mode='wt', newline='') as new_file:
    writer = csv.writer(new_file, delimeter=',')
    for row in rows:
        writer.writerow(row)

refactored

  • convert filename to file directly with the argparse FileType
  • argparse will open the files but not close until the program exits
import csv
from argparse import ArgumentParser, FileType

parser = ArgumentParser()
parser.add_argument('old_file', type=FileType('rt'))
parser.add_argument('new_file', type=FileType('wt'))
args = parser.parse_args()

reader = csv.reader(args.old_file, delimiter='|')
writer = csv.writer(args.new_file, delimiter=',')
writer.writerows(reader)

add.py

def add(matrix1, matrix2):
    """Add corresponding numbers in given 2-D matrices."""
    combined = []
    for i in range(len(matrix1)):
        row = []
        for j in range(len(matrix1[i])):
            row.append(matrix1[i][j] + matrix2[i][j])
        combined.append(row)
    return combined

if __name__ == "__main__":
    m1 = [[1, 2, 3], [4, 5, 6]]
    m2 = [[-1, -2, -3], [-4, -5, -6]]
    m3 = [[0, 0, 0], [0, 0, 0]]
    assert add(m1, m2) == m3

refactored

  • zip the two lists and remove range
def add(matrix1, matrix2):
    """Add corresponding numbers in given 2-D matrices."""
    combined = []
    for row1, row2 in zip(matrix1, matrix2):
        row = []
        for x, y in zip(row1, row2):
            row.append(x + y)
        combined.append(row)
    return combined

refactored

  • use list comprehension
def add(matrix1, matrix2):
    """Add corresponding numbers in given 2-D matrices."""
    return [
        [ x + y for x, y in zip(row1, row2)]
        for row1, row2 in zip(matrix1, matrix2)
    ]

interleave.py

def interleave(*iterables):
    """Return iterable of one item at a time from each given iterable."""
    interleaved = []
    for i in range(len(iterables[0])):
        for iterable in iterables:
            interleaved.append(iterable[i])
    return interleaved

if __name__ == "__main__":
    assert interleave([1, 2], [3, 4]) == [1, 3, 2, 4]

refactored

  • replace range with zip
def interleave(*iterables):
    """Return iterable of one item at a time from each given iterable."""
    interleaved = []
    for items in zip(*iterables):
        for item in items:
            interleaved.append(item)
    return interleaved

refactored

  • use comprehension
def interleave(*iterables):
    """Return iterable of one item at a time from each given iterable."""
    return (
        item
        for items in zip(*iterables)
        for item in items
    )

sum_timestamps.py

def sum_timestamps(timestamps):
    total_time = 0
    for time in timestamps:
        total_time += parse_time(time)
    return format_time(total_time)

def parse_time(time_string):
    sections = time_string.split(':')
    if len(sections) == 2:
        seconds = int(sections[1])
        minutes = int(sections[0])
        hours = 0
    else:
        seconds = int(sections[2])
        minutes = int(sections[1])
        hours = int(sections[0])
    return hours*3600 + minutes*60 + seconds

def format_time(total_seconds):
    hours = str(int(total_seconds / 3600))
    minutes = str(int(total_seconds / 60) % 60)
    seconds = str(total_seconds % 60)
    if len(minutes) < 2 and hours != "0":
        minutes = "0" + minutes
    if len(seconds) < 2:
        seconds = "0" + seconds
    time = minutes + ":" + seconds
    if hours != "0":
        time = hours + ":" + time
    return time

if __name__ == "__main__":
    times = [
            '3:52', '3:29', '3:23', '4:05', '3:24', '2:29', '2:16', '2:44',
            '1:58', '3:21', '2:51', '2:53', '2:51', '3:32', '3:20', '2:40',
            '2:50', '3:24', '3:22', '0:42']
    assert sum_timestamps(times) == '59:26'

refactored

  • modify to use sum function
def sum_timestamps(timestamps):
    total_time = sum(parse_time(time) for time in timestamps)
    return format_time(total_time)

refactored

  • replace the hardcoded indexes
def parse_time(time_string):
    sections = time_string.split(':')
    if len(sections) == 2:
        hours = 0
        minutes, seconds = sections
    else:
        hours, minutes, seconds = sections
    return int(hours)*3600 + int(minutes)*60 + int(seconds)

refactored

  • making it symmetrical
  • * unpacks an iterable into another iterable
def parse_time(time_string):  
    sections = time_string.split(':')
    if len(sections) == 2:
        #hours, (minutes, seconds) = 0, sections 
        hours, minutes, seconds = (0, *sections) 
    else:
        hours, minutes, seconds = sections
    return int(hours)*3600 + int(minutes)*60 + int(seconds)

refactored

  • replace if, else with try, except
  • asking for forginess is easier than asking for permission
def parse_time(time_string):
    sections = time_string.split(':')
    try:
        hours, minutes, seconds = (0, *sections)
    except ValueError:
        hours, minutes, seconds = sections
    return int(hours)*3600 + int(minutes)*60 + int(seconds)

refactored

  • if divide and want to get an integer use //
  • replace concatation with f strings
  • add padding to replace the len checks
  • remove != and rely on thruthiness
def format_time(total_seconds):
    minutes, seconds = divmod(total_seconds, 60)
    hours, minutes = divmod(minutes, 60)
    if hours:
        return f"{hours}:{minutes:02}:{seconds:02}"
    else:
        return f"{minutes:02}:{seconds:02}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment