Skip to content

Instantly share code, notes, and snippets.

@phumpal phumpal/README.md
Last active Aug 11, 2018

Embed
What would you like to do?
Devin's WormDeath processor

WormDeath Processor

WormDeath Processor is a simple and not so idiomatic Python script to process strain worm deaths over a given number of days.

The number of days and strains can be dynamic but the script ignores days where there were 0 worm deaths and outputs a two column CSV with the strains processed in series.

Usage

  • -i or --input input CSV
  • -o or --output output CSV

Example

Let's process test data in deathsheet.csv by running

python -i deathsheet.csv -o output.csv

Output should now be 2 column format with the first strain processed first, then the second strain, etc.

#!/usr/bin/env python
import argparse # https://docs.python.org/2/library/argparse.html
import csv # https://docs.python.org/2.7/library/csv.html
import sys # https://docs.python.org/2/library/sys.html
from collections import defaultdict # https://docs.python.org/2/library/collections.html
class DefaultHelpParser(argparse.ArgumentParser): # extend the DefaultHelpParser class to print error and help
def error(self, message): # message rather than exiting "uncleanly"
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
def parseCsv(inputFile,outputFile):
columns = defaultdict(list) # each value in each column is appended to a list
with open(inputFile) as f: # suboptimal way of getting column headers
readHeadings = csv.reader(f) # read rows into into variable
i = readHeadings.next() # create iterator on first row
headers = [row for row in readHeadings] # iterate over each value in the row
i.pop(0) # remove Day from headers row
with open(inputFile) as f:
reader = csv.DictReader(f) # read rows into a dictionary format
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k,v) in row.items(): # go over each column name and value
columns[k].append(int(v)) # append the value into the appropriate list; cast as integer
numDays = len(columns['Day']) # get number of rows in Day column
of = open(outputFile, 'w') # create an output csv; overwrite if it exists; i.e. not good citizen
with of as csvfile:
fieldnames = [ 'Day', 'WormDeaths'] # set header values for output csv
writer = csv.DictWriter(of,fieldnames=fieldnames) # create csv writer
writer.writeheader() # write the CSV header
for strains in range(0, len(i)): # iterate over strains (python lists start at 0)
for c in range(0, numDays): # per strain column iterate over days
num_worms = columns[str(i[strains])][c] # get dead worms per day
for x in range(0, num_worms): # based on numbder of dead worms
writer.writerow({'Day': str(columns['Day'][c]), 'WormDeaths': 1})
of.close() # be a good citizen and close the file descriptor
def main():
parser = argparse.ArgumentParser() # parse cli parameters
parser.add_argument("-i","--input",help="Input file name",required=True) # -i or --input for input file
parser.add_argument("-o","--output",help="Output file name",required=True) # -o or --output for output file
args = parser.parse_args() # parse cli arguments
if args.input == args.output:
sys.stderr.write('Input and output file cannot be the same\n')
sys.exit(1) # not necessarily a cli error
parseCsv(args.input,args.output) # send arguments to parser function
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.