Skip to content

Instantly share code, notes, and snippets.

@sansumbrella
Created February 4, 2011 04:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sansumbrella/810744 to your computer and use it in GitHub Desktop.
Save sansumbrella/810744 to your computer and use it in GitHub Desktop.
Patches holes in tabular data. Good for cleaning up TSV and CSV files.
#!/usr/bin/env python
# encoding: utf-8
# David Wicks
"""
patcher.py
Fills holes in tabular data files (TSV)
basic usage: python patcher.py fileToPatch.tsv
more control: python patcher.py fileToPatch.csv outputfile.csv replacementValue ,
"""
import sys
import os
# for command-line execution
def main(argv=None):
if argv is None:
argv = sys.argv
try:
args = len( argv ) - 1
if( args == 1 ):
patch( argv[1] )
if( args == 2 ):
patch( argv[1], argv[2] )
if( args == 3 ):
patch( argv[1], argv[2], argv[3] )
if( args == 4 ):
patch( argv[1], argv[2], argv[3], argv[4] )
except:
print "That didn't work. Try including a filename to load."
# does the patching
def patch( filename, outputName="patched.tsv", filler="empty", sep="\t" ):
file = open( filename, 'r' )
lines = file.readlines()
file.close()
patchedLines = []
for line in lines:
while not line.find( sep + sep ) == -1:
line = line.replace( sep + sep, sep + filler + sep )
patchedLines.append( line )
output = open( outputName, 'w' )
output.write( "".join( patchedLines ) )
output.close()
print "Patched holes %s with '%s' and saved to %s." % ( filename, filler, outputName )
def patchLine( line, patch ):
if( line == '' ):
return patch
return line
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment