Skip to content

Instantly share code, notes, and snippets.

@brendano
Created February 13, 2009 09:12
Show Gist options
  • Save brendano/63810 to your computer and use it in GitHub Desktop.
Save brendano/63810 to your computer and use it in GitHub Desktop.
csv2tsv
#!/usr/bin/env python2.6
"""
Input is Excel-style CSV. Either stdin or filename.
Output is honest-to-goodness tsv: no quoting or any \\n\\r\\t.
"""
from __future__ import print_function
import csv, sys
warning_count=0
warning_max = 50
def warning(s):
global warning_count
warning_count += 1
if warning_count > warning_max: return
print("WARNING:", s, file=sys.stderr)
def cell_text_clean(text):
s = text
#s = text.encode("utf-8")
if "\t" in s: warning("Clobbering embedded tab")
if "\n" in s: warning("Clobbering embedded newline")
if "\r" in s: warning("Clobbering embedded carriage return")
s = s.replace("\t"," ").replace("\n"," ").replace("\r"," ")
return s
def clean_row(row):
return [cell_text_clean(x) for x in row]
#return [x.replace("\n"," ").replace("\t"," ").replace("\r"," ") for x in row]
#print row
#return [x.encode('utf-8').replace("\n"," ").replace("\t"," ").replace("\r"," ") for x in row]
#return [x.replace("\n"," ").replace("\t"," ").replace("\r"," ").encode('utf-8') for x in row]
args = sys.argv[:]
args.pop(0)
if len(args)==1:
reader = csv.reader(open(args[0]))
elif len(args) > 1:
raise Exception("No support for multiple files yet")
# could try to enforce conformity, or meld them together, etc.
elif not sys.stdin.isatty():
reader = csv.reader(sys.stdin)
else:
print(__doc__.strip())
sys.exit(1)
#header = reader.next()
#print "\t".join(clean_row(header))
#print(sys.stdout.encoding)
#sys.exit(0)
for row in reader:
#print "\t".join(clean_row(row))
print(*clean_row(row), sep="\t")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment