brendano (owner)

Revisions

gist: 63810 Download_button fork
public
Description:
csv2tsv
Public Clone URL: git://gist.github.com/63810.git
Embed All Files: show embed
csv2tsv #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python2.6
"""
Input is Excel-style CSV. Either stdin or filename.
Output is honest-to-goodness tsv: no quoting or any \\n\\r\\t.
"""
 
from __future__ import print_function
import csv, sys
 
warning_count=0
warning_max = 50
def warning(s):
  global warning_count
  warning_count += 1
  if warning_count > warning_max: return
  print("WARNING:", s, file=sys.stderr)
 
 
def cell_text_clean(text):
  s = text
  #s = text.encode("utf-8")
  if "\t" in s: warning("Clobbering embedded tab")
  if "\n" in s: warning("Clobbering embedded newline")
  if "\r" in s: warning("Clobbering embedded carriage return")
  s = s.replace("\t"," ").replace("\n"," ").replace("\r"," ")
  return s
 
def clean_row(row):
  return [cell_text_clean(x) for x in row]
  #return [x.replace("\n"," ").replace("\t"," ").replace("\r"," ") for x in row]
  #print row
  #return [x.encode('utf-8').replace("\n"," ").replace("\t"," ").replace("\r"," ") for x in row]
  #return [x.replace("\n"," ").replace("\t"," ").replace("\r"," ").encode('utf-8') for x in row]
 
args = sys.argv[:]
args.pop(0)
if len(args)==1:
  reader = csv.reader(open(args[0]))
elif len(args) > 1:
  raise Exception("No support for multiple files yet")
  # could try to enforce conformity, or meld them together, etc.
elif not sys.stdin.isatty():
  reader = csv.reader(sys.stdin)
else:
  print(__doc__.strip())
  sys.exit(1)
 
#header = reader.next()
#print "\t".join(clean_row(header))
 
#print(sys.stdout.encoding)
#sys.exit(0)
 
for row in reader:
  #print "\t".join(clean_row(row))
  print(*clean_row(row), sep="\t")