Skip to content

Instantly share code, notes, and snippets.

@jb55
Last active February 14, 2018 20:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jb55/a6dc2a8a9b77b490b11ce487fb8973fa to your computer and use it in GitHub Desktop.
Save jb55/a6dc2a8a9b77b490b11ce487fb8973fa to your computer and use it in GitHub Desktop.
name age interest age2 age3
bill 29 bitcoin 29 29
vanessa 33 wine 33 33
#!/usr/bin/env python
import sys
import re
eol = re.compile('\r?\n$')
def build_header_table(cols):
return [[0 for col in cols] for col in cols]
def get_row(line):
line = eol.sub('', line)
return line.split("\t")
def populate_checklist(table):
xs = []
for j, t2 in enumerate(table):
for k, _ in enumerate(t2):
if j != k:
xs.append([j,k])
return xs
def main(handle):
first = 1
header = None
table = None
checklist = None
removed = []
for line in handle:
row = get_row(line)
if first:
header = row
table = build_header_table(header)
checklist = populate_checklist(table)
first = 0
continue
for j, k in checklist:
if row[j] != row[k]:
checklist.remove([j,k])
checklist.remove([k,j])
for j, k in checklist:
checklist.remove([k,j])
print("{}\t{}".format(j+1, k+1))
if __name__ == "__main__":
handle = open(sys.argv[1], "rb") if len(sys.argv) >= 2 else sys.stdin
main(handle)
@jb55
Copy link
Author

jb55 commented Feb 14, 2018

./uniqcol.py <(csv2tsv test.csv)
2	4
2	5
4	5

@jb55
Copy link
Author

jb55 commented Feb 14, 2018

Need to merge these pairs now to find common subsets 🤔

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment