Created
January 11, 2019 00:14
Python CSV filter: replaces repeating values in columns with spaces. Originally, used to clean csv files generated with multiindex pandas dataframe to_csv() method.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Clean csv with multiindices | |
from sys import argv | |
# Command line arguments: | |
# original csv file | |
# number of columns to check (optional) | |
with open(argv[1], 'r') as f: | |
if len(argv) > 2: | |
n = int(argv[2]) | |
i0 = ('', ) * n | |
else: | |
n = None | |
i0 = None | |
for l in f: | |
if n is None: | |
# define n from the 1-st line | |
i1 = l.split(',') | |
n = len(i1) | |
i0 = ('', ) * n | |
i1 = l.split(',', n) | |
nl = () | |
for s0, s1 in zip(i0, i1): | |
if s0 == s1: | |
e = ' ' * len(s0) | |
else: | |
e = s1 | |
nl += (e, ) | |
nl += tuple(i1[n:]) | |
print ','.join(nl), | |
i0 = i1[:n] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment