Skip to content

Instantly share code, notes, and snippets.

@alexcasalboni
Created April 26, 2015 21:51
Show Gist options
  • Save alexcasalboni/97443bb586906e372612 to your computer and use it in GitHub Desktop.
Save alexcasalboni/97443bb586906e372612 to your computer and use it in GitHub Desktop.
HAR dataset to csv
from itertools import izip
import re
output_file = 'dataset.csv'
input_files = {
'train/X_train.txt': 'train/y_train.txt',
'test/X_test.txt': 'test/y_test.txt'
}
def getOutputLines(filenames):
for X,y in filenames.iteritems():
with open(X) as Xf, open(y) as yf:
for Xline, yline in izip(Xf, yf):
Xline = re.sub(' +', ' ', Xline).strip() #remove multiple white spaces and strip
yield ','.join([yline.strip()] + Xline.split(' ')) + "\n" #concat in csv format
with open(output_file, 'w+') as f:
for newline in getOutputLines(input_files):
f.writelines(newline)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment