Last active
June 23, 2018 17:58
-
-
Save j-faria/517c08862e9a32847b4b to your computer and use it in GitHub Desktop.
.rdb file reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import cStringIO | |
filename = 'example.rdb' | |
def read_rdb(filename): | |
""" Reads a .rdb file with possible comments '#' and header | |
col1 col2 col3 | |
---- ---- ---- | |
Returns a numpy record array | |
""" | |
with open(filename) as f: | |
lines = f.readlines() | |
# remove the annoying "---" below column headers | |
lines = [line for line in lines if not line.startswith('--')] | |
# remove comments ('#') | |
lines = [line for line in lines if not line.startswith('#')] | |
# read the "file" now that it only has the column names | |
data = np.genfromtxt(cStringIO.StringIO(''.join(lines)), names=True, dtype=None) | |
return data | |
import csv | |
from collections import OrderedDict | |
# slower than read_rdb! | |
def read_rdb_asdict(filename): | |
""" Reads a .rdb file with header | |
col1 col2 col3 | |
---- ---- ---- | |
Returns a (ordered) dictionay | |
""" | |
with open(filename, 'rb') as csvfile: | |
reader = csv.reader(csvfile, delimiter='\t') | |
# first line has column names | |
names = reader.next() | |
data = OrderedDict((name,[]) for name in names) | |
# second line only has "--", skip it | |
reader.next() | |
for line in reader: | |
try: values = map(float, line) | |
except ValueError: | |
values = [] | |
for l in line: | |
try: values.append(float(l)) | |
except ValueError: values.append(l) | |
[data.values()[i].append(v) for i, v in enumerate(values)] | |
for k,v in data.iteritems(): | |
data[k] = array(v) | |
return data |
Also, if using python 3, change line 39 and 44 to:
next(reader)
see: https://stackoverflow.com/questions/42767250/python-csv2libsvm-py-attributeerror-csv-reader-object-has-no-attribute-nex
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this @j-faria. Im wondering if line 55 should be:
data[k] = list(v)