Last active
August 29, 2015 14:15
-
-
Save rinfz/d06d96c7522c15088c34 to your computer and use it in GitHub Desktop.
fileio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import os, sys | |
import numpy | |
def get_data_range(filename): | |
start = ''.join([n for n in itertools.takewhile(lambda x: x != ",", open(filename).read())]) | |
end = ''.join(list(reversed([n for n in itertools.takewhile(lambda x: x != ",", reversed(open(filename).read()))]))) | |
ret = [start, end] | |
return eval(min(ret)), eval(max(ret)[:-1]) | |
def data_files_from_dir(directory): | |
files = filter(lambda x: ".dat" in x, list(os.walk(directory))[0][2]) | |
return files | |
def ranges_in_dir(directory): | |
return [(n, get_data_range(n)) for n in data_files_from_dir(directory)] | |
def select_required_files(data, required, ranges): | |
wantedfiles = [] | |
for r in ranges: | |
filerange = r[1] | |
if min(required) in range(min(filerange), max(filerange)) and min(data) > min(required): | |
wantedfiles.append(r[0]) | |
elif max(required) in range(min(filerange), max(filerange)) and max(data) < max(required): | |
wantedfiles.append(r[0]) | |
return list(set(wantedfiles)) | |
def retrieve_new_data(files, required): | |
values = [] | |
for f in files: | |
data = numpy.loadtxt(f, delimiter=',') | |
[values.append(n) for n in data] | |
return filter(lambda x: min(required) <= x <= max(required), values) | |
def add_new_data(data, newdata): | |
for n in newdata: | |
data.append(type(data[0])(n)) | |
data.sort() | |
def find_missing_data(data, required): | |
missing = filter(lambda x: x not in data, required) | |
return missing | |
data = range(1, 9) | |
datarange = [-5, 11] | |
newdata = retrieve_new_data(select_required_files(data, datarange, ranges_in_dir(".")), datarange) | |
add_new_data(data, newdata) | |
print data | |
print "Need to generate classifiers in from the following values: ", find_missing_data(data, range(min(datarange), max(datarange))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment