Created
July 10, 2017 16:02
-
-
Save chrwang/c83790ef79a9a090c27f37b32055326c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_parallel_1d(path, pattern='_eval_', typef='aggr', verbosity=False): | |
""" | |
Reads all files in the folder path. Opens the files whose names match the | |
regex pattern. Returns lists of Q, I(Q), and ID. Path can be a | |
relative or absolute path. Uses Pool and map to speed up IO. | |
typef is one of 'json' or 'aggr'. JSON mode reads in all and only json files | |
in the folder specified by path. aggr mode reads in aggregated data files. | |
See sasmodels/generate_sets.py for more about these formats. | |
Assumes files contain 1D data. | |
:type path: String | |
:type pattern: String | |
:type typef: String | |
:type verbosity: Boolean | |
""" | |
q_list, iq_list, y_list = (list() for i in range(3)) | |
# pattern = re.compile(pattern) | |
n = 0 | |
nlines = None | |
if typef == 'json': | |
for fn in os.listdir(path): | |
if pattern.search(fn): # Only open JSON files | |
with open(path + fn, 'r') as fd: | |
n += 1 | |
data_d = yaml.safe_load(fd) | |
q_list.append(data_d['data']['Q']) | |
iq_list.append(data_d["data"]["I(Q)"]) | |
y_list.append(data_d["model"]) | |
if (n % 100 == 0) and verbosity: | |
print("Read " + str(n) + " files.") | |
if typef == 'aggr': | |
nlines = 0 | |
fn = os.listdir(path) | |
chunked = [fn[i: i + 8] for i in xrange(0, len(fn), 8)] | |
pool = multiprocessing.Pool(8) | |
result = np.asarray( | |
pool.map(r2, izip(chunked, repeat(path), repeat(pattern)))) | |
q_list = result[0::3].tolist() | |
iq_list = result[1::3].tolist() | |
y_list = result[2::3].tolist() | |
else: | |
print("Error: the type " + typef + " was not recognised. Valid types " | |
"are 'aggr' and 'json'.") | |
return q_list, iq_list, y_list, nlines | |
def r2(args): | |
return read_h(*args) | |
def read_h(fns, path, pattern): | |
q_list, iq_list, y_list = (list() for i in range(3)) | |
p = re.compile(pattern) | |
for fn in fns: | |
if p.search(fn): | |
try: | |
with open(path + fn, 'r') as fd: | |
logging.info("Reading " + fn) | |
templ = ast.literal_eval(fd.readline().strip()) | |
y_list.extend([templ[0] for i in range(templ[1])]) | |
t2 = ast.literal_eval(fd.readline().strip()) | |
q_list.extend([t2 for i in range(templ[1])]) | |
iq_list.extend(ast.literal_eval(fd.readline().strip())) | |
except Exception as e: | |
logging.warning( | |
"skipped, {{0}}: {{1}}".format(e.errno, e.strerr)) | |
return q_list, iq_list, y_list | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment