Skip to content

Instantly share code, notes, and snippets.

@chrwang
Created July 10, 2017 16:02
Show Gist options
  • Save chrwang/c83790ef79a9a090c27f37b32055326c to your computer and use it in GitHub Desktop.
Save chrwang/c83790ef79a9a090c27f37b32055326c to your computer and use it in GitHub Desktop.
def read_parallel_1d(path, pattern='_eval_', typef='aggr', verbosity=False):
"""
Reads all files in the folder path. Opens the files whose names match the
regex pattern. Returns lists of Q, I(Q), and ID. Path can be a
relative or absolute path. Uses Pool and map to speed up IO.
typef is one of 'json' or 'aggr'. JSON mode reads in all and only json files
in the folder specified by path. aggr mode reads in aggregated data files.
See sasmodels/generate_sets.py for more about these formats.
Assumes files contain 1D data.
:type path: String
:type pattern: String
:type typef: String
:type verbosity: Boolean
"""
q_list, iq_list, y_list = (list() for i in range(3))
# pattern = re.compile(pattern)
n = 0
nlines = None
if typef == 'json':
for fn in os.listdir(path):
if pattern.search(fn): # Only open JSON files
with open(path + fn, 'r') as fd:
n += 1
data_d = yaml.safe_load(fd)
q_list.append(data_d['data']['Q'])
iq_list.append(data_d["data"]["I(Q)"])
y_list.append(data_d["model"])
if (n % 100 == 0) and verbosity:
print("Read " + str(n) + " files.")
if typef == 'aggr':
nlines = 0
fn = os.listdir(path)
chunked = [fn[i: i + 8] for i in xrange(0, len(fn), 8)]
pool = multiprocessing.Pool(8)
result = np.asarray(
pool.map(r2, izip(chunked, repeat(path), repeat(pattern))))
q_list = result[0::3].tolist()
iq_list = result[1::3].tolist()
y_list = result[2::3].tolist()
else:
print("Error: the type " + typef + " was not recognised. Valid types "
"are 'aggr' and 'json'.")
return q_list, iq_list, y_list, nlines
def r2(args):
return read_h(*args)
def read_h(fns, path, pattern):
q_list, iq_list, y_list = (list() for i in range(3))
p = re.compile(pattern)
for fn in fns:
if p.search(fn):
try:
with open(path + fn, 'r') as fd:
logging.info("Reading " + fn)
templ = ast.literal_eval(fd.readline().strip())
y_list.extend([templ[0] for i in range(templ[1])])
t2 = ast.literal_eval(fd.readline().strip())
q_list.extend([t2 for i in range(templ[1])])
iq_list.extend(ast.literal_eval(fd.readline().strip()))
except Exception as e:
logging.warning(
"skipped, {{0}}: {{1}}".format(e.errno, e.strerr))
return q_list, iq_list, y_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment