Skip to content

Instantly share code, notes, and snippets.

@tkazusa
Created November 11, 2018 14:28
Show Gist options
  • Save tkazusa/3f2f0f6b0ce8286ccea2040c78950d96 to your computer and use it in GitHub Desktop.
Save tkazusa/3f2f0f6b0ce8286ccea2040c78950d96 to your computer and use it in GitHub Desktop.
def load_train_data():
paths = sorted(glob.glob('../data/train/*.csv.gz'))
with Pool() as p:
df = pd.concat(p.map(read_csv, paths), ignore_index=True, axis=0, copy=False)
logger.info('data size {}'.format(df.shape))
return df
def load_val_data():
paths = sorted(glob.glob('../data/val/*.csv.gz'))
with Pool() as p:
df = pd.concat(p.map(read_csv, paths), ignore_index=True, axis=0, copy=False)
logger.info('data size {}'.format(df.shape))
return df
def load_test_data():
paths = sorted(glob.glob('../data/test/*.csv.gz'))
with Pool() as p:
df = pd.concat(p.map(read_csv, paths), ignore_index=True, axis=0, copy=False)
logger.info('data size {}'.format(df.shape))
return df
def load_all_data():
paths = sorted(glob.glob('../data/*.csv.gz')) + \
sorted(glob.glob('../data/*.csv.gz')) + sorted(glob.glob('../data/*.csv.gz'))
with Pool() as p:
df = pd.concat(p.map(read_csv, paths), ignore_index=True, axis=0, copy=False)
logger.info('data size {}'.format(df.shape))
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment