Skip to content

Instantly share code, notes, and snippets.

@caseyjconger
Last active January 22, 2020 17:22
Show Gist options
  • Save caseyjconger/b8adaa1fa7155a1d06ab5cd84871bfcb to your computer and use it in GitHub Desktop.
Save caseyjconger/b8adaa1fa7155a1d06ab5cd84871bfcb to your computer and use it in GitHub Desktop.
[df_read_csvs_filepattern] Reads data stored in possibly multiple, but identically formatted csv files identified by matching a path wildcard pattern, into a pandas dataframe #pandas #IO #dataframe #csv
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
#%% %%#
#%% df_read_csvs_filepattern.py %%#
#%% %%#
#%% Author: Casey Conger <casey.conger@priceline.com> %%#
#%% %%#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
import pandas as pd
import tensorflow as tf
from tensorflow.python.lib.io import file_io
def read_csv_file(filename, read_csv_kwargs):
"""Read csv file from specified location into a DataFrame
Args:
filename (str): specifies location of csv file
read_csv_kwargs (dict): arbitrary set of keyword arguments to be passed
to pd.read_csv
Returns:
(pandas.DataFrame): DataFrame containing data from file
"""
with file_io.FileIO(filename, 'r') as f:
df = pd.read_csv(f, **read_csv_kwargs)
return df
def read_csv_files(filename_pattern, **read_csv_kwargs):
"""Read csv files matching a pattern into a DataFrame
Args:
filename_pattern (str): specifies set of csv files to be read into
dataframe
read_csv_kwargs (dict): arbitrary set of keyword arguments to be passed
to pd.read_csv
Returns:
(pandas.DataFrame): DataFrame containing concatenated data from files
matching filename_pattern
"""
filenames = tf.gfile.Glob(filename_pattern)
dataframes = [
read_csv_file(filename, read_csv_kwargs)
for filename in filenames
]
return pd.concat(dataframes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment