BrunoGomesCoelho/faster_csv_concat.py

## faster_csv_concat.py
from multiprocessing import Pool # for reading the CSVs faster

def my_read_csv(filename):
    # Helper function for the parellel load_csvs
    return pd.read_csv(filename)

def load_csvs(prefix):
    """Reads and joins all our CSV files into one big dataframe.
    We do it in parallel to make it faster, since otherwise it takes some time.
    Idea from: https://stackoverflow.com/questions/36587211/easiest-way-to-read-csv-files-with-multiprocessing-in-pandas

    """
    # set up your pool
    pool = Pool()
    file_list = [f"{DATA_PATH}/{prefix}{idx}.csv" for idx in range(1, 21)]
    df_list = pool.map(my_read_csv, file_list)

    # reduce the list of dataframes to a single dataframe
    return pd.concat(df_list, ignore_index=True)
	from multiprocessing import Pool # for reading the CSVs faster

	def my_read_csv(filename):
	# Helper function for the parellel load_csvs
	return pd.read_csv(filename)

	def load_csvs(prefix):
	"""Reads and joins all our CSV files into one big dataframe.
	We do it in parallel to make it faster, since otherwise it takes some time.
	Idea from: https://stackoverflow.com/questions/36587211/easiest-way-to-read-csv-files-with-multiprocessing-in-pandas

	"""
	# set up your pool
	pool = Pool()
	file_list = [f"{DATA_PATH}/{prefix}{idx}.csv" for idx in range(1, 21)]
	df_list = pool.map(my_read_csv, file_list)

	# reduce the list of dataframes to a single dataframe
	return pd.concat(df_list, ignore_index=True)