Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import pandas as pd
from pathlib import Path
import sys
base_dir = sys.argv[1]
output_csv = sys.argv[2]
files = Path(base_dir).rglob('*.csv')
def read_file(file):
return pd.read_csv(file)
with ProcessPoolExecutor(12) as pool:
df = pd.concat(pool.map(read_file, files))
df.to_csv(output_csv, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment