Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save peon-pasado-zeitnot/182e06fc95f7a9b86388fb3276fa2adb to your computer and use it in GitHub Desktop.
Save peon-pasado-zeitnot/182e06fc95f7a9b86388fb3276fa2adb to your computer and use it in GitHub Desktop.
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import pandas as pd
from pathlib import Path
import sys
base_dir = sys.argv[1]
output_csv = sys.argv[2]
files = Path(base_dir).rglob('*.csv')
def read_file(file):
return pd.read_csv(file)
with ProcessPoolExecutor(12) as pool:
df = pd.concat(pool.map(read_file, files))
df.to_csv(output_csv, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment