Skip to content

Instantly share code, notes, and snippets.

@KunstDerFuge
Created March 6, 2022 23:59
Show Gist options
  • Save KunstDerFuge/99c322ce66c7482223fd9812e5c60bfa to your computer and use it in GitHub Desktop.
Save KunstDerFuge/99c322ce66c7482223fd9812e5c60bfa to your computer and use it in GitHub Desktop.
Split CSV into files of up to N rows
import glob
import pandas as pd
import math
import os
max_rows = 100000
for file in glob.glob('*.csv'):
filename = os.path.splitext(file)[0]
print(f'Loading {file}...')
df = pd.read_csv(file, sep=',', low_memory=False)
if len(df) > max_rows:
print(f'Processing into chunks of {max_rows} rows...')
for i in range(math.ceil(len(df) / max_rows)):
split = df.head(max_rows)
split.to_csv(f'{filename}_{i+1}.csv')
print(f'Wrote {filename}_{i+1}.csv...')
df = df.iloc[max_rows:]
else:
print('Skipping...')
print('Done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment