Created
November 10, 2022 18:04
-
-
Save fpcorso/88299c4c1fa8b7f791880e60ac3ac5ab to your computer and use it in GitHub Desktop.
CSV Chunkifier using Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import pandas as pd | |
import typer | |
def main(file: str, to_format: str, size: int = typer.Argument(40000)): | |
""" | |
CSV Chunkifier uses Typer. Pass an existing CSV file, the name for the new files, and | |
size of each chunk to the command. | |
Example: python csv_chunkifier.py my_data.csv "my_data_part_{}.csv" 50 | |
""" | |
df = pd.read_csv(file) | |
total = len(df) | |
del df | |
typer.echo('Total rows: {}'.format(total)) | |
chunks = math.ceil(total / size) | |
typer.echo('Total chunks: {}'.format(chunks)) | |
typer.confirm('Continue with chunking?: ', abort=True) | |
reader = pd.read_csv(file, iterator=True) | |
with typer.progressbar(range(1, chunks + 1)) as progress: | |
for value in progress: | |
df = reader.get_chunk(size) | |
df.to_csv(to_format.format(value), index=False) | |
if __name__ == "__main__": | |
typer.run(main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment