Skip to content

Instantly share code, notes, and snippets.

@ngoodger
Created August 27, 2017 03:31
Show Gist options
  • Save ngoodger/f3aa1e248ab3d1dfaff20695db314843 to your computer and use it in GitHub Desktop.
Save ngoodger/f3aa1e248ab3d1dfaff20695db314843 to your computer and use it in GitHub Desktop.
Split csv file into a user specified number of smaller csv files. Example: python3 split_csv.py measurement_by_imsi_cell_2017-07-22_v4.csv 5
import argparse
def main(input_csv, file_count):
with open(input_csv, "r") as f:
csv_text = f.readlines()
header_line = csv_text[0]
body_lines = csv_text[1:]
line_count = len(body_lines)
lines_per_file = int(line_count / output_file_count)
# First file should take extra lines
first_lines_per_file = line_count - lines_per_file * output_file_count + lines_per_file
with open(input_csv[:-4] + "_0.csv", "w") as f:
file_lines = body_lines[0:first_lines_per_file]
write_text = header_line + "".join(file_lines)
f.write(write_text)
line_body_pointer = first_lines_per_file
for i in range(1, output_file_count):
with open(input_csv[:-4] + "_{}.csv".format(i), "w") as f:
file_lines = body_lines[line_body_pointer:line_body_pointer + lines_per_file]
write_text = header_line + "".join(file_lines)
f.write(write_text)
line_body_pointer += lines_per_file
if __name__ == "__main__":
"""
Example: python3 split_csv.py measurement_by_imsi_cell_2017-07-22_v4.csv 5
"""
parser = argparse.ArgumentParser()
parser.add_argument("input_csv", type=str)
parser.add_argument("output_file_count", type=int)
args = parser.parse_args()
input_csv= args.input_csv
output_file_count = args.output_file_count
main(input_csv, output_file_count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment