Skip to content

Instantly share code, notes, and snippets.

@iolson
Created February 14, 2024 14:39
Show Gist options
  • Save iolson/33571de11df0162b78703d3c0bb61ad2 to your computer and use it in GitHub Desktop.
Save iolson/33571de11df0162b78703d3c0bb61ad2 to your computer and use it in GitHub Desktop.
def write_batch_file(file_name, headers, part, lines):
with open(str(file_name) + "-part-" + str(part) + ".csv", "w", newline="") as batch_file:
batch_file.write(headers)
batch_file.writelines(lines)
def process_csv(csv_path, batch_size):
with open(csv_path, newline="") as csv_file:
file_name = str(csv_file.name).replace(".csv", "")
headers = csv_file.readline()
count = 0
lines = []
for line in csv_file:
count += 1
lines.append(line)
if count % batch_size == 0:
write_batch_file(file_name, headers, count // batch_size, lines)
lines = []
if len(lines) > 0:
write_batch_file(file_name, headers, (count // batch_size) + 1, lines)
#####################################
# SCRIPT STARTS RUNNING HERE
#####################################
# CSV Path Input
csv_file_path = input("CSV File Path: ")
while True:
try:
batch_size = int(input("Batch Size (Integer): "))
except ValueError:
print("Provided Batch Size is not an integer. Please try again.")
continue
else:
process_csv(csv_file_path, batch_size)
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment