Skip to content

Instantly share code, notes, and snippets.

@z-a-f
Created March 10, 2024 03:18
Show Gist options
  • Save z-a-f/5ecfd9322b052e837a048025adaea996 to your computer and use it in GitHub Desktop.
Save z-a-f/5ecfd9322b052e837a048025adaea996 to your computer and use it in GitHub Desktop.
# Original CSV file
original_csv_file = 'filename.csv'
# Columns that need to be separated
# For this example, we have 3 subfiles, with these columns
# File 1: 0, 2, 4
# File 2: 1, 3, 5
# File 3: 0, 5
subfile_columns = [
[0, 2, 4],
[1, 3, 5],
[0, 5],
]
num_files = len(subfile_columns)
# 1. Open the new subfiles in "append" mode -- that way we can add new lines to each file one-by-one
subfiles = []
for idx in range(num_files):
# Create new file name
filename = original_csv_file[:-4] + str(idx) + '.csv'
# Create file handle in append mode ('a')
file_handle = open(filename, 'a')
subfiles.append(file_handle)
# 2. Open the original file, and write subfiles in the process
with open(original_csv_file, 'r') as csv_file_handle:
for line in csv_file_handle: # 2.1. Read the file one line at a time
split_line = line.split(',') # 2.2. Split the line by comma
for idx in range(num_files): # 2.3. Iterate over all files
# 2.4. Get the relevant columns
columns = subfile_columns[idx]
# 2.5. Create a new line by combining the relevant columns
subline = ','.join([split_line[col] for col in columns]) + '\n' # Don't forget new line character
# 2.6. Get the relevant file
fh = subfiles[idx]
# 2.7. Add the subline to the subfile
fh.write(subline)
# 3. Close all the open subfiles
for sf in subfiles:
sf.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment