Skip to content

Instantly share code, notes, and snippets.

@deepankarb
Created April 30, 2024 07:37
Show Gist options
  • Save deepankarb/189258ae24634396e52c6aef6d073603 to your computer and use it in GitHub Desktop.
Save deepankarb/189258ae24634396e52c6aef6d073603 to your computer and use it in GitHub Desktop.
Transpose a single column from a CSV
# Copyright 2024, Deepankar Bhardwaj
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import csv
import logging
import time
def transpose_csv(input_file, output_prefix="transposed", num_records=500, column_index=0):
"""
Transposes a CSV file, picking a specific column (optional) and writes the result to multiple files with N records each, excluding headers.
Logs messages about the process with timing information and incorporates the input column name in the output filenames with an underscore.
Args:
input_file: Path to the input CSV file.
output_prefix: Prefix for the output files (default: "transposed").
num_records: Number of records to write to each output file (default: 500).
column_index: Index of the column to pick from the input file (default: 0).
"""
logging.basicConfig(filename='transpose.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
start_time = time.time()
with open(input_file, 'r') as infile:
reader = csv.reader(infile)
headers = next(reader) # Assuming headers exist
logging.info(f"Input file opened: {input_file}")
# Get the column name
column_name = headers[column_index]
# Get all rows but pick the specified column only (if specified)
data = [row[column_index] for row in reader]
chunked = [data[i:i+num_records] for i in range(0,len(data),num_records)]
for i, chunk in enumerate(chunked):
outname=f"{output_prefix}_{column_name}_{i}.txt"
with open(outname, 'w') as outfile:
logging.info(f"Writing to {outname}")
outfile.write(",".join(chunk))
end_time = time.time()
total_time = end_time - start_time
logging.info(f"Transposition completed successfully. Total time: {total_time:.2f} seconds.")
# Example usage
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Transpose a CSV file')
parser.add_argument('input_file', help='Path to the input CSV file')
parser.add_argument('-o', '--output_prefix', default="transposed", help='Prefix for the output files (default: "transposed")')
parser.add_argument('-n', '--num_records', type=int, default=500, help='Number of records per output file (default: 500)')
parser.add_argument('-c', '--column_index', type=int, default=0, help='Index of the column to pick (default: 0)')
args = parser.parse_args()
# Call transpose_csv with parsed arguments directly
transpose_csv(args.input_file, args.output_prefix, args.num_records, args.column_index)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment