Skip to content

Instantly share code, notes, and snippets.

@harendra21
Created December 11, 2021 08:19
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harendra21/791cad6ac4a9d65aa98a54ac6463d50c to your computer and use it in GitHub Desktop.
Save harendra21/791cad6ac4a9d65aa98a54ac6463d50c to your computer and use it in GitHub Desktop.
import sys
import os
import shutil
import pandas as pd
class Split_Files:
'''
Class file for split file program
'''
def __init__(self, filename, split_number):
'''
Getting the file name and the split index
Initializing the output directory, if present then truncate it.
Getting the file extension
'''
self.file_name = filename
self.directory = "file_split"
self.split = int(split_number)
if os.path.exists(self.directory):
shutil.rmtree(self.directory)
os.mkdir(self.directory)
if self.file_name.endswith('.txt'):
self.file_extension = '.txt'
else:
self.file_extension = '.csv'
self.file_number = 1
def split_data(self):
'''
spliting the input csv/txt file according to the index provided
'''
data = pd.read_csv(self.file_name, header=None)
data.index += 1
split_frame = pd.DataFrame()
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
for i in range(1, len(data)+1):
split_frame = split_frame.append(data.iloc[i-1])
if i % self.split == 0:
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
if self.file_extension == '.txt':
split_frame.to_csv(output_file, header=False, index=False, sep=' ')
else:
split_frame.to_csv(output_file, header=False, index=False)
split_frame.drop(split_frame.index, inplace=True)
self.file_number += 1
if not split_frame.empty:
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
split_frame.to_csv(output_file, header=False, index=False)
if __name__ == '__main__':
file, split_number = sys.argv[1], sys.argv[2]
sp = Split_Files(file, split_number)
sp.split_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment