Skip to content

Instantly share code, notes, and snippets.

@Mondonno
Last active September 21, 2021 16:38
Show Gist options
  • Save Mondonno/e0b7ffb2976fa9c427ef8efb9d13249b to your computer and use it in GitHub Desktop.
Save Mondonno/e0b7ffb2976fa9c427ef8efb9d13249b to your computer and use it in GitHub Desktop.
Analizer and deleter, for csv. Deletes rows where some condictions does not pass, and generates new csv file.
# Copyrighted by Mondonno
OUT_DIR = "./" # folder where is written output
IN_DIR = "./in/" # folder where program searches for input
CSV_FILE_EXTENSION = ".csv" # csv files extension
OUT_FILE_NAME = "out" # name for generated output file
COL_DELIMETER = "\n" # col delimeter that is used in CSV files (new line)
ROW_DELIMETER = ";" # row delimeter that is used in CSV files
ELIMINATE_PROVIDING_EMPTY = True # NOTE: deprecated filed
EMPTY_LIMIT = 2 # limit of empty elements
START_COL = 0 # from wich column to start searching and generating output | 0 = all
MAX_ELEMENTS = 3 # elements that getting selected | 0 = all
#####################
fileName = input("Provide CSV file name\n")
file = open(IN_DIR + fileName + CSV_FILE_EXTENSION)
fileContent = file.read()
fileCols = fileContent.split(COL_DELIMETER)
deletedCols = 0
modifiedFileContent = ""
print("\nProcessing...\n")
for index in range(START_COL, len(fileCols)):
col = fileCols[index]
colRows = col.split(ROW_DELIMETER)
rowsSame = 0
rowsEmpty = 0
rowsLength = len(colRows)
rowIndex = 0
rowSelectionIndex = 0
rowMaxLen = (rowsLength - MAX_ELEMENTS - 1)
for row in colRows:
if(MAX_ELEMENTS > 0 and rowSelectionIndex <= rowMaxLen and rowMaxLen >= 0):
rowSelectionIndex += 1
rowIndex += 1
continue
foundSame = False
searchIndex = 0
for rr in colRows:
if(rr == row and searchIndex != rowIndex):
foundSame = True
searchIndex += 1
if(foundSame):
rowsSame += 1
if(row == ""):
rowsEmpty += 1
rowIndex += 1
rowEq = (rowsSame + rowsEmpty) == rowsLength
if(rowsSame >= rowsLength - rowSelectionIndex or (rowEq and rowsEmpty == 2) or (rowEq and rowsEmpty == 1)):
print("Error! Limit reached")
deletedCols += 1
else:
modifiedFileContent += col + COL_DELIMETER
print("\nWriting to output file...")
outFile = open(OUT_DIR + OUT_FILE_NAME + CSV_FILE_EXTENSION, "w")
outFile.write(modifiedFileContent.strip())
print("Done! Deleted cols: " + str(deletedCols))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment