piXelicidio/fixcsv.py

## fixcsv.py
import os

#function to fix the new CSV format coming from TurboSquid in order work with the old statistics application: https://www.turbosquid.com/FullPreview/Index.cfm/ID/1002949
#by Denys Almaral (https://twitter.com/denysalmaral)
def fixCSV(fileName):

    newLines=[]
    newLines.append('\n')
    newLines.append('"Sales Monthly Details"\n')
    newLines.append('"Filter:  Year,Month,"\n')
    newLines.append('\n')
    newLines.append('COMMENT,DATE,HYPERLINK,NAME,ORDER_TYPE,PRICE,PRODUCTID,RATE,ROYALTY,SOURCE\n');
    headerSkip = 4
    nameOrder = ['COMMENT','DATE','HYPERLINK','NAME','ORDER_TYPE','PRICE','PRODUCTID','RATE','ROYALTY','SOURCE']
    dateIdx = 1


    with open(fileName) as f:
        lines = f.readlines()


    names = str.split(lines[headerSkip], sep=',')
    indexing = []
    for i in range(0, len(names)):
        names[i] = names[i].strip()
        #indexing for reordering
        try:
            idx = nameOrder.index(names[i])
        except ValueError:
            idx = -1
        indexing.append(idx)

    #now we now names[N] correct position column is indexing[N]
    #fix the shit

    def fixValue(typeName :str, value:str):
        if typeName == 'NAME':
            value = value.strip('"')
            value = value.replace(',','')
        elif typeName == 'DATE':
            value = value.strip("\"{}ts '")

        return value

    def removeCommasInsideQuotes(s :str):
        quoted = 0
        res = ''
        for c in s:
            if c=='"':
                quoted = quoted + 1
            elif c==',':
                if quoted % 2 == 1:
                    c=' '
            res = res + c
        return res
    sampleFirst = None
    for i in range(headerSkip+1, len(lines)):
        oldLine = removeCommasInsideQuotes( lines[i] )
        newValues = [''] * len(nameOrder) #list of length = len(nameOrder)
        oldValues = oldLine.split(",")
        del oldValues[len(names):] #remove last empty item or anything else at the end

        #less than 3 values, is not good line or empty, skip
        if len(oldValues)<3:
            break

        for j in range(0, len(oldValues)):
            if indexing[j] != -1:
                newValues[indexing[j]] = oldValues[j]

        for j in range(0, len(newValues)):
            newValues[j] = fixValue(nameOrder[j], newValues[j])
        if i==headerSkip+1:
            sampleFirst = newValues

        #building new line
        newLine = ""
        for j in range(0, len(newValues)):
            newLine = newLine + newValues[j]+","


        newLines.append(newLine + '\n')

    #autodetecting output file name
    fileNameOut = "EmptyData"
    if sampleFirst!= None:
        date = sampleFirst[dateIdx]
        fileNameOut = date[:7].replace("-","_") + ".csv"

    fileNameOut = fileName.parent / fileNameOut

    with open(fileNameOut,'w') as fout:
        fout.writelines(newLines)
        print(str(fileName.name) + " >> " + str(fileNameOut.name))

#fixing TS CSV files in current .py directory
from pathlib import Path
import sys

currdir = Path(__file__).parent
print(currdir)

for anyFile in currdir.glob("*.csv"):
    try:
        fixCSV(anyFile)
    except Exception as ex:
        print(ex)
        input("ERROR press enter:")
	import os

	#function to fix the new CSV format coming from TurboSquid in order work with the old statistics application: https://www.turbosquid.com/FullPreview/Index.cfm/ID/1002949
	#by Denys Almaral (https://twitter.com/denysalmaral)
	def fixCSV(fileName):

	newLines=[]
	newLines.append('\n')
	newLines.append('"Sales Monthly Details"\n')
	newLines.append('"Filter: Year,Month,"\n')
	newLines.append('\n')
	newLines.append('COMMENT,DATE,HYPERLINK,NAME,ORDER_TYPE,PRICE,PRODUCTID,RATE,ROYALTY,SOURCE\n');
	headerSkip = 4
	nameOrder = ['COMMENT','DATE','HYPERLINK','NAME','ORDER_TYPE','PRICE','PRODUCTID','RATE','ROYALTY','SOURCE']
	dateIdx = 1


	with open(fileName) as f:
	lines = f.readlines()


	names = str.split(lines[headerSkip], sep=',')
	indexing = []
	for i in range(0, len(names)):
	names[i] = names[i].strip()
	#indexing for reordering
	try:
	idx = nameOrder.index(names[i])
	except ValueError:
	idx = -1
	indexing.append(idx)

	#now we now names[N] correct position column is indexing[N]
	#fix the shit

	def fixValue(typeName :str, value:str):
	if typeName == 'NAME':
	value = value.strip('"')
	value = value.replace(',','')
	elif typeName == 'DATE':
	value = value.strip("\"{}ts '")

	return value

	def removeCommasInsideQuotes(s :str):
	quoted = 0
	res = ''
	for c in s:
	if c=='"':
	quoted = quoted + 1
	elif c==',':
	if quoted % 2 == 1:
	c=' '
	res = res + c
	return res
	sampleFirst = None
	for i in range(headerSkip+1, len(lines)):
	oldLine = removeCommasInsideQuotes( lines[i] )
	newValues = [''] * len(nameOrder) #list of length = len(nameOrder)
	oldValues = oldLine.split(",")
	del oldValues[len(names):] #remove last empty item or anything else at the end

	#less than 3 values, is not good line or empty, skip
	if len(oldValues)<3:
	break

	for j in range(0, len(oldValues)):
	if indexing[j] != -1:
	newValues[indexing[j]] = oldValues[j]

	for j in range(0, len(newValues)):
	newValues[j] = fixValue(nameOrder[j], newValues[j])
	if i==headerSkip+1:
	sampleFirst = newValues

	#building new line
	newLine = ""
	for j in range(0, len(newValues)):
	newLine = newLine + newValues[j]+","


	newLines.append(newLine + '\n')

	#autodetecting output file name
	fileNameOut = "EmptyData"
	if sampleFirst!= None:
	date = sampleFirst[dateIdx]
	fileNameOut = date[:7].replace("-","_") + ".csv"

	fileNameOut = fileName.parent / fileNameOut

	with open(fileNameOut,'w') as fout:
	fout.writelines(newLines)
	print(str(fileName.name) + " >> " + str(fileNameOut.name))

	#fixing TS CSV files in current .py directory
	from pathlib import Path
	import sys

	currdir = Path(__file__).parent
	print(currdir)

	for anyFile in currdir.glob("*.csv"):
	try:
	fixCSV(anyFile)
	except Exception as ex:
	print(ex)
	input("ERROR press enter:")