Created
August 21, 2022 20:29
-
-
Save piXelicidio/500e2d3b8f1bd0aa3d410ed702ea739b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
#function to fix the new CSV format coming from TurboSquid in order work with the old statistics application: https://www.turbosquid.com/FullPreview/Index.cfm/ID/1002949 | |
#by Denys Almaral (https://twitter.com/denysalmaral) | |
def fixCSV(fileName): | |
newLines=[] | |
newLines.append('\n') | |
newLines.append('"Sales Monthly Details"\n') | |
newLines.append('"Filter: Year,Month,"\n') | |
newLines.append('\n') | |
newLines.append('COMMENT,DATE,HYPERLINK,NAME,ORDER_TYPE,PRICE,PRODUCTID,RATE,ROYALTY,SOURCE\n'); | |
headerSkip = 4 | |
nameOrder = ['COMMENT','DATE','HYPERLINK','NAME','ORDER_TYPE','PRICE','PRODUCTID','RATE','ROYALTY','SOURCE'] | |
dateIdx = 1 | |
with open(fileName) as f: | |
lines = f.readlines() | |
names = str.split(lines[headerSkip], sep=',') | |
indexing = [] | |
for i in range(0, len(names)): | |
names[i] = names[i].strip() | |
#indexing for reordering | |
try: | |
idx = nameOrder.index(names[i]) | |
except ValueError: | |
idx = -1 | |
indexing.append(idx) | |
#now we now names[N] correct position column is indexing[N] | |
#fix the shit | |
def fixValue(typeName :str, value:str): | |
if typeName == 'NAME': | |
value = value.strip('"') | |
value = value.replace(',','') | |
elif typeName == 'DATE': | |
value = value.strip("\"{}ts '") | |
return value | |
def removeCommasInsideQuotes(s :str): | |
quoted = 0 | |
res = '' | |
for c in s: | |
if c=='"': | |
quoted = quoted + 1 | |
elif c==',': | |
if quoted % 2 == 1: | |
c=' ' | |
res = res + c | |
return res | |
sampleFirst = None | |
for i in range(headerSkip+1, len(lines)): | |
oldLine = removeCommasInsideQuotes( lines[i] ) | |
newValues = [''] * len(nameOrder) #list of length = len(nameOrder) | |
oldValues = oldLine.split(",") | |
del oldValues[len(names):] #remove last empty item or anything else at the end | |
#less than 3 values, is not good line or empty, skip | |
if len(oldValues)<3: | |
break | |
for j in range(0, len(oldValues)): | |
if indexing[j] != -1: | |
newValues[indexing[j]] = oldValues[j] | |
for j in range(0, len(newValues)): | |
newValues[j] = fixValue(nameOrder[j], newValues[j]) | |
if i==headerSkip+1: | |
sampleFirst = newValues | |
#building new line | |
newLine = "" | |
for j in range(0, len(newValues)): | |
newLine = newLine + newValues[j]+"," | |
newLines.append(newLine + '\n') | |
#autodetecting output file name | |
fileNameOut = "EmptyData" | |
if sampleFirst!= None: | |
date = sampleFirst[dateIdx] | |
fileNameOut = date[:7].replace("-","_") + ".csv" | |
fileNameOut = fileName.parent / fileNameOut | |
with open(fileNameOut,'w') as fout: | |
fout.writelines(newLines) | |
print(str(fileName.name) + " >> " + str(fileNameOut.name)) | |
#fixing TS CSV files in current .py directory | |
from pathlib import Path | |
import sys | |
currdir = Path(__file__).parent | |
print(currdir) | |
for anyFile in currdir.glob("*.csv"): | |
try: | |
fixCSV(anyFile) | |
except Exception as ex: | |
print(ex) | |
input("ERROR press enter:") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment