Last active
October 8, 2015 10:49
-
-
Save GTKplusplus/f12e04aa23cee98fa8b6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Data=1 | |
Causale=2 | |
Descrizione=3 | |
Entrate=4 | |
Uscite=5 | |
Saldo=6 | |
Codice=7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"asti": { | |
"month": 9, | |
"day": 2, | |
"year": 2015 | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# import required libraries | |
from openpyxl import load_workbook, Workbook | |
from os import getcwd, chdir, listdir | |
from structure import rows | |
import datetime | |
import json | |
import pymsgbox | |
def format(columns, row): | |
# gets column index for saldo, entrate, uscite | |
print row | |
saldo = chr(int(columns['Saldo']) + 64) | |
entrate = chr(int(columns['Entrate']) + 64) | |
uscite = chr(int(columns['Uscite']) + 64) | |
# get the codes ready: | |
if row <= 2: | |
res = '=' + entrate + str(row) + '+' + uscite + str(row) | |
else: | |
res = '=' + saldo + str(row - 1) + '+' + entrate + str(row) \ | |
+ '+' + uscite + str(row) | |
print res | |
return res | |
def letters(row, column): | |
col = chr(int(column) + 64) | |
string = col + str(row) | |
return string | |
# gets the workspace ready | |
defaultSheet = 'main' | |
# sets up relevant paths | |
path = getcwd() | |
startingdatapath = path + '\\starting data\\' | |
out = path + '\\output_files\\' | |
in_dir = path + '\\input_files\\' | |
# sets the columns index | |
chdir(startingdatapath) | |
print getcwd() | |
columns = {} | |
titles = [] | |
with open('columns.txt', 'r') as f: | |
i = 0 | |
for line in f: | |
(name, value) = line.split('=') | |
i += 1 | |
columns[name] = value | |
titles = ['' for x in range(i)] | |
for key in columns.keys(): | |
titles[int(columns[key]) - 1] = key | |
# goes to the input directory | |
chdir(in_dir) | |
# iterates between the files and read them. | |
results = {} | |
codes = [] | |
with open(path + '\\starting data\\dates.json') as f: | |
dates = json.load(f) | |
newdates = {} | |
for f in listdir(getcwd()): | |
# loads only files not being worked on and owned by excel | |
activeFile = 0 | |
if not '~$' in str(f): | |
activeFile = load_workbook(f) | |
else: | |
continue | |
# reads the input data | |
Sheet = activeFile[defaultSheet] | |
f = str(f).strip('.xlsx') | |
sheetName = f[:len(f) - 8] | |
sheetName = sheetName.lower() | |
print sheetName | |
# decides wether to add or create a new instance of rows | |
activeCells = 0 | |
if sheetName in codes: | |
activeCells = results['sheetName'] | |
else: | |
activeCells = rows() | |
cellsGenerator = Sheet[Sheet.calculate_dimension()] | |
cells = [] | |
# since the way I get the cells doesn't give me the data I need in a sliceable format | |
# I convert it using the following loop | |
for cell in cellsGenerator: | |
cells.append(cell) | |
# iterates between all rows in the file except the first one | |
for row in cells[1:]: | |
activeCells.org(row) | |
results[sheetName] = activeCells | |
# checks for data out of the time period | |
Rawdate = f.strip('.xlsx')[-8:] | |
# DDMMYY | |
Day = int(Rawdate[0:2]) | |
Month = int(Rawdate[2:4]) | |
Year = int(Rawdate[4:8]) | |
actualFileDate = datetime.datetime(Year, Month, Day) | |
date = datetime.datetime(dates[sheetName]['year'], | |
dates[sheetName]['month'], | |
dates[sheetName]['day']) | |
delta = actualFileDate - date | |
# evaluates data to check for possible duplicates | |
print results[sheetName].index | |
for i in range(results[sheetName].index): | |
value = results[sheetName].data[i] | |
value = value[int(columns['Data']) - 1].value | |
if not value: | |
results[sheetName].remove(i) | |
continue | |
localDelta = actualFileDate - value | |
if localDelta > delta: | |
textconfirm = 'La data ' + str(value) + ' nel file ' \ | |
+ str(f) \ | |
+ """e' precedente all'ultimo caricamento dei dati | |
Controllare se il valore non e' un duplicato od un errore | |
Premere INSERISCI per inserire il dato, RIMUOVI per rimuoverlo dal risultato finale""" | |
answer = \ | |
pymsgbox.confirm(title='HO TROVATO UN POSSIBILE DUPLICATO' | |
, text=textconfirm, | |
buttons=['INSERISCI', 'RIMUOVI']) | |
if answer is 'INSERISCI': | |
pass | |
elif answer is 'RIMUOVI': | |
results[sheetName].remove(i + 1) | |
# saves the new date as long as it's more recent in case there were multiple files from the same office | |
if newdates.get(sheetName, 1): | |
newdates = {str(sheetName): {}} | |
newdates[sheetName]['year'] = Year | |
newdates[sheetName]['month'] = Month | |
newdates[sheetName]['day'] = Day | |
else: | |
oldYear = newdates[sheetName]['year'] | |
oldMonth = newdates[sheetName]['month'] | |
oldDay = newdates[sheetName]['day'] | |
olddate = datetime.datetime(oldYear, oldMonth, oldDay) | |
actualdate = datetime.datetime(Year, Month, Day) | |
if olddate - actualdate < datetime.timedelta(0): | |
newdates[sheetName]['year'] = Year | |
newdates[sheetName]['month'] = Month | |
newdates[sheetName]['day'] = Day | |
with open(path + '\\starting data\\dates.json', 'w') as f: | |
json.dump(newdates, f) | |
# formattedresults is a multidimensional list | |
# prepares the output file(not sheet separated yet) | |
print '#############################################\n' | |
chdir(out) | |
wend = Workbook() | |
wsmain = wend.active | |
# dd the titles row | |
wsmain.append(titles) | |
wsmain.column_dimensions[chr(int(columns['Descrizione']) + 64)].width = \ | |
52 | |
wsmain.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52 | |
wsmain.title = 'Principale' | |
mainrownumber = 2 | |
for key in results.keys(): | |
# adding capitalize to each sheet name/ Code means I get a proper capitalized name | |
wend.create_sheet().title = key.capitalize() | |
wsend = wend[key.capitalize()] | |
# dd the titles row | |
titles.pop(int(columns['Codice']) - 1) | |
wsend.append(titles) | |
rownumber = 2 | |
for row in results[key].data: | |
values = [] | |
# create the array of the values, just to reduce clutter, even if it adds overhead | |
for value in row: | |
values.append(value.value) | |
# write the date in the correct format, since else it would write the object datetime, and excel freaks ou | |
for cell in values: | |
if isinstance(cell, datetime.datetime): | |
ind = values.index(cell) | |
cell = cell.date().strftime('%d-%m-%Y') | |
values[ind] = cell | |
# this terrible block is needed to initiliase and fill the cells. A more compact way of doing this | |
# would require rewriting part of openpyxl | |
wsend.append(values) | |
wscell = wsend[letters(rownumber, columns['Saldo'])] | |
wscell.value = format(columns, rownumber) | |
wsmain.append(values) | |
wmcellsaldo = wsmain[letters(rownumber, columns['Saldo'])] | |
wmcellsaldo.value = format(columns, rownumber) | |
wmcellcodice = wsmain[letters(rownumber, columns['Codice'])] | |
wmcellcodice.value = 'Ufficio di: ' + sheetName.capitalize() | |
rownumber += 1 | |
mainrownumber += 1 | |
wsend.column_dimensions[chr(int(columns['Descrizione']) | |
+ 64)].width = 52 | |
wsend.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52 | |
wend.save('rawResults.xlsx') | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
from pymsgbox import * | |
class rows: | |
def __init__(self): | |
# titles structure: {name:index(columnNumber-1)} | |
# columns structure: {index(columnNumber-1):name} | |
self.index = 0 | |
self.data = [] | |
def addLine(self, arr): | |
self.index += 1 | |
self.data.append(arr) | |
print self.index | |
def org(self, row): | |
cells = [] | |
# transform the tuple into an array | |
for cell in row: | |
cells.append(cell) | |
if self.clean(cells): | |
self.addLine(cells) | |
return cells | |
def clean(self, cells): | |
# check the line. If most values are none, remove it | |
void = 0 | |
tot = len(cells) | |
for val in cells: | |
if val.value is None: | |
void += 1 | |
# in case most cells are empty, ignore it | |
if void > tot / 2: | |
return 0 | |
else: | |
return 1 | |
# just removes the selected line and scales the index back (even if it's not needed) | |
def remove(self, rownumber): | |
self.index -= 1 | |
self.data.pop(rownumber) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment