Skip to content

Instantly share code, notes, and snippets.

@GTKplusplus
Last active October 8, 2015 10:49
Show Gist options
  • Save GTKplusplus/f12e04aa23cee98fa8b6 to your computer and use it in GitHub Desktop.
Save GTKplusplus/f12e04aa23cee98fa8b6 to your computer and use it in GitHub Desktop.
Data=1
Causale=2
Descrizione=3
Entrate=4
Uscite=5
Saldo=6
Codice=7
{
"asti": {
"month": 9,
"day": 2,
"year": 2015
}
}
#!/usr/bin/python
# -*- coding: utf-8 -*-
# import required libraries
from openpyxl import load_workbook, Workbook
from os import getcwd, chdir, listdir
from structure import rows
import datetime
import json
import pymsgbox
def format(columns, row):
# gets column index for saldo, entrate, uscite
print row
saldo = chr(int(columns['Saldo']) + 64)
entrate = chr(int(columns['Entrate']) + 64)
uscite = chr(int(columns['Uscite']) + 64)
# get the codes ready:
if row <= 2:
res = '=' + entrate + str(row) + '+' + uscite + str(row)
else:
res = '=' + saldo + str(row - 1) + '+' + entrate + str(row) \
+ '+' + uscite + str(row)
print res
return res
def letters(row, column):
col = chr(int(column) + 64)
string = col + str(row)
return string
# gets the workspace ready
defaultSheet = 'main'
# sets up relevant paths
path = getcwd()
startingdatapath = path + '\\starting data\\'
out = path + '\\output_files\\'
in_dir = path + '\\input_files\\'
# sets the columns index
chdir(startingdatapath)
print getcwd()
columns = {}
titles = []
with open('columns.txt', 'r') as f:
i = 0
for line in f:
(name, value) = line.split('=')
i += 1
columns[name] = value
titles = ['' for x in range(i)]
for key in columns.keys():
titles[int(columns[key]) - 1] = key
# goes to the input directory
chdir(in_dir)
# iterates between the files and read them.
results = {}
codes = []
with open(path + '\\starting data\\dates.json') as f:
dates = json.load(f)
newdates = {}
for f in listdir(getcwd()):
# loads only files not being worked on and owned by excel
activeFile = 0
if not '~$' in str(f):
activeFile = load_workbook(f)
else:
continue
# reads the input data
Sheet = activeFile[defaultSheet]
f = str(f).strip('.xlsx')
sheetName = f[:len(f) - 8]
sheetName = sheetName.lower()
print sheetName
# decides wether to add or create a new instance of rows
activeCells = 0
if sheetName in codes:
activeCells = results['sheetName']
else:
activeCells = rows()
cellsGenerator = Sheet[Sheet.calculate_dimension()]
cells = []
# since the way I get the cells doesn't give me the data I need in a sliceable format
# I convert it using the following loop
for cell in cellsGenerator:
cells.append(cell)
# iterates between all rows in the file except the first one
for row in cells[1:]:
activeCells.org(row)
results[sheetName] = activeCells
# checks for data out of the time period
Rawdate = f.strip('.xlsx')[-8:]
# DDMMYY
Day = int(Rawdate[0:2])
Month = int(Rawdate[2:4])
Year = int(Rawdate[4:8])
actualFileDate = datetime.datetime(Year, Month, Day)
date = datetime.datetime(dates[sheetName]['year'],
dates[sheetName]['month'],
dates[sheetName]['day'])
delta = actualFileDate - date
# evaluates data to check for possible duplicates
print results[sheetName].index
for i in range(results[sheetName].index):
value = results[sheetName].data[i]
value = value[int(columns['Data']) - 1].value
if not value:
results[sheetName].remove(i)
continue
localDelta = actualFileDate - value
if localDelta > delta:
textconfirm = 'La data ' + str(value) + ' nel file ' \
+ str(f) \
+ """e' precedente all'ultimo caricamento dei dati
Controllare se il valore non e' un duplicato od un errore
Premere INSERISCI per inserire il dato, RIMUOVI per rimuoverlo dal risultato finale"""
answer = \
pymsgbox.confirm(title='HO TROVATO UN POSSIBILE DUPLICATO'
, text=textconfirm,
buttons=['INSERISCI', 'RIMUOVI'])
if answer is 'INSERISCI':
pass
elif answer is 'RIMUOVI':
results[sheetName].remove(i + 1)
# saves the new date as long as it's more recent in case there were multiple files from the same office
if newdates.get(sheetName, 1):
newdates = {str(sheetName): {}}
newdates[sheetName]['year'] = Year
newdates[sheetName]['month'] = Month
newdates[sheetName]['day'] = Day
else:
oldYear = newdates[sheetName]['year']
oldMonth = newdates[sheetName]['month']
oldDay = newdates[sheetName]['day']
olddate = datetime.datetime(oldYear, oldMonth, oldDay)
actualdate = datetime.datetime(Year, Month, Day)
if olddate - actualdate < datetime.timedelta(0):
newdates[sheetName]['year'] = Year
newdates[sheetName]['month'] = Month
newdates[sheetName]['day'] = Day
with open(path + '\\starting data\\dates.json', 'w') as f:
json.dump(newdates, f)
# formattedresults is a multidimensional list
# prepares the output file(not sheet separated yet)
print '#############################################\n'
chdir(out)
wend = Workbook()
wsmain = wend.active
# dd the titles row
wsmain.append(titles)
wsmain.column_dimensions[chr(int(columns['Descrizione']) + 64)].width = \
52
wsmain.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52
wsmain.title = 'Principale'
mainrownumber = 2
for key in results.keys():
# adding capitalize to each sheet name/ Code means I get a proper capitalized name
wend.create_sheet().title = key.capitalize()
wsend = wend[key.capitalize()]
# dd the titles row
titles.pop(int(columns['Codice']) - 1)
wsend.append(titles)
rownumber = 2
for row in results[key].data:
values = []
# create the array of the values, just to reduce clutter, even if it adds overhead
for value in row:
values.append(value.value)
# write the date in the correct format, since else it would write the object datetime, and excel freaks ou
for cell in values:
if isinstance(cell, datetime.datetime):
ind = values.index(cell)
cell = cell.date().strftime('%d-%m-%Y')
values[ind] = cell
# this terrible block is needed to initiliase and fill the cells. A more compact way of doing this
# would require rewriting part of openpyxl
wsend.append(values)
wscell = wsend[letters(rownumber, columns['Saldo'])]
wscell.value = format(columns, rownumber)
wsmain.append(values)
wmcellsaldo = wsmain[letters(rownumber, columns['Saldo'])]
wmcellsaldo.value = format(columns, rownumber)
wmcellcodice = wsmain[letters(rownumber, columns['Codice'])]
wmcellcodice.value = 'Ufficio di: ' + sheetName.capitalize()
rownumber += 1
mainrownumber += 1
wsend.column_dimensions[chr(int(columns['Descrizione'])
+ 64)].width = 52
wsend.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52
wend.save('rawResults.xlsx')
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pymsgbox import *
class rows:
def __init__(self):
# titles structure: {name:index(columnNumber-1)}
# columns structure: {index(columnNumber-1):name}
self.index = 0
self.data = []
def addLine(self, arr):
self.index += 1
self.data.append(arr)
print self.index
def org(self, row):
cells = []
# transform the tuple into an array
for cell in row:
cells.append(cell)
if self.clean(cells):
self.addLine(cells)
return cells
def clean(self, cells):
# check the line. If most values are none, remove it
void = 0
tot = len(cells)
for val in cells:
if val.value is None:
void += 1
# in case most cells are empty, ignore it
if void > tot / 2:
return 0
else:
return 1
# just removes the selected line and scales the index back (even if it's not needed)
def remove(self, rownumber):
self.index -= 1
self.data.pop(rownumber)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment