GTKplusplus/columns.txt

## columns.txt
Data=1
Causale=2
Descrizione=3
Entrate=4
Uscite=5
Saldo=6
Codice=7

## dates.json
{
  "asti": {
    "month": 9,
    "day": 2,
    "year": 2015
  }
}

## main.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
# import required libraries

from openpyxl import load_workbook, Workbook
from os import getcwd, chdir, listdir
from structure import rows
import datetime
import json
import pymsgbox


def format(columns, row):

    # gets column index for saldo, entrate, uscite

    print row
    saldo = chr(int(columns['Saldo']) + 64)
    entrate = chr(int(columns['Entrate']) + 64)
    uscite = chr(int(columns['Uscite']) + 64)

    # get the codes ready:

    if row <= 2:
        res = '=' + entrate + str(row) + '+' + uscite + str(row)
    else:
        res = '=' + saldo + str(row - 1) + '+' + entrate + str(row) \
            + '+' + uscite + str(row)
    print res
    return res


def letters(row, column):
    col = chr(int(column) + 64)
    string = col + str(row)
    return string


# gets the workspace ready

defaultSheet = 'main'

# sets up relevant paths

path = getcwd()
startingdatapath = path + '\\starting data\\'
out = path + '\\output_files\\'
in_dir = path + '\\input_files\\'

# sets the columns index

chdir(startingdatapath)
print getcwd()
columns = {}
titles = []
with open('columns.txt', 'r') as f:
    i = 0
    for line in f:
        (name, value) = line.split('=')
        i += 1
        columns[name] = value
    titles = ['' for x in range(i)]
    for key in columns.keys():
        titles[int(columns[key]) - 1] = key

# goes to the input directory

chdir(in_dir)

# iterates between the files and read them.

results = {}
codes = []
with open(path + '\\starting data\\dates.json') as f:
    dates = json.load(f)
newdates = {}
for f in listdir(getcwd()):

    # loads only files not being worked on and owned by excel

    activeFile = 0
    if not '~$' in str(f):
        activeFile = load_workbook(f)
    else:
        continue

    # reads the input data

    Sheet = activeFile[defaultSheet]
    f = str(f).strip('.xlsx')
    sheetName = f[:len(f) - 8]
    sheetName = sheetName.lower()
    print sheetName

    # decides wether to add or create a new instance of rows

    activeCells = 0
    if sheetName in codes:
        activeCells = results['sheetName']
    else:
        activeCells = rows()

    cellsGenerator = Sheet[Sheet.calculate_dimension()]
    cells = []

# since the way I get the cells doesn't give me the data I need in a sliceable format
# I convert it using the following loop

    for cell in cellsGenerator:
        cells.append(cell)

    # iterates between all rows in the file except the first one

    for row in cells[1:]:
        activeCells.org(row)

    results[sheetName] = activeCells

    # checks for data out of the time period

    Rawdate = f.strip('.xlsx')[-8:]

    # DDMMYY

    Day = int(Rawdate[0:2])
    Month = int(Rawdate[2:4])
    Year = int(Rawdate[4:8])

    actualFileDate = datetime.datetime(Year, Month, Day)

    date = datetime.datetime(dates[sheetName]['year'],
                             dates[sheetName]['month'],
                             dates[sheetName]['day'])
    delta = actualFileDate - date

# evaluates data to check for possible duplicates

    print results[sheetName].index
    for i in range(results[sheetName].index):
        value = results[sheetName].data[i]
        value = value[int(columns['Data']) - 1].value
        if not value:
            results[sheetName].remove(i)
            continue
        localDelta = actualFileDate - value
        if localDelta > delta:
            textconfirm = 'La data ' + str(value) + ' nel file ' \
                + str(f) \
                + """e' precedente all'ultimo caricamento dei dati
 Controllare se il valore non e' un duplicato od un errore
 Premere INSERISCI per inserire il dato, RIMUOVI per rimuoverlo dal risultato finale"""
            answer = \
                pymsgbox.confirm(title='HO TROVATO UN POSSIBILE DUPLICATO'
                                 , text=textconfirm,
                                 buttons=['INSERISCI', 'RIMUOVI'])
            if answer is 'INSERISCI':
                pass
            elif answer is 'RIMUOVI':
                results[sheetName].remove(i + 1)

# saves the new date as long as it's more recent in case there were multiple files from the same office

    if newdates.get(sheetName, 1):
        newdates = {str(sheetName): {}}
        newdates[sheetName]['year'] = Year
        newdates[sheetName]['month'] = Month
        newdates[sheetName]['day'] = Day
    else:
        oldYear = newdates[sheetName]['year']
        oldMonth = newdates[sheetName]['month']
        oldDay = newdates[sheetName]['day']
        olddate = datetime.datetime(oldYear, oldMonth, oldDay)
        actualdate = datetime.datetime(Year, Month, Day)
        if olddate - actualdate < datetime.timedelta(0):
            newdates[sheetName]['year'] = Year
            newdates[sheetName]['month'] = Month
            newdates[sheetName]['day'] = Day

with open(path + '\\starting data\\dates.json', 'w') as f:
    json.dump(newdates, f)

# formattedresults is a multidimensional list
# prepares the output file(not sheet separated yet)

print '#############################################\n'
chdir(out)
wend = Workbook()
wsmain = wend.active

# dd the titles row

wsmain.append(titles)
wsmain.column_dimensions[chr(int(columns['Descrizione']) + 64)].width = \
    52
wsmain.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52
wsmain.title = 'Principale'
mainrownumber = 2
for key in results.keys():

    # adding capitalize to each sheet name/ Code means I get a proper capitalized name

    wend.create_sheet().title = key.capitalize()
    wsend = wend[key.capitalize()]

    # dd the titles row

    titles.pop(int(columns['Codice']) - 1)
    wsend.append(titles)
    rownumber = 2
    for row in results[key].data:
        values = []

        # create the array of the values, just to reduce clutter, even if it adds overhead

        for value in row:
            values.append(value.value)

        # write the date in the correct format, since else it would write the object datetime, and excel freaks ou

        for cell in values:
            if isinstance(cell, datetime.datetime):
                ind = values.index(cell)
                cell = cell.date().strftime('%d-%m-%Y')
                values[ind] = cell

        # this terrible block is needed to initiliase and fill the cells. A more compact way of doing this
        # would require rewriting part of openpyxl

        wsend.append(values)
        wscell = wsend[letters(rownumber, columns['Saldo'])]
        wscell.value = format(columns, rownumber)
        wsmain.append(values)
        wmcellsaldo = wsmain[letters(rownumber, columns['Saldo'])]
        wmcellsaldo.value = format(columns, rownumber)
        wmcellcodice = wsmain[letters(rownumber, columns['Codice'])]
        wmcellcodice.value = 'Ufficio di: ' + sheetName.capitalize()
        rownumber += 1
        mainrownumber += 1
    wsend.column_dimensions[chr(int(columns['Descrizione'])
                            + 64)].width = 52
    wsend.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52

wend.save('rawResults.xlsx')


## structure.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pymsgbox import *


class rows:

    def __init__(self):

# titles structure: {name:index(columnNumber-1)}
# columns structure: {index(columnNumber-1):name}

        self.index = 0
        self.data = []

    def addLine(self, arr):

        self.index += 1
        self.data.append(arr)
        print self.index

    def org(self, row):
        cells = []

        # transform the tuple into an array

        for cell in row:
            cells.append(cell)
        if self.clean(cells):
            self.addLine(cells)
        return cells

    def clean(self, cells):

# check the line. If most values are none, remove it

        void = 0
        tot = len(cells)
        for val in cells:
            if val.value is None:
                void += 1

# in case most cells are empty, ignore it

        if void > tot / 2:
            return 0
        else:
            return 1

# just removes the selected line and scales the index back (even if it's not needed)

    def remove(self, rownumber):
        self.index -= 1
        self.data.pop(rownumber)
	Data=1
	Causale=2
	Descrizione=3
	Entrate=4
	Uscite=5
	Saldo=6
	Codice=7
	#!/usr/bin/python
	# -- coding: utf-8 --
	# import required libraries

	from openpyxl import load_workbook, Workbook
	from os import getcwd, chdir, listdir
	from structure import rows
	import datetime
	import json
	import pymsgbox


	def format(columns, row):

	# gets column index for saldo, entrate, uscite

	print row
	saldo = chr(int(columns['Saldo']) + 64)
	entrate = chr(int(columns['Entrate']) + 64)
	uscite = chr(int(columns['Uscite']) + 64)

	# get the codes ready:

	if row <= 2:
	res = '=' + entrate + str(row) + '+' + uscite + str(row)
	else:
	res = '=' + saldo + str(row - 1) + '+' + entrate + str(row) \
	+ '+' + uscite + str(row)
	print res
	return res


	def letters(row, column):
	col = chr(int(column) + 64)
	string = col + str(row)
	return string


	# gets the workspace ready

	defaultSheet = 'main'

	# sets up relevant paths

	path = getcwd()
	startingdatapath = path + '\\starting data\\'
	out = path + '\\output_files\\'
	in_dir = path + '\\input_files\\'

	# sets the columns index

	chdir(startingdatapath)
	print getcwd()
	columns = {}
	titles = []
	with open('columns.txt', 'r') as f:
	i = 0
	for line in f:
	(name, value) = line.split('=')
	i += 1
	columns[name] = value
	titles = ['' for x in range(i)]
	for key in columns.keys():
	titles[int(columns[key]) - 1] = key

	# goes to the input directory

	chdir(in_dir)

	# iterates between the files and read them.

	results = {}
	codes = []
	with open(path + '\\starting data\\dates.json') as f:
	dates = json.load(f)
	newdates = {}
	for f in listdir(getcwd()):

	# loads only files not being worked on and owned by excel

	activeFile = 0
	if not '~$' in str(f):
	activeFile = load_workbook(f)
	else:
	continue

	# reads the input data

	Sheet = activeFile[defaultSheet]
	f = str(f).strip('.xlsx')
	sheetName = f[:len(f) - 8]
	sheetName = sheetName.lower()
	print sheetName

	# decides wether to add or create a new instance of rows

	activeCells = 0
	if sheetName in codes:
	activeCells = results['sheetName']
	else:
	activeCells = rows()

	cellsGenerator = Sheet[Sheet.calculate_dimension()]
	cells = []

	# since the way I get the cells doesn't give me the data I need in a sliceable format
	# I convert it using the following loop

	for cell in cellsGenerator:
	cells.append(cell)

	# iterates between all rows in the file except the first one

	for row in cells[1:]:
	activeCells.org(row)

	results[sheetName] = activeCells

	# checks for data out of the time period

	Rawdate = f.strip('.xlsx')[-8:]

	# DDMMYY

	Day = int(Rawdate[0:2])
	Month = int(Rawdate[2:4])
	Year = int(Rawdate[4:8])

	actualFileDate = datetime.datetime(Year, Month, Day)

	date = datetime.datetime(dates[sheetName]['year'],
	dates[sheetName]['month'],
	dates[sheetName]['day'])
	delta = actualFileDate - date

	# evaluates data to check for possible duplicates

	print results[sheetName].index
	for i in range(results[sheetName].index):
	value = results[sheetName].data[i]
	value = value[int(columns['Data']) - 1].value
	if not value:
	results[sheetName].remove(i)
	continue
	localDelta = actualFileDate - value
	if localDelta > delta:
	textconfirm = 'La data ' + str(value) + ' nel file ' \
	+ str(f) \
	+ """e' precedente all'ultimo caricamento dei dati
	Controllare se il valore non e' un duplicato od un errore
	Premere INSERISCI per inserire il dato, RIMUOVI per rimuoverlo dal risultato finale"""
	answer = \
	pymsgbox.confirm(title='HO TROVATO UN POSSIBILE DUPLICATO'
	, text=textconfirm,
	buttons=['INSERISCI', 'RIMUOVI'])
	if answer is 'INSERISCI':
	pass
	elif answer is 'RIMUOVI':
	results[sheetName].remove(i + 1)

	# saves the new date as long as it's more recent in case there were multiple files from the same office

	if newdates.get(sheetName, 1):
	newdates = {str(sheetName): {}}
	newdates[sheetName]['year'] = Year
	newdates[sheetName]['month'] = Month
	newdates[sheetName]['day'] = Day
	else:
	oldYear = newdates[sheetName]['year']
	oldMonth = newdates[sheetName]['month']
	oldDay = newdates[sheetName]['day']
	olddate = datetime.datetime(oldYear, oldMonth, oldDay)
	actualdate = datetime.datetime(Year, Month, Day)
	if olddate - actualdate < datetime.timedelta(0):
	newdates[sheetName]['year'] = Year
	newdates[sheetName]['month'] = Month
	newdates[sheetName]['day'] = Day

	with open(path + '\\starting data\\dates.json', 'w') as f:
	json.dump(newdates, f)

	# formattedresults is a multidimensional list
	# prepares the output file(not sheet separated yet)

	print '#############################################\n'
	chdir(out)
	wend = Workbook()
	wsmain = wend.active

	# dd the titles row

	wsmain.append(titles)
	wsmain.column_dimensions[chr(int(columns['Descrizione']) + 64)].width = \
	52
	wsmain.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52
	wsmain.title = 'Principale'
	mainrownumber = 2
	for key in results.keys():

	# adding capitalize to each sheet name/ Code means I get a proper capitalized name

	wend.create_sheet().title = key.capitalize()
	wsend = wend[key.capitalize()]

	# dd the titles row

	titles.pop(int(columns['Codice']) - 1)
	wsend.append(titles)
	rownumber = 2
	for row in results[key].data:
	values = []

	# create the array of the values, just to reduce clutter, even if it adds overhead

	for value in row:
	values.append(value.value)

	# write the date in the correct format, since else it would write the object datetime, and excel freaks ou

	for cell in values:
	if isinstance(cell, datetime.datetime):
	ind = values.index(cell)
	cell = cell.date().strftime('%d-%m-%Y')
	values[ind] = cell

	# this terrible block is needed to initiliase and fill the cells. A more compact way of doing this
	# would require rewriting part of openpyxl

	wsend.append(values)
	wscell = wsend[letters(rownumber, columns['Saldo'])]
	wscell.value = format(columns, rownumber)
	wsmain.append(values)
	wmcellsaldo = wsmain[letters(rownumber, columns['Saldo'])]
	wmcellsaldo.value = format(columns, rownumber)
	wmcellcodice = wsmain[letters(rownumber, columns['Codice'])]
	wmcellcodice.value = 'Ufficio di: ' + sheetName.capitalize()
	rownumber += 1
	mainrownumber += 1
	wsend.column_dimensions[chr(int(columns['Descrizione'])
	+ 64)].width = 52
	wsend.column_dimensions[chr(int(columns['Codice']) + 64)].width = 52

	wend.save('rawResults.xlsx')
	#!/usr/bin/python
	# -- coding: utf-8 --
	from pymsgbox import *


	class rows:

	def __init__(self):

	# titles structure: {name:index(columnNumber-1)}
	# columns structure: {index(columnNumber-1):name}

	self.index = 0
	self.data = []

	def addLine(self, arr):

	self.index += 1
	self.data.append(arr)
	print self.index

	def org(self, row):
	cells = []

	# transform the tuple into an array

	for cell in row:
	cells.append(cell)
	if self.clean(cells):
	self.addLine(cells)
	return cells

	def clean(self, cells):

	# check the line. If most values are none, remove it

	void = 0
	tot = len(cells)
	for val in cells:
	if val.value is None:
	void += 1

	# in case most cells are empty, ignore it

	if void > tot / 2:
	return 0
	else:
	return 1

	# just removes the selected line and scales the index back (even if it's not needed)

	def remove(self, rownumber):
	self.index -= 1
	self.data.pop(rownumber)