reuf/AppendHeaders.py

## AppendHeaders.py
import tabula
import pandas as pd
import openpyxl
import glob, os
os.chdir("./")

table1991Census = pd.read_csv('1991CensusNoKB.csv', header=None, encoding = "ISO-8859-1",  sep='\t', low_memory=False)
table1991Census.set_axis(["id", "address_id", "nas", "name", "national_id", "birthdate"], axis=1, inplace=True)
table1991Census.to_csv('1991CensusNoKB.csv', index=False)

table1997FVR = pd.read_csv('1997FVR.csv', header=None, encoding = "ISO-8859-1",  sep='\t', low_memory=False)
table1997FVR.set_axis(["reg_id", "last_name", "first_name", "birthdate", "national_id", "name_is_in_census", "vote_for_municipality_code", "sot_name_latin", "sort_name_cyrillic", "census_id"], axis=1, inplace=True)
table1997FVR.to_csv('1997FVR.csv', index=False)

tableAddress = pd.read_csv('Address.csv', header=None, encoding = "ISO-8859-1",  sep='\t')
tableAddress.set_axis(["address_id", "opstina_id", "address"], axis=1, inplace=True)
tableAddress.to_csv('Address.csv', index=False)

tableOpstina = pd.read_csv('Opstina.csv', header=None,  sep='\t')
tableOpstina.set_axis(["opstina_id", "opstina"], axis=1, inplace=True)
tableOpstina.to_csv('Opstina.csv', index=False)

# https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/

## ImportCSVToMySQL.py
from sqlalchemy import create_engine
import pymysql
import pandas as pd

table = pd.read_csv("1991CensusNoKB.csv", low_memory=False)
table2 = pd.read_csv("1997FVR.csv", low_memory=False)
table3 = pd.read_csv("Address.csv")
table4 = pd.read_csv("Opstina.csv")

tableName = "tabela1991census"
tableName2 = "tabela1997fvr"
tableName3 = "address"
tableName4 = "opstina"

dataFrame = pd.DataFrame(data=table)
dataFrame2 = pd.DataFrame(data=table2)
dataFrame3 = pd.DataFrame(data=table3)
dataFrame4 = pd.DataFrame(data=table4)

sqlEngine = create_engine('mysql+pymysql://root:Root1234@127.0.0.1/popis1991', pool_recycle=3600)

dbConnection = sqlEngine.connect()

try:
    frame = dataFrame.to_sql(tableName, dbConnection, if_exists='fail');
    frame = dataFrame2.to_sql(tableName2, dbConnection, if_exists='fail');
    frame = dataFrame3.to_sql(tableName3, dbConnection, if_exists='fail');
    frame = dataFrame4.to_sql(tableName4, dbConnection, if_exists='fail');
except ValueError as vx:
    print(vx)
except Exception as ex:
    print(ex)
else:
    print("Table %s created successfully."%tableName);
    # print("Table %s created successfully." %tableName2);
finally:
    dbConnection.close()

## spliter.py
import os

# 1991Census
# 1997FVR

def split(filehandler, delimiter='\t', row_limit=100000,
          output_name_template='1997FVR_%s.csv', output_path='.', keep_headers=True):
    import csv
    reader = csv.reader(filehandler, delimiter=delimiter)
    current_piece = 1
    current_out_path = os.path.join(
        output_path,
        output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
    current_limit = row_limit
    if keep_headers:
        headers = next(reader)
        current_out_writer.writerow(headers)
    for i, row in enumerate(reader):
        if i + 1 > current_limit:
            current_piece += 1
            current_limit = row_limit * current_piece
            current_out_path = os.path.join(
                output_path,
                output_name_template % current_piece
            )
            current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
            if keep_headers:
                current_out_writer.writerow(headers)
        current_out_writer.writerow(row)

split(open('1997FVR.csv', 'r'));
	import tabula
	import pandas as pd
	import openpyxl
	import glob, os
	os.chdir("./")

	table1991Census = pd.read_csv('1991CensusNoKB.csv', header=None, encoding = "ISO-8859-1", sep='\t', low_memory=False)
	table1991Census.set_axis(["id", "address_id", "nas", "name", "national_id", "birthdate"], axis=1, inplace=True)
	table1991Census.to_csv('1991CensusNoKB.csv', index=False)

	table1997FVR = pd.read_csv('1997FVR.csv', header=None, encoding = "ISO-8859-1", sep='\t', low_memory=False)
	table1997FVR.set_axis(["reg_id", "last_name", "first_name", "birthdate", "national_id", "name_is_in_census", "vote_for_municipality_code", "sot_name_latin", "sort_name_cyrillic", "census_id"], axis=1, inplace=True)
	table1997FVR.to_csv('1997FVR.csv', index=False)

	tableAddress = pd.read_csv('Address.csv', header=None, encoding = "ISO-8859-1", sep='\t')
	tableAddress.set_axis(["address_id", "opstina_id", "address"], axis=1, inplace=True)
	tableAddress.to_csv('Address.csv', index=False)

	tableOpstina = pd.read_csv('Opstina.csv', header=None, sep='\t')
	tableOpstina.set_axis(["opstina_id", "opstina"], axis=1, inplace=True)
	tableOpstina.to_csv('Opstina.csv', index=False)

	# https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
	from sqlalchemy import create_engine
	import pymysql
	import pandas as pd

	table = pd.read_csv("1991CensusNoKB.csv", low_memory=False)
	table2 = pd.read_csv("1997FVR.csv", low_memory=False)
	table3 = pd.read_csv("Address.csv")
	table4 = pd.read_csv("Opstina.csv")

	tableName = "tabela1991census"
	tableName2 = "tabela1997fvr"
	tableName3 = "address"
	tableName4 = "opstina"

	dataFrame = pd.DataFrame(data=table)
	dataFrame2 = pd.DataFrame(data=table2)
	dataFrame3 = pd.DataFrame(data=table3)
	dataFrame4 = pd.DataFrame(data=table4)

	sqlEngine = create_engine('mysql+pymysql://root:Root1234@127.0.0.1/popis1991', pool_recycle=3600)

	dbConnection = sqlEngine.connect()

	try:
	frame = dataFrame.to_sql(tableName, dbConnection, if_exists='fail');
	frame = dataFrame2.to_sql(tableName2, dbConnection, if_exists='fail');
	frame = dataFrame3.to_sql(tableName3, dbConnection, if_exists='fail');
	frame = dataFrame4.to_sql(tableName4, dbConnection, if_exists='fail');
	except ValueError as vx:
	print(vx)
	except Exception as ex:
	print(ex)
	else:
	print("Table %s created successfully."%tableName);
	# print("Table %s created successfully." %tableName2);
	finally:
	dbConnection.close()
	import os

	# 1991Census
	# 1997FVR

	def split(filehandler, delimiter='\t', row_limit=100000,
	output_name_template='1997FVR_%s.csv', output_path='.', keep_headers=True):
	import csv
	reader = csv.reader(filehandler, delimiter=delimiter)
	current_piece = 1
	current_out_path = os.path.join(
	output_path,
	output_name_template % current_piece
	)
	current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
	current_limit = row_limit
	if keep_headers:
	headers = next(reader)
	current_out_writer.writerow(headers)
	for i, row in enumerate(reader):
	if i + 1 > current_limit:
	current_piece += 1
	current_limit = row_limit * current_piece
	current_out_path = os.path.join(
	output_path,
	output_name_template % current_piece
	)
	current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
	if keep_headers:
	current_out_writer.writerow(headers)
	current_out_writer.writerow(row)

	split(open('1997FVR.csv', 'r'));