Skip to content

Instantly share code, notes, and snippets.

@reuf
Created February 18, 2022 00:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reuf/f65215c7809c1b3bdaef7367f1b1d493 to your computer and use it in GitHub Desktop.
Save reuf/f65215c7809c1b3bdaef7367f1b1d493 to your computer and use it in GitHub Desktop.
import tabula
import pandas as pd
import openpyxl
import glob, os
os.chdir("./")
table1991Census = pd.read_csv('1991CensusNoKB.csv', header=None, encoding = "ISO-8859-1", sep='\t', low_memory=False)
table1991Census.set_axis(["id", "address_id", "nas", "name", "national_id", "birthdate"], axis=1, inplace=True)
table1991Census.to_csv('1991CensusNoKB.csv', index=False)
table1997FVR = pd.read_csv('1997FVR.csv', header=None, encoding = "ISO-8859-1", sep='\t', low_memory=False)
table1997FVR.set_axis(["reg_id", "last_name", "first_name", "birthdate", "national_id", "name_is_in_census", "vote_for_municipality_code", "sot_name_latin", "sort_name_cyrillic", "census_id"], axis=1, inplace=True)
table1997FVR.to_csv('1997FVR.csv', index=False)
tableAddress = pd.read_csv('Address.csv', header=None, encoding = "ISO-8859-1", sep='\t')
tableAddress.set_axis(["address_id", "opstina_id", "address"], axis=1, inplace=True)
tableAddress.to_csv('Address.csv', index=False)
tableOpstina = pd.read_csv('Opstina.csv', header=None, sep='\t')
tableOpstina.set_axis(["opstina_id", "opstina"], axis=1, inplace=True)
tableOpstina.to_csv('Opstina.csv', index=False)
# https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
from sqlalchemy import create_engine
import pymysql
import pandas as pd
table = pd.read_csv("1991CensusNoKB.csv", low_memory=False)
table2 = pd.read_csv("1997FVR.csv", low_memory=False)
table3 = pd.read_csv("Address.csv")
table4 = pd.read_csv("Opstina.csv")
tableName = "tabela1991census"
tableName2 = "tabela1997fvr"
tableName3 = "address"
tableName4 = "opstina"
dataFrame = pd.DataFrame(data=table)
dataFrame2 = pd.DataFrame(data=table2)
dataFrame3 = pd.DataFrame(data=table3)
dataFrame4 = pd.DataFrame(data=table4)
sqlEngine = create_engine('mysql+pymysql://root:Root1234@127.0.0.1/popis1991', pool_recycle=3600)
dbConnection = sqlEngine.connect()
try:
frame = dataFrame.to_sql(tableName, dbConnection, if_exists='fail');
frame = dataFrame2.to_sql(tableName2, dbConnection, if_exists='fail');
frame = dataFrame3.to_sql(tableName3, dbConnection, if_exists='fail');
frame = dataFrame4.to_sql(tableName4, dbConnection, if_exists='fail');
except ValueError as vx:
print(vx)
except Exception as ex:
print(ex)
else:
print("Table %s created successfully."%tableName);
# print("Table %s created successfully." %tableName2);
finally:
dbConnection.close()
import os
# 1991Census
# 1997FVR
def split(filehandler, delimiter='\t', row_limit=100000,
output_name_template='1997FVR_%s.csv', output_path='.', keep_headers=True):
import csv
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
headers = next(reader)
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
split(open('1997FVR.csv', 'r'));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment