Skip to content

Instantly share code, notes, and snippets.

@craigderington
Created January 28, 2019 17:00
Show Gist options
  • Save craigderington/aa85e5c3abd95bef8f5015c3e2c69c39 to your computer and use it in GitHub Desktop.
Save craigderington/aa85e5c3abd95bef8f5015c3e2c69c39 to your computer and use it in GitHub Desktop.
OpenAddress Data from Local Filesystem Path - Query into MySQL Database
#!.env/bin/python
# -*- coding:utf-8 -*-
import config
import logging
import os
import csv
import sys
import MySQLdb
class DatabaseConnection(object):
"""
Create a new MySQL database connection
:param host, user, password, database
:type MySQL connection object
:return MySQL database query objects
"""
dbconfig = (config.db_host, config.db_user, config.db_pass, config.db)
def __init__(self):
self.conn = MySQLdb.connect(*self.dbconfig)
self.cur = self.conn.cursor()
def __enter__(self):
return DatabaseConnection()
def query(self, sql, params):
self.cur.execute(sql, params)
self.conn.commit()
def __exit__(self, exc_type, exc_val, exc_tb):
if self.conn:
self.conn.close()
def get_path():
"""
Get working directory path and list of files
:return: list
"""
path = os.getcwd() + '/data/us_south/us/'
return path
def get_file_list(path):
"""
Get a list of CSV files from each directory
:return: list of lists
"""
csv_files = [os.path.join(d, x) for d, dirs, files in os.walk(path) for x in files if x.endswith(".csv")]
return csv_files
def split_paths(path):
"""
Split the path string neatly into usable parts
:param path:
:return: country, state
"""
return os.path.split(path)
def get_state(path):
"""
Return the city and state
:return:
"""
values = path.split('/')
country = values[6]
state = values[7]
return country, state
def main():
"""
Main entry point
:return: none
"""
# create our log
log_format = '%(asctime)s %(levelname)s:%(message)s'
logging.basicConfig(
format=log_format,
filename=__name__ + '.addresses'
)
# set a counter
counter = 0
sql = '''insert into address (lon, lat, number, street, unit, city, district, \
region, postcode, unique_id, hash) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
# determine paths
try:
path = get_path()
if path:
try:
file_list = get_file_list(path)
if file_list:
try:
for item in file_list:
paths = split_paths(item)
data = get_state(paths[0])
filename = paths[1]
country = data[0]
state = data[1]
try:
with open(path + '/' + state + '/' + filename, 'r') as f1:
reader = csv.reader(f1, delimiter=',')
for row in reader:
print(country.upper(), state.upper(), row)
params = (row[0], row[1], row[2] or 0, row[3], row[4], row[5], row[6], row[7],
row[8], row[9], row[10])
try:
with DatabaseConnection() as db:
db.query(sql, params)
logging.info('Added record: {} {}'.format(str(row[4]), str(row[5])))
except MySQLdb.Error as db_err:
logging.critical('A database exception has '
'occurred: {}'.format(str(db_err)))
counter += 1
except Exception as e:
logging.critical('Can not open file paths: {}'.format(str(e)))
except Exception as e:
logging.critical('There are zero CSV files to parse: {}. Aborting'.format(str(e)))
except IOError as io_err:
logging.critical('Unable to access the local filesystem: {}'.format(str(io_err)))
except IOError as io_err:
logging.critical('Unable to access the local filesystem: {}'.format(str(io_err)))
# return the counter total records
return counter
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment