dazfuller/Pipfile

## import.py
import csv
import logging
import multiprocessing as mp

from contextlib import contextmanager
from pathlib import Path
from typing import Dict
from mysql.connector import MySQLConnection
from mysql.connector.cursor import MySQLCursor

import mysql.connector


logging.basicConfig(level=logging.DEBUG)

config = {
    'host': '<server>.mysql.database.azure.com',
    'username': 'user',
    'password': 'password',
    'database': 'db_name',
    'autocommit': False
}

@contextmanager
def mysql_connect(connection_config: Dict) -> MySQLConnection:
    conn: MySQLConnection = mysql.connector.connect(**connection_config)
    logging.info('Connected to database \'%s\' at \'%s\'', connection_config['database'], connection_config['host'])
    yield conn

    logging.info('Closing connection to server \'%s\'', connection_config['host'])
    conn.close()

@contextmanager
def mysql_cursor(conn: MySQLConnection) -> MySQLCursor:
    cur: MySQLCursor = conn.cursor()
    yield cur

    logging.info('Closing cursor')
    cur.close()


insert_sql = """
INSERT INTO parking_citations (
    TicketNumber
    , IssueDate
    , IssueTime
    , MeterId
    , MarkedTime
    , RPStatePlate
    , PlateExpiryDate
    , VIN
    , Make
    , BodyStyle
    , Color
    , Location
    , Route
    , Agency
    , ViolationCode
    , ViolationDescription
    , FineAmount
    , Latitude
    , Longitude
) VALUES (
    %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
    , %s
)
"""

def worker(rows):
    with mysql_connect(config) as conn:
        with mysql_cursor(conn) as cursor:
            logging.info('Processing batch')
            cursor.executemany(insert_sql, rows)
            conn.commit()

def get_chunks(batch_size: int, source_file: Path):
    with open(source_file, 'r') as f:
        csv_reader = csv.reader(f, delimiter=',')
        next(csv_reader, None)
        batch_data = []
        batch_count = 0
        for row in csv_reader:
            batch_data.append([v if v is not '' else None for v in row])
            batch_count += 1
            if batch_count % batch_size == 0:
                yield batch_data
                batch_data = []
        if batch_data:
            yield batch_data

def main():
    batch_size = 5000
    source_file = Path('data/parking-citations.csv')
    chunk_gen = get_chunks(batch_size, source_file)
    pool = mp.Pool(mp.cpu_count()-1)
    results = pool.imap(worker, chunk_gen)
    pool.close()
    pool.join()

if __name__ == '__main__':
    main()

## Pipfile
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]

[packages]
mysql-connector-python = "*"

[requires]
python_version = "3.6"
	import csv
	import logging
	import multiprocessing as mp

	from contextlib import contextmanager
	from pathlib import Path
	from typing import Dict
	from mysql.connector import MySQLConnection
	from mysql.connector.cursor import MySQLCursor

	import mysql.connector


	logging.basicConfig(level=logging.DEBUG)

	config = {
	'host': '<server>.mysql.database.azure.com',
	'username': 'user',
	'password': 'password',
	'database': 'db_name',
	'autocommit': False
	}

	@contextmanager
	def mysql_connect(connection_config: Dict) -> MySQLConnection:
	conn: MySQLConnection = mysql.connector.connect(**connection_config)
	logging.info('Connected to database \'%s\' at \'%s\'', connection_config['database'], connection_config['host'])
	yield conn

	logging.info('Closing connection to server \'%s\'', connection_config['host'])
	conn.close()

	@contextmanager
	def mysql_cursor(conn: MySQLConnection) -> MySQLCursor:
	cur: MySQLCursor = conn.cursor()
	yield cur

	logging.info('Closing cursor')
	cur.close()


	insert_sql = """
	INSERT INTO parking_citations (
	TicketNumber
	, IssueDate
	, IssueTime
	, MeterId
	, MarkedTime
	, RPStatePlate
	, PlateExpiryDate
	, VIN
	, Make
	, BodyStyle
	, Color
	, Location
	, Route
	, Agency
	, ViolationCode
	, ViolationDescription
	, FineAmount
	, Latitude
	, Longitude
	) VALUES (
	%s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	, %s
	)
	"""

	def worker(rows):
	with mysql_connect(config) as conn:
	with mysql_cursor(conn) as cursor:
	logging.info('Processing batch')
	cursor.executemany(insert_sql, rows)
	conn.commit()

	def get_chunks(batch_size: int, source_file: Path):
	with open(source_file, 'r') as f:
	csv_reader = csv.reader(f, delimiter=',')
	next(csv_reader, None)
	batch_data = []
	batch_count = 0
	for row in csv_reader:
	batch_data.append([v if v is not '' else None for v in row])
	batch_count += 1
	if batch_count % batch_size == 0:
	yield batch_data
	batch_data = []
	if batch_data:
	yield batch_data

	def main():
	batch_size = 5000
	source_file = Path('data/parking-citations.csv')
	chunk_gen = get_chunks(batch_size, source_file)
	pool = mp.Pool(mp.cpu_count()-1)
	results = pool.imap(worker, chunk_gen)
	pool.close()
	pool.join()

	if __name__ == '__main__':
	main()
	[[source]]
	name = "pypi"
	url = "https://pypi.org/simple"
	verify_ssl = true

	[dev-packages]

	[packages]
	mysql-connector-python = "*"

	[requires]
	python_version = "3.6"