mattlewissf/class_def.py

## class_def.py
# written and discarded in favor of sqlalchemy methods - still a good example of how to dynamically create classes in Python

import glob
import sys
from sqlalchemy import Column, ForeignKey, Integer, String
from sqlalchemy.dialects.mysql import TIMESTAMP
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
import pandas as pd
from sqlalchemy.orm import sessionmaker
import pdb
import pprint

# stuff to setup ability to write to db in sqlalchemy
Base = declarative_base()
engine = create_engine('sqlite:///test.db')
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()

# maps dtype to an sql-friendly type for writing
def convert_type(dtype):

    dtype = str(dtype) # hacky
    # this dict either doesnt need to exist or needs to be changed depending on where data is going to be put (sqlite3, etc). Current version is very basic for sqlalchemy.
    type_map_dict = {'int64': 'Integer', 'object' :'String', 'float64':'Float'}

    if dtype in type_map_dict:
        new_type = type_map_dict[dtype]
        return new_type
    else:
        print dtype
        raise Exception('not in type_map_dict')

# creates attribute dict to give to create_data_class based on csv input
def create_data_attributes(table_source):
    attributes = {}
    attributes['columns'] = []
    attributes['table_name'] = table_source.split('/')[-1].lower().replace('.csv','')
    attributes['class_name'] = attributes['table_name'].capitalize()
    df = pd.read_csv(table_source)
    for column in df:
        attributes_dict = {}
        attributes_dict['col_name'] = column
        attributes_dict['col_type'] = convert_type(df[column].dtype)
        attributes_dict['obj_name'] = column.lower()
        attributes['columns'].append(attributes_dict)

    pp.pprint(attributes) # for testing

# programmically creates a new python class. Establishes a tablename and attributes for writing to sql
def create_data_class(attributes):
    class_name = attributes['class_name']
    class_dict = {'__tablename__':attributes['table_name']}
    columns = attributes['columns']

    for col in columns:
        class_dict[col['obj_name']] = "Column({0})".format(col['col_type'])

    Obj = type(class_name, (object,), class_dict)


def grab_data_tables():
    tables  = glob.glob('data/*.csv') # hard code
    for table in tables:
        data_attributes = create_data_attributes(table)
        create_data_class(data_attributes)
        print("processed {0}").format(table)

if __name__ == '__main__':
    grab_data_tables()
    pdb.set_trace()

    # these are for instances of a class to save to db
    # session.add(o)
    # session.commit()
	# written and discarded in favor of sqlalchemy methods - still a good example of how to dynamically create classes in Python

	import glob
	import sys
	from sqlalchemy import Column, ForeignKey, Integer, String
	from sqlalchemy.dialects.mysql import TIMESTAMP
	from sqlalchemy.ext.declarative import declarative_base
	from sqlalchemy.orm import relationship
	from sqlalchemy import create_engine
	import pandas as pd
	from sqlalchemy.orm import sessionmaker
	import pdb
	import pprint

	# stuff to setup ability to write to db in sqlalchemy
	Base = declarative_base()
	engine = create_engine('sqlite:///test.db')
	Base.metadata.bind = engine
	DBSession = sessionmaker(bind=engine)
	session = DBSession()

	# maps dtype to an sql-friendly type for writing
	def convert_type(dtype):

	dtype = str(dtype) # hacky
	# this dict either doesnt need to exist or needs to be changed depending on where data is going to be put (sqlite3, etc). Current version is very basic for sqlalchemy.
	type_map_dict = {'int64': 'Integer', 'object' :'String', 'float64':'Float'}

	if dtype in type_map_dict:
	new_type = type_map_dict[dtype]
	return new_type
	else:
	print dtype
	raise Exception('not in type_map_dict')

	# creates attribute dict to give to create_data_class based on csv input
	def create_data_attributes(table_source):
	attributes = {}
	attributes['columns'] = []
	attributes['table_name'] = table_source.split('/')[-1].lower().replace('.csv','')
	attributes['class_name'] = attributes['table_name'].capitalize()
	df = pd.read_csv(table_source)
	for column in df:
	attributes_dict = {}
	attributes_dict['col_name'] = column
	attributes_dict['col_type'] = convert_type(df[column].dtype)
	attributes_dict['obj_name'] = column.lower()
	attributes['columns'].append(attributes_dict)

	pp.pprint(attributes) # for testing

	# programmically creates a new python class. Establishes a tablename and attributes for writing to sql
	def create_data_class(attributes):
	class_name = attributes['class_name']
	class_dict = {'__tablename__':attributes['table_name']}
	columns = attributes['columns']

	for col in columns:
	class_dict[col['obj_name']] = "Column({0})".format(col['col_type'])

	Obj = type(class_name, (object,), class_dict)


	def grab_data_tables():
	tables = glob.glob('data/*.csv') # hard code
	for table in tables:
	data_attributes = create_data_attributes(table)
	create_data_class(data_attributes)
	print("processed {0}").format(table)

	if __name__ == '__main__':
	grab_data_tables()
	pdb.set_trace()

	# these are for instances of a class to save to db
	# session.add(o)
	# session.commit()