spitz-dan-l/gist:77968e6177ad483741ad

## gistfile1.py
# If you are using anaconda, then you need to install an extra package which is used behind the scenes.
# Run the following command on the command line:
#
# $ conda install pymysql
#
# say yes when it prompts you.

import pandas
from sqlalchemy import create_engine

# Create the database engine object.
# Depending on how the db is set up on your machine, you may have to change 'root' to a different user,
# or change the db name from 'smartscheduling_dev2' to the right name.
engine = create_engine("mysql+pymysql://root@localhost/smartscheduling_dev2")

# Load appointments and cancel_reasons tables into memory
appointments = pandas.read_sql_table('appointments', engine, index_col='id')
cancel_reasons = pandas.read_sql_table('cancel_reasons', engine, index_col='id')

# Join stuff in memory
appointments = appointments.join(cancel_reasons, 'cancel_reason_id', rsuffix='_cancel_reason')

# Sort stuff
appointments.sort('start_time', inplace=True)

# Break down into groups
for patient_id, apts in appointments.groupby('patient_id'):
  # Example aggregate feature: total number of appointments for patient
  total_patient_appointments = len(apts)

  # Add the new feature to the appointments DataFrame like this
  appointments.loc[appointments.patient_id == patient_id, 'total_patient_appointments'] = total_patient_appointments

# Write the result back to SQL...
appointments.to_sql('rich_appointments', engine)

# ...or, to a csv
appointments.to_csv('rich_appointments.csv')
	# If you are using anaconda, then you need to install an extra package which is used behind the scenes.
	# Run the following command on the command line:
	#
	# $ conda install pymysql
	#
	# say yes when it prompts you.

	import pandas
	from sqlalchemy import create_engine

	# Create the database engine object.
	# Depending on how the db is set up on your machine, you may have to change 'root' to a different user,
	# or change the db name from 'smartscheduling_dev2' to the right name.
	engine = create_engine("mysql+pymysql://root@localhost/smartscheduling_dev2")

	# Load appointments and cancel_reasons tables into memory
	appointments = pandas.read_sql_table('appointments', engine, index_col='id')
	cancel_reasons = pandas.read_sql_table('cancel_reasons', engine, index_col='id')

	# Join stuff in memory
	appointments = appointments.join(cancel_reasons, 'cancel_reason_id', rsuffix='_cancel_reason')

	# Sort stuff
	appointments.sort('start_time', inplace=True)

	# Break down into groups
	for patient_id, apts in appointments.groupby('patient_id'):
	# Example aggregate feature: total number of appointments for patient
	total_patient_appointments = len(apts)

	# Add the new feature to the appointments DataFrame like this
	appointments.loc[appointments.patient_id == patient_id, 'total_patient_appointments'] = total_patient_appointments

	# Write the result back to SQL...
	appointments.to_sql('rich_appointments', engine)

	# ...or, to a csv
	appointments.to_csv('rich_appointments.csv')