Mengyuz

## Problem set 1-1 A Simple Heuristic
import numpy
import pandas
import statsmodels.api as sm

def simple_heuristic(file_path):
    '''
    In this exercise, we will perform some rudimentary practices similar to those of
    an actual data scientist.

    Part of a data scientist's job is to use her or his intuition and insight to

## Problem set 1-2 A More Complex Heuristic
import numpy
import pandas
import statsmodels.api as sm

def complex_heuristic(file_path):
    '''
    You are given a list of Titantic passengers and their associated
    information. More information about the data can be seen at the link below:
    http://www.kaggle.com/c/titanic-gettingStarted/data

## Problem set 1-3 - Your Custom Heuristic
import numpy
import pandas
import statsmodels.api as sm

def custom_heuristic(file_path):
    '''
    You are given a list of Titantic passengers and their associated
    information. More information about the data can be seen at the link below:
    http://www.kaggle.com/c/titanic-gettingStarted/data

## Problem set 2-1 - Number of Rainy Days
import pandas
import pandasql


def num_rainy_days(filename):
    '''
    This function should run a SQL query on a dataframe of
    weather data.  The SQL query should return one column and
    one row - a count of the number of days in the dataframe where
    the rain column is equal to 1 (i.e., the number of days it

## Problem set 2-2 - Temp on Foggy and Nonfoggy Days
import pandas
import pandasql


def max_temp_aggregate_by_fog(filename):
    '''
    This function should run a SQL query on a dataframe of
    weather data.  The SQL query should return two columns and
    two rows - whether it was foggy or not (0 or 1) and the max
    maxtempi for that fog value (i.e., the maximum max temperature

## Problem set 2-3 - Mean Temp on Weekends
import pandas
import pandasql

def avg_weekend_temperature(filename):
    '''
    This function should run a SQL query on a dataframe of
    weather data.  The SQL query should return one column and
    one row - the average meantempi on days that are a Saturday
    or Sunday (i.e., the the average mean temperature on weekends).
    The dataframe will be titled 'weather_data' and you can access

## Problem set 2-4 - Mean Temp on Rainy Days
import pandas
import pandasql

def avg_min_temperature(filename):
    '''
    This function should run a SQL query on a dataframe of
    weather data. More specifically you want to find the average
    minimum temperature (mintempi column of the weather dataframe) on
    rainy days where the minimum temperature is greater than 55 degrees.


## Problem set 2-5 - Fixing Turnstile Data
import csv

def fix_turnstile_data(filenames):
    '''
    Filenames is a list of MTA Subway turnstile text files. A link to an example
    MTA Subway turnstile text file can be seen at the URL below:
    http://web.mta.info/developers/data/nyct/turnstile/turnstile_110507.txt

    As you can see, there are numerous data points included in each row of the
    a MTA Subway turnstile text file.

## Problem set 2-6 - Combining Turnstile Data
def create_master_turnstile_file(filenames, output_file):
    '''
    Write a function that takes the files in the list filenames, which all have the
    columns 'C/A, UNIT, SCP, DATEn, TIMEn, DESCn, ENTRIESn, EXITSn', and consolidates
    them into one file located at output_file.  There should be ONE row with the column
    headers, located at the top of the file. The input files do not have column header
    rows of their own.

    For example, if file_1 has:
    'C/A, UNIT, SCP, DATEn, TIMEn, DESCn, ENTRIESn, EXITSn'

## Problem set 2-7 - Filtering Irregular Data
import pandas
import pandasql

def filter_by_regular(filename):
    '''
    This function should read the csv file located at filename into a pandas dataframe,
    and filter the dataframe to only rows where the 'DESCn' column has the value 'REGULAR'.

    For example, if the pandas dataframe is as follows:
    ,C/A,UNIT,SCP,DATEn,TIMEn,DESCn,ENTRIESn,EXITSn
	import numpy
	import pandas
	import statsmodels.api as sm

	def simple_heuristic(file_path):
	'''
	In this exercise, we will perform some rudimentary practices similar to those of
	an actual data scientist.

	Part of a data scientist's job is to use her or his intuition and insight to
	import pandas
	import pandasql


	def num_rainy_days(filename):
	'''
	This function should run a SQL query on a dataframe of
	weather data. The SQL query should return one column and
	one row - a count of the number of days in the dataframe where
	the rain column is equal to 1 (i.e., the number of days it
	import pandas
	import pandasql


	def max_temp_aggregate_by_fog(filename):
	'''
	This function should run a SQL query on a dataframe of
	weather data. The SQL query should return two columns and
	two rows - whether it was foggy or not (0 or 1) and the max
	maxtempi for that fog value (i.e., the maximum max temperature
	import pandas
	import pandasql

	def avg_weekend_temperature(filename):
	'''
	This function should run a SQL query on a dataframe of
	weather data. The SQL query should return one column and
	one row - the average meantempi on days that are a Saturday
	or Sunday (i.e., the the average mean temperature on weekends).
	The dataframe will be titled 'weather_data' and you can access
	import pandas
	import pandasql

	def avg_min_temperature(filename):
	'''
	This function should run a SQL query on a dataframe of
	weather data. More specifically you want to find the average
	minimum temperature (mintempi column of the weather dataframe) on
	rainy days where the minimum temperature is greater than 55 degrees.
	import csv

	def fix_turnstile_data(filenames):
	'''
	Filenames is a list of MTA Subway turnstile text files. A link to an example
	MTA Subway turnstile text file can be seen at the URL below:
	http://web.mta.info/developers/data/nyct/turnstile/turnstile_110507.txt

	As you can see, there are numerous data points included in each row of the
	a MTA Subway turnstile text file.
	def create_master_turnstile_file(filenames, output_file):
	'''
	Write a function that takes the files in the list filenames, which all have the
	columns 'C/A, UNIT, SCP, DATEn, TIMEn, DESCn, ENTRIESn, EXITSn', and consolidates
	them into one file located at output_file. There should be ONE row with the column
	headers, located at the top of the file. The input files do not have column header
	rows of their own.

	For example, if file_1 has:
	'C/A, UNIT, SCP, DATEn, TIMEn, DESCn, ENTRIESn, EXITSn'
	import pandas
	import pandasql

	def filter_by_regular(filename):
	'''
	This function should read the csv file located at filename into a pandas dataframe,
	and filter the dataframe to only rows where the 'DESCn' column has the value 'REGULAR'.

	For example, if the pandas dataframe is as follows:
	,C/A,UNIT,SCP,DATEn,TIMEn,DESCn,ENTRIESn,EXITSn