cal687

## HYPOTHESIS TESTING WITH SCIPY
#show most basic package that Vein Pack offers, and wheter it has a significant impact on subscribers.
#If subscribers of Vein Pack live longer than other people, then there exists a marketing goldmine.

#import lifespan data
vein_pack_lifespans = familiar.lifespans(package='vein')

#find out if the average lifespan of a Vein Pack subscriber is
#significanlty different from the average life expectancy of 71 years using a 1-Sample T-Test.

#perform 1-Sample T-Test

## Boxplots with plt
import codecademylib3_seaborn
import pandas as pd
from matplotlib import pyplot as plt

healthcare = pd.read_csv("healthcare.csv")
#print(healthcare.head(4))
chest_pain = healthcare[healthcare['DRG Definition'] == '313 - CHEST PAIN']

alabama_chest_pain = chest_pain[chest_pain['Provider State'] == "AL"]
costs = alabama_chest_pain[' Average Covered Charges '].values

## STATISTICS WITH PYTHON & HISTOGRAM
#We will create wo histograms, each displaying the frequency of an occurrence each day of the year
#(either flights or flower blooms).
#You will use the in_bloom variable to find a count of the number of flowers that start blooming each day of the year.
#You will use the flights variable to find a count of the number of flights that occur each day of the year.

# import codecademylib3
import codecademylib3
import numpy as np
from matplotlib import pyplot as plt

## Customer Types - Histogram of data matplotlib
import codecademylib
from matplotlib import pyplot as plt
import pandas as pd

orders = pd.read_csv('orders.csv')

customer_amount = orders.groupby('customer_id').price.sum().reset_index()

print customer_amount.head()

## Twitch Part 2: Visualize Data with Matplotlib
import codecademylib3_seaborn
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

# Bar Graph: Featured Games

games = ["LoL", "Dota 2", "CS:GO", "DayZ", "HOS", "Isaac", "Shows", "Hearth", "WoT", "Agar.io"]

viewers =  [1070, 472, 302, 239, 210, 171, 170, 90, 86, 71]

## Twitch Part 1: Analyze Data with SQL
/*getting a feel for tables */
SELECT * FROM stream LIMIT 2;
SELECT * FROM chat LIMIT 2;

/*How many unique games and unique channels are there in stream table */
SELECT DISTINCT game FROM stream;
SElECT DIStinct channel FROM stream;

/*What are the most popular games in the stream table? */
SELECT COUNT(*), game FROM stream GROUP BY game ORDER BY 1 DESC;

## Visualizing World Cup Data With Seaborn
import codecademylib3_seaborn
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns

df = pd.read_csv('WorldCupMatches.csv')
print(df.head())

#We want to visualize the total number of goals scored in each match
df['Total Goals'] = df['Home Team Goals'] + df['Away Team Goals']

## MATPLOTLIB VISUALIZATIONS
#Bar Chart with Error
import codecademylib
from matplotlib import pyplot as plt

past_years_averages = [82, 84, 83, 86, 74, 84, 90]
years = [2000, 2001, 2002, 2003, 2004, 2005, 2006]
error = [1.5, 2.1, 1.2, 3.2, 2.3, 1.7, 2.4]

# Make your chart here
plt.figure(figsize=(10, 8))

## Page Visits Funnel - DATA ANALYSIS WITH PANDAS
#Funnel analysis is a method used to visualize and map the flow of visitors across a set of website pages or events.
#A website funnel gets its name because, much like a physical funnel,
#it narrows toward the end—so the volume of visitors at the top is larger than the volume of visitors at the bottom.

#Funnel process, 1. A user visits CoolTShirts.com, 2. A user adds a t-shirt to their cart, 3. A user clicks “checkout”
#4. A user actually purchases a t-shirt


## AB Testing with Pandas
import codecademylib
import pandas as pd

df = pd.read_csv('ad_clicks.csv')
print(df.head(10))

#which ad platform is getting you the most views.
views = df.groupby('utm_source').user_id.count().reset_index()
print(views)
	#show most basic package that Vein Pack offers, and wheter it has a significant impact on subscribers.
	#If subscribers of Vein Pack live longer than other people, then there exists a marketing goldmine.

	#import lifespan data
	vein_pack_lifespans = familiar.lifespans(package='vein')

	#find out if the average lifespan of a Vein Pack subscriber is
	#significanlty different from the average life expectancy of 71 years using a 1-Sample T-Test.

	#perform 1-Sample T-Test
	import codecademylib3_seaborn
	import pandas as pd
	from matplotlib import pyplot as plt

	healthcare = pd.read_csv("healthcare.csv")
	#print(healthcare.head(4))
	chest_pain = healthcare[healthcare['DRG Definition'] == '313 - CHEST PAIN']

	alabama_chest_pain = chest_pain[chest_pain['Provider State'] == "AL"]
	costs = alabama_chest_pain[' Average Covered Charges '].values
	#We will create wo histograms, each displaying the frequency of an occurrence each day of the year
	#(either flights or flower blooms).
	#You will use the in_bloom variable to find a count of the number of flowers that start blooming each day of the year.
	#You will use the flights variable to find a count of the number of flights that occur each day of the year.

	# import codecademylib3
	import codecademylib3
	import numpy as np
	from matplotlib import pyplot as plt
	import codecademylib
	from matplotlib import pyplot as plt
	import pandas as pd

	orders = pd.read_csv('orders.csv')

	customer_amount = orders.groupby('customer_id').price.sum().reset_index()

	print customer_amount.head()
	/getting a feel for tables /
	SELECT * FROM stream LIMIT 2;
	SELECT * FROM chat LIMIT 2;

	/How many unique games and unique channels are there in stream table /
	SELECT DISTINCT game FROM stream;
	SElECT DIStinct channel FROM stream;

	/What are the most popular games in the stream table? /
	SELECT COUNT(*), game FROM stream GROUP BY game ORDER BY 1 DESC;
	#Bar Chart with Error
	import codecademylib
	from matplotlib import pyplot as plt

	past_years_averages = [82, 84, 83, 86, 74, 84, 90]
	years = [2000, 2001, 2002, 2003, 2004, 2005, 2006]
	error = [1.5, 2.1, 1.2, 3.2, 2.3, 1.7, 2.4]

	# Make your chart here
	plt.figure(figsize=(10, 8))
	#Funnel analysis is a method used to visualize and map the flow of visitors across a set of website pages or events.
	#A website funnel gets its name because, much like a physical funnel,
	#it narrows toward the end—so the volume of visitors at the top is larger than the volume of visitors at the bottom.

	#Funnel process, 1. A user visits CoolTShirts.com, 2. A user adds a t-shirt to their cart, 3. A user clicks “checkout”
	#4. A user actually purchases a t-shirt
	import codecademylib
	import pandas as pd

	df = pd.read_csv('ad_clicks.csv')
	print(df.head(10))

	#which ad platform is getting you the most views.
	views = df.groupby('utm_source').user_id.count().reset_index()
	print(views)