Aaron Lee sciencelee

## 1.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                sciencelee
                / 1.ipynb
            
            
              Created
              May 10, 2020 02:01
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## 2.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("vgsales.csv")

df = df.loc[df['Year'] < 2008]
df.head(3)
pd.set_option('display.max_columns', None)

## video_game_readcsv.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("vgsales.csv")

## video_game_groupby1.py
#  If this was SQL, it would look something like this:
'''
SELECT Year, SUM Global_Sales
FROM df
GROUP BY Year
'''

# In Python...
# Grouping it by year produces a DataFrameGroupBy object
df.groupby(['Year'])

## video_game_groupby2.py
'''
SELECT SUM Global_Sales
FROM df
GROUP BY Publisher
'''

# Grouped by Publisher and selected the sum of Global_Sales (a Series object)
df.groupby(['Publisher'])['Global_Sales'].sum()

# Now we should be able to plot it??

## video_game_groupby3.py
# We can make this into a horizontal bar graph to make this a little easier to read.
df.groupby(['Publisher'])['Global_Sales'].sum().plot(kind='barh')

# That last line produces a messy bar graph with hundreds of publishers.
# We can use iloc or head mehthods to limit the number shown
df.groupby(['Publisher'])['Global_Sales'].sum().head(10).plot(kind='barh')

## video_game_groupby4.py
# Flatten the grouped object as seen above
df_flat = df.groupby(['Publisher'])['Global_Sales'].sum().reset_index()

# sort it by Global_Sales, include only the first 10, and make a horizontal bar plot.  Yay!
df_flat.sort_values(['Global_Sales'], ascending=False).head(10).plot(kind='barh', y='Global_Sales', x='Publisher')

## video_game_groupby5.py
# SQL Query might look like...
'''
SELECT Year, SUM Global_Sales
FROM df_me
GROUP BY Year, Platform
'''

# plot global sales by platform by year.  Sounds tricky, but we can handle it.

platforms = ['NES', 'PS', '2600', 'PS2', 'GBA']

## video_game_groupby6.py
# This line of code does it for us
df_me.groupby(['Year', 'Platform'])['Global_Sales'].sum().unstack().plot()

## gist:96f12a53ceb30212f694b30233f1f1d5
import pandas as pd
df = pd.read_csv('kc_house_data.csv')
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd

	df = pd.read_csv("vgsales.csv")

	df = df.loc[df['Year'] < 2008]
	df.head(3)
	pd.set_option('display.max_columns', None)
	# If this was SQL, it would look something like this:
	'''
	SELECT Year, SUM Global_Sales
	FROM df
	GROUP BY Year
	'''

	# In Python...
	# Grouping it by year produces a DataFrameGroupBy object
	df.groupby(['Year'])
	'''
	SELECT SUM Global_Sales
	FROM df
	GROUP BY Publisher
	'''

	# Grouped by Publisher and selected the sum of Global_Sales (a Series object)
	df.groupby(['Publisher'])['Global_Sales'].sum()

	# Now we should be able to plot it??
	# We can make this into a horizontal bar graph to make this a little easier to read.
	df.groupby(['Publisher'])['Global_Sales'].sum().plot(kind='barh')

	# That last line produces a messy bar graph with hundreds of publishers.
	# We can use iloc or head mehthods to limit the number shown
	df.groupby(['Publisher'])['Global_Sales'].sum().head(10).plot(kind='barh')
	# Flatten the grouped object as seen above
	df_flat = df.groupby(['Publisher'])['Global_Sales'].sum().reset_index()

	# sort it by Global_Sales, include only the first 10, and make a horizontal bar plot. Yay!
	df_flat.sort_values(['Global_Sales'], ascending=False).head(10).plot(kind='barh', y='Global_Sales', x='Publisher')
	# SQL Query might look like...
	'''
	SELECT Year, SUM Global_Sales
	FROM df_me
	GROUP BY Year, Platform
	'''

	# plot global sales by platform by year. Sounds tricky, but we can handle it.

	platforms = ['NES', 'PS', '2600', 'PS2', 'GBA']
	# This line of code does it for us
	df_me.groupby(['Year', 'Platform'])['Global_Sales'].sum().unstack().plot()