Loading
      
  Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
    
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| df = pd.read_csv("vgsales.csv") | |
| df = df.loc[df['Year'] < 2008] | |
| df.head(3) | |
| pd.set_option('display.max_columns', None) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| df = pd.read_csv("vgsales.csv") | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # If this was SQL, it would look something like this: | |
| ''' | |
| SELECT Year, SUM Global_Sales | |
| FROM df | |
| GROUP BY Year | |
| ''' | |
| # In Python... | |
| # Grouping it by year produces a DataFrameGroupBy object | |
| df.groupby(['Year']) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | ''' | |
| SELECT SUM Global_Sales | |
| FROM df | |
| GROUP BY Publisher | |
| ''' | |
| # Grouped by Publisher and selected the sum of Global_Sales (a Series object) | |
| df.groupby(['Publisher'])['Global_Sales'].sum() | |
| # Now we should be able to plot it?? | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # We can make this into a horizontal bar graph to make this a little easier to read. | |
| df.groupby(['Publisher'])['Global_Sales'].sum().plot(kind='barh') | |
| # That last line produces a messy bar graph with hundreds of publishers. | |
| # We can use iloc or head mehthods to limit the number shown | |
| df.groupby(['Publisher'])['Global_Sales'].sum().head(10).plot(kind='barh') | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Flatten the grouped object as seen above | |
| df_flat = df.groupby(['Publisher'])['Global_Sales'].sum().reset_index() | |
| # sort it by Global_Sales, include only the first 10, and make a horizontal bar plot. Yay! | |
| df_flat.sort_values(['Global_Sales'], ascending=False).head(10).plot(kind='barh', y='Global_Sales', x='Publisher') | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # SQL Query might look like... | |
| ''' | |
| SELECT Year, SUM Global_Sales | |
| FROM df_me | |
| GROUP BY Year, Platform | |
| ''' | |
| # plot global sales by platform by year. Sounds tricky, but we can handle it. | |
| platforms = ['NES', 'PS', '2600', 'PS2', 'GBA'] | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # This line of code does it for us | |
| df_me.groupby(['Year', 'Platform'])['Global_Sales'].sum().unstack().plot() | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| df = pd.read_csv('kc_house_data.csv') | 
OlderNewer