Skip to content

Instantly share code, notes, and snippets.

@saahil1292
Created August 1, 2017 09:30
Show Gist options
  • Save saahil1292/f2676149da4abf8169a54de73fcb1542 to your computer and use it in GitHub Desktop.
Save saahil1292/f2676149da4abf8169a54de73fcb1542 to your computer and use it in GitHub Desktop.
Exploratory Data Analysis of Game of Thrones Dataset
# Future Imports
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals
# Data Cleaning
from pandas import read_csv
import numpy as np
# Data Visualization
import matplotlib.pyplot as plt
%matplotlib inline
# Time
import time
def timeit(method):
''' To check the execution time
Parameter
---------
method: function
Return
---------
float number
'''
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
if 'log_time' in kw:
name = kw.get('log_name', method.__name__.upper())
kw['log_time'][name] = int((te - ts) * 1000)
else:
print('%r %2.2f ms' % \
(method.__name__, (te - ts) * 1000))
return result
return timed
@timeit
def dataframe(name):
''' Creates the dataframe
Parameters
----------
method: string
Return
----------
dataframe
'''
try:
filepath = './'+ name +'.csv'
if name == 'battles':
return read_csv(filepath)
elif name == 'character-deaths':
return read_csv(filepath)
elif name == 'character-predictions':
return read_csv(filepath)
except IOError:
raise 'File does not exists'
# return (battles, character_deaths, character_predictions)
battles = dataframe('battles')
character_deaths = dataframe('character-deaths')
character_predictions = dataframe('character-predictions')
battles = battles[['name', 'year', 'battle_number', 'attacker_king', 'defender_king', \
'attacker_outcome', 'major_death', 'major_capture', 'attacker_size', 'defender_size']]
battles.groupby('attacker_outcome').count()
#Plots
@timeit
def plots(dataframe):
''' Creates bar charts for the columns
Parameter
---------
method: dataframe
Return
---------
None
'''
dataframe.groupby('attacker_outcome')[['attacker_size', 'defender_size']].sum().plot(kind='bar')
plt.xlabel('')
dataframe.groupby('attacker_king')[['attacker_size', 'defender_size']].sum().plot(kind='bar')
plt.xlabel('')
plt.ylabel('Army Size')
dataframe.groupby('attacker_king')[['attacker_outcome']].count().plot(kind='barh')
plt.ylabel('')
plt.xlabel('Wins')
dataframe[dataframe.attacker_outcome == 'loss'].groupby('defender_king')[['defender_king']].count().plot(kind='barh')
plt.xlabel('wins')
plt.ylabel('')
dataframe.groupby('attacker_king')[['battle_number']].sum().plot(kind='barh')
plt.xlabel('Battle Count')
plt.ylabel('')
dataframe[dataframe.attacker_king == 'Joffrey/Tommen Baratheon'].groupby \
('attacker_king')[['attacker_size']].sum()
plots(battles)
# Battles fought by Joffrey/Tommen
battles[battles.attacker_king == 'Joffrey/Tommen Baratheon'][['name']]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment