linwoodc3/ddlTwitterPlotters.py

## ddlTwitterPlotters.py
# Author: Linwood Creekmore
# email: valinvescap@gmail.com
# date: 17 April 2017


import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.patches as patches
import datetime
import pandas as pd
import numpy as np

def countplot(geodataframe,data,colorlist):
    '''A simple bar plot of magnitude for each language

    This creates a bar plot with the Economist theme.
    The data is a filtered pandas Series representing counts
    of each language to be displayed. This assumes the input
    Series is generated from the `reader` function. *Requires pandas library.*

    Parameters
    ----------
    geodataframe : geopandas GeoDataFrame
        geopandas dataframe with original data

    data : pandas Series
        pandas Series with counts of languages.

    Returns
    -------
    matplotlib plot
        Economist-styled plot of the magnitude of language occurence.
    '''
    f, (ax1, ax2) = plt.subplots(1, 2,figsize=(22, 15))
    gs = gridspec.GridSpec(2, 2, height_ratios=[3, 1],width_ratios=[3,1])
    ax1 = plt.subplot(gs[0])

    b=(geodataframe[geodataframe.lang !='und'].groupby('lang')['lang'].count())
    colors = colorlist #['014d64','6794a7', '7ad2f6', '01a2d9', '76c0c1','00887d','97b6b0','d7d29e','1a476e','90353b','9c8847','938dd2','6e8e84','c10534','cac27e']
    colors = list(map(lambda x: "#{0}".format(x),colors))
#     b[b>(b.sum()*.02)]
    # plot and highlight highest bar
    b1 = data[data>(data.sum()*.02)].sort_values(ascending=False).plot(kind='bar',
                                               linewidth=[2.5,0,0,0,0,0,0],
                                               edgecolor=['red','#EDEDED','#EDEDED',
                                                          '#EDEDED','#EDEDED','#EDEDED','#EDEDED'],
                                               color=colors,
                                               stacked=True,figsize=(20,17))
    ax1.grid(False)
    ax1.yaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)
    ax1.set_xticklabels(['English',"Indonesian",'Japanese',"Spanish",
                        'Turkish','Portuguese','Tagalog (Filipino)'],rotation=45)
    for tick_label in ax1.yaxis.get_ticklabels():
        tick_label.set_fontsize(20)
    for tick_label in ax1.xaxis.get_ticklabels():
        tick_label.set_fontsize(20)

    ax1.set_xlabel('Language of Tweet',fontsize=24)
    ax1.set_ylabel('Count',fontsize=24)
    ax1.set_axisbelow(True)

    ax1.annotate(xy=(0.3,185000),xytext=(2.3, 119000), fontsize=22,style='italic',
            s=('Twitter is a US-based company so it\ncomes as no surprise that'
            ' the English\nlanguage dominates our sample of\n~600,000 tweets.'
              ),
                 bbox={'facecolor':'#6794a7', 'alpha':0.5, 'pad':10},
               arrowprops=dict(facecolor='black', shrink=0.05),
               multialignment='left')
    plt.suptitle('Top Tweeted Languages',
              fontsize=47,fontweight='bold')
    # plt.text(7.9, data.max()*1.45, "Data by Linwood\nhttps://goo.gl/pV7Oqt", fontsize=9.5,
    #          style='normal',ha='center',va='top', wrap=True,multialignment='right')
    # im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
    # newax = f.add_axes([.87, 0.96, 0.1, 0.1], anchor='SW', zorder=10)
    # newax.imshow(im)
    # newax.axis('off')
    ax2 = plt.subplot(gs[1])

    ax2.xaxis.set_visible(False)
    ax2.yaxis.set_visible(False)

    data = data.values[:,np.newaxis]
    rowColors = ['#eceff6',"#d0daec"]
    colLabels = "Language"
    rowLabels = ["English",'Indonesian','Japanese','Spanish','Turkish'\
                ,'Portuguese','Tagalog','Thai','Russian','French','Italian',\
                'German','Estonian','Arabic','Dutch']#counted.index.values[:,np.newaxis]
    the_table = ax2.table(cellText=data,
                      rowLabels=rowLabels,
                         colWidths=[0.25, 0.25],
                         rowColours=["#d0daec"]*16,
                         colColours=['#eceff6'],
                         cellColours=np.array(['#eceff6']*15)[:,np.newaxis],
                      loc='center')
    ax2.axis('tight')
    the_table.set_fontsize(20)
    the_table.scale(2.5, 3.9)
    ax2.set_axis_off()
    the_table.properties()
    for key, cell in the_table.get_celld().items():
        cell.set_linewidth(0.2)
    plt.show()

def countryplot(geodataframe,data,colorlist):
    '''A simple bar plot of magnitude for Twitter usage
    by country.

    This creates a bar plot with the Economist theme.
    The data is a filtered pandas Series representing counts
    of each language to be displayed. This assumes the input
    Series is generated from the `reader` function.
    *Requires pandas library.*

    Parameters
    ----------
    geodataframe : geopandas GeoDataFrame
        geopandas dataframe with original data

    data : pandas Series
        pandas Series with counts of languages.

    Returns
    -------
    matplotlib plot
        Economist-styled plot of the magnitude of Twitter
        usage in each country.
    '''
    f, (ax1, ax2) = plt.subplots(1, 2,figsize=(22, 17),frameon=True)
    gs = gridspec.GridSpec(2, 2, height_ratios=[3,1],width_ratios=[3,1])

    # first plot
    ax1 = plt.subplot(gs[0])
    colors = colorlist

    # group by country
    try:
        countrycount = geodataframe.groupby(['NAME'])['NAME'].count()
    except:
        countrycount = geodataframe.groupby(['name'])['name'].count()
    data.sort_values(ascending=True).plot(
                    kind='barh',
                    ax=ax1,
                    colormap='RdBu_r',figsize=(20,17))

    # adding grids on horizontal line only
    ax1.yaxis.label.set_visible(False)
    ax1.grid(False)
    ax1.xaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)

    #changing y and x tick label size
    for tick_label in ax1.yaxis.get_ticklabels():
        tick_label.set_fontsize(24)
    for tick_label in ax1.xaxis.get_ticklabels():
        tick_label.set_fontsize(24)

    # overarching title
    plt.suptitle('Top Tweeting Countries',
                  fontsize=38,fontweight='bold')

    # adding text annotation
    ax1.text(x=38000,y= 2.6,
                fontsize=22,
               s=('Surprisingly, the United States is not the\ngreatest user'
                ' of Twitter in our dataset.  This\ncould be for a number of reasons,\n'
                  'especially given my unscientific retreival\nof the data.'
                  ' We do however, see some\nconsistency in the data. Seven of the top ten\n'
                 'Twitter-using countries are in the '
                 'top 10\nof my unscientifically collected dataset.'),
               bbox={'facecolor':'#6794a7', 'alpha':0.5, 'pad':18},
               multialignment='left')
    ax1.text(37000, 0.98, 'Comparison Source:\nNumber of active Twitter users in leading markets as of May 2016          \nhttps://www.statista.com/', style='italic',
            bbox={'facecolor':'whitesmoke', 'alpha':0.5, 'pad':10},fontsize=14)
    y=data.sort_values(ascending=False)[:15].sort_values(ascending=True).values
    # adding labels to horizontal bar
    for i, v in enumerate(y):
        if v > 10000:
            ax1.text(v-8200 , i-.13, str(v), color='white', fontweight='bold', fontsize=18)
        else:
            ax1.text(v-5700 , i-.13, str(v), color='white', fontweight='bold', fontsize=18)
    # add text to my little avatar
    # plt.text(data.max()*1.282, 16.3, "Data by Linwood\nhttps://goo.gl/pV7Oqt", fontsize=16,
    #          style='normal',ha='center',va='top', wrap=True,multialignment='right')
    # # read in my avatar and plot on new axis
    # im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
    # newax = f.add_axes([.89, .98, .05, .05], anchor='SW', zorder=10)
    # newax.imshow(im)
    # newax.axis('off')
    # second image; the table
    ax2 = plt.subplot(gs[1])

    ax2.xaxis.set_visible(False)
    ax2.yaxis.set_visible(False)

    datain = data.sort_values(ascending=False)[:14].values[:,np.newaxis]
    rowColors = ['#eceff6',"#d0daec"]
    colLabels = "Count"
    rowLabels = countrycount.sort_values(ascending=False)[:14].index.values
    the_table = ax2.table(cellText=datain,
                      rowLabels=rowLabels,
                         colWidths=[0.10, 0.10],
                         rowColours=["#d0daec"]*16,
                         colColours=['#eceff6'],
                         cellColours=np.array(['#eceff6']*14)[:,np.newaxis],
                      loc='center')
    ax2.axis('tight')
    the_table.set_fontsize(23)
    the_table.scale(3,3.2)
    ax2.set_axis_off()
    the_table.properties()
    for key, cell in the_table.get_celld().items():
        cell.set_linewidth(0.2)
    # plt.savefig('twitterusagecountryplot2.png')
    plt.show()

def hourplot(geodataframe,country1='United States',country2='Indonesia'):
    '''Function that compares local time occurences.

    Function extracts Indonesia and United States
    originating tweets from the data set.  Then,
    it converts each datetime to local time and
    creates a barplot to compare the count of
    tweets by local time hour of the day.*

    Parameters
    ----------
    geodataframe : geopandas GeoDataFrame
        geopandas dataframe with original data


    Returns
    -------
    matplotlib plot
        Economist-styled plot of the magnitude of tweets
        by local time hour of the day.
    '''

    us_count = geodataframe.normtime[geodataframe.NAME==country1]\
    .groupby(geodataframe.normtime.apply(lambda x:x.hour)).size()

    indo_count=geodataframe.normtime[geodataframe.NAME==country2]\
    .groupby(geodataframe.normtime.apply(lambda x:x.hour)).size()


    f,ax = plt.subplots(figsize=(20,12))

    ax.set_xlabel('Busiest Hour of the Day for Tweets (Normalized Local Time)',fontsize=30)

    # adding grids on horizontal line only

    ax.set_xticks(np.arange(24),minor=True)

    #changing y and x tick label size
    for tick_label in ax.yaxis.get_ticklabels():
        tick_label.set_fontsize(20)
    for tick_label in ax.xaxis.get_ticklabels():
        tick_label.set_fontsize(20)

    us_count.plot(kind='bar',width=0.8,ax=ax,color='#01a2d9',\
                  label=country1,alpha=1,zorder=10)
    ax.bar(np.arange(indo_count.index.values.shape[0]),\
           indo_count.values,color='#014d64',\
           width=0.8,label=country2)
    ax.set_xlabel('Hour of the Day (Country Local Time)',fontsize=22)
    plt.setp( ax.xaxis.get_majorticklabels(), rotation=45 )
    ax.legend(fontsize=22)
    ax.grid(False)
    ax.yaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)

    #highlight rectangle
    ax.add_patch(
        patches.Rectangle(
            (9.8, 0),   # (x,y)
            9,          # width
            11300, # height
            fill=True, color="#ff6d6d",
            alpha=0.3, zorder=-1))

    ax.annotate(s=("Huge gap in U.S. data at local peak\ntime"
                   " for Twitter usage (1100-1300 Local)\n"
                  "just when Indonesia Twitter usage peaks. \n"
                  "This explains our anomaly."),
                xy=(11.5,10000),xytext=(1,9500),arrowprops=dict(facecolor='black',\
                                                                  shrink=0.05),\
               bbox={'facecolor':'#6794a7', 'alpha':.8, 'pad':10},\
                fontsize=20,style='italic')
    ax.text(1, 8380, ('Comparison Source:\nThe Biggest Social Media Science Study: What'
                         ' 4.8 Million Tweets                                  \nSay About the Best Time to Tweet\nhttps://blog'
                         '.bufferapp.com/best-time-to-tweet-research'), style='italic',
                bbox={'facecolor':'whitesmoke', 'alpha':0.5, 'pad':10},fontsize=10)
    # plt.text(20.2, us_count.max()*2.1, "Data by Linwood\nhttps://goo.gl/pV7Oqt",
    #          fontsize=16,style='normal',ha='center',va='top', wrap=True,multialignment='right')
    #     # read in my avatar and plot on new axis
    # im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
    # newax = f.add_axes([.9, 1, .05, .08], anchor='SW', zorder=10)
    # newax.imshow(im)
    # newax.axis('off')
    plt.suptitle('United States Data has Gap During Peak Usage Time (in Local Time Hours)',\
                 fontsize=25,fontweight='bold')
    plt.legend(loc='best')
    # plt.savefig('linwoodSample_normalizedTimeAnalysis.png')
    plt.show()

def waterplot(geodataframe,data,colorlist):
    '''A simple bar plot of magnitude for Twitter usage
    by body of water.

    This creates a bar plot with the Economist theme.
    The data is a filtered pandas Series representing counts
    of each language to be displayed. This assumes the input
    Series is generated from the `reader` function.
    *Requires pandas library.*

    Parameters
    ----------
    geodataframe : geopandas GeoDataFrame
        geopandas dataframe with original data

    data : pandas Series
        pandas Series with counts of languages.

    Returns
    -------
    matplotlib plot
        Economist-styled plot of the magnitude of Twitter
        usage in each country.
    '''
    f, (ax1, ax2) = plt.subplots(1, 2,figsize=(22, 17),frameon=True)
    gs = gridspec.GridSpec(2, 2, height_ratios=[6,1],width_ratios=[5,1])

    # first plot
    ax1 = plt.subplot(gs[0])
    colors = colorlist

    # group by country
    try:
        countrycount = geodataframe.groupby(['NAME'])['NAME'].count()
    except:
        countrycount = geodataframe.groupby(['name'])['name'].count()
    data.sort_values(ascending=True).plot(
                    kind='barh',
                    ax=ax1,
                    colormap='RdBu_r',figsize=(20,15))

    # adding grids on horizontal line only
    ax1.yaxis.label.set_visible(False)
    ax1.grid(False)
    ax1.xaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)

    #changing y and x tick label size
    for tick_label in ax1.yaxis.get_ticklabels():
        tick_label.set_fontsize(18)
    for tick_label in ax1.xaxis.get_ticklabels():
        tick_label.set_fontsize(22)

    # overarching title
    plt.suptitle('Top 10 Bodies of Water by Count of Tweets',
                  fontsize=38,fontweight='bold')

    # adding text annotation
    ax1.text(x=440,y= 1.6,
                fontsize=22,
               s=('A good number of tweets occur from bodies\n'
                 'of water. The North Atlantic and Pacific\n'
                 'Oceans likley dominate because they hold\n'
                 'major trade/travel routes.'),
               bbox={'facecolor':'#6794a7', 'alpha':0.5, 'pad':18},
               multialignment='left')
#     ax1.text(37000, 0.98, 'Comparison Source:\nNumber of active Twitter users in leading markets as of May 2016          \nhttps://www.statista.com/', style='italic',
#             bbox={'facecolor':'whitesmoke', 'alpha':0.5, 'pad':10},fontsize=14)
    y=data.sort_values(ascending=False)[:10].sort_values(ascending=True).values

    # adding labels to horizontal bar
    for i, v in enumerate(y):
        if v > 10000:
            ax1.text(v-70 , i-.13, str(v), color='white', fontweight='bold', fontsize=18)
        else:
            ax1.text(v-70 , i-.13, str(v), color='white', fontweight='bold', fontsize=24)
    # add text to my little avatar
    # plt.text(data.max()*1.282, 16.3, "Data by Linwood\nhttps://goo.gl/pV7Oqt", fontsize=16,
    #          style='normal',ha='center',va='top', wrap=True,multialignment='right')
    # # read in my avatar and plot on new axis
    # im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
    # newax = f.add_axes([.89, .98, .05, .05], anchor='SW', zorder=10)
    # newax.imshow(im)
    # newax.axis('off')
    # second image; the table
    ax2 = plt.subplot(gs[1])

    ax2.xaxis.set_visible(False)
    ax2.yaxis.set_visible(False)

    datain = data.sort_values(ascending=False)[:10].values[:,np.newaxis]
    rowColors = ['#eceff6',"#d0daec"]
    colLabels = "Count"
    rowLabels = countrycount.sort_values(ascending=False)[:10].index.values
    the_table = ax2.table(cellText=datain,
                      rowLabels=rowLabels,
                         colWidths=[0.10, 0.20],
                         rowColours=["#d0daec"]*16,
                         colColours=['#eceff6'],
                         cellColours=np.array(['#eceff6']*10)[:,np.newaxis],
                      loc='center')
    # ax2.axis('tight')
    the_table.set_fontsize(20)
    the_table.scale(1.8,4.3)
    ax2.set_axis_off()
    the_table.properties()
    for key, cell in the_table.get_celld().items():
        cell.set_linewidth(0.2)
    # plt.savefig('twitterusagecountryplot2.png')
    plt.show()
	# Author: Linwood Creekmore
	# email: valinvescap@gmail.com
	# date: 17 April 2017


	import matplotlib.pyplot as plt
	from matplotlib import gridspec
	import matplotlib.patches as patches
	import datetime
	import pandas as pd
	import numpy as np

	def countplot(geodataframe,data,colorlist):
	'''A simple bar plot of magnitude for each language

	This creates a bar plot with the Economist theme.
	The data is a filtered pandas Series representing counts
	of each language to be displayed. This assumes the input
	Series is generated from the `reader` function. Requires pandas library.

	Parameters
	----------
	geodataframe : geopandas GeoDataFrame
	geopandas dataframe with original data

	data : pandas Series
	pandas Series with counts of languages.

	Returns
	-------
	matplotlib plot
	Economist-styled plot of the magnitude of language occurence.
	'''
	f, (ax1, ax2) = plt.subplots(1, 2,figsize=(22, 15))
	gs = gridspec.GridSpec(2, 2, height_ratios=[3, 1],width_ratios=[3,1])
	ax1 = plt.subplot(gs[0])

	b=(geodataframe[geodataframe.lang !='und'].groupby('lang')['lang'].count())
	colors = colorlist #['014d64','6794a7', '7ad2f6', '01a2d9', '76c0c1','00887d','97b6b0','d7d29e','1a476e','90353b','9c8847','938dd2','6e8e84','c10534','cac27e']
	colors = list(map(lambda x: "#{0}".format(x),colors))
	# b[b>(b.sum()*.02)]
	# plot and highlight highest bar
	b1 = data[data>(data.sum()*.02)].sort_values(ascending=False).plot(kind='bar',
	linewidth=[2.5,0,0,0,0,0,0],
	edgecolor=['red','#EDEDED','#EDEDED',
	'#EDEDED','#EDEDED','#EDEDED','#EDEDED'],
	color=colors,
	stacked=True,figsize=(20,17))
	ax1.grid(False)
	ax1.yaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)
	ax1.set_xticklabels(['English',"Indonesian",'Japanese',"Spanish",
	'Turkish','Portuguese','Tagalog (Filipino)'],rotation=45)
	for tick_label in ax1.yaxis.get_ticklabels():
	tick_label.set_fontsize(20)
	for tick_label in ax1.xaxis.get_ticklabels():
	tick_label.set_fontsize(20)

	ax1.set_xlabel('Language of Tweet',fontsize=24)
	ax1.set_ylabel('Count',fontsize=24)
	ax1.set_axisbelow(True)

	ax1.annotate(xy=(0.3,185000),xytext=(2.3, 119000), fontsize=22,style='italic',
	s=('Twitter is a US-based company so it\ncomes as no surprise that'
	' the English\nlanguage dominates our sample of\n~600,000 tweets.'
	),
	bbox={'facecolor':'#6794a7', 'alpha':0.5, 'pad':10},
	arrowprops=dict(facecolor='black', shrink=0.05),
	multialignment='left')
	plt.suptitle('Top Tweeted Languages',
	fontsize=47,fontweight='bold')
	# plt.text(7.9, data.max()*1.45, "Data by Linwood\nhttps://goo.gl/pV7Oqt", fontsize=9.5,
	# style='normal',ha='center',va='top', wrap=True,multialignment='right')
	# im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
	# newax = f.add_axes([.87, 0.96, 0.1, 0.1], anchor='SW', zorder=10)
	# newax.imshow(im)
	# newax.axis('off')
	ax2 = plt.subplot(gs[1])

	ax2.xaxis.set_visible(False)
	ax2.yaxis.set_visible(False)

	data = data.values[:,np.newaxis]
	rowColors = ['#eceff6',"#d0daec"]
	colLabels = "Language"
	rowLabels = ["English",'Indonesian','Japanese','Spanish','Turkish'\
	,'Portuguese','Tagalog','Thai','Russian','French','Italian',\
	'German','Estonian','Arabic','Dutch']#counted.index.values[:,np.newaxis]
	the_table = ax2.table(cellText=data,
	rowLabels=rowLabels,
	colWidths=[0.25, 0.25],
	rowColours=["#d0daec"]*16,
	colColours=['#eceff6'],
	cellColours=np.array(['#eceff6']*15)[:,np.newaxis],
	loc='center')
	ax2.axis('tight')
	the_table.set_fontsize(20)
	the_table.scale(2.5, 3.9)
	ax2.set_axis_off()
	the_table.properties()
	for key, cell in the_table.get_celld().items():
	cell.set_linewidth(0.2)
	plt.show()

	def countryplot(geodataframe,data,colorlist):
	'''A simple bar plot of magnitude for Twitter usage
	by country.

	This creates a bar plot with the Economist theme.
	The data is a filtered pandas Series representing counts
	of each language to be displayed. This assumes the input
	Series is generated from the `reader` function.
	Requires pandas library.

	Parameters
	----------
	geodataframe : geopandas GeoDataFrame
	geopandas dataframe with original data

	data : pandas Series
	pandas Series with counts of languages.

	Returns
	-------
	matplotlib plot
	Economist-styled plot of the magnitude of Twitter
	usage in each country.
	'''
	f, (ax1, ax2) = plt.subplots(1, 2,figsize=(22, 17),frameon=True)
	gs = gridspec.GridSpec(2, 2, height_ratios=[3,1],width_ratios=[3,1])

	# first plot
	ax1 = plt.subplot(gs[0])
	colors = colorlist

	# group by country
	try:
	countrycount = geodataframe.groupby(['NAME'])['NAME'].count()
	except:
	countrycount = geodataframe.groupby(['name'])['name'].count()
	data.sort_values(ascending=True).plot(
	kind='barh',
	ax=ax1,
	colormap='RdBu_r',figsize=(20,17))

	# adding grids on horizontal line only
	ax1.yaxis.label.set_visible(False)
	ax1.grid(False)
	ax1.xaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)

	#changing y and x tick label size
	for tick_label in ax1.yaxis.get_ticklabels():
	tick_label.set_fontsize(24)
	for tick_label in ax1.xaxis.get_ticklabels():
	tick_label.set_fontsize(24)

	# overarching title
	plt.suptitle('Top Tweeting Countries',
	fontsize=38,fontweight='bold')

	# adding text annotation
	ax1.text(x=38000,y= 2.6,
	fontsize=22,
	s=('Surprisingly, the United States is not the\ngreatest user'
	' of Twitter in our dataset. This\ncould be for a number of reasons,\n'
	'especially given my unscientific retreival\nof the data.'
	' We do however, see some\nconsistency in the data. Seven of the top ten\n'
	'Twitter-using countries are in the '
	'top 10\nof my unscientifically collected dataset.'),
	bbox={'facecolor':'#6794a7', 'alpha':0.5, 'pad':18},
	multialignment='left')
	ax1.text(37000, 0.98, 'Comparison Source:\nNumber of active Twitter users in leading markets as of May 2016 \nhttps://www.statista.com/', style='italic',
	bbox={'facecolor':'whitesmoke', 'alpha':0.5, 'pad':10},fontsize=14)
	y=data.sort_values(ascending=False)[:15].sort_values(ascending=True).values
	# adding labels to horizontal bar
	for i, v in enumerate(y):
	if v > 10000:
	ax1.text(v-8200 , i-.13, str(v), color='white', fontweight='bold', fontsize=18)
	else:
	ax1.text(v-5700 , i-.13, str(v), color='white', fontweight='bold', fontsize=18)
	# add text to my little avatar
	# plt.text(data.max()*1.282, 16.3, "Data by Linwood\nhttps://goo.gl/pV7Oqt", fontsize=16,
	# style='normal',ha='center',va='top', wrap=True,multialignment='right')
	# # read in my avatar and plot on new axis
	# im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
	# newax = f.add_axes([.89, .98, .05, .05], anchor='SW', zorder=10)
	# newax.imshow(im)
	# newax.axis('off')
	# second image; the table
	ax2 = plt.subplot(gs[1])

	ax2.xaxis.set_visible(False)
	ax2.yaxis.set_visible(False)

	datain = data.sort_values(ascending=False)[:14].values[:,np.newaxis]
	rowColors = ['#eceff6',"#d0daec"]
	colLabels = "Count"
	rowLabels = countrycount.sort_values(ascending=False)[:14].index.values
	the_table = ax2.table(cellText=datain,
	rowLabels=rowLabels,
	colWidths=[0.10, 0.10],
	rowColours=["#d0daec"]*16,
	colColours=['#eceff6'],
	cellColours=np.array(['#eceff6']*14)[:,np.newaxis],
	loc='center')
	ax2.axis('tight')
	the_table.set_fontsize(23)
	the_table.scale(3,3.2)
	ax2.set_axis_off()
	the_table.properties()
	for key, cell in the_table.get_celld().items():
	cell.set_linewidth(0.2)
	# plt.savefig('twitterusagecountryplot2.png')
	plt.show()

	def hourplot(geodataframe,country1='United States',country2='Indonesia'):
	'''Function that compares local time occurences.

	Function extracts Indonesia and United States
	originating tweets from the data set. Then,
	it converts each datetime to local time and
	creates a barplot to compare the count of
	tweets by local time hour of the day.*

	Parameters
	----------
	geodataframe : geopandas GeoDataFrame
	geopandas dataframe with original data


	Returns
	-------
	matplotlib plot
	Economist-styled plot of the magnitude of tweets
	by local time hour of the day.
	'''

	us_count = geodataframe.normtime[geodataframe.NAME==country1]\
	.groupby(geodataframe.normtime.apply(lambda x:x.hour)).size()

	indo_count=geodataframe.normtime[geodataframe.NAME==country2]\
	.groupby(geodataframe.normtime.apply(lambda x:x.hour)).size()


	f,ax = plt.subplots(figsize=(20,12))

	ax.set_xlabel('Busiest Hour of the Day for Tweets (Normalized Local Time)',fontsize=30)

	# adding grids on horizontal line only

	ax.set_xticks(np.arange(24),minor=True)

	#changing y and x tick label size
	for tick_label in ax.yaxis.get_ticklabels():
	tick_label.set_fontsize(20)
	for tick_label in ax.xaxis.get_ticklabels():
	tick_label.set_fontsize(20)

	us_count.plot(kind='bar',width=0.8,ax=ax,color='#01a2d9',\
	label=country1,alpha=1,zorder=10)
	ax.bar(np.arange(indo_count.index.values.shape[0]),\
	indo_count.values,color='#014d64',\
	width=0.8,label=country2)
	ax.set_xlabel('Hour of the Day (Country Local Time)',fontsize=22)
	plt.setp( ax.xaxis.get_majorticklabels(), rotation=45 )
	ax.legend(fontsize=22)
	ax.grid(False)
	ax.yaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)

	#highlight rectangle
	ax.add_patch(
	patches.Rectangle(
	(9.8, 0), # (x,y)
	9, # width
	11300, # height
	fill=True, color="#ff6d6d",
	alpha=0.3, zorder=-1))

	ax.annotate(s=("Huge gap in U.S. data at local peak\ntime"
	" for Twitter usage (1100-1300 Local)\n"
	"just when Indonesia Twitter usage peaks. \n"
	"This explains our anomaly."),
	xy=(11.5,10000),xytext=(1,9500),arrowprops=dict(facecolor='black',\
	shrink=0.05),\
	bbox={'facecolor':'#6794a7', 'alpha':.8, 'pad':10},\
	fontsize=20,style='italic')
	ax.text(1, 8380, ('Comparison Source:\nThe Biggest Social Media Science Study: What'
	' 4.8 Million Tweets \nSay About the Best Time to Tweet\nhttps://blog'
	'.bufferapp.com/best-time-to-tweet-research'), style='italic',
	bbox={'facecolor':'whitesmoke', 'alpha':0.5, 'pad':10},fontsize=10)
	# plt.text(20.2, us_count.max()*2.1, "Data by Linwood\nhttps://goo.gl/pV7Oqt",
	# fontsize=16,style='normal',ha='center',va='top', wrap=True,multialignment='right')
	# # read in my avatar and plot on new axis
	# im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
	# newax = f.add_axes([.9, 1, .05, .08], anchor='SW', zorder=10)
	# newax.imshow(im)
	# newax.axis('off')
	plt.suptitle('United States Data has Gap During Peak Usage Time (in Local Time Hours)',\
	fontsize=25,fontweight='bold')
	plt.legend(loc='best')
	# plt.savefig('linwoodSample_normalizedTimeAnalysis.png')
	plt.show()

	def waterplot(geodataframe,data,colorlist):
	'''A simple bar plot of magnitude for Twitter usage
	by body of water.

	This creates a bar plot with the Economist theme.
	The data is a filtered pandas Series representing counts
	of each language to be displayed. This assumes the input
	Series is generated from the `reader` function.
	Requires pandas library.

	Parameters
	----------
	geodataframe : geopandas GeoDataFrame
	geopandas dataframe with original data

	data : pandas Series
	pandas Series with counts of languages.

	Returns
	-------
	matplotlib plot
	Economist-styled plot of the magnitude of Twitter
	usage in each country.
	'''
	f, (ax1, ax2) = plt.subplots(1, 2,figsize=(22, 17),frameon=True)
	gs = gridspec.GridSpec(2, 2, height_ratios=[6,1],width_ratios=[5,1])

	# first plot
	ax1 = plt.subplot(gs[0])
	colors = colorlist

	# group by country
	try:
	countrycount = geodataframe.groupby(['NAME'])['NAME'].count()
	except:
	countrycount = geodataframe.groupby(['name'])['name'].count()
	data.sort_values(ascending=True).plot(
	kind='barh',
	ax=ax1,
	colormap='RdBu_r',figsize=(20,15))

	# adding grids on horizontal line only
	ax1.yaxis.label.set_visible(False)
	ax1.grid(False)
	ax1.xaxis.grid(True, color='w', ls='-', lw=1.5, zorder=0)

	#changing y and x tick label size
	for tick_label in ax1.yaxis.get_ticklabels():
	tick_label.set_fontsize(18)
	for tick_label in ax1.xaxis.get_ticklabels():
	tick_label.set_fontsize(22)

	# overarching title
	plt.suptitle('Top 10 Bodies of Water by Count of Tweets',
	fontsize=38,fontweight='bold')

	# adding text annotation
	ax1.text(x=440,y= 1.6,
	fontsize=22,
	s=('A good number of tweets occur from bodies\n'
	'of water. The North Atlantic and Pacific\n'
	'Oceans likley dominate because they hold\n'
	'major trade/travel routes.'),
	bbox={'facecolor':'#6794a7', 'alpha':0.5, 'pad':18},
	multialignment='left')
	# ax1.text(37000, 0.98, 'Comparison Source:\nNumber of active Twitter users in leading markets as of May 2016 \nhttps://www.statista.com/', style='italic',
	# bbox={'facecolor':'whitesmoke', 'alpha':0.5, 'pad':10},fontsize=14)
	y=data.sort_values(ascending=False)[:10].sort_values(ascending=True).values

	# adding labels to horizontal bar
	for i, v in enumerate(y):
	if v > 10000:
	ax1.text(v-70 , i-.13, str(v), color='white', fontweight='bold', fontsize=18)
	else:
	ax1.text(v-70 , i-.13, str(v), color='white', fontweight='bold', fontsize=24)
	# add text to my little avatar
	# plt.text(data.max()*1.282, 16.3, "Data by Linwood\nhttps://goo.gl/pV7Oqt", fontsize=16,
	# style='normal',ha='center',va='top', wrap=True,multialignment='right')
	# # read in my avatar and plot on new axis
	# im = plt.imread('/Users/linwood/Downloads/LinwoodCartoon.jpg')
	# newax = f.add_axes([.89, .98, .05, .05], anchor='SW', zorder=10)
	# newax.imshow(im)
	# newax.axis('off')
	# second image; the table
	ax2 = plt.subplot(gs[1])

	ax2.xaxis.set_visible(False)
	ax2.yaxis.set_visible(False)

	datain = data.sort_values(ascending=False)[:10].values[:,np.newaxis]
	rowColors = ['#eceff6',"#d0daec"]
	colLabels = "Count"
	rowLabels = countrycount.sort_values(ascending=False)[:10].index.values
	the_table = ax2.table(cellText=datain,
	rowLabels=rowLabels,
	colWidths=[0.10, 0.20],
	rowColours=["#d0daec"]*16,
	colColours=['#eceff6'],
	cellColours=np.array(['#eceff6']*10)[:,np.newaxis],
	loc='center')
	# ax2.axis('tight')
	the_table.set_fontsize(20)
	the_table.scale(1.8,4.3)
	ax2.set_axis_off()
	the_table.properties()
	for key, cell in the_table.get_celld().items():
	cell.set_linewidth(0.2)
	# plt.savefig('twitterusagecountryplot2.png')
	plt.show()