ken333135

## genSankey
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
    # maximum of 6 value cols -> 6 colors
    colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464']
    labelList = []
    colorNumList = []
    for catCol in cat_cols:
        labelListTemp =  list(set(df[catCol].values))
        colorNumList.append(len(labelListTemp))
        labelList = labelList + labelListTemp


## calendarGraph5
# set colors
cmap=['white','red','orange','yellow','green','blue',
      'indigo','violet','purple','grey','pink',
      'brown','black']

f, ax = plt.subplots(figsize=(6, 18))

# drop duplicates for bookDf **End of book A is the start of book B
df = bookDf.copy()
df.drop_duplicates(['Date'],inplace=True)

## calendarGraph4
# Adding needed columns
bookDf['DateOrig'] = bookDf['Date']
bookDf['Day'] = bookDf['Date'].apply(lambda x: x.day)
bookDf['Month'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%b'))
bookDf['DOW'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%a'))
bookDf['Month_num'] = bookDf['Date'].apply(lambda x: x.month)
bookDf['DOW_num'] = bookDf['Date'].apply(lambda x: x.weekday())
bookDf['Week_num'] = bookDf['Date'].apply(lambda x: int(dt.datetime.strftime(x,'%W')))

#add proxy for different colours

## calendarGraph3
# create a df with date from 1st to last day of year from min_year
dateList = pd.DataFrame(list(date_generator(dt.datetime(year,1,1,0,0,0),dt.datetime(year,12,31,0,0,0))),columns=['Date'])
dateList.Date = dateList.Date.astype('O')

## calendarGraph2
def date_generator(from_date,to_date):
    while from_date<=to_date:
        yield from_date
        from_date = from_date + dt.timedelta(days=1)

# create a new df with 2 columns
# col1 : Title, col2: DateRead
Title = []
Date = []
for index,row in booksv2.iterrows():

## calendarGraph1
import pandas as pd
import datetime as dt

booksv2 = pd.read_csv('book1.csv')

booksv2['Start'] = booksv2['Start'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y'))
booksv2['End'] = booksv2['End'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y'))
min_date = min(list(booksv2['Start'])+list(booksv2['End']))
max_date = max(list(booksv2['Start'])+list(booksv2['End']))
#year = min_date.year

## Create a Sankey Diagram
import pandas as pd
import plotly
import plotly.plotly as py

fig = genSankey(df,cat_cols=['lvl1','lvl2','lvl3','lvl4'],value_cols='count',title='Word Etymology')
plotly.offline.plot(fig, validate=False)

## DrawGraphDash 2
app.layout = html.Div([
                html.Div(dcc.Graph(id='Graph',figure=fig)),
                html.Div(className='row', children=[
                    html.Div([html.H2('Overall Data'),
                              html.P('Num of nodes: ' + str(len(G.nodes))),
                              html.P('Num of edges: ' + str(len(G.edges)))],
                              className='three columns'),
                    html.Div([
                            html.H2('Selected Data'),
                            html.Div(id='selected-data'),

## DrawGraphDash 1
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Network Graph of '+str(num_nodes)+' rules',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    showarrow=False,
                    xref="paper", yref="paper",

## TopWords 4
top_words = []
#loop to find top 5 words of each class in the dataset
for code in vect_data.index:
    top_words.append([code,find_top_words(code,5)])

#print the list of top words
top_words
	def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
	# maximum of 6 value cols -> 6 colors
	colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464']
	labelList = []
	colorNumList = []
	for catCol in cat_cols:
	labelListTemp = list(set(df[catCol].values))
	colorNumList.append(len(labelListTemp))
	labelList = labelList + labelListTemp
	# set colors
	cmap=['white','red','orange','yellow','green','blue',
	'indigo','violet','purple','grey','pink',
	'brown','black']

	f, ax = plt.subplots(figsize=(6, 18))

	# drop duplicates for bookDf **End of book A is the start of book B
	df = bookDf.copy()
	df.drop_duplicates(['Date'],inplace=True)
	# Adding needed columns
	bookDf['DateOrig'] = bookDf['Date']
	bookDf['Day'] = bookDf['Date'].apply(lambda x: x.day)
	bookDf['Month'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%b'))
	bookDf['DOW'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%a'))
	bookDf['Month_num'] = bookDf['Date'].apply(lambda x: x.month)
	bookDf['DOW_num'] = bookDf['Date'].apply(lambda x: x.weekday())
	bookDf['Week_num'] = bookDf['Date'].apply(lambda x: int(dt.datetime.strftime(x,'%W')))

	#add proxy for different colours
	# create a df with date from 1st to last day of year from min_year
	dateList = pd.DataFrame(list(date_generator(dt.datetime(year,1,1,0,0,0),dt.datetime(year,12,31,0,0,0))),columns=['Date'])
	dateList.Date = dateList.Date.astype('O')
	def date_generator(from_date,to_date):
	while from_date<=to_date:
	yield from_date
	from_date = from_date + dt.timedelta(days=1)

	# create a new df with 2 columns
	# col1 : Title, col2: DateRead
	Title = []
	Date = []
	for index,row in booksv2.iterrows():
	import pandas as pd
	import datetime as dt

	booksv2 = pd.read_csv('book1.csv')

	booksv2['Start'] = booksv2['Start'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y'))
	booksv2['End'] = booksv2['End'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y'))
	min_date = min(list(booksv2['Start'])+list(booksv2['End']))
	max_date = max(list(booksv2['Start'])+list(booksv2['End']))
	#year = min_date.year
	import pandas as pd
	import plotly
	import plotly.plotly as py

	fig = genSankey(df,cat_cols=['lvl1','lvl2','lvl3','lvl4'],value_cols='count',title='Word Etymology')
	plotly.offline.plot(fig, validate=False)
	app.layout = html.Div([
	html.Div(dcc.Graph(id='Graph',figure=fig)),
	html.Div(className='row', children=[
	html.Div([html.H2('Overall Data'),
	html.P('Num of nodes: ' + str(len(G.nodes))),
	html.P('Num of edges: ' + str(len(G.edges)))],
	className='three columns'),
	html.Div([
	html.H2('Selected Data'),
	html.Div(id='selected-data'),
	fig = go.Figure(data=[edge_trace, node_trace],
	layout=go.Layout(
	title='<br>Network Graph of '+str(num_nodes)+' rules',
	titlefont=dict(size=16),
	showlegend=False,
	hovermode='closest',
	margin=dict(b=20,l=5,r=5,t=40),
	annotations=[ dict(
	showarrow=False,
	xref="paper", yref="paper",
	top_words = []
	#loop to find top 5 words of each class in the dataset
	for code in vect_data.index:
	top_words.append([code,find_top_words(code,5)])

	#print the list of top words
	top_words