Skip to content

Instantly share code, notes, and snippets.

@cjue25
Last active February 5, 2018 19:01
Show Gist options
  • Save cjue25/e2a983a2ebfccf47fb29d863743b3890 to your computer and use it in GitHub Desktop.
Save cjue25/e2a983a2ebfccf47fb29d863743b3890 to your computer and use it in GitHub Desktop.
Use Python for Research_edX_Case_Study_4
運用的觀念
pearson correlation
spectral co-clustering
adjacency matrix
eigenvalues eigenvectors
bokeh用法
import numpy as np
import pandas as pd
whisky=pd.read_csv("whiskies.txt")
whisky['Region']=pd.read_csv("regions.txt")
flavors=whisky.iloc[:,2:14]
corr_flavors = pd.DataFrame.corr(flavors)
import matplotlib.pyplot as plt
plt.figure(figsize=(10,10))
plt.pcolor(corr_flavors)
plt.colorbar()
plt.savefig("corr_flavors.pdf")
corr_whisky=pd.DataFrame.corr(flavors.transpose())
plt.figure(figsize=(10,10))
plt.pcolor(corr_whisky)
plt.axis("tight")
plt.colorbar()
plt.savefig("corr_whisky.pdf")
from sklearn.cluster.bicluster import SpectralCoclustering
model = SpectralCoclustering(n_clusters=6 ,random_state=0)
model.fit(corr_whisky)
np.sum(model.rows_, axis=1) #每一類有幾個whiskeys
np.sum(model.rows_, axis=0) #都是1的數值,代表每個whisky只會屬於某一類
model.row_labels_ #呈現每一個whisky屬於第幾類
whisky['Group']=pd.Series(model.row_labels_,index=whisky.index)
whisky=whisky.ix[np.argsort(model.row_labels_)] ##按照Group做排列(index沒變)
whisky=whisky.reset_index(drop=True) ##重新排列index (0,1,2,3,4)
correlations=pd.DataFrame.corr(whisky.iloc[:,2:14].transpose())
correlations=np.array(correlations)
plt.figure(figsize= (14,7))
plt.subplot(121)
plt.pcolor(corr_whisky)
plt.title("Original")
plt.axis("tight")
plt.subplot(122)
plt.pcolor(correlations)
plt.title("Rearranged")
plt.axis("tight")
plt.colorbar()
plt.savefig("correlations.pdf")
# First, we import a tool to allow text to pop up on a plot when the cursor
# hovers over it. Also, we import a data structure used to store arguments
# of what to plot in Bokeh. Finally, we will use numpy for this section as well!
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.io import output_file ,show
from bokeh.plotting.figure import figure
import numpy as np
# Let's plot a simple 5x5 grid of squares, alternating in color as red and blue.
plot_values = [1,2,3,4,5]
plot_colors = ["red", "blue"]
# How do we tell Bokeh to plot each point in a grid? Let's use a function that
# finds each combination of values from 1-5.
from itertools import product
grid = list(product(plot_values, plot_values))
print('grid',grid)
# The first value is the x coordinate, and the second value is the y coordinate.
# Let's store these in separate lists.
xs, ys = zip(*grid)
print('x',xs)
print('y',ys)
# Now we will make a list of colors, alternating between red and blue.
colors = [plot_colors[i%2] for i in range(len(grid))]
print(colors)
# Finally, let's determine the strength of transparency (alpha) for each point,
# where 0 is completely transparent.
alphas = np.linspace(0, 1, len(grid))
# Bokeh likes each of these to be stored in a special dataframe, called
# ColumnDataSource. Let's store our coordinates, colors, and alpha values.
source = ColumnDataSource(
data={
"x": xs,
"y": ys,
"colors": colors,
"alphas": alphas,
}
)
# We are ready to make our interactive Bokeh plot!
output_file("Basic_Example.html", title="Basic Example")
fig = figure(tools=" hover, save")
fig.rect("x", "y", 0.9, 0.9, source=source, color="colors",alpha="alphas")
hover = fig.select(dict(type=HoverTool))
hover.tooltips = {
"Value": "@x, @y",
}
show(fig)
cluster_colors = ["red", "orange", "green", "blue", "purple", "gray"]
regions = ["Speyside", "Highlands", "Lowlands", "Islands", "Campbelltown", "Islay"]
region_colors = {}
for i in range(len(regions)):
region_colors[regions[i]]=cluster_colors[i]
print (region_colors)
##zip作法
##region_colors = dict(zip(regions, cluster_colors))
print(region_colors)
distilleries = list(whisky.Distillery)
correlation_colors = []
for i in range(len(distilleries)):
for j in range(len(distilleries)):
if correlations[i][j]<0.7: # if low correlation,
correlation_colors.append('white') # just use white.
else: # otherwise,
if whisky.Group[j]==whisky.Group[i] : # if the groups match,
correlation_colors.append(cluster_colors[whisky.Group[i]]) # color them by their mutual group.
else: # otherwise
correlation_colors.append('lightgray') # color them lightgray.
source = ColumnDataSource(
data = {
"x": np.repeat(distilleries,len(distilleries)),
"y": list(distilleries)*len(distilleries),
"colors": correlation_colors,
"correlations": correlations.flatten()
}
)
output_file("Whisky Correlations.html", title="Whisky Correlations")
fig = figure(title="Whisky Correlations",
x_axis_location="above", tools="hover,save",
x_range=list(reversed(distilleries)), y_range=distilleries)
fig.grid.grid_line_color = None
fig.axis.axis_line_color = None
fig.axis.major_tick_line_color = None
fig.axis.major_label_text_font_size = "5pt"
fig.xaxis.major_label_orientation = np.pi / 3
fig.rect('x', 'y', .9, .9, source=source,
color='colors', alpha='correlations')
hover = fig.select(dict(type=HoverTool))
hover.tooltips = {
"Whiskies": "@x, @y",
"Correlation": "@correlations",
}
show(fig)
points = [(0,0), (1,2), (3,1)]
xs, ys = zip(*points)
colors = ["red", "blue", "green"]
output_file("Spatial_Example.html", title="Regional Example")
location_source = ColumnDataSource(
data={
"x": xs,
"y": ys,
"colors": colors,
}
)
fig = figure(title = "Title",
x_axis_location = "above", tools="hover, save")
fig.plot_width = 300
fig.plot_height = 380
fig.circle("x", "y",size=10, source=location_source,
color='colors', line_color = None)
hover = fig.select(dict(type = HoverTool))
hover.tooltips = {
"Location": "(@x, @y)"
}
show(fig)
def location_plot(title, colors):
output_file(title+".html")
location_source = ColumnDataSource(
data={
"x": whisky[" Latitude"],
"y": whisky[" Longitude"],
"colors": colors,
"regions": whisky.Region,
"distilleries": whisky.Distillery
}
)
fig = figure(title = title,
x_axis_location = "above", tools="hover, save")
fig.plot_width = 400
fig.plot_height = 500
fig.circle("x", "y", size=9, source=location_source,
color='colors', line_color = None)
fig.xaxis.major_label_orientation = np.pi / 3
hover = fig.select(dict(type = HoverTool))
hover.tooltips = {
"Distillery": "@distilleries",
"Location": "(@x, @y)"
}
show(fig)
region_cols = [region_colors[i] for i in list(whisky.Region)]
classification_cols = [cluster_colors[i] for i in list(whisky.Group)]
location_plot("Whisky Locations and Regions", region_cols)
location_plot("Whisky Locations and Groups", classification_cols)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment