Last active
February 5, 2018 19:01
-
-
Save cjue25/e2a983a2ebfccf47fb29d863743b3890 to your computer and use it in GitHub Desktop.
Use Python for Research_edX_Case_Study_4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
運用的觀念 | |
pearson correlation | |
spectral co-clustering | |
adjacency matrix | |
eigenvalues eigenvectors | |
bokeh用法 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
whisky=pd.read_csv("whiskies.txt") | |
whisky['Region']=pd.read_csv("regions.txt") | |
flavors=whisky.iloc[:,2:14] | |
corr_flavors = pd.DataFrame.corr(flavors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
plt.figure(figsize=(10,10)) | |
plt.pcolor(corr_flavors) | |
plt.colorbar() | |
plt.savefig("corr_flavors.pdf") | |
corr_whisky=pd.DataFrame.corr(flavors.transpose()) | |
plt.figure(figsize=(10,10)) | |
plt.pcolor(corr_whisky) | |
plt.axis("tight") | |
plt.colorbar() | |
plt.savefig("corr_whisky.pdf") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster.bicluster import SpectralCoclustering | |
model = SpectralCoclustering(n_clusters=6 ,random_state=0) | |
model.fit(corr_whisky) | |
np.sum(model.rows_, axis=1) #每一類有幾個whiskeys | |
np.sum(model.rows_, axis=0) #都是1的數值,代表每個whisky只會屬於某一類 | |
model.row_labels_ #呈現每一個whisky屬於第幾類 | |
whisky['Group']=pd.Series(model.row_labels_,index=whisky.index) | |
whisky=whisky.ix[np.argsort(model.row_labels_)] ##按照Group做排列(index沒變) | |
whisky=whisky.reset_index(drop=True) ##重新排列index (0,1,2,3,4) | |
correlations=pd.DataFrame.corr(whisky.iloc[:,2:14].transpose()) | |
correlations=np.array(correlations) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize= (14,7)) | |
plt.subplot(121) | |
plt.pcolor(corr_whisky) | |
plt.title("Original") | |
plt.axis("tight") | |
plt.subplot(122) | |
plt.pcolor(correlations) | |
plt.title("Rearranged") | |
plt.axis("tight") | |
plt.colorbar() | |
plt.savefig("correlations.pdf") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First, we import a tool to allow text to pop up on a plot when the cursor | |
# hovers over it. Also, we import a data structure used to store arguments | |
# of what to plot in Bokeh. Finally, we will use numpy for this section as well! | |
from bokeh.models import HoverTool, ColumnDataSource | |
from bokeh.io import output_file ,show | |
from bokeh.plotting.figure import figure | |
import numpy as np | |
# Let's plot a simple 5x5 grid of squares, alternating in color as red and blue. | |
plot_values = [1,2,3,4,5] | |
plot_colors = ["red", "blue"] | |
# How do we tell Bokeh to plot each point in a grid? Let's use a function that | |
# finds each combination of values from 1-5. | |
from itertools import product | |
grid = list(product(plot_values, plot_values)) | |
print('grid',grid) | |
# The first value is the x coordinate, and the second value is the y coordinate. | |
# Let's store these in separate lists. | |
xs, ys = zip(*grid) | |
print('x',xs) | |
print('y',ys) | |
# Now we will make a list of colors, alternating between red and blue. | |
colors = [plot_colors[i%2] for i in range(len(grid))] | |
print(colors) | |
# Finally, let's determine the strength of transparency (alpha) for each point, | |
# where 0 is completely transparent. | |
alphas = np.linspace(0, 1, len(grid)) | |
# Bokeh likes each of these to be stored in a special dataframe, called | |
# ColumnDataSource. Let's store our coordinates, colors, and alpha values. | |
source = ColumnDataSource( | |
data={ | |
"x": xs, | |
"y": ys, | |
"colors": colors, | |
"alphas": alphas, | |
} | |
) | |
# We are ready to make our interactive Bokeh plot! | |
output_file("Basic_Example.html", title="Basic Example") | |
fig = figure(tools=" hover, save") | |
fig.rect("x", "y", 0.9, 0.9, source=source, color="colors",alpha="alphas") | |
hover = fig.select(dict(type=HoverTool)) | |
hover.tooltips = { | |
"Value": "@x, @y", | |
} | |
show(fig) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cluster_colors = ["red", "orange", "green", "blue", "purple", "gray"] | |
regions = ["Speyside", "Highlands", "Lowlands", "Islands", "Campbelltown", "Islay"] | |
region_colors = {} | |
for i in range(len(regions)): | |
region_colors[regions[i]]=cluster_colors[i] | |
print (region_colors) | |
##zip作法 | |
##region_colors = dict(zip(regions, cluster_colors)) | |
print(region_colors) | |
distilleries = list(whisky.Distillery) | |
correlation_colors = [] | |
for i in range(len(distilleries)): | |
for j in range(len(distilleries)): | |
if correlations[i][j]<0.7: # if low correlation, | |
correlation_colors.append('white') # just use white. | |
else: # otherwise, | |
if whisky.Group[j]==whisky.Group[i] : # if the groups match, | |
correlation_colors.append(cluster_colors[whisky.Group[i]]) # color them by their mutual group. | |
else: # otherwise | |
correlation_colors.append('lightgray') # color them lightgray. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source = ColumnDataSource( | |
data = { | |
"x": np.repeat(distilleries,len(distilleries)), | |
"y": list(distilleries)*len(distilleries), | |
"colors": correlation_colors, | |
"correlations": correlations.flatten() | |
} | |
) | |
output_file("Whisky Correlations.html", title="Whisky Correlations") | |
fig = figure(title="Whisky Correlations", | |
x_axis_location="above", tools="hover,save", | |
x_range=list(reversed(distilleries)), y_range=distilleries) | |
fig.grid.grid_line_color = None | |
fig.axis.axis_line_color = None | |
fig.axis.major_tick_line_color = None | |
fig.axis.major_label_text_font_size = "5pt" | |
fig.xaxis.major_label_orientation = np.pi / 3 | |
fig.rect('x', 'y', .9, .9, source=source, | |
color='colors', alpha='correlations') | |
hover = fig.select(dict(type=HoverTool)) | |
hover.tooltips = { | |
"Whiskies": "@x, @y", | |
"Correlation": "@correlations", | |
} | |
show(fig) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
points = [(0,0), (1,2), (3,1)] | |
xs, ys = zip(*points) | |
colors = ["red", "blue", "green"] | |
output_file("Spatial_Example.html", title="Regional Example") | |
location_source = ColumnDataSource( | |
data={ | |
"x": xs, | |
"y": ys, | |
"colors": colors, | |
} | |
) | |
fig = figure(title = "Title", | |
x_axis_location = "above", tools="hover, save") | |
fig.plot_width = 300 | |
fig.plot_height = 380 | |
fig.circle("x", "y",size=10, source=location_source, | |
color='colors', line_color = None) | |
hover = fig.select(dict(type = HoverTool)) | |
hover.tooltips = { | |
"Location": "(@x, @y)" | |
} | |
show(fig) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def location_plot(title, colors): | |
output_file(title+".html") | |
location_source = ColumnDataSource( | |
data={ | |
"x": whisky[" Latitude"], | |
"y": whisky[" Longitude"], | |
"colors": colors, | |
"regions": whisky.Region, | |
"distilleries": whisky.Distillery | |
} | |
) | |
fig = figure(title = title, | |
x_axis_location = "above", tools="hover, save") | |
fig.plot_width = 400 | |
fig.plot_height = 500 | |
fig.circle("x", "y", size=9, source=location_source, | |
color='colors', line_color = None) | |
fig.xaxis.major_label_orientation = np.pi / 3 | |
hover = fig.select(dict(type = HoverTool)) | |
hover.tooltips = { | |
"Distillery": "@distilleries", | |
"Location": "(@x, @y)" | |
} | |
show(fig) | |
region_cols = [region_colors[i] for i in list(whisky.Region)] | |
classification_cols = [cluster_colors[i] for i in list(whisky.Group)] | |
location_plot("Whisky Locations and Regions", region_cols) | |
location_plot("Whisky Locations and Groups", classification_cols) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment