Skip to content

Instantly share code, notes, and snippets.

@mjstevens777
Last active July 30, 2020 04:39
Show Gist options
  • Save mjstevens777/e8543e83d61bb9d449b723c009d2c7ac to your computer and use it in GitHub Desktop.
Save mjstevens777/e8543e83d61bb9d449b723c009d2c7ac to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import SpectralClustering
# Download https://www.hindustantimes.com/static/iframes/language_probability_map/data.csv
df = pd.read_csv('data.csv', index_col=0)
names = list(df.index)
affinity = np.zeros((len(names), len(names)))
for i, name_i in enumerate(names):
    for j, name_j in enumerate(names):
        affinity[i, j] = df.loc[name_i, name_j]

name_mapping = {
    "Andaman_and_Nicobar_Islands": "AN",
    "Andhra_Pradesh": "AP",
    "Arunachal_Pradesh": "AR",
    "Assam": "AS",
    "Bihar": "BR",
    "Chandigarh": "CH",
    "Chhattisgarh": "CT",
    "Dadra_and_Nagar_Haveli": "DN",
    "Daman_and_Diu": "DD",
    "Nct_Of_Delhi": "DL",
    "Goa": "GA",
    "Gujarat": "GJ",
    "Haryana": "HR",
    "Himachal_Pradesh": "HP",
    "Jammu_and_Kashmir": "JK",
    "Jharkhand": "JH",
    "Karnataka": "KA",
    "Kerala": "KL",
    "Lakshadweep": "LD",
    "Madhya_Pradesh": "MP",
    "Maharashtra": "MH",
    "Manipur": "MN",
    "Meghalaya": "ML",
    "Mizoram": "MZ",
    "Nagaland": "NL",
    "Odisha": "OR",
    "Puducherry": "PY",
    "Punjab": "PB",
    "Rajasthan": "RJ",
    "Sikkim": "SK",
    "Tamil_Nadu": "TN",
    "Telangana": "TG",
    "Tripura": "TR",
    "Uttar_Pradesh": "UP",
    "Uttarakhand": "UT",
    "West_Bengal": "WB",
}
names_short = [name_mapping[n] for n in names]
plt.imshow(affinity)
plt.xticks(np.arange(len(names_short)), names_short)
plt.gca().xaxis.tick_top()
plt.yticks(np.arange(len(names_short)), names_short)
plt.gcf().set_size_inches(12, 12)

image

sc = SpectralClustering(affinity='precomputed', n_clusters=6)
sc.fit(affinity)
label_counts = np.bincount(sc.labels_)
label_remap = np.argsort(-label_counts)
labels_reordered = label_remap[sc.labels_]
sort_idx = np.argsort(labels_reordered, kind='stable')
affinity_sorted = affinity[sort_idx]
affinity_sorted = affinity_sorted[:, sort_idx]
names_sorted = [names_short[i] for i in sort_idx]
plt.imshow(affinity_sorted)
plt.xticks(np.arange(len(names_sorted)), names_sorted)
plt.gca().xaxis.tick_top()
plt.yticks(np.arange(len(names_sorted)), names_sorted)
plt.gcf().set_size_inches(12, 12)

image

print(",".join([''] + names_sorted))
for name, row in zip(names_sorted, affinity_sorted):
    print(",".join([name] + [f"{x:0.1f}" for x in row]))
,AN,AR,BR,CH,CT,DN,DD,GA,GJ,HR,HP,JK,JH,MP,MH,DL,OR,PB,RJ,SK,UP,UT,AS,MN,ML,MZ,NL,PY,TN,KL,LD,TR,WB,AP,KA
AN,69.8,49.1,71.0,75.4,74.1,45.7,60.6,46.4,34.8,75.8,76.6,32.1,69.8,75.9,41.9,77.3,17.4,41.4,75.0,39.8,77.3,77.3,30.1,19.1,15.2,11.0,16.7,23.7,22.1,17.0,16.5,32.9,36.6,23.4,13.8
AR,49.1,45.4,53.4,56.9,55.7,34.5,45.6,35.9,26.3,57.2,57.8,24.6,51.7,57.2,31.5,58.2,13.3,31.3,56.5,34.5,58.2,58.2,26.7,15.6,11.6,7.9,18.5,7.9,5.3,8.6,8.9,13.3,15.7,9.3,9.1
BR,71.0,53.4,81.6,84.1,83.3,51.2,67.7,48.2,39.0,85.3,86.2,36.6,76.8,85.5,46.9,86.7,17.1,45.7,84.6,42.8,87.4,87.1,22.8,16.9,12.7,6.7,13.2,4.2,2.5,8.5,10.6,9.8,13.4,12.3,12.1
CH,75.4,56.9,84.1,92.7,87.6,54.2,71.6,57.4,41.4,91.7,92.1,39.9,80.4,90.0,49.5,91.9,20.7,71.7,89.7,48.2,91.7,91.8,24.9,25.2,16.9,12.0,22.4,14.3,9.8,14.0,14.2,11.9,15.2,15.2,14.8
CT,74.1,55.7,83.3,87.6,87.2,53.5,70.6,50.2,40.7,88.9,89.9,35.7,79.7,89.2,48.8,90.1,20.7,47.8,88.2,44.6,90.8,90.6,23.8,17.5,13.2,6.9,13.7,4.2,2.5,8.9,11.0,10.2,13.8,12.6,11.7
DN,45.7,34.5,51.2,54.2,53.5,61.0,62.2,38.7,60.7,54.7,55.3,22.8,49.0,56.7,41.4,55.6,12.5,29.6,56.0,28.3,55.8,55.7,15.1,12.5,9.1,5.4,10.5,5.0,3.4,7.1,7.9,7.0,9.2,8.4,8.9
DD,60.6,45.6,67.7,71.6,70.6,62.2,76.4,44.4,71.1,72.3,73.1,30.2,64.9,72.6,42.9,73.4,16.7,39.1,71.7,37.5,73.8,73.6,20.4,16.8,12.1,7.4,14.2,6.8,4.6,9.3,10.3,10.2,13.1,11.0,10.9
GA,46.4,35.9,48.2,57.4,50.2,38.7,44.4,81.0,26.3,53.5,53.2,25.7,46.5,52.4,54.0,57.0,14.9,32.7,51.2,31.7,53.1,53.3,15.5,19.6,12.5,9.5,17.5,13.2,9.3,12.4,11.6,8.1,9.9,12.1,18.4
GJ,34.8,26.3,39.0,41.4,40.7,60.7,71.1,26.3,90.0,41.6,42.1,18.0,37.3,42.0,25.5,42.5,9.3,22.6,41.4,21.7,42.5,42.4,11.3,10.4,7.2,4.6,8.9,4.9,3.5,5.7,6.1,5.1,6.7,6.7,6.7
HR,75.8,57.2,85.3,91.7,88.9,54.7,72.3,53.5,41.6,91.6,92.5,38.1,81.5,91.2,49.8,92.6,19.0,56.3,90.4,46.5,92.9,92.8,24.4,20.4,14.7,8.7,16.9,7.5,4.8,10.7,12.2,10.5,14.1,13.3,13.0
HP,76.6,57.8,86.2,92.1,89.9,55.3,73.1,53.2,42.1,92.5,93.4,38.1,82.4,92.2,50.2,93.4,18.8,55.3,91.4,47.4,93.9,93.8,24.5,19.7,14.3,8.1,15.9,6.3,3.9,10.1,12.0,10.2,13.8,12.9,12.6
JK,32.1,24.6,36.6,39.9,35.7,22.8,30.2,25.7,18.0,38.1,38.1,51.2,34.5,37.2,22.8,40.8,9.5,23.4,36.7,21.1,39.3,39.0,10.5,11.0,7.2,4.9,9.3,6.0,4.3,6.2,6.5,4.9,6.8,9.0,9.3
JH,69.8,51.7,76.8,80.4,79.7,49.0,64.9,46.5,37.3,81.5,82.4,34.5,75.0,81.7,44.7,82.8,18.6,43.8,80.8,41.2,83.4,83.2,25.9,16.8,13.3,7.8,13.5,4.6,2.9,8.5,10.3,19.3,23.2,11.8,11.6
MP,75.9,57.2,85.5,90.0,89.2,56.7,72.6,52.4,42.0,91.2,92.2,37.2,81.7,91.7,50.9,92.5,18.3,49.0,90.7,46.0,93.2,93.0,24.2,18.6,13.8,7.4,14.7,5.0,3.0,9.5,11.5,9.9,13.6,12.6,12.4
MH,41.9,31.5,46.9,49.5,48.8,41.4,42.9,54.0,25.5,49.8,50.2,22.8,44.7,50.9,78.3,50.9,11.0,27.2,49.3,26.1,51.0,50.8,13.6,12.4,9.1,5.5,10.6,6.1,4.4,7.0,7.4,6.2,8.2,10.2,13.1
DL,77.3,58.2,86.7,91.9,90.1,55.6,73.4,57.0,42.5,92.6,93.4,40.8,82.8,92.5,50.9,93.9,20.5,53.9,91.5,48.4,94.3,94.1,25.5,23.7,16.4,10.9,20.6,12.0,8.2,13.3,13.9,12.3,15.9,15.1,14.9
OR,17.4,13.3,17.1,20.7,20.7,12.5,16.7,14.9,9.3,19.0,18.8,9.5,18.6,18.3,11.0,20.5,87.6,11.8,18.0,11.6,18.8,18.9,6.9,7.5,4.8,3.9,7.0,5.4,3.7,4.4,4.2,5.0,5.3,6.7,4.4
PB,41.4,31.3,45.7,71.7,47.8,29.6,39.1,32.7,22.6,56.3,55.3,23.4,43.8,49.0,27.2,53.9,11.8,92.7,50.7,26.9,50.0,51.5,13.7,15.2,10.0,7.5,13.9,9.4,6.4,8.6,8.3,6.6,8.4,8.8,8.6
RJ,75.0,56.5,84.6,89.7,88.2,56.0,71.7,51.2,41.4,90.4,91.4,36.7,80.8,90.7,49.3,91.5,18.0,50.7,89.8,45.4,92.2,92.0,23.8,18.2,13.5,7.2,14.3,4.8,2.8,9.2,11.3,9.7,13.4,12.4,12.1
SK,39.8,34.5,42.8,48.2,44.6,28.3,37.5,31.7,21.7,46.5,47.4,21.1,41.2,46.0,26.1,48.4,11.6,26.9,45.4,80.4,46.9,47.5,15.1,16.2,10.8,7.7,14.6,8.9,6.0,8.4,8.3,7.6,10.1,8.6,8.3
UP,77.3,58.2,87.4,91.7,90.8,55.8,73.8,53.1,42.5,92.9,93.9,39.3,83.4,93.2,51.0,94.3,18.8,50.0,92.2,46.9,95.1,94.8,24.6,19.1,14.1,7.6,15.1,5.4,3.3,9.7,11.8,10.1,13.9,13.3,13.1
UT,77.3,58.2,87.1,91.8,90.6,55.7,73.6,53.3,42.4,92.8,93.8,39.0,83.2,93.0,50.8,94.1,18.9,51.5,92.0,47.5,94.8,94.6,25.1,19.4,14.3,8.0,15.6,5.9,3.6,9.9,11.9,11.3,15.2,13.3,13.1
AS,30.1,26.7,22.8,24.9,23.8,15.1,20.4,15.5,11.3,24.4,24.5,10.5,25.9,24.2,13.6,25.5,6.9,13.7,23.8,15.1,24.6,25.1,64.4,7.5,10.4,6.8,30.3,3.1,2.0,3.5,3.8,36.2,38.1,4.0,3.9
MN,19.1,15.6,16.9,25.2,17.5,12.5,16.8,19.6,10.4,20.4,19.7,11.0,16.8,18.6,12.4,23.7,7.5,15.2,18.2,16.2,19.1,19.4,7.5,54.6,6.8,6.7,11.1,9.0,6.1,7.1,6.4,5.5,5.5,5.7,5.4
ML,15.2,11.6,12.7,16.9,13.2,9.1,12.1,12.5,7.2,14.7,14.3,7.2,13.3,13.8,9.1,16.4,4.8,10.0,13.5,10.8,14.1,14.3,10.4,6.8,37.1,4.3,7.4,4.6,3.1,3.9,3.7,11.0,11.5,3.4,3.3
MZ,11.0,7.9,6.7,12.0,6.9,5.4,7.4,9.5,4.6,8.7,8.1,4.9,7.8,7.4,5.5,10.9,3.9,7.5,7.2,7.7,7.6,8.0,6.8,6.7,4.3,69.6,5.5,4.5,3.0,3.6,3.3,12.0,11.3,2.8,2.6
NL,16.7,18.5,13.2,22.4,13.7,10.5,14.2,17.5,8.9,16.9,15.9,9.3,13.5,14.7,10.6,20.6,7.0,13.9,14.3,14.6,15.1,15.6,30.3,11.1,7.4,5.5,24.3,8.0,5.4,6.5,6.0,6.6,7.0,5.1,4.7
PY,23.7,7.9,4.2,14.3,4.2,5.0,6.8,13.2,4.9,7.5,6.3,6.0,4.6,5.0,6.1,12.0,5.4,9.4,4.8,8.9,5.4,5.9,3.1,9.0,4.6,4.5,8.0,83.0,87.7,11.7,8.7,2.5,2.4,11.5,8.9
TN,22.1,5.3,2.5,9.8,2.5,3.4,4.6,9.3,3.5,4.8,3.9,4.3,2.9,3.0,4.4,8.2,3.7,6.4,2.8,6.0,3.3,3.6,2.0,6.1,3.1,3.0,5.4,87.7,93.0,7.8,5.2,1.7,1.6,11.3,10.1
KL,17.0,8.6,8.5,14.0,8.9,7.1,9.3,12.4,5.7,10.7,10.1,6.2,8.5,9.5,7.0,13.3,4.4,8.6,9.2,8.4,9.7,9.9,3.5,7.1,3.9,3.6,6.5,11.7,7.8,96.4,82.8,2.3,2.5,3.6,5.5
LD,16.5,8.9,10.6,14.2,11.0,7.9,10.3,11.6,6.1,12.2,12.0,6.5,10.3,11.5,7.4,13.9,4.2,8.3,11.3,8.3,11.8,11.9,3.8,6.4,3.7,3.3,6.0,8.7,5.2,82.8,71.1,2.4,2.7,3.3,4.3
TR,32.9,13.3,9.8,11.9,10.2,7.0,10.2,8.1,5.1,10.5,10.2,4.9,19.3,9.9,6.2,12.3,5.0,6.6,9.7,7.6,10.1,11.3,36.2,5.5,11.0,12.0,6.6,2.5,1.7,2.3,2.4,82.9,82.1,2.2,2.1
WB,36.6,15.7,13.4,15.2,13.8,9.2,13.1,9.9,6.7,14.1,13.8,6.8,23.2,13.6,8.2,15.9,5.3,8.4,13.4,10.1,13.9,15.2,38.1,5.5,11.5,11.3,7.0,2.4,1.6,2.5,2.7,82.1,85.0,2.8,2.7
AP,23.4,9.3,12.3,15.2,12.6,8.4,11.0,12.1,6.7,13.3,12.9,9.0,11.8,12.6,10.2,15.1,6.7,8.8,12.4,8.6,13.3,13.3,4.0,5.7,3.4,2.8,5.1,11.5,11.3,3.6,3.3,2.2,2.8,86.7,15.7
KA,13.8,9.1,12.1,14.8,11.7,8.9,10.9,18.4,6.7,13.0,12.6,9.3,11.6,12.4,13.1,14.9,4.4,8.6,12.1,8.3,13.1,13.1,3.9,5.4,3.3,2.6,4.7,8.9,10.1,5.5,4.3,2.1,2.7,15.7,78.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment