Last active
April 9, 2017 02:09
-
-
Save chemikadze/f3a1deca6fe8c0822ab81960911361c1 to your computer and use it in GitHub Desktop.
Strava ride clustering
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.mllib.clustering import KMeans, KMeansModel | |
from pyspark.mllib.linalg.Vectors | |
from numpy import isnan | |
from IPython.lib.pretty import pprint | |
# hypotesis: mtb offroad normal, mtb road normal, mtb flat intervals, mtb hill intervals, road normal | |
def do_clustering(sc, sqlCtx): | |
ss = sqlCtx.sparkSession | |
activities = ss.read.json("gs://chemikadze/strava-analysis/activities_full.json") | |
train_data = activities.rdd \ | |
.filter(lambda r: r.average_heartrate > 60 and r.distance > 5 and r.type == "Ride") \ | |
.map(lambda r: Vectors.dense(r.average_speed * 3.6, r.average_heartrate, r.total_elevation_gain / r.distance * 1000)) \ | |
.persist() | |
num_iterations = 50 | |
for num_clusters in xrange(1, 15): | |
clusters = KMeans.train(train_data, num_clusters, num_iterations) | |
wsse = clusters.computeCost(train_data) | |
print("=====================") | |
print("clusters: %s wsse: %s" % (num_clusters, wsse)) | |
pprint(clusters.clusterCenters) | |
print("") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
===================== | |
clusters: 1 wsse: 93584.7642615 | |
[array([ 18.49169096, 141.5290411 , 9.57007626])] | |
===================== | |
clusters: 2 wsse: 51653.9041397 | |
[array([ 18.41273115, 148.92459016, 11.07617648]), | |
array([ 18.6509157 , 126.61570248, 6.53298159])] | |
===================== | |
clusters: 3 wsse: 41942.6155675 | |
[array([ 10.8177 , 128.0875 , 27.56189311]), | |
array([ 18.56358238, 149.54273128, 10.44243336]), | |
array([ 19.96411579, 128.40175439, 4.0452634 ])] | |
===================== | |
clusters: 4 wsse: 27931.9319376 | |
[array([ 15.1584 , 116.85416667, 9.46983675]), | |
array([ 18.28769268, 150.4152439 , 10.02375824]), | |
array([ 12.80055484, 151.33548387, 28.78192362]), | |
array([ 21.52348525, 136.8 , 4.11794825])] | |
===================== | |
clusters: 5 wsse: 24669.9852226 | |
[array([ 13.24812857, 153.46785714, 28.84986639]), | |
array([ 21.85741463, 140.95284553, 4.24549706]), | |
array([ 12.22992 , 117.18 , 17.74435366]), | |
array([ 17.52289254, 151.24701493, 11.11531333]), | |
array([ 18.84076364, 124.13090909, 4.18226563])] | |
===================== | |
clusters: 6 wsse: 20112.4224287 | |
[array([ 11.56225263, 124.28947368, 22.40440807]), | |
array([ 21.85616629, 134.52696629, 3.41238669]), | |
array([ 11.26877143, 151.73571429, 34.54716717]), | |
array([ 16.11535135, 154.51486486, 14.96312391]), | |
array([ 19.8342 , 146.87205882, 7.40644953]), | |
array([ 16.26774545, 114.86969697, 5.01463697])] | |
===================== | |
clusters: 7 wsse: 17435.3423657 | |
[array([ 21.32876471, 144.41666667, 5.3934587 ]), | |
array([ 9.238 , 139.31111111, 37.13987924]), | |
array([ 16.0111125 , 114.6375 , 5.17134437]), | |
array([ 12.062475 , 122.975 , 20.5382479]), | |
array([ 17.11121416, 151.65132743, 11.83406696]), | |
array([ 14.50422857, 159.02857143, 25.07988355]), | |
array([ 21.49 , 132.8 , 3.48144481])] | |
===================== | |
clusters: 8 wsse: 16078.0656302 | |
[array([ 22.34222222, 144.7382716 , 4.35659797]), | |
array([ 14.05816364, 157.26818182, 27.6377888 ]), | |
array([ 10.57896 , 129.23333333, 28.20258724]), | |
array([ 21.20078919, 132.99054054, 3.81229138]), | |
array([ 17.8422 , 155.56481481, 10.10725364]), | |
array([ 12.6492 , 107.56666667, 14.6033539 ]), | |
array([ 16.46404138, 117.10344828, 4.61640735]), | |
array([ 16.64377778, 147.28395062, 12.5421726 ])] | |
===================== | |
clusters: 9 wsse: 14688.4978365 | |
[array([ 21.75560597, 132.06567164, 2.95348401]), | |
array([ 16.64314286, 159.86190476, 11.4000495 ]), | |
array([ 15.52347 , 151.08 , 16.22124704]), | |
array([ 10.25112 , 128.23333333, 28.66353089]), | |
array([ 15.13058824, 114.17941176, 7.09277694]), | |
array([ 17.21630204, 141.22653061, 10.66913325]), | |
array([ 19.2331125 , 150.6421875 , 8.57764353]), | |
array([ 13.75155 , 158.36875 , 29.87031924]), | |
array([ 23.02791864, 144.21525424, 3.15515734])] | |
===================== | |
clusters: 10 wsse: 12686.6834652 | |
[array([ 22.83398491, 136.11698113, 2.25672241]), | |
array([ 18.45165405, 129.16756757, 7.29148748]), | |
array([ 9.238 , 139.31111111, 37.13987924]), | |
array([ 14.34305455, 163.17272727, 28.23340588]), | |
array([ 17.403225 , 143.31875 , 10.68176622]), | |
array([ 10.94 , 114.21111111, 23.83249491]), | |
array([ 16.14888 , 114.83333333, 4.75670706]), | |
array([ 15.393 , 150.49722222, 17.95108856]), | |
array([ 22.58628197, 145.91639344, 3.92830588]), | |
array([ 17.82319437, 154.15492958, 9.90247663])] | |
===================== | |
clusters: 11 wsse: 12264.6880846 | |
[array([ 15.49392632, 150.26578947, 17.76128292]), | |
array([ 15.81955714, 114.33571429, 5.08836354]), | |
array([ 22.8807 , 138.785 , 2.74174112]), | |
array([ 16.48877143, 163.32857143, 15.09638114]), | |
array([ 12.71232 , 157.37 , 33.12208065]), | |
array([ 10.90035 , 113.05 , 23.34386922]), | |
array([ 9.92847273, 131.94545455, 29.4181611 ]), | |
array([ 20.51113171, 128.96585366, 3.90980567]), | |
array([ 17.7985125 , 152.496875 , 9.63050466]), | |
array([ 17.48228936, 141.50212766, 10.58679503]), | |
array([ 22.47267273, 147.85 , 4.18217891])] | |
===================== | |
clusters: 12 wsse: 11202.7464003 | |
[array([ 22.63210435, 147.57826087, 3.95245456]), | |
array([ 10.23325714, 124.77142857, 29.60598989]), | |
array([ 17.546475 , 143.30416667, 11.29879874]), | |
array([ 16.33896 , 163.14666667, 13.60581841]), | |
array([ 8.18424 , 142.88 , 41.44058217]), | |
array([ 15.23178 , 131.32 , 11.40703088]), | |
array([ 17.18361429, 151.975 , 11.36787419]), | |
array([ 22.73854426, 138.26229508, 2.91989145]), | |
array([ 11.2056 , 109.13333333, 19.39290411]), | |
array([ 14.5412 , 155.43888889, 26.59558906]), | |
array([ 22.2064 , 128.11111111, 2.10771663]), | |
array([ 15.88795714, 114.925 , 4.90173801])] | |
===================== | |
clusters: 13 wsse: 10312.657661 | |
[array([ 18.560775 , 154.2625 , 8.62968799]), | |
array([ 8.0862 , 134.56666667, 36.95139746]), | |
array([ 14.6286 , 130.36875 , 12.24990262]), | |
array([ 22.5279 , 147.06666667, 3.91323292]), | |
array([ 23.66271429, 137.2047619 , 1.61120476]), | |
array([ 22.03416 , 127.76 , 2.22510478]), | |
array([ 14.6718 , 175.9 , 35.53503772]), | |
array([ 15.39189474, 153.91842105, 15.83886042]), | |
array([ 19.22241951, 139.92926829, 7.37384222]), | |
array([ 15.81331034, 114.63103448, 5.14771645]), | |
array([ 17.1364 , 146.15111111, 12.33139923]), | |
array([ 10.94 , 114.21111111, 23.83249491]), | |
array([ 13.713075 , 154.85 , 28.50306786])] | |
===================== | |
clusters: 14 wsse: 9079.07954898 | |
[array([ 17.38536585, 142.74146341, 10.88346639]), | |
array([ 15.81189231, 116.03076923, 5.07582023]), | |
array([ 8.45055 , 138.5375 , 38.03433723]), | |
array([ 14.9319 , 102. , 8.3211653]), | |
array([ 22.71531429, 136.34285714, 2.4798319 ]), | |
array([ 20.07687857, 150.35178571, 7.47258486]), | |
array([ 13.1922 , 170.5 , 32.14681167]), | |
array([ 16.6968 , 159.44347826, 11.19629668]), | |
array([ 22.03416 , 127.76 , 2.22510478]), | |
array([ 23.24347826, 144.18478261, 2.98302143]), | |
array([ 15.23178 , 131.32 , 11.40703088]), | |
array([ 15.96889565, 150.80869565, 14.14488373]), | |
array([ 10.77615 , 115.925 , 24.81503051]), | |
array([ 14.8509 , 153.7375 , 24.90207169])] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment