Skip to content

Instantly share code, notes, and snippets.

@Fil
Last active January 30, 2017 14:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Fil/6d928b3f5fa49a357e513d0402988f20 to your computer and use it in GitHub Desktop.
Save Fil/6d928b3f5fa49a357e513d0402988f20 to your computer and use it in GitHub Desktop.
t-SNE Alexa upstream links + taille wolfram [UNLISTED]
license: mit
site up1 up1pc up2 up2pc up3 up3pc up4 up4pc up5 up5pc
360.cn baidu.com 19.9% sohu.com 6.6% hao123.com 5.7% taobao.com 4.9% weibo.com 4.4%
360.com baidu.com 33.2% 360.cn 14.0% sohu.com 8.9% qq.com 7.6% hao123.com 5.8%
adexchangeprediction.com popads.net 21.7% onclickads.net 3.5% userscloud.com 2.9% onclkds.com 2.4% pttsite.com 1.9%
adf.ly youtube.com 9.7% google.com 2.6% sh.st 1.6% mediafire.com 1.4% adplxmd.com 1.4%
adobe.com google.com 9.8% youtube.com 3.7% google.ru 2.9% vk.com 2.6% adobelogin.com 2.5%
alibaba.com google.com 5.3% aliexpress.com 4.6% clicksgear.com 3.7% 4dsply.com 3.0% spotscenered.info 2.8%
aliexpress.com vk.com 8.1% google.ru 7.7% youtube.com 5.5% yandex.ru 4.6% google.com 3.3%
alipay.com aliexpress.com 54.5% taobao.com 7.7% privatbank.ua 3.2% sbrf.ru 2.9% yandex.ru 2.2%
amazon.co.jp google.co.jp 19.9% yahoo.co.jp 5.6% google.com 4.7% youtube.com 2.3% amazon.com 2.2%
amazon.co.uk google.co.uk 16.9% google.com 6.9% amazon.com 4.3% youtube.com 3.3% ebay.co.uk 2.7%
amazon.com google.com 24.1% youtube.com 3.4% facebook.com 2.6% ebay.com 2.0% yahoo.com 1.5%
amazon.de google.de 19.7% youtube.com 3.9% google.com 3.5% ebay.de 3.2% amazon.com 2.0%
amazon.in google.co.in 32.1% flipkart.com 6.4% google.com 6.0% youtube.com 3.7% snapdeal.com 3.1%
amazonaws.com google.com 8.8% instructure.com 3.7% google.co.in 2.1% amazon.com 1.5% onclkds.com 1.4%
apple.com google.com 17.5% google.ru 3.9% youtube.com 3.3% google.co.in 2.2% icloud.com 1.8%
ask.com youtube.com 8.3% google.com 7.2% yahoo.com 3.1% myway.com 1.8% facebook.com 1.4%
baidu.com qq.com 3.6% google.com 3.5% 360.cn 2.3% sina.com.cn 2.0% taobao.com 1.9%
bbc.co.uk google.co.uk 15.9% google.com 9.3% bbc.com 5.3% facebook.com 3.7% youtube.com 2.9%
bbc.com google.com 10.8% bbc.co.uk 10.2% facebook.com 2.9% youtube.com 2.7% google.co.in 1.7%
bet365.com onclkds.com 7.6% popads.net 7.3% onclickads.net 5.0% mediafire.com 4.0% clevernt.com 2.2%
bing.com google.com 4.7% msn.com 4.1% youtube.com 3.0% yahoo.com 1.3% live.com 1.2%
blogger.com google.com 16.2% youtube.com 6.1% google.co.id 5.0% google.co.in 3.4% facebook.com 3.4%
blogspot.com google.com 7.9% facebook.com 6.6% yahoo.com 6.3% bing.com 5.9% baidu.com 0.5%
blogspot.in google.co.in 10.3% google.com 10.3% baidu.com 4.4% soso.com 3.1% blogger.com 2.6%
booking.com google.com 8.3% google.ru 3.3% yandex.ru 2.9% google.com.ua 1.8% facebook.com 1.6%
buzzfeed.com google.com 18.7% facebook.com 12.0% youtube.com 3.6% google.co.in 1.7% pinterest.com 1.7%
chase.com google.com 29.5% amazon.com 3.1% jpmorganchase.com 2.4% facebook.com 2.3% bankofamerica.com 2.1%
china.com.cn baidu.com 22.5% sohu.com 7.6% qq.com 5.8% 360.cn 5.8% taobao.com 4.3%
cloudfront.net google.com 6.8% youtube.com 2.8% google.co.in 2.4% popads.net 1.3% coursera.org 1.1%
cnet.com google.com 18.3% google.co.in 6.6% yahoo.com 3.7% youtube.com 1.9% google.co.id 1.9%
cnn.com google.com 26.2% facebook.com 4.4% yahoo.com 3.1% youtube.com 2.1% reddit.com 1.7%
cnzz.com baidu.com 48.1% sohu.com 14.2% hao123.com 10.4% soso.com 10.1% taobao.com 9.3%
craigslist.org google.com 33.3% facebook.com 3.9% yahoo.com 2.9% youtube.com 2.8% ebay.com 2.6%
dailymail.co.uk google.com 15.8% google.co.uk 5.4% facebook.com 3.8% drudgereport.com 3.2% youtube.com 2.9%
dailymotion.com google.com 11.2% youtube.com 7.6% google.co.in 5.2% google.com.tr 3.5% tr-sanat.info 3.2%
daum.net google.co.kr 14.4% naver.com 13.9% google.com 6.2% youtube.com 3.8% tistory.com 2.6%
doubleclick.net google.com 3.0% youtube.com 2.6% googlesyndication.com 1.0% facebook.com 1.0% googleapis.com 0.8%
dropbox.com google.com 19.4% youtube.com 4.2% facebook.com 2.9% vk.com 1.7% yahoo.com 1.6%
ebay.com google.com 15.5% amazon.com 4.3% youtube.com 3.5% google.ru 2.6% aliexpress.com 2.6%
ebay.de google.de 17.7% amazon.de 4.9% ebay-kleinanzeigen.de 3.2% youtube.com 2.9% google.com 2.7%
espn.com google.com 25.1% facebook.com 5.5% yahoo.com 3.3% youtube.com 2.8% reddit.com 2.5%
facebook.com google.com 11.1% youtube.com 5.9% twitter.com 1.6% google.co.in 1.2% yahoo.com 1.0%
fbcdn.net facebook.com 33.1% youtube.com 5.5% google.com 5.3% instagram.com 2.5% savefrom.net 1.9%
fc2.com google.co.jp 18.9% google.com 4.0% yahoo.co.jp 3.7% youtube.com 2.7% livedoor.jp 1.5%
flipkart.com google.co.in 29.6% amazon.in 8.6% google.com 5.5% youtube.com 3.5% snapdeal.com 3.4%
force.com salesforce.com 36.5% google.com 8.4% okta.com 3.1% ustraveldocs.com 1.5% linkedin.com 1.3%
github.com google.com 10.8% stackoverflow.com 4.0% github.io 2.6% google.co.in 2.5% youtube.com 2.0%
github.io google.com 13.0% github.com 8.1% google.co.in 3.7% stackoverflow.com 2.5% google.ru 2.5%
globo.com google.com.br 28.2% google.com 4.9% youtube.com 4.8% facebook.com 3.0% uol.com.br 1.6%
godaddy.com google.com 14.5% secureserver.net 5.5% google.co.in 5.5% facebook.com 3.2% youtube.com 1.9%
google.ca youtube.com 5.4% google.com 4.9% facebook.com 2.3% wikipedia.org 1.9% reddit.com 1.2%
google.co.id youtube.com 6.1% google.com 5.8% yahoo.com 2.7% myway.com 1.3% wikipedia.org 1.0%
google.co.in youtube.com 7.1% google.com 6.6% wikipedia.org 1.6% yahoo.com 1.4% amazon.in 1.2%
google.co.jp yahoo.co.jp 3.2% google.com 3.0% youtube.com 2.8% amazon.co.jp 1.8% wikipedia.org 1.6%
google.co.kr naver.com 8.8% youtube.com 5.7% google.com 3.9% daum.net 3.0% tistory.com 2.5%
google.co.th youtube.com 7.5% google.com 3.5% facebook.com 2.5% pantip.com 2.5% wikipedia.org 1.1%
google.co.uk youtube.com 4.7% google.com 4.0% facebook.com 2.3% wikipedia.org 1.8% amazon.co.uk 1.6%
google.com.ar youtube.com 8.5% google.com 3.7% facebook.com 2.3% mercadolibre.com.ar 2.3% wikipedia.org 1.6%
google.com.au youtube.com 5.5% google.com 4.0% facebook.com 2.0% wikipedia.org 1.6% ebay.com.au 1.3%
google.com.br youtube.com 6.8% google.com 3.9% globo.com 2.0% facebook.com 1.7% mercadolivre.com.br 1.5%
google.com.eg youtube.com 8.1% google.com 3.6% facebook.com 2.3% wikipedia.org 1.4% hao123.com 1.2%
google.com.hk google.com 5.8% youtube.com 4.4% facebook.com 2.3% baidu.com 2.2% wikipedia.org 1.9%
google.com youtube.com 7.0% facebook.com 2.1% google.co.in 1.9% yahoo.com 1.2% wikipedia.org 1.1%
google.com.mx youtube.com 8.2% google.com 3.9% facebook.com 2.3% yahoo.com 2.0% wikipedia.org 2.0%
google.com.pk youtube.com 9.5% google.com 6.3% facebook.com 2.1% wikipedia.org 1.4% yahoo.com 1.1%
google.com.sa youtube.com 7.7% google.com 5.1% wikipedia.org 1.5% facebook.com 1.0% yahoo.com 0.9%
google.com.tr youtube.com 6.4% google.com 3.0% facebook.com 1.6% wikipedia.org 1.0% sahibinden.com 0.9%
google.com.tw youtube.com 5.4% facebook.com 5.1% google.com 5.0% yahoo.com 3.7% pixnet.net 2.5%
google.de youtube.com 4.4% google.com 2.6% wikipedia.org 2.0% amazon.de 1.9% ebay.de 1.3%
google.es google.com 4.7% youtube.com 4.7% facebook.com 1.7% wikipedia.org 1.5% amazon.es 1.1%
google.fr youtube.com 4.4% google.com 4.0% facebook.com 1.8% wikipedia.org 1.7% amazon.fr 0.9%
google.it youtube.com 3.9% google.com 3.5% facebook.com 2.4% wikipedia.org 1.9% amazon.it 1.6%
google.nl youtube.com 4.8% google.com 4.6% facebook.com 1.8% wikipedia.org 1.8% live.com 0.6%
google.pl youtube.com 5.0% facebook.com 3.0% google.com 3.0% wikipedia.org 1.9% allegro.pl 1.8%
google.ru vk.com 9.1% youtube.com 6.2% google.com 2.2% yandex.ru 2.1% wikipedia.org 1.8%
googleusercontent.com google.com 43.1% google.co.in 3.5% google.ru 3.3% youtube.com 1.9% google.com.ua 1.5%
hao123.com youtube.com 6.1% baidu.com 5.6% google.com.eg 3.6% qq.com 3.0% google.com 3.0%
haosou.com baidu.com 39.5% sohu.com 9.6% 360.cn 8.6% qq.com 8.2% hao123.com 7.1%
huffingtonpost.com google.com 27.8% facebook.com 7.3% yahoo.com 2.9% youtube.com 1.7% reddit.com 1.5%
imdb.com google.com 17.0% google.co.in 5.4% youtube.com 4.0% wikipedia.org 3.2% google.co.uk 1.7%
imgur.com reddit.com 29.3% google.com 6.1% youtube.com 5.2% google.ru 2.5% facebook.com 2.3%
indiatimes.com google.co.in 29.2% google.com 8.1% facebook.com 3.2% youtube.com 3.1% yahoo.com 1.4%
instagram.com vk.com 15.5% youtube.com 8.0% google.com 7.2% google.ru 5.1% facebook.com 2.8%
isanalyze.com janjuaplayer.com 4.5% facebook.com 4.1% mipsplayer.com 4.1% doubleclick.net 3.8% youtube.com 3.6%
iwanttodeliver.com xvideos.com 12.1% google.com 9.0% google.co.in 7.2% xnxx.com 6.1% yahoo.com 2.8%
linkedin.com google.com 18.7% facebook.com 4.8% google.co.in 3.6% youtube.com 2.1% yahoo.com 2.1%
live.com google.com 10.7% msn.com 4.2% youtube.com 3.3% facebook.com 3.1% microsoft.com 2.8%
livejasmin.com xhamster.com 12.5% tnaflix.com 10.2% gotporn.com 5.2% pornhub.com 4.6% justporno.tv 3.0%
mail.ru vk.com 7.2% google.ru 5.8% ok.ru 5.2% youtube.com 3.7% yandex.ru 3.7%
mediafire.com youtube.com 11.7% adf.ly 8.6% google.com 3.0% 4dsply.com 1.7% bet365.com 1.3%
microsoft.com google.com 10.3% live.com 7.6% google.ru 4.8% google.co.in 3.8% youtube.com 2.9%
microsoftonline.com office.com 28.0% google.com 8.0% live.com 3.3% office365.com 2.0% google.co.in 1.3%
mozilla.org google.com 11.0% youtube.com 3.5% google.co.in 3.1% google.ru 2.9% google.com.ua 2.3%
msn.com live.com 29.1% google.com 6.0% youtube.com 3.7% yahoo.com 2.4% bing.com 1.6%
naver.com google.co.kr 11.5% youtube.com 4.8% google.com 4.5% daum.net 3.9% tistory.com 2.9%
netflix.com google.com 25.3% youtube.com 7.6% facebook.com 4.2% google.com.br 4.0% google.ca 2.5%
nytimes.com google.com 27.3% facebook.com 5.5% t.co 2.2% reddit.com 1.5% youtube.com 1.5%
office.com microsoftonline.com 33.1% google.com 9.0% live.com 3.2% microsoft.com 1.5% google.co.in 1.4%
ok.ru vk.com 13.0% google.ru 9.0% mail.ru 6.3% yandex.ru 5.7% google.com.ua 4.4%
onclickads.net popads.net 2.3% openload.co 1.6% youtube.com 1.5% indexmovie.xyz 1.4% 123movies.is 1.2%
outbrain.com cnn.com 4.9% foxnews.com 3.7% msn.com 3.5% google.com 2.3% bbc.com 2.1%
paypal.com google.com 13.4% facebook.com 3.3% ebay.com 3.2% youtube.com 1.6% yahoo.com 1.0%
people.com.cn baidu.com 33.0% sohu.com 7.0% 360.cn 6.0% qq.com 5.9% svpressa.ru 4.8%
pinterest.com google.com 23.9% facebook.com 5.4% youtube.com 2.1% twitter.com 1.8% google.co.in 1.5%
popads.net onclkds.com 1.7% drakorindo.com 1.7% popcash.net 1.6% openload.co 1.5% poptm.com 1.2%
popcash.net popads.net 6.5% poptm.com 4.1% popmyads.com 2.9% onclkds.com 2.8% clickadu.com 1.6%
pornhub.com google.com 7.4% doublepimp.com 3.7% google.co.in 2.8% youtube.com 2.4% trafficserving.com 2.4%
qq.com baidu.com 14.3% google.com 3.6% taobao.com 2.9% weibo.com 2.3% hao123.com 2.3%
quora.com google.com 27.5% google.co.in 12.5% facebook.com 3.3% google.co.uk 2.1% youtube.com 2.1%
rakuten.co.jp google.co.jp 20.0% yahoo.co.jp 6.9% google.com 4.6% amazon.co.jp 4.5% rakuten.ne.jp 3.7%
reddit.com google.com 11.7% imgur.com 7.5% youtube.com 7.1% reddituploads.com 3.2% facebook.com 3.0%
salesforce.com force.com 14.6% google.com 10.1% linkedin.com 3.3% facebook.com 1.9% youtube.com 1.2%
savefrom.net youtube.com 39.6% google.com 5.0% googlevideo.com 4.3% google.co.in 2.3% google.co.id 1.8%
sina.com.cn baidu.com 16.3% weibo.com 5.4% google.com 5.2% qq.com 4.4% hao123.com 3.2%
slideshare.net google.com 16.3% google.co.in 7.4% google.co.id 5.1% linkedin.com 3.5% myway.com 2.2%
snapdeal.com google.co.in 29.7% amazon.in 9.1% flipkart.com 7.5% google.com 6.2% irctc.co.in 5.5%
so.com baidu.com 41.9% sohu.com 12.1% hao123.com 8.8% soso.com 8.7% taobao.com 7.9%
sohu.com baidu.com 9.2% 360.cn 4.7% soso.com 3.4% cnzz.com 3.0% so.com 3.0%
soso.com baidu.com 20.7% sohu.com 6.2% hao123.com 5.3% 360.cn 5.2% taobao.com 4.3%
soundcloud.com google.com 13.3% youtube.com 10.7% vk.com 4.5% facebook.com 4.2% google.ru 3.9%
spotify.com google.com 26.3% youtube.com 6.1% facebook.com 4.5% google.ca 1.4% google.com.br 1.3%
stackexchange.com google.com 23.5% google.co.in 9.1% google.ru 3.4% stackoverflow.com 3.1% google.co.uk 2.2%
stackoverflow.com google.com 15.6% google.co.in 10.5% google.ru 3.2% google.com.ua 2.4% github.com 1.8%
t.co twitter.com 18.8% google.com 4.2% youtube.com 3.9% facebook.com 2.8% instagram.com 2.4%
taobao.com baidu.com 6.2% detail.tmall.com 4.7% google.com 3.7% alipay.com 3.3% world.tmall.com 2.6%
terraclicks.com thevideo.me 8.4% clicksgear.com 7.8% torrentproject.se 4.7% anyporn.com 3.4% xl415.com 2.2%
theguardian.com google.com 14.9% google.co.uk 6.6% facebook.com 3.9% reddit.com 3.2% google.co.in 2.8%
thepiratebay.org google.com 9.0% protect-your-privacy.net 5.2% youtube.com 4.9% imdb.com 2.6% ablogica.com 2.1%
thewhizmarketing.com onclkds.com 2.9% adf.ly 2.5% thewhizproducts.com 2.4% clicksgear.com 2.3% adplxmd.com 2.0%
tmall.com baidu.com 25.7% sohu.com 7.4% taobao.com 7.0% service.tmall.com 6.7% soso.com 5.7%
trello.com google.com 18.4% facebook.com 4.0% youtube.com 2.6% google.ru 1.6% vk.com 1.4%
tudou.com baidu.com 18.2% google.com 7.1% soku.com 6.4% youku.com 3.4% youtube.com 3.3%
tumblr.com google.com 8.9% youtube.com 5.7% google.ru 3.0% vk.com 2.9% facebook.com 2.4%
twitch.tv youtube.com 17.5% vk.com 11.9% google.ru 8.3% google.com 5.5% reddit.com 3.3%
twitter.com google.com 12.0% facebook.com 9.1% youtube.com 4.4% t.co 2.9% reddit.com 2.0%
uol.com.br google.com.br 14.6% globo.com 5.6% esporte.uol.com.br 4.4% google.com 3.9% noticias.uol.com.br 3.9%
vimeo.com google.com 12.9% youtube.com 7.9% facebook.com 3.4% vk.com 3.2% google.ru 3.0%
vk.com google.ru 10.2% youtube.com 8.5% google.com.ua 4.6% yandex.ru 4.2% ok.ru 3.4%
w3schools.com google.com 15.4% google.co.in 13.2% stackoverflow.com 4.0% youtube.com 1.9% google.com.tr 1.8%
washingtonpost.com google.com 25.2% yahoo.com 5.2% facebook.com 5.1% reddit.com 3.4% t.co 2.7%
weather.com google.com 37.6% facebook.com 4.2% yahoo.com 2.7% youtube.com 1.9% amazon.com 1.1%
weibo.com sina.com.cn 13.5% baidu.com 10.8% google.com 3.9% qq.com 3.5% 360.cn 3.1%
wetransfer.com google.com 26.2% youtube.com 3.3% live.com 3.2% google.com.tr 3.0% facebook.com 2.8%
whatsapp.com google.com 12.0% facebook.com 9.6% youtube.com 4.7% google.co.in 3.8% google.com.br 3.0%
wikia.com google.com 16.1% google.ru 11.7% youtube.com 5.5% google.com.ua 3.3% vk.com 3.1%
wikihow.com google.com 19.1% google.co.in 6.9% google.ru 5.1% youtube.com 2.6% google.com.ua 2.5%
wikipedia.org google.com 13.7% google.ru 9.9% google.com.ua 4.9% google.co.in 4.7% yandex.ru 3.8%
wordpress.com google.com 9.5% facebook.com 2.9% google.co.in 2.5% youtube.com 2.0% support.wordpress.com 1.8%
wordpress.org google.com 11.4% google.co.in 5.9% google.com.tr 1.9% w.org 1.4% youtube.com 1.4%
xhamster.com livejasmin.com 4.6% google.com 3.0% watchmygirlfriend.tv 2.7% pornhub.com 2.2% alohatube.com 1.7%
xinhuanet.com baidu.com 20.0% sohu.com 7.4% qq.com 5.5% 360.cn 5.4% taobao.com 3.7%
xvideos.com google.com 9.4% google.co.in 5.9% iwanttodeliver.com 5.6% facebook.com 3.4% youtube.com 3.4%
yahoo.co.jp google.co.jp 17.0% google.com 3.9% amazon.co.jp 2.5% youtube.com 2.2% facebook.com 1.4%
yahoo.com google.com 10.7% youtube.com 4.3% google.co.in 2.0% myway.com 1.9% facebook.com 1.6%
yandex.ru vk.com 5.7% google.ru 3.5% youtube.com 3.2% ok.ru 1.8% mail.ru 1.3%
youku.com baidu.com 11.6% soku.com 11.5% google.com 6.2% youtube.com 3.3% qq.com 2.5%
youtube.com google.com 12.0% vk.com 5.2% google.ru 4.5% google.co.in 2.8% google.com.ua 2.4%
zhihu.com baidu.com 31.2% google.com 9.0% qq.com 2.6% weibo.com 2.5% google.com.hk 2.5%
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<script src="https://d3js.org/d3.v4.min.js"></script>
<style>
body {
margin: 0;
position: fixed;
top: 0;
right: 0;
bottom: 0;
left: 0;
font-family: monospace;
}
</style>
</head>
<body>
<script>
const width = 960,
height = 500,
margin = { top: 40, bottom: 40, left: 140, right: 140 },
scalepop = d3.scaleSqrt().domain([0, 100]).range([2.2, 34]),
scalecountry = d3.scaleOrdinal(d3.schemeCategory10),
centerx = d3.scaleLinear()
.range([margin.left, width - margin.right]),
centery = d3.scaleLinear()
.range([margin.top, height - margin.bottom]);
console.clear()
d3.queue()
.defer(d3.csv,'alexa-upstream.csv')
.defer(d3.csv,'sites-wolfram-alpha.csv')
.await(function (err, sites, wolfram) {
tailles = {};
parents = {};
wolfram.forEach(d => {
tailles[d.site] = +d.visits;
if (d.parent) parents[d.site] = d.parent;
})
console.log(parents)
scaleTailles = d3.scalePow()
.exponent(1/2.5)
.domain(d3.extent(d3.values(tailles)))
.range([11, 50])
const data = sites
.map((d, i) => {
return {
lon: Math.random(),
lat: Math.random(),
name: d['site'],
r: scaleTailles(tailles[d.site] || 10),
color: scalecountry(parents[d['site']]),
up1: d.up1,
up2: d.up2,
up3: d.up3,
up4: d.up4,
up5: d.up5,
parent: parents[d['site']] || Math.random(),
};
})
.slice(0, 800);
/*
const canvas = d3.select("body").append("canvas")
.attr("width", width)
.attr("height", height);
*/
const svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);
// pos is the array of positions that will be updated by the tsne worker
// start with the geographic coordinates as is (plate-carrée)
// random or [0,0] is fine too
let pos = data.map(d => [d.lon, -d.lat]);
const forcetsne = d3.forceSimulation(
data.map(d => (d.x = width / 2, d.y = height / 2, d))
)
.alphaDecay(0.005)
.alpha(0.1)
.force('tsne', function (alpha) {
centerx.domain(d3.extent(pos.map(d => d[0])));
centery.domain(d3.extent(pos.map(d => d[1])));
data.forEach((d, i) => {
d.x += alpha * (centerx(pos[i][0]) - d.x);
d.y += alpha * (centery(pos[i][1]) - d.y);
});
})
.force('collide', d3.forceCollide().radius(d => 1.5 + d.r))
.on('tick', function () {
// drawcanvas(canvas, data);
drawsvg(svg, data);
});
function drawcanvas(canvas, nodes) {
let context = canvas.node().getContext("2d");
context.clearRect(0, 0, width, width);
for (var i = 0, n = nodes.length; i < n; ++i) {
var node = nodes[i];
context.beginPath();
context.moveTo(node.x, node.y);
context.arc(node.x, node.y, node.r, 0, 2 * Math.PI);
context.lineWidth = 0.5;
context.fillStyle = node.color;
context.fill();
}
}
function drawsvg(svg, nodes) {
const g = svg.selectAll('g.city')
.data(nodes);
var enter = g.enter().append('g').classed('city', true);
enter.append('circle')
.attr('r', d => d.r)
.attr('fill', d => d.color)
.append('title')
.text(d => d.name);
enter
.filter(d => d.r > 7)
.append('text')
.attr('fill', 'white')
.style('font-size', d => d.r > 9 ? '12px' : '9px')
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'middle')
.attr('pointer-events', 'none')
.text(d => d.name.substring(0,4));
g.attr('transform', d => `translate(${d.x},${d.y})`);
}
d3.queue()
.defer(d3.text, 'tsne.js')
.defer(d3.text, 'https://unpkg.com/d3-geo')
.defer(d3.text, 'worker.js')
.awaitAll(function (err, scripts) {
const worker = new Worker(
window.URL.createObjectURL(
new Blob(scripts, {
type: "text/javascript"
})
)
);
worker.postMessage({
maxIter: 10,
dim: 2,
perplexity: 30.0,
data: data
});
worker.onmessage = function (e) {
if (e.data.log) console.log.apply(this, e.data.log);
if (e.data.pos) pos = e.data.pos;
if (e.data.done && e.data.done < 10000 && e.data.cost > 1e-2) {
worker.postMessage({
maxIter: e.data.done + 10,
});
}
};
});
});
</script>
</body>
We can make this file beautiful and searchable if this error is corrected: It looks like row 5 should actually have 4 columns, instead of 3. in line 4.
rank,site,visits,parent
1,google.com,1800,alphabet
2,youtube.com,1500,alphabet
3,facebook.com,910,fb
4,baidu.com,460
5,wikipedia.org,420
6,yahoo.com,360,yahoo
7,twitter.com,260,twitter
8,google.co.in,210,alphabet
9,live.com,210,microsoft
10,qq.com,210
11,amazon.com,190,amazon
12,taobao.com,180
13,linkedin.com,170
14,instagram.com,170,fb
15,vk.com,160
16,google.co.jp,160,alphabet
17,hao123.com,160
18,sohu.com,150
19,360.cn,140
20,sina.com.cn,130
21,weibo.com,130
22,tmall.com,120
23,t.co,110,twitter
24,onclickads.net,110
25,wordpress.com,110
26,soso.com,100
27,blogspot.com,98,alphabet
28,google.de,98,alphabet
29,reddit.com,96
30,whatsapp.com,96
31,yandex.ru,93
32,google.ru,90,alphabet
33,microsoft.com,90,microsoft
34,google.co.uk,89,alphabet
35,google.com.br,85,alphabet
36,yahoo.co.jp,85,yahoo
37,stackoverflow.com,85
38,msn.com,84,microsoft
39,bing.com,84,microsoft
40,google.fr,84,alphabet
41,netflix.com,79
42,apple.com,78
43,tumblr.com,74
44,pinterest.com,74
45,ebay.com,74,ebay
46,paypal.com,70,ebay
47,google.it,65,alphabet
48,popads.net,64
49,office.com,64,microsoft
50,google.es,64,alphabet
51,blogger.com,60,alphabet
52,ok.ru,59
53,mail.ru,58
54,googleusercontent.com,58,alphabet
55,github.com,58
56,imdb.com,57
57,cnzz.com,57
58,aliexpress.com,57
59,imgur.com,57
60,google.com.mx,55,alphabet
61,amazon.in,55,amazon
62,microsoftonline.com,54,microsoft
63,isanalyze.com,53
64,dropbox.com,52
65,google.ca,52,alphabet
66,google.com.hk,50,alphabet
67,google.co.kr,50,alphabet
68,youku.com,50
69,amazon.co.jp,48,amazon
70,flipkart.com,47
71,pornhub.com,45
72,naver.com,45
73,quora.com,44
74,fc2.com,44
75,twitch.tv,42
76,adobe.com,41
77,cnn.com,41
78,google.co.id,39,alphabet
79,so.com,38
80,bbc.com,38
81,terraclicks.com,38
82,amazonaws.com,38,amazon
83,xinhuanet.com,37
84,google.com.tr,36,alphabet
85,google.com.au,36,alphabet
86,adf.ly,35
87,stackexchange.com,34
88,xvideos.com,34
89,savefrom.net,33
90,nytimes.com,33
91,wikia.com,33
92,soundcloud.com,33
93,google.com.tw,33,alphabet
94,alibaba.com,33
95,360.com,32
96,ask.com,32
97,google.pl,32,alphabet
98,haosou.com,32
99,godaddy.com,31
100,xhamster.com,31
101,bet365.com,30
102,vimeo.com,30
103,amazon.de,30,amazon
104,indiatimes.com,30
105,bbc.co.uk,29
106,espn.com,29
107,cloudfront.net,28
108,globo.com,28
109,uol.com.br,28
110,dailymail.co.uk,27
111,china.com.cn,27
112,craigslist.org,27
113,outbrain.com,27
114,google.com.eg,27,alphabet
115,dailymotion.com,27
116,google.com.ar,26,alphabet
117,amazon.co.uk,26,amazon
118,w3schools.com,26
119,rakuten.co.jp,26
120,google.com.pk,25,alphabet
121,google.co.th,25,alphabet
122,wordpress.org,25
123,thewhizmarketing.com,25
124,blogspot.com,98,alphabet
125,cnet.com,25
126,popcash.net,25
127,people.com.cn,25
128,tudou.com,24
129,google.com.sa,24,alphabet
130,theguardian.com,24
131,huffingtonpost.com,24
132,alipay.com,24
133,booking.com,24
134,buzzfeed.com,24
135,wikihow.com,24
136,livejasmin.com,23
137,mozilla.org,23
138,google.nl,23,alphabet
139,force.com,23
140,github.io,23,github
141,zhihu.com,23
142,adexchangeprediction.com,23
143,slideshare.net,22
144,thepiratebay.org,22
145,doubleclick.net,22
146,mediafire.com,22
147,wetransfer.com,21
148,salesforce.com,21
149,washingtonpost.com,21,amazon
150,snapdeal.com,21
151,daum.net,21
152,ebay.de,20,ebay
153,chase.com,20
154,fbcdn.net,20
155,trello.com,20
156,blogspot.in,20
157,iwanttodeliver.com,20
158,wordpress.com,110
159,spotify.com,20
160,weather.com,20
// create main global object
var tsnejs = tsnejs || { REVISION: 'ALPHA' };
(function(global) {
"use strict";
// utility function
var assert = function(condition, message) {
if (!condition) { throw message || "Assertion failed"; }
}
// syntax sugar
var getopt = function(opt, field, defaultval) {
if(opt.hasOwnProperty(field)) {
return opt[field];
} else {
return defaultval;
}
}
// return 0 mean unit standard deviation random number
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if(return_v) {
return_v = false;
return v_val;
}
var u = 2*Math.random()-1;
var v = 2*Math.random()-1;
var r = u*u + v*v;
if(r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2*Math.log(r)/r);
v_val = v*c; // cache this for next function call for efficiency
return_v = true;
return u*c;
}
// return random normal number
var randn = function(mu, std){ return mu+gaussRandom()*std; }
// utilitity that creates contiguous vector of zeros of size n
var zeros = function(n) {
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
if(typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for(var i=0;i<n;i++) { arr[i]= 0; }
return arr;
} else {
return new Float64Array(n); // typed arrays are faster
}
}
// utility that returns 2d array filled with random numbers
// or with value s, if provided
var randn2d = function(n,d,s) {
var uses = typeof s !== 'undefined';
var x = [];
for(var i=0;i<n;i++) {
var xhere = [];
for(var j=0;j<d;j++) {
if(uses) {
xhere.push(s);
} else {
xhere.push(randn(0.0, 1e-4));
}
}
x.push(xhere);
}
return x;
}
// compute L2 distance between two vectors
var L2 = function(x1, x2) {
var D = x1.length;
var d = 0;
for(var i=0;i<D;i++) {
var x1i = x1[i];
var x2i = x2[i];
d += (x1i-x2i)*(x1i-x2i);
}
return d;
}
// compute pairwise distance in all vectors in X
var xtod = function(X) {
var N = X.length;
var dist = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = L2(X[i], X[j]);
dist[i*N+j] = d;
dist[j*N+i] = d;
}
}
return dist;
}
// compute (p_{i|j} + p_{j|i})/(2n)
var d2p = function(D, perplexity, tol) {
var Nf = Math.sqrt(D.length); // this better be an integer
var N = Math.floor(Nf);
assert(N === Nf, "D should have square number of elements.");
var Htarget = Math.log(perplexity); // target entropy of distribution
var P = zeros(N * N); // temporary probability matrix
var prow = zeros(N); // a temporary storage compartment
for(var i=0;i<N;i++) {
var betamin = -Infinity;
var betamax = Infinity;
var beta = 1; // initial value of precision
var done = false;
var maxtries = 50;
// perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate
var num = 0;
while(!done) {
//debugger;
// compute entropy and kernel row with beta precision
var psum = 0.0;
for(var j=0;j<N;j++) {
var pj = Math.exp(- D[i*N+j] * beta);
if(i===j) { pj = 0; } // we dont care about diagonals
prow[j] = pj;
psum += pj;
}
// normalize p and compute entropy
var Hhere = 0.0;
for(var j=0;j<N;j++) {
if(psum == 0) {
var pj = 0;
} else {
var pj = prow[j] / psum;
}
prow[j] = pj;
if(pj > 1e-7) Hhere -= pj * Math.log(pj);
}
// adjust beta based on result
if(Hhere > Htarget) {
// entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution
betamin = beta; // move up the bounds
if(betamax === Infinity) { beta = beta * 2; }
else { beta = (beta + betamax) / 2; }
} else {
// converse case. make distrubtion less peaky
betamax = beta;
if(betamin === -Infinity) { beta = beta / 2; }
else { beta = (beta + betamin) / 2; }
}
// stopping conditions: too many tries or got a good precision
num++;
if(Math.abs(Hhere - Htarget) < tol) { done = true; }
if(num >= maxtries) { done = true; }
}
// console.log('data point ' + i + ' gets precision ' + beta + ' after ' + num + ' binary search steps.');
// copy over the final prow to P at row i
for(var j=0;j<N;j++) { P[i*N+j] = prow[j]; }
} // end loop over examples i
// symmetrize P and normalize it to sum to 1 over all ij
var Pout = zeros(N * N);
var N2 = N*2;
for(var i=0;i<N;i++) {
for(var j=0;j<N;j++) {
Pout[i*N+j] = Math.max((P[i*N+j] + P[j*N+i])/N2, 1e-100);
}
}
return Pout;
}
// helper function
function sign(x) { return x > 0 ? 1 : x < 0 ? -1 : 0; }
var tSNE = function(opt) {
var opt = opt || {};
this.perplexity = getopt(opt, "perplexity", 30); // effective number of nearest neighbors
this.dim = getopt(opt, "dim", 2); // by default 2-D tSNE
this.epsilon = getopt(opt, "epsilon", 10); // learning rate
this.iter = 0;
}
tSNE.prototype = {
// this function takes a set of high-dimensional points
// and creates matrix P from them using gaussian kernel
initDataRaw: function(X) {
var N = X.length;
var D = X[0].length;
assert(N > 0, " X is empty? You must have some data!");
assert(D > 0, " X[0] is empty? Where is the data?");
var dists = xtod(X); // convert X to distances using gaussian kernel
this.P = d2p(dists, this.perplexity, 1e-4); // attach to object
this.N = N; // back up the size of the dataset
this.initSolution(); // refresh this
},
// this function takes a given distance matrix and creates
// matrix P from them.
// D is assumed to be provided as a list of lists, and should be symmetric
initDataDist: function(D) {
var N = D.length;
assert(N > 0, " X is empty? You must have some data!");
// convert D to a (fast) typed array version
var dists = zeros(N * N); // allocate contiguous array
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var d = D[i][j];
dists[i*N+j] = d;
dists[j*N+i] = d;
}
}
this.P = d2p(dists, this.perplexity, 1e-4);
this.N = N;
this.initSolution(); // refresh this
},
// (re)initializes the solution to random
initSolution: function() {
// generate random solution to t-SNE
this.Y = randn2d(this.N, this.dim); // the solution
this.gains = randn2d(this.N, this.dim, 1.0); // step gains to accelerate progress in unchanging directions
this.ystep = randn2d(this.N, this.dim, 0.0); // momentum accumulator
this.iter = 0;
},
// return pointer to current solution
getSolution: function() {
return this.Y;
},
// perform a single step of optimization to improve the embedding
step: function() {
this.iter += 1;
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
// perform gradient step
var ymean = zeros(this.dim);
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var gid = grad[i][d];
var sid = this.ystep[i][d];
var gainid = this.gains[i][d];
// compute gain update
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
if(newgain < 0.01) newgain = 0.01; // clamp
this.gains[i][d] = newgain; // store for next turn
// compute momentum step direction
var momval = this.iter < 250 ? 0.5 : 0.8;
var newsid = momval * sid - this.epsilon * newgain * grad[i][d];
this.ystep[i][d] = newsid; // remember the step we took
// step!
this.Y[i][d] += newsid;
ymean[d] += this.Y[i][d]; // accumulate mean so that we can center later
}
}
// reproject Y to be zero mean
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
this.Y[i][d] -= ymean[d]/N;
}
}
//if(this.iter%100===0) console.log('iter ' + this.iter + ', cost: ' + cost);
return cost; // return current cost
},
// for debugging: gradient check
debugGrad: function() {
var N = this.N;
var cg = this.costGrad(this.Y); // evaluate gradient
var cost = cg.cost;
var grad = cg.grad;
var e = 1e-5;
for(var i=0;i<N;i++) {
for(var d=0;d<this.dim;d++) {
var yold = this.Y[i][d];
this.Y[i][d] = yold + e;
var cg0 = this.costGrad(this.Y);
this.Y[i][d] = yold - e;
var cg1 = this.costGrad(this.Y);
var analytic = grad[i][d];
var numerical = (cg0.cost - cg1.cost) / ( 2 * e );
console.log(i + ',' + d + ': gradcheck analytic: ' + analytic + ' vs. numerical: ' + numerical);
this.Y[i][d] = yold;
}
}
},
// return cost and gradient, given an arrangement
costGrad: function(Y) {
var N = this.N;
var dim = this.dim; // dim of output space
var P = this.P;
var pmul = this.iter < 100 ? 4 : 1; // trick that helps with local optima
// compute current Q distribution, unnormalized first
var Qu = zeros(N * N);
var qsum = 0.0;
for(var i=0;i<N;i++) {
for(var j=i+1;j<N;j++) {
var dsum = 0.0;
for(var d=0;d<dim;d++) {
var dhere = Y[i][d] - Y[j][d];
dsum += dhere * dhere;
}
var qu = 1.0 / (1.0 + dsum); // Student t-distribution
Qu[i*N+j] = qu;
Qu[j*N+i] = qu;
qsum += 2 * qu;
}
}
// normalize Q distribution to sum to 1
var NN = N*N;
var Q = zeros(NN);
for(var q=0;q<NN;q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); }
var cost = 0.0;
var grad = [];
for(var i=0;i<N;i++) {
var gsum = new Array(dim); // init grad for point i
for(var d=0;d<dim;d++) { gsum[d] = 0.0; }
for(var j=0;j<N;j++) {
cost += - P[i*N+j] * Math.log(Q[i*N+j]); // accumulate cost (the non-constant portion at least...)
var premult = 4 * (pmul * P[i*N+j] - Q[i*N+j]) * Qu[i*N+j];
for(var d=0;d<dim;d++) {
gsum[d] += premult * (Y[i][d] - Y[j][d]);
}
}
grad.push(gsum);
}
return {cost: cost, grad: grad};
}
}
global.tSNE = tSNE; // export tSNE class
})(tsnejs);
// export the library to window, or to module in nodejs
(function(lib) {
"use strict";
if (typeof module === "undefined" || typeof module.exports === "undefined") {
if (typeof window == "object")
window.tsnejs = lib; // in ordinary browser attach library to window
} else {
module.exports = lib; // in nodejs
}
})(tsnejs);
// in worker.onmessage add : `if (e.data.log) console.log.apply(this, e.data.log);`
console.log = function() {
self.postMessage({ log: [...arguments] });
}
let iterations = 0,
model;
self.onmessage = function(e) {
const msg = e.data,
data = msg.data,
maxIter = msg.maxIter || 500,
perplexity = msg.perplexity || 30;
let dists = msg.dist;
if (data && !dists){
var m = {};
data.forEach(d => {
m[d.name] = [];
});
var poids = {};
data.forEach(d => {
['up1', 'up2', 'up3', 'up4', 'up5']
.forEach(i => {
m[d.name].push(d[i])
if (typeof m[d[i]] != 'undefined') m[d[i]].push(d.name)
poids[d[i]] = poids[d[i]] ? poids[d[i]] +1 : 1;
})
});
console.log("m['facebook.com']", m['facebook.com'])
console.log("m['google.com']", m['google.com'])
console.log("poids['google.com']", poids['google.com'])
console.log("gru", JSON.stringify(m['dailymotion.com']))
let now = performance.now();
dists = data.map(
a => data.map(
b => (2 - (a.parent == b.parent))/ (1 + (m[a.name].indexOf(b.name) >-1) / Math.sqrt(poids[b.name]||1) + (m[b.name].indexOf(a.name) >-1 ) / Math.sqrt(poids[b.name]||1))
)
);
console.log('computed', data.length * data.length,'distances in', Math.round(performance.now()-now),'ms');
console.log('dists[0,2]',dists[0][2])
console.log('dists[2,0]',dists[2][0])
console.log(dists.map(k => k.reduce((a,b)=>a+b, 0)))
}
if (dists) {
model = new tsnejs.tSNE({
dim: msg.dim,
perplexity: perplexity,
});
model.initDataDist(dists);
}
let startpos = model.getSolution().map(d=>d.slice()),
pos;
while (iterations++ < maxIter) {
// every time you call this, solution gets better
model.step();
pos = model.getSolution();
// Y is an array of 2-D points that you can plot
self.postMessage({
iterations: iterations - 1,
pos: pos,
//log: [ 'step', iterations-1 ]
});
}
let cost = startpos
.map((d,i) => sqdist(d, pos[i]))
.reduce ((a,b) => a + b, 0) / pos.length / Math.max(...pos.map(d => Math.abs(d[0]) + Math.abs(d[1])));
self.postMessage({
done: iterations - 1,
cost: cost,
log: [ 'done', iterations - 1, cost ]
});
};
function sqdist (a,b) {
let d = [a[0] - b[0], a[1] - b[1]].map(Math.abs);
return Math.max(d[0], d[1]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment