Skip to content

Instantly share code, notes, and snippets.

@NeKzor
Last active November 4, 2019 20:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NeKzor/0fc5fba418e40e5711c083c849287cb1 to your computer and use it in GitHub Desktop.
Save NeKzor/0fc5fba418e40e5711c083c849287cb1 to your computer and use it in GitHub Desktop.
SDA forum scraping + analytics.
{
console.save = function(file, obj) {
let blob = new Blob([JSON.stringify(obj, null, 4)], { type: 'application/json' });
let e = document.createEvent('MouseEvents');
let a = document.createElement('a');
a.download = file;
a.href = window.URL.createObjectURL(blob);
a.dataset.downloadurl = ['application/json', a.download, a.href].join(':');
e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
a.dispatchEvent(e);
}
const parser = new DOMParser();
const base = 'https://forum.speeddemosarchive.com';
let posts = [];
let next = '/post/portal_223.html';
const scrap = async () => {
let page = await fetch(base + next);
console.log('[FETCHED] ' + page.url);
let document = parser.parseFromString(await page.text(), 'text/html');
document.querySelectorAll('.filled').forEach(post => {
posts.push({
user: post.querySelector('.usernameindent').firstChild.innerText,
date: new Date(post.querySelector('.datelink').innerText).toISOString(),
post: post.querySelector('.posttext .edit_hide').innerHTML,
});
});
let nextPageLink = document.querySelector('#nextpagelink');
if (!nextPageLink) {
console.save('result.json', { ...posts });
return;
}
next = nextPageLink.firstChild.getAttribute('href');
scrap();
};
scrap();
}
const fs = require('fs');
const posts = [...Object.values(JSON.parse(fs.readFileSync('./result.json', 'utf-8'))).map(post => ({ ...post, date: new Date(post.date) }))];
console.log('Posts: ', posts.length);
for (let year = 2011; year < 2020; ++year)
console.log(` ${year}: ${posts.filter(({ date }) => date.getFullYear() === year).length}`);
console.log('Links:');
posts.map(({ post }) => /href=\"(.+?)">/g.exec(post)).filter(x => x !== null).map(x => x[1]).sort().forEach(x => console.log(' ' + x));
console.log('Terms:');
let words = [].concat.apply([], posts.map(({ post }) => post.split(' ')));
let frequency = words
.filter((word) => word !== '')
.reduce((count, word) => {
count[word] = (count[word] || 0) + 1;
return count;
}, {});
Object.keys(frequency)
.sort((a, b) => frequency[b] - frequency[a])
.slice(0, 200)
.forEach((key) => console.log(' ' + key + ': ' + frequency[key]));
console.log('Users:');
let users = [].concat.apply([], posts.map(x => x.user));
frequency = users
.reduce((count, user) => {
count[user] = (count[user] || 0) + 1;
return count;
}, {});
Object.keys(frequency)
.sort((a, b) => frequency[b] - frequency[a])
.slice(0, 30)
.forEach((key) => console.log(' ' + key + ': ' + frequency[key]));
Posts: 1052
2011: 841
2012: 145
2013: 55
2014: 10
2015: 1
2016: 0
2017: 0
2018: 0
2019: 0
Links:
http://adzicents.net/portal2/portal2%202011-04-24%2017-45-11-05.png
http://adzicents.net/portal2/portal2%202011-04-24%2021-49-48-28.png
http://blip.tv/file/5046980
http://cs.rin.ru/forum/viewtopic.php?f=10&amp;t=52701
http://cs.rin.ru/forum/viewtopic.php?f=30&amp;t=54888
http://cs.rin.ru/forum/viewtopic.php?f=30&amp;t=54888
http://dl.dropbox.com/u/45123651/sr-port2cf.rar
http://dl.dropbox.com/u/4533383/CubePreservation_HQ.mp4
http://dl.dropbox.com/u/4533383/minorskip_sp_a2_bts1_HQ.mp4
http://dl.dropbox.com/u/4533383/minorskip_sp_a2_bts1_HQ.mp4
http://dl.dropbox.com/u/4533383/minorskip_sp_a2_bts1_HQ.mp4
http://dl.dropbox.com/u/4533383/promising_HQ.mp4
http://dl.dropbox.com/u/4533383/promising_HQ.mp4
http://dl.dropbox.com/u/4533383/wtflevitatingcube_HQ.mp4
http://dl.dropbox.com/u/5013851/portal%202/route4.dem
http://forum.speeddemosarchive.com/post/demonstrates_portal_done_pro.html
http://forum.speeddemosarchive.com/post/demonstrates_portal_done_pro.html
http://forum.speeddemosarchive.com/post/portal_223_102.html
http://forum.speeddemosarchive.com/post/portal_223_102.html
http://forum.speeddemosarchive.com/post/portal_223_102.html
http://forum.speeddemosarchive.com/post/portal_223_254.html
http://forum.speeddemosarchive.com/post/portal_223_480.html
http://forum.speeddemosarchive.com/post/portal_2__november_7th_2012.html
http://forum.speeddemosarchive.com/post/single_segment_with_resets_now_accepted_186.html
http://forums.steampowered.com/forums/showthread.php?t=1849054
http://i.imgur.com/hE78s.png
http://i43.tinypic.com/20fb3n6.png
http://imageshack.us/photo/my-images/220/goonsquad1.jpg/
http://imageshack.us/photo/my-images/542/myusernames.png/
http://img153.imageshack.us/img153/8682/spa3010001.jpg
http://img543.imageshack.us/img543/6613/spa2bts20000.jpg
http://img807.imageshack.us/img807/7726/spa3010002.jpg
http://img815.imageshack.us/img815/639/52192416.jpg
http://img853.imageshack.us/img853/6192/spa3010000.jpg
http://imgur.com/oA9ED
http://sourceruns.org/index.php
http://sourceruns.org/index.php
http://sourceruns.org/index.php
http://sourceruns.org/index.php?board=15.0
http://sourceruns.org/index.php?board=15.0
http://sourceruns.org/index.php?board=15.0
http://sourceruns.org/index.php?board=17.0
http://sourceruns.org/index.php?board=17.0
http://sourceruns.org/index.php?board=17.0
http://sourceruns.org/index.php?board=17.0
http://sourceruns.org/index.php?board=17.0
http://sourceruns.org/index.php?topic=473.msg6746#msg6746
http://sourceruns.org/index.php?topic=473.msg6746#msg6746
http://sourceruns.org/index.php?topic=473.msg6746#msg6746
http://sourceruns.org/index.php?topic=475.msg6966#msg6966
http://sourceruns.org/index.php?topic=475.msg6966#msg6966
http://sourceruns.org/index.php?topic=475.msg6966#msg6966
http://sourceruns.org/index.php?topic=566.0
http://speeddemosarchive.com/kb/SDA_Strategy_Wiki
http://steamcommunity.com/id/adzicents
http://steamcommunity.com/id/rogotin
http://steamcommunity.com/profiles/76561197972228601
http://steamcommunity.com/profiles/76561197972228601
http://techforum4u.com/forumdisplay.php/58-SMF-Simple-Machines-Forum
http://up.k10x.net/xpddezgpkjdkg/trick1.dem
http://up.k10x.net/xpddezgpkjdkg/trick1.dem
http://up.k10x.net/xpddezgpkjdkg/trick1.dem
http://up.k10x.net/xqnzoaskxvgiz/sp_a4_finale2_01.dem
http://up.k10x.net/xqnzoaskxvgiz/sp_a4_finale2_01.dem
http://webchat.quakenet.org
http://www.adzicents.net/portal2/wiki/
http://www.adzicents.net/portal2/wiki/doku.php
http://www.adzicents.net/portal2/wiki/doku.php?id=Home
http://www.amazon.com/Portal-2-Pc/dp/B002I0JIQW/ref=br_lf_m_1000208101_1_3_ttl?t=slicinc-20&amp;tag=slicinc-20&amp;ie=UTF8&amp;m=ATVPDKIKX0DER&amp;s=videogames&amp;pf_rd_p=1295543582&amp;pf_rd_s=center-2&amp;pf_rd_t=1401&amp;pf_rd_i=1000208101&amp;pf_rd_m=ATVPDKIKX0DER&amp;pf_rd_r=1ZDCDNA0CHJJ8NGPJZ6H
http://www.livestream.com/bkbroadcasting
http://www.livestream.com/transgenic
http://www.logitech.com/en-us/mice-pointers/mice/devices/5750
http://www.mediafire.com/?687r464goy6l00q
http://www.mediafire.com/?a686f9r419e8706
http://www.mediafire.com/?cu00ghdhgzrsfmq
http://www.megaupload.com/?d=SN5D7QN0
http://www.megaupload.com/?d=SN5D7QN0
http://www.megaupload.com/?d=SN5D7QN0
http://www.megaupload.com/?d=SN5D7QN0
http://www.newegg.com/Product/Product.aspx?Item=N82E16823109191
http://www.twitch.tv/znernicus/b/323603752
http://www.youtube.com/playlist?p=PLD3CDFF0B75369CA8
http://www.youtube.com/playlist?p=PLD3CDFF0B75369CA8
http://www.youtube.com/user/ChaoThing?feature=mhum
http://www.youtube.com/user/SoftlyAdverse#grid/user/11E8F87FB6D48568
http://www.youtube.com/watch?v=-m_vwjlwkZI
http://www.youtube.com/watch?v=-yduLqf4jF4
http://www.youtube.com/watch?v=6XzR5wopFQQ
http://www.youtube.com/watch?v=7Yk-vIwbgPA
http://www.youtube.com/watch?v=8Pb4u38TPCc
http://www.youtube.com/watch?v=98CG1OqFQlc
http://www.youtube.com/watch?v=9Uy4b1JFT4E
http://www.youtube.com/watch?v=E1AE2GB6Aho
http://www.youtube.com/watch?v=EeYf-K0rGdw
http://www.youtube.com/watch?v=FQfIelYDtW8
http://www.youtube.com/watch?v=MDevhzgWozY
http://www.youtube.com/watch?v=MKFreE4cXzw
http://www.youtube.com/watch?v=MmuGpuv8RMg
http://www.youtube.com/watch?v=Pt8L9AQ30vI
http://www.youtube.com/watch?v=QRwJ3NTNa8Y
http://www.youtube.com/watch?v=Qty0Rv_cQNI
http://www.youtube.com/watch?v=R9p850eVNDo
http://www.youtube.com/watch?v=XY50yk7oodk
http://www.youtube.com/watch?v=Yk0T4aSSxIo
http://www.youtube.com/watch?v=ZziFnNroWEA
http://www.youtube.com/watch?v=ZziFnNroWEA
http://www.youtube.com/watch?v=ZziFnNroWEA
http://www.youtube.com/watch?v=fMVsn-8ozNw
http://www.youtube.com/watch?v=fRQphlshpds
http://www.youtube.com/watch?v=g8pa1-Pt5ho
http://www.youtube.com/watch?v=iczE214fG-8
http://www.youtube.com/watch?v=ijMgrpwhmBY
http://www.youtube.com/watch?v=kHGtqyWYk64&amp;NR=1
http://www.youtube.com/watch?v=kznj0Zs7b8k
http://www.youtube.com/watch?v=lGtaB-m7zmg
http://www.youtube.com/watch?v=pqIhtlz6DC4
http://www.youtube.com/watch?v=qQuWxzpsQrs
http://www.youtube.com/watch?v=rm_qd...ature=youtu.be
http://www.youtube.com/watch?v=sDhRiVK5EKE&amp;feature=g-upl&amp;context=G2dc8befAUAAAAAAAAAA
http://www.youtube.com/watch?v=wDzm4v5r6uY
http://www.youtube.com/watch?v=wbsKQiyc5go
http://www.youtube.com/watch?v=y99h6JwJ1pY
http://youtu.be/oNH0s_Dv38Q#t=1m1s
http://youtu.be/oNH0s_Dv38Q#t=1m1s
http://youtu.be/oNH0s_Dv38Q#t=1m1s
http://zalil.ru/30922927
https://bitbucket.org/VoiDeD/steamre/src/6af5ab95757c/Projects/DepotDownloader/DepotDownloader
https://bitbucket.org/VoiDeD/steamre/src/6af5ab95757c/Projects/DepotDownloader/DepotDownloader
https://bitbucket.org/VoiDeD/steamre/src/6af5ab95757c/Projects/DepotDownloader/DepotDownloader
https://support.steampowered.com/view.php?ticketref=6931-WOKB-4412
https://support.steampowered.com/view.php?ticketref=6931-WOKB-4412
https://support.steampowered.com/view.php?ticketref=6931-WOKB-4412
https://youtu.be/2aIgFpJstro
https://youtu.be/2aIgFpJstro
https://youtu.be/2aIgFpJstro
steam://friends/add/76561197969042354
Terms:
the: 3384
to: 2011
a: 1386
I: 1342
you: 1190
it: 1032
of: 959
and: 891
is: 831
that: 774
in: 744
on: 638
from: 614
for: 543
this: 518
be: 483
but: 445
class="youtube-player": 429
type="text/html": 429
height="295": 429
allowfullscreen="": 429
frameborder="0": 429
width="480": 429
class="quotearea"><div: 413
class="quotelabel">Quote: 394
with: 393
if: 369
just: 353
can: 328
have: 327
not: 311
<div: 306
as: 300
get: 294
so: 294
at: 289
target="_blank": 280
do: 276
portal: 251
are: 244
was: 234
my: 229
I'm: 225
your: 220
it's: 215
glitch: 203
would: 199
or: 197
one: 192
don't: 189
up: 185
will: 178
like: 176
some: 175
way: 174
cube: 172
think: 165
out: 164
an: 162
by: 161
through: 161
how: 158
game: 154
time: 152
because: 148
-: 147
what: 145
than: 145
when: 143
about: 141
me: 140
all: 134
then: 132
Chapter: 132
route: 128
first: 127
2: 124
video: 124
into: 124
sure: 123
use: 123
Portal: 122
could: 122
make: 121
found: 121
there: 120
run: 119
really: 118
If: 115
It: 115
i: 114
jump: 113
The: 112
other: 109
<img: 108
using: 108
save: 107
faster: 107
know: 106
wall: 105
you're: 105
no: 103
only: 102
should: 101
see: 101
any: 100
Chamber: 97
actually: 97
without: 96
<a: 95
been: 94
which: 93
where: 92
has: 91
trick: 90
button: 90
much: 90
already: 89
it.: 88
more: 87
done: 87
did: 85
possible: 85
still: 85
go: 85
map: 85
You: 83
work: 82
version: 81
can't: 81
we: 80
speed: 80
they: 80
pretty: 76
new: 75
need: 74
I've: 73
level: 73
doesn't: 73
last: 73
even: 72
before: 72
after: 71
same: 70
off: 70
them: 66
This: 66
want: 65
doing: 64
able: 63
going: 63
had: 63
over: 63
here: 63
find: 62
might: 62
back: 60
trying: 59
does: 59
portals: 59
something: 58
made: 58
being: 57
crouch: 57
end: 56
skip: 56
another: 56
=: 56
down: 55
since: 55
load: 55
people: 55
post: 55
chamber: 55
someone: 55
while: 54
two: 54
good: 54
too: 54
class="quotetext"><div: 54
right: 54
start: 53
posted: 53
it,: 53
lot: 53
also: 52
co-op: 52
hard: 52
next: 52
probably: 51
may: 50
there's: 50
src="/file/Pz%2Fcj6ngNFMHn%2BnxTNlCutk1Ef8": 50
isn't: 50
why: 50
different: 49
stuff: 49
I'll: 49
haven't: 48
flying: 48
Users:
DemonStrate: 103
adzicents: 76
coolkid: 65
Paraxade: 65
Kingpin: 47
00Svo: 43
ExplodingCabbage: 43
Scepheo: 33
NabsterHax: 29
z1mb0bw4y: 27
ncla: 24
bandit5k: 23
znernicus: 16
toten: 15
forever: 14
TTom: 14
RøvRapunzel: 13
DJell: 13
S.: 13
Pmk138: 12
Negatratoron: 10
Laufas: 10
NoPegs: 10
Rogo_Pro_Venia: 9
Fed981: 9
logitechSDAZ: 9
Spyrunite: 9
Blazier: 9
romscout: 8
Inexistence: 8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment