Last active
April 13, 2022 21:36
-
-
Save Polsaker/f102c98b42deacb6a1b646de86d32ec4 to your computer and use it in GitHub Desktop.
Fetches alexa rank for reddit alternatives
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
# Site list in the format ('DOMAIN', 'DISPLAY NAME') | |
# Display name is only used for the first column of the table. | |
sites = [('steemit.com', 'steemit'), | |
('band.us', 'band'), | |
('voat.co', 'voat'), | |
('swiflie.com', 'swiflie'), | |
('papaly.com', 'papaly'), | |
('sapien.network', 'sapien'), | |
('the-artifice.com', 'the artifice'), | |
('raddle.me', 'raddle'), | |
('snapzu.com', 'snapzu'), | |
('campussociety.com', 'campus society'), | |
('hubski.com', 'hubsky'), | |
('voten.co', 'voten'), | |
('scuttlebutt.nz', 'scuttlebutt'), | |
('dangeru.us', 'danger/u/'), | |
('gameiki.com', 'gameiki'), | |
('stacksity.com', 'stacksity'), | |
('weco.io', 'weco'), | |
('panjury.com', 'panjury'), | |
('saidit.net', 'saidit'), | |
('phuks.co', 'phuks'), | |
('wishfie.com', 'wishfie'), | |
('aether', 'getaether.net'), | |
('dojo.press', 'dojo press'), | |
('perusen.com', 'perusen'), | |
('flow-chat.com', 'flowchat'), | |
('parley.io', 'parley'), | |
('reallyread.it', 'reallyread.it'), | |
('trendulus.com', 'trendulus'), | |
('darto.com', 'darto'), | |
('ratesome.com', 'ratesome'), | |
('vlnc.co', 'vlnc'), | |
('monalo.net', 'monalo'), | |
('postwith.me', 'post with.me'), | |
('tildes.net', 'tildes'), | |
('atob.xyz', 'atob'), | |
('hereshot.com', 'hereshot'), | |
('9dot.win', '9dot'), | |
('uvoh.com', 'uvoh'), | |
('headcycle.com', 'headcycle'), | |
('rectacular.me', 'rectacular.me'), | |
('quickanswer.net', 'quickanswer'), | |
('unfollo.com', 'unfollo'), | |
('alpha.campfire.site', 'campfire'), | |
('poal.co', 'poal'), | |
('notabug.io', 'notabug')] | |
similar = [('discordapp.com', 'discord'), | |
('4chan.org', '4chan'), | |
('news.ycombinator.com', 'hacker news'), | |
('8ch.net', '8chan'), | |
('slashdot.org', 'slashdot')] | |
regex = re.compile(r".*POPULARITY URL=.+ TEXT=\"(\d+)\" SOURCE.*") | |
ranks = [] | |
ranks_n = [] | |
ranks_s = [] | |
def get_rank(site): | |
s = requests.get("http://data.alexa.com/data?cli=100&url=" + site) | |
ma = regex.search(s.text) | |
if ma: | |
return int(ma.group(1)) | |
return None | |
# Go through all the sites | |
for site in sites: | |
rank = get_rank(site[0]) | |
if rank is not None and rank < 500000: | |
ranks.append((site[0], site[1], rank)) | |
else: | |
ranks_n.append((site[0], site[1], rank)) | |
for site in similar: | |
ranks_s.append((site[0], site[1], get_rank(site[0]))) | |
# Sort everything | |
ranks.sort(key=lambda x:(x[2] is None, x[2])) | |
ranks_n.sort(key=lambda x:(x[2] is None, x[2])) | |
ranks_s.sort(key=lambda x:(x[2] is None, x[2])) | |
# Print results | |
print("**Active Reddit Alternatives:**\n") | |
print("name | url | alexa") | |
print("-----|-----|------") | |
for st in ranks: | |
print("{0} | [{1}](https://{1}) | {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data")) | |
print(""" | |
___ | |
""") | |
print("*these may have low activity, based on Alexa rank*\n") | |
print("name | url | alexa") | |
print("-----|-----|------") | |
for st in ranks_n: | |
print("{0} | [{1}](https://{1}) | {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data")) | |
print("""--- | |
*established sites similar to Reddit* | |
""") | |
print("name | url | alexa") | |
print("-----|-----|------") | |
for st in ranks_s: | |
print("{0} | [{1}](https://{1}) | {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment