Polsaker/alexafetch.py

## alexafetch.py
import requests
import re

# Site list in the format ('DOMAIN', 'DISPLAY NAME')
# Display name is only used for the first column of the table.
sites = [('steemit.com', 'steemit'),
		 ('band.us', 'band'),
		 ('voat.co', 'voat'),
		 ('swiflie.com', 'swiflie'),
		 ('papaly.com', 'papaly'),
		 ('sapien.network', 'sapien'),
		 ('the-artifice.com', 'the artifice'),
		 ('raddle.me', 'raddle'),
		 ('snapzu.com', 'snapzu'),
		 ('campussociety.com', 'campus society'),
		 ('hubski.com', 'hubsky'),
		 ('voten.co', 'voten'),
		 ('scuttlebutt.nz', 'scuttlebutt'),
		 ('dangeru.us', 'danger/u/'),
		 ('gameiki.com', 'gameiki'),
		 ('stacksity.com', 'stacksity'),
		 ('weco.io', 'weco'),
		 ('panjury.com', 'panjury'),
		 ('saidit.net', 'saidit'),
		 ('phuks.co', 'phuks'),
		 ('wishfie.com', 'wishfie'),
		 ('aether', 'getaether.net'),
		 ('dojo.press', 'dojo press'),
		 ('perusen.com', 'perusen'),
		 ('flow-chat.com', 'flowchat'),
		 ('parley.io', 'parley'),
		 ('reallyread.it', 'reallyread.it'),
		 ('trendulus.com', 'trendulus'),
		 ('darto.com', 'darto'),
		 ('ratesome.com', 'ratesome'),
		 ('vlnc.co', 'vlnc'),
		 ('monalo.net', 'monalo'),
		 ('postwith.me', 'post with.me'),
		 ('tildes.net', 'tildes'),
		 ('atob.xyz', 'atob'),
		 ('hereshot.com', 'hereshot'),
		 ('9dot.win', '9dot'),
		 ('uvoh.com', 'uvoh'),
		 ('headcycle.com', 'headcycle'),
		 ('rectacular.me', 'rectacular.me'),
		 ('quickanswer.net', 'quickanswer'),
		 ('unfollo.com', 'unfollo'),
		 ('alpha.campfire.site', 'campfire'),
	 	 ('poal.co', 'poal'),
		 ('notabug.io', 'notabug')]

similar = [('discordapp.com', 'discord'),
		   ('4chan.org', '4chan'),
		   ('news.ycombinator.com', 'hacker news'),
		   ('8ch.net', '8chan'),
		   ('slashdot.org', 'slashdot')]

regex = re.compile(r".*POPULARITY URL=.+ TEXT=\"(\d+)\" SOURCE.*")

ranks = []
ranks_n = []
ranks_s = []
def get_rank(site):
	s = requests.get("http://data.alexa.com/data?cli=100&url=" + site)
	ma = regex.search(s.text)
	if ma:
		return int(ma.group(1))
	return None


# Go through all the sites
for site in sites:
	rank = get_rank(site[0])
	if rank is not None and rank < 500000:
		ranks.append((site[0], site[1], rank))
	else:
		ranks_n.append((site[0], site[1], rank))

for site in similar:
	ranks_s.append((site[0], site[1], get_rank(site[0])))

# Sort everything
ranks.sort(key=lambda x:(x[2] is None, x[2]))
ranks_n.sort(key=lambda x:(x[2] is None, x[2]))
ranks_s.sort(key=lambda x:(x[2] is None, x[2]))

# Print results
print("**Active Reddit Alternatives:**\n")
print("name  |  url  |  alexa")
print("-----|-----|------")
for st in ranks:
	print("{0} | [{1}](https://{1}) | {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data"))

print("""
___

""")
print("*these may have low activity, based on Alexa rank*\n")
print("name  |  url  |  alexa")
print("-----|-----|------")
for st in ranks_n:
	print("{0} | [{1}](https://{1}) | {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data"))

print("""---

*established sites similar to Reddit*
""")
print("name  |  url  |  alexa")
print("-----|-----|------")
for st in ranks_s:
	print("{0} | [{1}](https://{1}) | {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data"))
	import requests
	import re

	# Site list in the format ('DOMAIN', 'DISPLAY NAME')
	# Display name is only used for the first column of the table.
	sites = [('steemit.com', 'steemit'),
	('band.us', 'band'),
	('voat.co', 'voat'),
	('swiflie.com', 'swiflie'),
	('papaly.com', 'papaly'),
	('sapien.network', 'sapien'),
	('the-artifice.com', 'the artifice'),
	('raddle.me', 'raddle'),
	('snapzu.com', 'snapzu'),
	('campussociety.com', 'campus society'),
	('hubski.com', 'hubsky'),
	('voten.co', 'voten'),
	('scuttlebutt.nz', 'scuttlebutt'),
	('dangeru.us', 'danger/u/'),
	('gameiki.com', 'gameiki'),
	('stacksity.com', 'stacksity'),
	('weco.io', 'weco'),
	('panjury.com', 'panjury'),
	('saidit.net', 'saidit'),
	('phuks.co', 'phuks'),
	('wishfie.com', 'wishfie'),
	('aether', 'getaether.net'),
	('dojo.press', 'dojo press'),
	('perusen.com', 'perusen'),
	('flow-chat.com', 'flowchat'),
	('parley.io', 'parley'),
	('reallyread.it', 'reallyread.it'),
	('trendulus.com', 'trendulus'),
	('darto.com', 'darto'),
	('ratesome.com', 'ratesome'),
	('vlnc.co', 'vlnc'),
	('monalo.net', 'monalo'),
	('postwith.me', 'post with.me'),
	('tildes.net', 'tildes'),
	('atob.xyz', 'atob'),
	('hereshot.com', 'hereshot'),
	('9dot.win', '9dot'),
	('uvoh.com', 'uvoh'),
	('headcycle.com', 'headcycle'),
	('rectacular.me', 'rectacular.me'),
	('quickanswer.net', 'quickanswer'),
	('unfollo.com', 'unfollo'),
	('alpha.campfire.site', 'campfire'),
	('poal.co', 'poal'),
	('notabug.io', 'notabug')]

	similar = [('discordapp.com', 'discord'),
	('4chan.org', '4chan'),
	('news.ycombinator.com', 'hacker news'),
	('8ch.net', '8chan'),
	('slashdot.org', 'slashdot')]

	regex = re.compile(r".POPULARITY URL=.+ TEXT=\"(\d+)\" SOURCE.")

	ranks = []
	ranks_n = []
	ranks_s = []
	def get_rank(site):
	s = requests.get("http://data.alexa.com/data?cli=100&url=" + site)
	ma = regex.search(s.text)
	if ma:
	return int(ma.group(1))
	return None


	# Go through all the sites
	for site in sites:
	rank = get_rank(site[0])
	if rank is not None and rank < 500000:
	ranks.append((site[0], site[1], rank))
	else:
	ranks_n.append((site[0], site[1], rank))

	for site in similar:
	ranks_s.append((site[0], site[1], get_rank(site[0])))

	# Sort everything
	ranks.sort(key=lambda x:(x[2] is None, x[2]))
	ranks_n.sort(key=lambda x:(x[2] is None, x[2]))
	ranks_s.sort(key=lambda x:(x[2] is None, x[2]))

	# Print results
	print("Active Reddit Alternatives:\n")
	print("name \| url \| alexa")
	print("-----\|-----\|------")
	for st in ranks:
	print("{0} \| [{1}](https://{1}) \| {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data"))

	print("""
	___

	""")
	print("these may have low activity, based on Alexa rank\n")
	print("name \| url \| alexa")
	print("-----\|-----\|------")
	for st in ranks_n:
	print("{0} \| [{1}](https://{1}) \| {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data"))

	print("""---

	established sites similar to Reddit
	""")
	print("name \| url \| alexa")
	print("-----\|-----\|------")
	for st in ranks_s:
	print("{0} \| [{1}](https://{1}) \| {2}".format(st[1], st[0], "{:,}".format(st[2]) if st[2] else "No data"))