Skip to content

Instantly share code, notes, and snippets.

@miratcan
Created October 9, 2016 13:40
Show Gist options
  • Save miratcan/c2b486945798c70b35c1adc7f70b70db to your computer and use it in GitHub Desktop.
Save miratcan/c2b486945798c70b35c1adc7f70b70db to your computer and use it in GitHub Desktop.
Cleanup script for Disqus URL Mapper output.
import sys
from urllib2 import urlopen, HTTPError
from urllib import urlencode, unquote_plus
from json import loads
from time import sleep
API_SECRET = '__YOUR_DUSQUS_APP_API_SECRET__'
FORUM_ID = '___YOUR_DISQUS_FORUM_ID___'
def get_comment_count(url):
params = (url, API_SECRET)
api_url = 'https://disqus.com/api/3.0/threads/set.json?thread=link:%s&api_secret=%s' % params
data = urlopen(api_url).read()
json = loads(data)
return json['response'][0]['posts']
valid_urls = []
if __name__ == '__main__':
filename = sys.argv[1]
with open(filename) as f:
for line in f.read().split('\n'):
if not line:
continue
line = unquote_plus(line[:-1])
print 'TESTING: %s' % line
try:
remote = urlopen(line)
except HTTPError:
remote = None
print 'REACHABLE: %s' % bool(remote)
if remote:
comment_count = get_comment_count(line)
print 'COMMENT Count: %s' % comment_count
if comment_count > 0:
valid_urls.append(line)
print '------'
sleep(1)
print '---- VALID URLS ----'
for url in valid_urls:
print url
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment