Skip to content

Instantly share code, notes, and snippets.

@AnsonYe
Created December 9, 2015 04:10
Show Gist options
  • Save AnsonYe/cc0db88c816e0cc96d61 to your computer and use it in GitHub Desktop.
Save AnsonYe/cc0db88c816e0cc96d61 to your computer and use it in GitHub Desktop.
Generate a domain list from gfwlist.txt
#!/usr/bin/env python
#coding=utf-8
import urllib2
import re
import os
import base64
# the url of gfwlist
baseurl = 'https://autoproxy-gfwlist.googlecode.com/svn/trunk/gfwlist.txt'
domain_pattern = '([\w\-\_]+\.[\w\.\-\_]+)[\/\*]*'
fs = file('domain-gfwlist.txt', 'w')
content = urllib2.urlopen(baseurl, timeout=15).read().decode('base64')
# remember all blocked domains, in case of duplicate records
domainlist = []
for line in content.splitlines():
if re.findall(domain_pattern, line):
domain = re.findall(domain_pattern, line)
if domain:
try:
found = domainlist.index(domain[0])
except ValueError:
domainlist.append(domain[0])
fs.write(domain[0]+'\n')
print 'done!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment