This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from trackingprotection_tools import DisconnectParser | |
BLOCKLIST_URL = 'https://raw.githubusercontent.com/mozilla-services/shavar-prod-lists/master/disconnect-blacklist.json' # noqa | |
REMAPPING_URL = 'https://raw.githubusercontent.com/mozilla-services/shavar-list-creation/master/disconnect_mapping.json' # noqa | |
dc = DisconnectParser( | |
blocklist_url=BLOCKLIST_URL, | |
disconnect_mapping_url=REMAPPING_URL, | |
verbose=True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
import hashlib | |
import json | |
import re | |
import urllib2 | |
from trackingprotection_tools import DisconnectParser | |
TRACKER_CATEGORIES = [ | |
'Advertising', 'Analytics', 'Social', 'Content', 'Disconnect' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Steven Englehardt | |
github.com/englehardt | |
Some dependencies (probably not exhaustive): | |
sudo apt-get install python-Xlib scrot xserver-xephyr | |
sudo pip install pyautogui pyvirtualdisplay | |
This needs access to a Firefox binary, and hardcodes a relative location. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import multiprocessing | |
import urlparse | |
import requests | |
import json | |
import os | |
# Available: https://github.com/citp/OpenWPM/blob/master/automation/utilities/domain_utils.py # noqa | |
import domain_utils as du | |
# Available: https://gist.github.com/englehardt/802d1872d6bda2084723489a82540cb3 # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"persianstat.com": ["persianstat.com"], | |
"marketgid": ["marketgid.com", "dt07.net", "dt00.net"], | |
"madvertise": ["madvertise.com"], | |
"voice2page": ["voice2page.com"], | |
"mixpanel": ["mixpanel.com"], | |
"automattic": ["wordpress.com", "polldaddy.com", "automattic.com", "wp.com", "gravatar.com", "intensedebate.com"], | |
"game advertising online": ["game-advertising-online.com"], | |
"adconion": ["amgdgt.com", "adconion.com", "smartclip.com", "euroclick.com"], | |
"sogou": ["sogou.com", "sogoucdn.com"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import json | |
import dill | |
import os | |
DATA_DIR = './' | |
WEBXRAY_LIST = 'webxray_orgs.json' | |
DISCONNECT_LIST = 'disconnect_list.json' | |
OUT_LIST = 'merged_organizations.dill' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Bus times last updated 2015-11-04 | |
# Steven Englehardt (github.com/englehardt) | |
bus_times=('7:16' '7:46' '8:16' '8:26' '8:36' '8:46' '8:56' '9:06' '9:16' '9:26' '9:36' '9:46' '9:56' '10:06' '10:16' '10:26' '10:36' '10:46' '10:56' '11:06' '11:16' '11:46' '12:16' '12:46' '13:16' '13:46' '14:16' '14:46' '15:16' '15:46' '16:16' '16:46' '17:16' '17:46' '18:16' '18:31' '18:46' '19:01' '19:16' '19:31' '19:46' '20:19' '21:04' '21:49' '22:34' '23:19') | |
on_demand=('22:00' '3:00') | |
limit=4 | |
if [ $# -eq 1 ]; then | |
limit=$1 | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A wrapper to serialize data read from/written to leveldb in json | |
# Steven Englehardt | |
import leveldb | |
import json | |
class JsonLevelDB(object): | |
def __init__(self, filename, **kwargs): | |
self._filename = filename | |
self._db = leveldb.LevelDB(self._filename, **kwargs) | |
NewerOlder