This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A wrapper to serialize data read from/written to leveldb in json | |
# Steven Englehardt | |
import leveldb | |
import json | |
class JsonLevelDB(object): | |
def __init__(self, filename, **kwargs): | |
self._filename = filename | |
self._db = leveldb.LevelDB(self._filename, **kwargs) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Bus times last updated 2015-11-04 | |
# Steven Englehardt (github.com/englehardt) | |
bus_times=('7:16' '7:46' '8:16' '8:26' '8:36' '8:46' '8:56' '9:06' '9:16' '9:26' '9:36' '9:46' '9:56' '10:06' '10:16' '10:26' '10:36' '10:46' '10:56' '11:06' '11:16' '11:46' '12:16' '12:46' '13:16' '13:46' '14:16' '14:46' '15:16' '15:46' '16:16' '16:46' '17:16' '17:46' '18:16' '18:31' '18:46' '19:01' '19:16' '19:31' '19:46' '20:19' '21:04' '21:49' '22:34' '23:19') | |
on_demand=('22:00' '3:00') | |
limit=4 | |
if [ $# -eq 1 ]; then | |
limit=$1 | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import json | |
import dill | |
import os | |
DATA_DIR = './' | |
WEBXRAY_LIST = 'webxray_orgs.json' | |
DISCONNECT_LIST = 'disconnect_list.json' | |
OUT_LIST = 'merged_organizations.dill' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"persianstat.com": ["persianstat.com"], | |
"marketgid": ["marketgid.com", "dt07.net", "dt00.net"], | |
"madvertise": ["madvertise.com"], | |
"voice2page": ["voice2page.com"], | |
"mixpanel": ["mixpanel.com"], | |
"automattic": ["wordpress.com", "polldaddy.com", "automattic.com", "wp.com", "gravatar.com", "intensedebate.com"], | |
"game advertising online": ["game-advertising-online.com"], | |
"adconion": ["amgdgt.com", "adconion.com", "smartclip.com", "euroclick.com"], | |
"sogou": ["sogou.com", "sogoucdn.com"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import multiprocessing | |
import urlparse | |
import requests | |
import json | |
import os | |
# Available: https://github.com/citp/OpenWPM/blob/master/automation/utilities/domain_utils.py # noqa | |
import domain_utils as du | |
# Available: https://gist.github.com/englehardt/802d1872d6bda2084723489a82540cb3 # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Steven Englehardt | |
github.com/englehardt | |
Some dependencies (probably not exhaustive): | |
sudo apt-get install python-Xlib scrot xserver-xephyr | |
sudo pip install pyautogui pyvirtualdisplay | |
This needs access to a Firefox binary, and hardcodes a relative location. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
import hashlib | |
import json | |
import re | |
import urllib2 | |
from trackingprotection_tools import DisconnectParser | |
TRACKER_CATEGORIES = [ | |
'Advertising', 'Analytics', 'Social', 'Content', 'Disconnect' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from trackingprotection_tools import DisconnectParser | |
BLOCKLIST_URL = 'https://raw.githubusercontent.com/mozilla-services/shavar-prod-lists/master/disconnect-blacklist.json' # noqa | |
REMAPPING_URL = 'https://raw.githubusercontent.com/mozilla-services/shavar-list-creation/master/disconnect_mapping.json' # noqa | |
dc = DisconnectParser( | |
blocklist_url=BLOCKLIST_URL, | |
disconnect_mapping_url=REMAPPING_URL, | |
verbose=True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" This script reads a sqlite database and writes the content to a parquet | |
database on S3 formatted as OpenWPM would format. It's best to just run this | |
on AWS as it bottlenecks on the S3 upload. This is a lightly modified version | |
of OpenWPM's S3Aggregator class. | |
""" | |
import os | |
import sqlite3 | |
import sys | |
from collections import defaultdict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Remove Likes on Twitter | |
// @namespace twitter | |
// @include https://twitter.com/ | |
// @version 2 | |
// @grant GM_addStyle | |
// ==/UserScript== | |
GM_addStyle('div.promoted-tweet, div[data-component-context=suggest_activity_tweet] {display: none !important}'); |
OlderNewer