Created
May 1, 2014 21:47
-
-
Save ibebrett/11462628 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyramid.paster import bootstrap | |
env = bootstrap('moatbot_experiment.ini') | |
from datetime import datetime, timedelta | |
from scrapepipeline import US_DESKTOP_REGION, UK_DESKTOP_REGION | |
from scrapepipeline.potato2.client import GlobalEntity, TagGroup, Tag | |
from scrapepipeline.cache import ghostery | |
from itertools import chain, product | |
import json | |
import csv | |
import argparse | |
import os | |
bugs = json.load(open('moat_ghostery_bugs/moat_ghostery_bugs.json')) | |
categories = ghostery.get_ghostery_categories('dev_ghostery_categories') | |
def creatives_count(region): | |
ge = GlobalEntity(region=region) | |
return ge.get_creatives().count() | |
def get_all_categories(): | |
return categories.keys() | |
def get_tags_by_categories(region, category_names): | |
names = [ name for name, _ in chain(*[categories[cname] for cname in category_names]) ] | |
aids = [bug['aid'] for bug in bugs['bugs'] if bug['name'] in names] | |
tags = TagGroup([Tag(aid).region(region) for aid in aids]) | |
ge = GlobalEntity(region=region) | |
stats = ge.get_impression_stats(filter_with=tags) | |
try: | |
return stats[0].num_impressions | |
except IndexError: | |
return 0 | |
def write_report(results_dir, report_name, headers, rows=None, table=None): | |
rows = rows if rows else [] | |
if table: | |
headers = [''] + headers | |
rows.extend([ [k]+table[k] for k in table]) | |
with open(os.path.sep.join([results_dir, '%s.csv' % report_name]), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerows(rows) | |
def main(results_dir, regions): | |
for region_name in regions: | |
print region_name, creatives_count(region_name) | |
category_names = get_all_categories() | |
write_report( | |
results, | |
'categories', | |
['Region', 'Category', 'Count'], | |
[ region_name, category_name, get_tags_by_categories(region_name, [category_name]) ] | |
) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--results-dir', default='') | |
parser.add_argument('--regions', nargs='+', default=['US', 'UK']) | |
args = parser.parse_args() | |
main(args.results_dir, args.regions) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment