Skip to content

Instantly share code, notes, and snippets.

@ibebrett
Created May 1, 2014 21:47
Show Gist options
  • Save ibebrett/11462628 to your computer and use it in GitHub Desktop.
Save ibebrett/11462628 to your computer and use it in GitHub Desktop.
from pyramid.paster import bootstrap
env = bootstrap('moatbot_experiment.ini')
from datetime import datetime, timedelta
from scrapepipeline import US_DESKTOP_REGION, UK_DESKTOP_REGION
from scrapepipeline.potato2.client import GlobalEntity, TagGroup, Tag
from scrapepipeline.cache import ghostery
from itertools import chain, product
import json
import csv
import argparse
import os
bugs = json.load(open('moat_ghostery_bugs/moat_ghostery_bugs.json'))
categories = ghostery.get_ghostery_categories('dev_ghostery_categories')
def creatives_count(region):
ge = GlobalEntity(region=region)
return ge.get_creatives().count()
def get_all_categories():
return categories.keys()
def get_tags_by_categories(region, category_names):
names = [ name for name, _ in chain(*[categories[cname] for cname in category_names]) ]
aids = [bug['aid'] for bug in bugs['bugs'] if bug['name'] in names]
tags = TagGroup([Tag(aid).region(region) for aid in aids])
ge = GlobalEntity(region=region)
stats = ge.get_impression_stats(filter_with=tags)
try:
return stats[0].num_impressions
except IndexError:
return 0
def write_report(results_dir, report_name, headers, rows=None, table=None):
rows = rows if rows else []
if table:
headers = [''] + headers
rows.extend([ [k]+table[k] for k in table])
with open(os.path.sep.join([results_dir, '%s.csv' % report_name]), 'w') as f:
writer = csv.writer(f)
writer.writerows(rows)
def main(results_dir, regions):
for region_name in regions:
print region_name, creatives_count(region_name)
category_names = get_all_categories()
write_report(
results,
'categories',
['Region', 'Category', 'Count'],
[ region_name, category_name, get_tags_by_categories(region_name, [category_name]) ]
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--results-dir', default='')
parser.add_argument('--regions', nargs='+', default=['US', 'UK'])
args = parser.parse_args()
main(args.results_dir, args.regions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment