Skip to content

Instantly share code, notes, and snippets.

@x
Created April 23, 2014 17:30
Show Gist options
  • Save x/11225232 to your computer and use it in GitHub Desktop.
Save x/11225232 to your computer and use it in GitHub Desktop.
getting csv of paths without xys
def main():
"""
[debug-only]
"""
define("domain", default="theverge.com")
define("path", default=None)
define("console", default=True, type=bool)
define("debug", default=True, type=bool)
define("click_through_host", default="localhost")
define("minutes", default=5)
from datetime import timedelta
from action.link_positions.lib.utils import get_path_and_ctr_range
read_options(None)
basicConfig(options=options)
dbs = init_dbs(click_through_host=options.click_through_host,
cellar_host='cellarreplicaset09')
#domains = ['theverge com']
domains = dbs['cellar_db'].get_newsbeat_domains()
ct_db = dbs['click_through_db']
end = datetime.now()
start = end - timedelta(minutes=15)
for domain in domains:
path, _ = get_path_and_ctr_range(domain, '/')
docs = ct_db._get_docs(domain, path, start, end)
paths = set(doc[TARGET_PATH] for doc in docs)
paths_with_xy = set(doc[TARGET_PATH] for doc in docs if doc[LOCATIONS])
if paths:
print(','.join(map(str, [domain, path, len(paths), len(paths - paths_with_xy)])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment