Skip to content

Instantly share code, notes, and snippets.

@peterbe
Created March 22, 2019 20:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterbe/8e2cdfe75f13502aa61d2747e6f573ae to your computer and use it in GitHub Desktop.
Save peterbe/8e2cdfe75f13502aa61d2747e6f573ae to your computer and use it in GitHub Desktop.
import datetime
import os
from urllib.parse import urljoin, urlparse
from collections import namedtuple
import pyquery
import click
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from django.core.wsgi import get_wsgi_application
from django.utils import timezone
from django.contrib.auth import get_user_model
from django.forms import ValidationError
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "experimenter.settings")
application = get_wsgi_application()
from experimenter.experiments.models import Experiment
from experimenter.experiments.forms import (
ExperimentOverviewForm,
ExperimentVariantsAddonForm,
ExperimentVariantsPrefForm,
)
ROOT_URL = os.environ.get(
"ROOT_URL", "https://experimenter.services.mozilla.com"
)
try:
import requests_cache
requests_cache.install_cache(
"requests_cache1", expire_after=60 * 30, allowable_methods=["GET"]
)
print(
"Warning! Running in debug mode means all HTTP requests are cached "
"indefinitely. To reset HTTP caches, delete the file "
"'requests_cache1.sqlite'"
)
except ImportError:
print("Consider 'pip install requests_cache' to persist requests")
class Unauthorized(Exception):
"""Bad cookies!"""
def run(cookies, page=1, verbose=False, dry_run=False):
session = requests_retry_session()
def download(url):
response = session.get(
url,
cookies=cookies,
allow_redirects=False,
headers={
# Not sure what this needs to be.
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:67.0) Gecko/20100101 Firefox/67.0"
},
)
response.raise_for_status()
if (
response.status_code == 302
and "auth.mozilla.auth0.com" in response.headers["location"]
):
raise Unauthorized("The cookie you provided doesn't work")
return response
uri = "/"
if page and page > 1:
uri = f"/?page={page}"
url = urljoin(ROOT_URL, uri)
response = download(url)
doc = pyquery.PyQuery(response.text)
doc.make_links_absolute(base_url=ROOT_URL)
experiment_urls = set()
for a in doc("div.row .col-9 a.hovershadow").items():
if urlparse(a.attr("href")).path.startswith("/experiments/"):
if urlparse(a.attr("href")).path.endswith("/fff/"):
continue
experiment_urls.add(a.attr("href"))
Request = namedtuple("Request", ["user"])
request = Request(user=get_user_model().objects.get(is_superuser=True))
def data_from_form(form):
data = {}
for inp in form("input, textarea").items():
name = inp.attr("name")
if name == "csrfmiddlewaretoken":
continue
data[name] = inp.val()
for select in form("select").items():
name = select.attr("name")
for opt in select("option[selected]").items():
if name == "owner":
email = opt.text().strip()
user, _ = get_user_model().objects.get_or_create(
username=email, email=email
)
data["owner"] = user.id
else:
if name in data:
if not isinstance(data[name], list):
data[name] = [data[name]]
data[name].append(opt.val())
else:
data[name] = opt.val()
return data
for url in experiment_urls:
print(url)
response = download(url + "edit/")
doc = pyquery.PyQuery(response.text)
doc.make_links_absolute(base_url=ROOT_URL)
form, = doc('form[method="POST"]').items()
data = data_from_form(form)
try:
experiment = Experiment.objects.get(name=data["name"])
except Experiment.DoesNotExist:
data["proposed_start_date"] = (
timezone.now() + datetime.timedelta(days=10)
).strftime("%Y-%m-%d")
form = ExperimentOverviewForm(request, data=data)
if form.is_valid():
experiment = form.save()
else:
print(form.errors)
raise ValidationError(form.errors)
print(repr(experiment))
# Population
response = download(url + "edit-variants/")
doc = pyquery.PyQuery(response.text)
doc.make_links_absolute(base_url=ROOT_URL)
form, = doc('form[method="POST"]').items()
data = data_from_form(form)
from pprint import pprint
keys = list(data.keys())
for key in keys:
if "-DELETE" in key:
data.pop(key, None)
pprint(data)
data["experiment"] = experiment.id
if experiment.type == "pref":
form_class = ExperimentVariantsPrefForm
elif experiment.type == "addon":
form_class = ExperimentVariantsAddonForm
else:
# ExperimentVariantsAddonForm
print(experiment.type)
raise Exception
form = form_class(request, data=data, instance=experiment)
if form.is_valid():
form.save()
elif form.errors:
print("ERRORS:")
print(form.errors)
raise ValidationError(form.errors)
# break ## ONE AT A TIME FOR NOW
# print(response.text)
def requests_retry_session(
retries=3, backoff_factor=0.3, status_forcelist=(500, 502, 504)
):
"""Opinionated wrapper that creates a requests session with a
HTTPAdapter that sets up a Retry policy that includes connection
retries.
If you do the more naive retry by simply setting a number. E.g.::
adapter = HTTPAdapter(max_retries=3)
then it will raise immediately on any connection errors.
Retrying on connection errors guards better on unpredictable networks.
From http://docs.python-requests.org/en/master/api/?highlight=retries#requests.adapters.HTTPAdapter
it says: "By default, Requests does not retry failed connections."
The backoff_factor is documented here:
https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.retry.Retry
A default of retries=3 and backoff_factor=0.3 means it will sleep like::
[0.3, 0.6, 1.2]
""" # noqa
session = requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
def error_out(msg, raise_abort=True):
click.echo(click.style(msg, fg="red"))
if raise_abort:
raise click.Abort
@click.command()
@click.option("-v", "--verbose", is_flag=True)
@click.option("-d", "--dry-run", is_flag=True)
@click.option("-p", "--page", default=1)
@click.argument("cookie")
def cli(cookie, page, dry_run, verbose):
cookies = {}
for part in cookie.split(";"):
key, value = part.strip().split("=", 1)
cookies[key] = value
if not cookies:
error_out("No valid cookie")
run(cookies, page=page, verbose=verbose, dry_run=dry_run)
# error_out("It's over")
if __name__ == "__main__":
cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment