Last active
February 6, 2019 17:19
-
-
Save thcipriani/e2bab1b4cfabdc5cfd3f81fe5bf9ab72 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
train.py | |
~~~~~~~~ | |
Find deployments of a given type in a given time period. | |
""" | |
from __future__ import division | |
import argparse | |
import re | |
from dateutil.parser import parse | |
import requests | |
import time | |
MW = 'mediawiki' | |
HEADERS = { | |
'User-Agent': ('train.py/0.1 ' | |
'(https://gist.github.com/thcipriani/' | |
'e2bab1b4cfabdc5cfd3f81fe5bf9ab72)') | |
} | |
API_URL = 'https://wikitech.wikimedia.org/w/api.php' | |
PAGE_ID = '7919' | |
OPTIONS = { | |
'action': 'query', | |
'prop': 'revisions', | |
'user': 'Stashbot', | |
'titles': 'Server Admin Log', | |
'rvprop': 'timestamp|comment', | |
'rvdir': 'newer', | |
'format': 'json', | |
} | |
DEFAULT_DELAY = 8 | |
def keywords_for(deploy_type): | |
duration = '\(duration: \d\dm \d\ds\)' | |
if deploy_type == MW: | |
return [ | |
'^Finished scap: .* %s$' % duration, | |
'^Synchronized [^:]+: .* %s$' % duration, | |
'^scap sync-l10n completed .* %s$' % duration, | |
'^rebuilt wikiversions.php and synchronized wikiversions files:', | |
] | |
return [ | |
'^Finished deploy \[[^@]+@[^@]+\]: .* %s$' % duration | |
] | |
def run_query(query, delay=DEFAULT_DELAY): | |
""" | |
Query the SAL | |
""" | |
last_continue = {} | |
while True: | |
req = query.copy() | |
req.update(last_continue) | |
r = requests.get(API_URL, headers=HEADERS, params=req) | |
r.raise_for_status() | |
result = r.json() | |
if 'query' in result: | |
yield result['query'] | |
if 'continue' not in result: | |
break | |
last_continue = result['continue'] | |
time.sleep(delay) | |
def run(query, deploy_type=MW, delay=DEFAULT_DELAY, user_re=None, | |
message_re=None): | |
""" | |
Check the sal for messages in a given time window matching criteria | |
""" | |
keywords = '|'.join(keywords_for(deploy_type)) | |
keywords = '({})'.format(keywords) | |
keywords = re.compile(keywords) | |
if user_re: | |
key_user = re.compile(user_re) | |
if message_re: | |
key_message = re.compile(message_re) | |
for result in run_query(query, delay=delay): | |
for rev in result['pages'][PAGE_ID]['revisions']: | |
comment = rev['comment'] | |
if '@' not in comment: | |
continue | |
clean = comment.split(':', 1) | |
user = clean[0] | |
message = clean[1].strip() | |
if not keywords.match(message): | |
continue | |
if user_re and not key_user.match(user): | |
continue | |
if message_re and not key_message.match(message): | |
continue | |
print('{}\t{}'.format(user, message)) | |
def parse_date(ts): | |
""" | |
Return date string | |
""" | |
return parse(ts).isoformat() | |
def parse_args(): | |
""" | |
Parse arguments | |
""" | |
p = argparse.ArgumentParser() | |
p.add_argument( | |
'-s', '--start-date', required=True, | |
help='Date to start scanning the SAL') | |
p.add_argument( | |
'-e', '--end-date', required=True, | |
help='Date to stop scanning the SAL') | |
p.add_argument( | |
'-t', '--type', default=MW, | |
help='Type of deployment: services or mediawiki') | |
p.add_argument( | |
'-d', '--continue-delay', default=DEFAULT_DELAY, type=int, | |
help='Seconds to pause between running a continue') | |
p.add_argument( | |
'-u', '--match-user', help='Regex to match against username') | |
p.add_argument( | |
'-m', '--match-message', help='Regex to match against message') | |
return p.parse_args() | |
def main(): | |
args = parse_args() | |
query = OPTIONS.copy() | |
query['rvstart'] = parse_date(args.start_date) | |
query['rvend'] = parse_date(args.end_date) | |
run( | |
query, | |
deploy_type=args.type, | |
delay=args.continue_delay, | |
user_re=args.match_user, | |
message_re=args.match_message | |
) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment