Skip to content

Instantly share code, notes, and snippets.

@thcipriani
Last active February 6, 2019 17:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thcipriani/e2bab1b4cfabdc5cfd3f81fe5bf9ab72 to your computer and use it in GitHub Desktop.
Save thcipriani/e2bab1b4cfabdc5cfd3f81fe5bf9ab72 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
train.py
~~~~~~~~
Find deployments of a given type in a given time period.
"""
from __future__ import division
import argparse
import re
from dateutil.parser import parse
import requests
import time
MW = 'mediawiki'
HEADERS = {
'User-Agent': ('train.py/0.1 '
'(https://gist.github.com/thcipriani/'
'e2bab1b4cfabdc5cfd3f81fe5bf9ab72)')
}
API_URL = 'https://wikitech.wikimedia.org/w/api.php'
PAGE_ID = '7919'
OPTIONS = {
'action': 'query',
'prop': 'revisions',
'user': 'Stashbot',
'titles': 'Server Admin Log',
'rvprop': 'timestamp|comment',
'rvdir': 'newer',
'format': 'json',
}
DEFAULT_DELAY = 8
def keywords_for(deploy_type):
duration = '\(duration: \d\dm \d\ds\)'
if deploy_type == MW:
return [
'^Finished scap: .* %s$' % duration,
'^Synchronized [^:]+: .* %s$' % duration,
'^scap sync-l10n completed .* %s$' % duration,
'^rebuilt wikiversions.php and synchronized wikiversions files:',
]
return [
'^Finished deploy \[[^@]+@[^@]+\]: .* %s$' % duration
]
def run_query(query, delay=DEFAULT_DELAY):
"""
Query the SAL
"""
last_continue = {}
while True:
req = query.copy()
req.update(last_continue)
r = requests.get(API_URL, headers=HEADERS, params=req)
r.raise_for_status()
result = r.json()
if 'query' in result:
yield result['query']
if 'continue' not in result:
break
last_continue = result['continue']
time.sleep(delay)
def run(query, deploy_type=MW, delay=DEFAULT_DELAY, user_re=None,
message_re=None):
"""
Check the sal for messages in a given time window matching criteria
"""
keywords = '|'.join(keywords_for(deploy_type))
keywords = '({})'.format(keywords)
keywords = re.compile(keywords)
if user_re:
key_user = re.compile(user_re)
if message_re:
key_message = re.compile(message_re)
for result in run_query(query, delay=delay):
for rev in result['pages'][PAGE_ID]['revisions']:
comment = rev['comment']
if '@' not in comment:
continue
clean = comment.split(':', 1)
user = clean[0]
message = clean[1].strip()
if not keywords.match(message):
continue
if user_re and not key_user.match(user):
continue
if message_re and not key_message.match(message):
continue
print('{}\t{}'.format(user, message))
def parse_date(ts):
"""
Return date string
"""
return parse(ts).isoformat()
def parse_args():
"""
Parse arguments
"""
p = argparse.ArgumentParser()
p.add_argument(
'-s', '--start-date', required=True,
help='Date to start scanning the SAL')
p.add_argument(
'-e', '--end-date', required=True,
help='Date to stop scanning the SAL')
p.add_argument(
'-t', '--type', default=MW,
help='Type of deployment: services or mediawiki')
p.add_argument(
'-d', '--continue-delay', default=DEFAULT_DELAY, type=int,
help='Seconds to pause between running a continue')
p.add_argument(
'-u', '--match-user', help='Regex to match against username')
p.add_argument(
'-m', '--match-message', help='Regex to match against message')
return p.parse_args()
def main():
args = parse_args()
query = OPTIONS.copy()
query['rvstart'] = parse_date(args.start_date)
query['rvend'] = parse_date(args.end_date)
run(
query,
deploy_type=args.type,
delay=args.continue_delay,
user_re=args.match_user,
message_re=args.match_message
)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment