Skip to content

Instantly share code, notes, and snippets.

@orimanabu
Last active April 22, 2019 09:15
Show Gist options
  • Save orimanabu/f17fd43ce0c81f6cfb829d440fcd5261 to your computer and use it in GitHub Desktop.
Save orimanabu/f17fd43ce0c81f6cfb829d440fcd5261 to your computer and use it in GitHub Desktop.
Obtain Errata Advisory and Bugzilla URLs for RHOSP
#!/usr/bin/env python
import csv
import sys
import requests
import subprocess
import urllib.parse
from pprint import pprint
from lxml import html
from argparse import ArgumentParser
CHROME_PATH = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
PRODUCT = 'Red Hat OpenStack Platform'
#URL = 'https://access.redhat.com/errata/#/?q=&p={}&sort=portal_publication_date%20desc&rows=100&portal_product=Red%20Hat%20OpenStack%20Platform&portal_product_version={}&portal_architecture={}'
URL = 'https://access.redhat.com/errata/#/?q=&p={}&sort=portal_publication_date%20desc&rows={}&portal_product={}&portal_product_version={}&portal_architecture={}'
def fetch_page_browser(url):
cmd = [CHROME_PATH, '--headless', '--disable-gpu', '--dump-dom', url]
print(" *** {}".format(cmd))
process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
out, err = process.communicate()
error_code = process.returncode
if error_code != 0:
print('headless chrome returns {}, exit...'.format(error_code))
sys.exit(1)
return out
def fetch_page_python(url):
res = requests.get(url)
return res.content
def parse_errata_detail(url):
print(' ** {}'.format(url))
content = fetch_page_python(url)
doc = html.fromstring(content)
none_bz = doc.xpath("//h2[contains(text(), 'Fixes')]/following-sibling::span/text()")
if len(none_bz) != 0 and none_bz[0] == '(none)':
return {}
bzid = doc.xpath("//h2[contains(text(), 'Fixes')]//following-sibling::ul[1]/li/a/text()")
bzurl = doc.xpath("//h2[contains(text(), 'Fixes')]//following-sibling::ul[1]/li/a/@href")
title = doc.xpath("//h2[contains(text(), 'Fixes')]//following-sibling::ul[1]/li/text()")
result = {}
for i, bz in enumerate(bzid):
_title = bz + ' ' + title[i]
print(' ** {}'.format(_title))
result[bz] = {'url': bzurl[i], 'title': _title}
return result
def get_errata_list(args):
errata = {}
page = 1
while True:
print(' * page={}'.format(page))
url = URL.format(page, args.rows, urllib.parse.quote(PRODUCT), args.version, args.arch)
last_page = get_errata_list_pagenated(url, errata)
if last_page:
break
page = page + 1
return errata
def get_errata_list_pagenated(url, errata):
print(' ** {}'.format(url))
content = fetch_page_browser(url)
doc = html.fromstring(content)
adv = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Advisory')]/../span/a/text()")
url = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Advisory')]/../span/a/@ng-href")
synop = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Synopsis')]/../span/text()")
prod = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Products')]/../span/text()")
date = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Publish Date')]/../span/time/text()")
for i, erratum in enumerate(adv):
#print('{}: {}'.format(i, erratum))
errata[erratum] = {'url': url[i],
'synopsis': synop[i],
'products': prod[i],
'date': date[i]}
pagenation_direction_links = doc.xpath("//div[contains(@class, 'flex-item')]//li[contains(@ng-if, 'directionLinks')]/@class")
if len(pagenation_direction_links) == 0:
return True
print(' *** {}'.format(doc.xpath("//div[contains(@class, 'flex-item')]//li[contains(@ng-if, 'directionLinks')]/@class")))
return 'disabled' in doc.xpath("//div[contains(@class, 'flex-item')]//li[contains(@ng-if, 'directionLinks')]/@class")[1]
def build_row_erratum_only(errata, erratum):
return [
erratum,
errata[erratum]['url'],
errata[erratum]['synopsis'],
#errata[erratum]['products'],
errata[erratum]['date'],
]
def parse_errata_list(errata, args):
results = []
for erratum in sorted(errata.keys()):
if args.errata_only:
print(' ** {}'.format(errata[erratum]['url']))
results.append(build_row_erratum_only(errata, erratum))
continue
bzs_in_erratum = parse_errata_detail(errata[erratum]['url'])
if len(bzs_in_erratum.keys()) == 0:
row = build_row_erratum_only(errata, erratum)
results.append(row)
continue
for bzid in sorted(bzs_in_erratum.keys()):
row = [
erratum,
errata[erratum]['url'],
errata[erratum]['synopsis'],
#errata[erratum]['products'],
errata[erratum]['date'],
bzs_in_erratum[bzid]['title'],
bzs_in_erratum[bzid]['url'],
]
results.append(row)
return results
def main():
usage = 'python {} --version VERSION --arch ARCH --rows ROWS'.format(__file__)
argparser = ArgumentParser(usage=usage)
argparser.add_argument('--debug', help='debug', action='store_true')
argparser.add_argument('--version', help='version', type=int, dest='version', default=13)
argparser.add_argument('--arch', help='arch', choices=['x86_64', 'ppc64le'], default='x86_64')
argparser.add_argument('--rows', help='rows', type=int, dest='rows', default=100)
argparser.add_argument('--errata-only', help='errata only, not bugzilla', dest='errata_only', action='store_true')
argparser.add_argument('--csv', help='csv', action='store_true')
argparser.add_argument('--csvfile', help='csv file name', type=str, dest='csvfile')
args = argparser.parse_args()
if not args.csvfile:
if args.errata_only:
args.csvfile = 'RHOSP{}_errata_only.csv'.format(args.version)
else:
args.csvfile = 'RHOSP{}_errata_bugzilla.csv'.format(args.version)
print('** args.debug: {}'.format(args.debug))
print('** args.version: {}'.format(args.version))
print('** args.arch: {}'.format(args.arch))
print('** args.rows: {}'.format(args.rows))
print('** args.errata_only: {}'.format(args.errata_only))
print('** args.csv: {}'.format(args.csv))
print('** args.csvfile: {}'.format(args.csvfile))
errata = get_errata_list(args)
results = parse_errata_list(errata, args)
print('** # of erratum: {}'.format(len(errata.keys())))
print('** # of results: {}'.format(len(results)))
if args.csv:
with open(args.csvfile, 'w') as f:
writer = csv.writer(f, lineterminator='\n', quoting=csv.QUOTE_ALL)
writer.writerows(results)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment