Skip to content

Instantly share code, notes, and snippets.

@kenych
Last active June 21, 2016 10:17
Show Gist options
  • Save kenych/b6305890174b35c311d7b8ece92292de to your computer and use it in GitHub Desktop.
Save kenych/b6305890174b35c311d7b8ece92292de to your computer and use it in GitHub Desktop.
as GO hasn't yet got an API for pipeline compare feature I have written simple HTML parser for GO pipeline compare to use in JIRA release ticekt creation, infut file is html page and output is list of commits
from HTMLParser import HTMLParser
import sys
class Record:
def __init__(self, a, p):
self.a = a
self.p = p
def __str__(self):
return "a: " + self.a+" p:"+self.p
class GoHTMLParser(HTMLParser):
tableStarted = False
dataStarted = False
recordStarted = False
recordList = []
record = None
tag = ''
prevTag = ''
def handle_starttag(self, tag, attrs):
if (tag == "table"):
for attr in attrs:
if(attr == ('class', 'list_table material_modifications')):
self.tableStarted = True
if ((tag == "tr" ) and self.tableStarted == True):
for attr in attrs:
if(attr == ('class', 'change')):
self.record = Record('', '')
self.recordStarted = True
if ((tag == "p" or tag == "a") and self.tableStarted == True):
self.dataStarted = True
self.prevTag = self.tag
self.tag = tag
else: self.dataStarted = False
def handle_endtag(self, tag):
if ((tag == "p" or tag == "a") and self.tableStarted == True):
self.tag = self.prevTag
if (tag == "table" and self.tableStarted == True):
self.tableStarted = False
if (tag == "tr" and self.tableStarted == True and self.recordStarted == True):
if self.record != None:
self.recordList.append(self.record)
def handle_data(self, data):
nonEmptyData = data.strip()
if self.tableStarted == True and self.dataStarted == True and nonEmptyData and self.record != None:
if self.tag == 'a':
self.record.a = nonEmptyData
if self.tag == 'p':
if nonEmptyData.startswith("Merge"):
self.record = None
else:
self.record.p = nonEmptyData
path = str(sys.argv[1])
input_file = str(sys.argv[2])
output_file = str(sys.argv[3])
file = open(path + '/'+output_file, 'w+')
parser = GoHTMLParser()
parser.feed(open(path + '/'+ input_file, 'r').read())
for r in parser.recordList:
with open(path +'/'+ output_file, 'a+') as file:
file.write(r.a+";"+r.p+"\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment