Skip to content

Instantly share code, notes, and snippets.

@bitrot-sh
Last active December 31, 2017 13:49
Show Gist options
  • Save bitrot-sh/708dfc0285927c5fe59fa1e1089cf210 to your computer and use it in GitHub Desktop.
Save bitrot-sh/708dfc0285927c5fe59fa1e1089cf210 to your computer and use it in GitHub Desktop.
Scrape Mozilla commits for security related patches
#!/usr/bin/env python3
try:
import BeautifulSoup
except:
import bs4 as BeautifulSoup
from urllib3 import ProxyManager, make_headers
headers = make_headers(user_agent='Mozilla/5.0')
http = ProxyManager('http://127.0.0.1:8118', headers=headers)
def get_url(url):
req = http.request('GET', url)
return BeautifulSoup.BeautifulSoup(req.data, "html5lib")
def read_perms(soup):
return False if soup.find('td', {'id': 'error_msg'}) else True
def parse_diff(soup):
diff = soup.findPrevious('td', {'class': 'link'}).find('a')
if diff['href'].startswith('/'):
diff['href'] = '%s/%s' % (baseUrl, diff['href'])
return diff['href']
def parse_age(soup):
return soup.find('td', {'class': 'date age'}).text
def parse_soup(soup):
for link in soup.findAll('a'):
if link['href'].startswith('/'):
link['href'] = 'http://hg.mozilla.org/%s' % (link['href'])
if "bug " not in link.text.lower():
continue
bug_link = get_url(link['href'])
# If there's an access error, mark it as being possibly security related
if not read_perms(bug_link):
diff = parse_diff(link)
try:
commit = get_url(diff)
age = parse_age(commit)
except Exception:
age = 'Unknown'
print("\t%s: %s" % (link.text, age))
print("\t\tDiff: (%s)" % (diff))
print("\t\tComment: %s" % (link.nextSibling))
print("\t\tBugzilla: %s" % (link['href']))
def main():
links = ["https://hg.mozilla.org/releases/mozilla-esr52/shortlog",
"https://hg.mozilla.org/releases/mozilla-release/shortlog",
"https://hg.mozilla.org/releases/mozilla-b2g44_v2_5/shortlog",
"https://hg.mozilla.org/releases/mozilla-beta/shortlog"]
for link in links:
print("[+] Parsing %s" % (link))
soup = get_url(link)
parse_soup(soup)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment