Skip to content

Instantly share code, notes, and snippets.

@itsecworks
Created July 8, 2022 02:37
Show Gist options
  • Save itsecworks/c6c0f4fc48c2058a1ea8ccdbafcda654 to your computer and use it in GitHub Desktop.
Save itsecworks/c6c0f4fc48c2058a1ea8ccdbafcda654 to your computer and use it in GitHub Desktop.
# Import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import date
def datacollector(releases, release, release_new, iss_type, release_placeholder):
# Create an URL object
url = releases[release][iss_type]['url'].replace(release_placeholder, release_new)
print(url)
# Create object page
page = requests.get(url, verify=False)
# parser-lxml = Change html to Python friendly format
# Obtain page's information
soup = BeautifulSoup(page.text, 'lxml')
# Obtain information from tag <table>
tbl = soup.find('table', attrs={"class": "table colsep rowsep"})
# Obtain every title of columns with tag <th>
if tbl.find_all('th'):
headers = []
headers.append('release')
for i in tbl.find_all('th'):
title = i.text
headers.append(title)
else:
print('The table header cells not found! default column name will be set!')
headers = ['release','Issue ID','Description']
# Create a dataframe
tbl_data = pd.DataFrame(columns=headers)
# Create a for loop to fill mydata
if tbl.find_all('tr'):
for j in tbl.find_all('tr')[1:]:
row_data = j.find_all('td')
row = [release_new] + [i.text.replace('\n', ' ') for i in row_data]
length = len(tbl_data)
tbl_data.loc[length] = row
else:
print('The table rows not found! no data!')
if not tbl_data.empty:
# print(print(mydata.to_markdown()))
return tbl_data
release_placeholder = 'XXX'
releases = {
'8-1': {
'known-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/8-1/pan-os-release-notes/pan-os-8-1-release-information/known-issues/known-issues-related-to-pan-os-8-1-releases',
'current_release': '1'
},
'addressed-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/8-1/pan-os-release-notes/pan-os-8-1-addressed-issues/pan-os-XXX-addressed-issues',
'current_release': '23',
'hotfixes': [ '8-1-21-h1', '8-1-20-h1', '8-1-15-h3', '8-1-14-h2', '8-1-9-h4', '8-1-8-h5', '8-1-6-h2', '8-1-4-h2' ]
}
},
'9-1': {
'known-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/9-1/pan-os-release-notes/pan-os-9-1-release-information/known-issues/known-issues-related-to-pan-os-9-1-releases/pan-os-XXX-known-issues',
'current_release': '14'
},
'addressed-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/9-1/pan-os-release-notes/pan-os-9-1-addressed-issues/pan-os-XXX-addressed-issues',
'current_release': '14',
'hotfixes': ['912-h1','9-1-3-h1','9-1-11-h2','9-1-11-h3','9-1-12-h3','9-1-12-h4','9-1-13-h1','9-1-13-h3']
}
},
'10-0': {
'known-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/10-0/pan-os-release-notes/pan-os-10-0-release-information/known-issues/known-issues-related-to-pan-os-10-0-releases/pan-os-XXX-known-issues',
'current_release': '10'
},
'addressed-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/10-0/pan-os-release-notes/pan-os-10-0-addressed-issues/pan-os-XXX-addressed-issues',
'current_release': '10',
'hotfixes': ['10-0-10-h1', '10-0-8-h8', '10-0-8-h4']
}
},
'10-1': {
'known-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/10-1/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-known-issues',
'current_release': '6'
},
'addressed-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/10-1/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-addressed-issues',
'current_release': '6',
'hotfixes': ['10-1-6-h3']
}
},
'10-2': {
'known-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/10-2/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-known-issues',
'current_release': '1'
},
'addressed-issues': {
'url': 'https://docs.paloaltonetworks.com/pan-os/10-2/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-addressed-issues',
'current_release': '1',
'hotfixes': []
}
}
}
today = date.today().strftime("%d-%m-%Y")
for issue_type in ['known-issues','addressed-issues']:
for release in releases:
# hotfixes
if issue_type == 'addressed-issues' and len(releases[release][issue_type]['hotfixes']) != 0:
for hotfix in releases[release][issue_type]['hotfixes']:
datacollector(releases, release, hotfix, issue_type, release_placeholder)
# releases
l = int(releases[release][issue_type]['current_release']) + 1
for i in range(1,l):
release_new = release + '-' + str(i)
tbldata = datacollector(releases, release, release_new, issue_type, release_placeholder)
# Append each release issues to csv file
file = 'C:/Users/akdaniel/Downloads/palo_alto_' + issue_type + '-' + str(today) +'.csv'
tbldata.to_csv(file, mode='a', index=False)
tbldata = ''
time.sleep(6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment