Created
July 8, 2022 02:37
-
-
Save itsecworks/c6c0f4fc48c2058a1ea8ccdbafcda654 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import time | |
from datetime import date | |
def datacollector(releases, release, release_new, iss_type, release_placeholder): | |
# Create an URL object | |
url = releases[release][iss_type]['url'].replace(release_placeholder, release_new) | |
print(url) | |
# Create object page | |
page = requests.get(url, verify=False) | |
# parser-lxml = Change html to Python friendly format | |
# Obtain page's information | |
soup = BeautifulSoup(page.text, 'lxml') | |
# Obtain information from tag <table> | |
tbl = soup.find('table', attrs={"class": "table colsep rowsep"}) | |
# Obtain every title of columns with tag <th> | |
if tbl.find_all('th'): | |
headers = [] | |
headers.append('release') | |
for i in tbl.find_all('th'): | |
title = i.text | |
headers.append(title) | |
else: | |
print('The table header cells not found! default column name will be set!') | |
headers = ['release','Issue ID','Description'] | |
# Create a dataframe | |
tbl_data = pd.DataFrame(columns=headers) | |
# Create a for loop to fill mydata | |
if tbl.find_all('tr'): | |
for j in tbl.find_all('tr')[1:]: | |
row_data = j.find_all('td') | |
row = [release_new] + [i.text.replace('\n', ' ') for i in row_data] | |
length = len(tbl_data) | |
tbl_data.loc[length] = row | |
else: | |
print('The table rows not found! no data!') | |
if not tbl_data.empty: | |
# print(print(mydata.to_markdown())) | |
return tbl_data | |
release_placeholder = 'XXX' | |
releases = { | |
'8-1': { | |
'known-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/8-1/pan-os-release-notes/pan-os-8-1-release-information/known-issues/known-issues-related-to-pan-os-8-1-releases', | |
'current_release': '1' | |
}, | |
'addressed-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/8-1/pan-os-release-notes/pan-os-8-1-addressed-issues/pan-os-XXX-addressed-issues', | |
'current_release': '23', | |
'hotfixes': [ '8-1-21-h1', '8-1-20-h1', '8-1-15-h3', '8-1-14-h2', '8-1-9-h4', '8-1-8-h5', '8-1-6-h2', '8-1-4-h2' ] | |
} | |
}, | |
'9-1': { | |
'known-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/9-1/pan-os-release-notes/pan-os-9-1-release-information/known-issues/known-issues-related-to-pan-os-9-1-releases/pan-os-XXX-known-issues', | |
'current_release': '14' | |
}, | |
'addressed-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/9-1/pan-os-release-notes/pan-os-9-1-addressed-issues/pan-os-XXX-addressed-issues', | |
'current_release': '14', | |
'hotfixes': ['912-h1','9-1-3-h1','9-1-11-h2','9-1-11-h3','9-1-12-h3','9-1-12-h4','9-1-13-h1','9-1-13-h3'] | |
} | |
}, | |
'10-0': { | |
'known-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/10-0/pan-os-release-notes/pan-os-10-0-release-information/known-issues/known-issues-related-to-pan-os-10-0-releases/pan-os-XXX-known-issues', | |
'current_release': '10' | |
}, | |
'addressed-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/10-0/pan-os-release-notes/pan-os-10-0-addressed-issues/pan-os-XXX-addressed-issues', | |
'current_release': '10', | |
'hotfixes': ['10-0-10-h1', '10-0-8-h8', '10-0-8-h4'] | |
} | |
}, | |
'10-1': { | |
'known-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/10-1/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-known-issues', | |
'current_release': '6' | |
}, | |
'addressed-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/10-1/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-addressed-issues', | |
'current_release': '6', | |
'hotfixes': ['10-1-6-h3'] | |
} | |
}, | |
'10-2': { | |
'known-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/10-2/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-known-issues', | |
'current_release': '1' | |
}, | |
'addressed-issues': { | |
'url': 'https://docs.paloaltonetworks.com/pan-os/10-2/pan-os-release-notes/pan-os-XXX-known-and-addressed-issues/pan-os-XXX-addressed-issues', | |
'current_release': '1', | |
'hotfixes': [] | |
} | |
} | |
} | |
today = date.today().strftime("%d-%m-%Y") | |
for issue_type in ['known-issues','addressed-issues']: | |
for release in releases: | |
# hotfixes | |
if issue_type == 'addressed-issues' and len(releases[release][issue_type]['hotfixes']) != 0: | |
for hotfix in releases[release][issue_type]['hotfixes']: | |
datacollector(releases, release, hotfix, issue_type, release_placeholder) | |
# releases | |
l = int(releases[release][issue_type]['current_release']) + 1 | |
for i in range(1,l): | |
release_new = release + '-' + str(i) | |
tbldata = datacollector(releases, release, release_new, issue_type, release_placeholder) | |
# Append each release issues to csv file | |
file = 'C:/Users/akdaniel/Downloads/palo_alto_' + issue_type + '-' + str(today) +'.csv' | |
tbldata.to_csv(file, mode='a', index=False) | |
tbldata = '' | |
time.sleep(6) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment