Skip to content

Instantly share code, notes, and snippets.

@K-Wu
Created April 14, 2019 13:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save K-Wu/35423b4d806e318d811420bdead2c13f to your computer and use it in GitHub Desktop.
Save K-Wu/35423b4d806e318d811420bdead2c13f to your computer and use it in GitHub Desktop.
Extract Intel Compiler Options and Sort Out to Xlsx
from bs4 import BeautifulSoup
import requests
import xlsxwriter
import bs4
def get_options_url():
BASEURL = "https://software.intel.com"
URL = "https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-alphabetical-list-of-compiler-options"
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html.parser')
entries = soup.find_all("tr")
options_descriptions_urls = [[item.text for item in entry.children if type(item)!=bs4.element.NavigableString]+[BASEURL+list(entry.children)[0].find_all("a")[0].attrs["href"]] for entry in entries]
#csv_lines = [["linux", "windows", "default", "default description"]]
csv_lines = [["linux", "windows"]]
for option_description_url in options_descriptions_urls:
linux_options, windows_options, defualt_option, default_description = get_windows_linux_available_single_option(option_description_url[2])
#for idx in range(len(linux_options)):
# if idx <len(linux_options):
# csv_lines.append([linux_options[idx], windows_options[idx]])
linux_options=linux_options+['None' for idx in range(len(windows_options)-len(linux_options))]
windows_options=windows_options+['None' for idx in range(len(linux_options)-len(windows_options))]
csv_lines.extend([list(item) for item in list(zip(linux_options,windows_options))])
workbook = xlsxwriter.Workbook('options.xlsx')
worksheet = workbook.add_worksheet()
for row_idx, line in enumerate(csv_lines):
for col_idx, item in enumerate(line):
worksheet.write(row_idx, col_idx, item)
workbook.close()
pass
def get_windows_linux_available_single_option(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
entries=soup.find_all("td", class_="noborder")
options = [[item.text for item in entry.children if type(item)!=bs4.element.NavigableString] for entry in entries]
if len(entries)!=6:
print("FUCK! Length != 6")
print(url)
#return
linux_idx = -1
windows_idx = -1
for idx,entry in enumerate(options):
if len(entry)==0:
continue
if entry[0]=="Linux and macOS:" or entry[0]=="Linux:":
linux_idx = idx
if entry[0]=="Windows:":
windows_idx = idx
if linux_idx ==-1 or windows_idx == -1:
print("FUCKFUCK! cannot extract linux windows table")
return ["-1"],["-1"],["-1"],["-1"]
return options[linux_idx+1],options[windows_idx+1],options[-1],options[-2]
pass
if __name__ == "__main__":
#get_windows_linux_available_single_option("https://software.intel.com/node/a897f631-3e3e-4c6b-9a32-763d62b82cf9")
get_options_url()
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment