Skip to content

Instantly share code, notes, and snippets.

@amosboldor
Created August 9, 2023 00:39
Show Gist options
  • Save amosboldor/a59df05da33859c8c824a99eed8441c7 to your computer and use it in GitHub Desktop.
Save amosboldor/a59df05da33859c8c824a99eed8441c7 to your computer and use it in GitHub Desktop.
# pip install requests beautifulsoup4 progress tabulate
import requests
from bs4 import BeautifulSoup
from progress.bar import Bar
from tabulate import tabulate
# Define the DistroWatch search URL that lists all active Linux distributions for servers
dws = (
"https://distrowatch.com/search.php?"
"ostype=Linux&" # OS Type: Linux
"category=Server&" # Distribution Category: Server
"status=Active&" # Status: Active
"origin=All&"
"basedon=All&"
"notbasedon=None&"
"desktop=All&"
"architecture=All&"
"package=All&"
"rolling=All&"
"isosize=All&"
"netinstall=All&"
"language=All&"
"defaultinit=All"
)
# Send a GET request to the URL
dws_r = requests.get(dws)
# Parse the response text with BeautifulSoup
dws_r_soup = BeautifulSoup(dws_r.text, "lxml")
# Select all Linux distribution names from the parsed HTML
distro_atags = dws_r_soup.css.select("td > b > a")[1:]
# Define a function to create a distribution's URL on DistroWatch
def base_url(d):
return f"https://distrowatch.com/table.php?distribution={d}"
# Initialize a dictionary to store the counts of different architectures
ARCHS = {}
# Process each Linux distribution
for distro_atag in Bar('Processing').iter(distro_atags):
# Create the distribution's URL
url = base_url(distro_atag["href"])
# Send a GET request to the URL
r = requests.get(url)
# Parse the response text with BeautifulSoup
soup = BeautifulSoup(r.text, "lxml")
# Select the architecture list from the parsed HTML
arch_li = soup.css.select("td.TablesTitle > ul > li")[3]
arch_atags = arch_li.css.select('a')
# Process each architecture
for arch_atag in arch_atags:
# Get the architecture name
arch_name = arch_atag.text
# Increment the count of this architecture in ARCHS
ARCHS[arch_name] = ARCHS.get(arch_name, 0) + 1
# Print the architectures and their counts, sorted by count in descending order
print(
tabulate(
sorted(
ARCHS.items(),
key=lambda i: i[1],
reverse=True
),
headers=['Architecture', 'Count'],
tablefmt="heavy_grid"
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment