rmhrisk/webpki-ca-countries.py

## webpki-ca-countries.py
import pandas as pd
import requests
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from io import StringIO
from cryptography.hazmat.primitives import hashes
import matplotlib.pyplot as plt

def download_csv(url):
    response = requests.get(url)
    response.raise_for_status()
    return StringIO(response.text)

def compute_fingerprint(pem_data):
    try:
        cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
        return cert.fingerprint(hashes.SHA256()).hex().upper()
    except Exception as e:
        print(f"Error computing fingerprint: {e}")
        return None

def extract_country_from_certificate(pem_data):
    try:
        cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
        issuer_countries = [i.value for i in cert.issuer.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)]
        return ",".join(set(issuer_countries))
    except Exception as e:
        print(f"Error extracting country: {e}")
        return ""

def generate_pie_chart_with_legend(ca_countries):
    # Transform the ca_countries into a DataFrame
    country_counts = pd.Series(ca_countries).value_counts().rename_axis('Country').reset_index(name='Counts')

    # Increase the figure size to make more room for the pie chart and the legend
    fig, ax = plt.subplots(figsize=(15, 7))

    # Create the pie chart with the autopct set to display percentages
    wedges, _, autotexts = ax.pie(
        country_counts['Counts'],
        startangle=140,
        autopct='%1.1f%%',
        textprops=dict(color="w")
    )

    # Draw a circle at the center to make it a donut chart
    plt.gca().add_artist(plt.Circle((0, 0), 0.70, color='white'))

    # Set legend with country names and percentages, placed on the right side
    legend_labels = [f"{country}: {perc:.2f}%" for country, perc in zip(country_counts['Country'], country_counts['Counts'])]
    ax.legend(wedges, legend_labels, title="Country", loc="center left", bbox_to_anchor=(1.1, 0.5))

    # Adjust figure to prevent cutoff of legend or labels
    plt.subplots_adjust(left=0.1, bottom=0.1, right=0.75)

    # Set the title and show the plot
    plt.title('Country Distribution of Certificate Authorities')
    plt.show()

def generate_trusted_ca_markdown_table_from_url(ca_url, roots_url):
    ca_csv_data = download_csv(ca_url)
    ca_data = pd.read_csv(ca_csv_data)
    ca_data = ca_data[ca_data['Certificate Record Type'] == 'Root Certificate']

    roots_csv_data = download_csv(roots_url)
    roots_data = pd.read_csv(roots_csv_data)
    roots_data['Computed SHA-256 Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint)
    fingerprint_to_country = dict(zip(roots_data['Computed SHA-256 Fingerprint'], roots_data['PEM'].apply(extract_country_from_certificate)))

    trusted_roots = {}
    ca_countries = {}

    for _, row in ca_data.iterrows():
        ca_owner = row['CA Owner']
        fingerprint = row.get('SHA-256 Fingerprint',
        '')
        country = fingerprint_to_country.get(fingerprint, "Unknown")  # Use "Unknown" for CAs without a country
        status = row['Status of Root Cert']

        # Only include CAs that are trusted by at least one program
        if any(trust in status for trust in ["Apple: Included", "Google Chrome: Included", "Microsoft: Included", "Mozilla: Included"]):
            if ca_owner not in trusted_roots:
                trusted_roots[ca_owner] = set()
            ca_countries[ca_owner] = country if country else "Unknown"

            # Check for inclusion by each program
            if "Apple: Included" in status:
                trusted_roots[ca_owner].add("Apple")
            if "Google Chrome: Included" in status:
                trusted_roots[ca_owner].add("Google Chrome")
            if "Microsoft: Included" in status:
                trusted_roots[ca_owner].add("Microsoft")
            if "Mozilla: Included" in status:
                trusted_roots[ca_owner].add("Mozilla")
   # Generating markdown table
    markdown_table = "CA Owner | Countries | Apple | Google Chrome | Microsoft | Mozilla\n"
    markdown_table += "--- | --- | --- | --- | --- | ---\n"
    for ca_owner, stores in trusted_roots.items():
        countries = ca_countries.get(ca_owner, "Unknown")
        row = [ca_owner, countries] + ["✓" if store in stores else "" for store in ["Apple", "Google Chrome", "Microsoft", "Mozilla"]]
        markdown_table += " | ".join(row) + "\n"
    markdown_table += f"\nTotal CAs: {len(trusted_roots)}\n"
    print(markdown_table)

    # Convert ca_countries to a list and then to a Series object for value counts
    ca_countries_list = list(ca_countries.values())
    generate_pie_chart_with_legend(ca_countries_list)

# URLs for the datasets
ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2'
roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites'

# Generate the markdown table and plot the pie chart with legend
generate_trusted_ca_markdown_table_from_url(ca_url, roots_url)
	import pandas as pd
	import requests
	from cryptography import x509
	from cryptography.hazmat.backends import default_backend
	from io import StringIO
	from cryptography.hazmat.primitives import hashes
	import matplotlib.pyplot as plt

	def download_csv(url):
	response = requests.get(url)
	response.raise_for_status()
	return StringIO(response.text)

	def compute_fingerprint(pem_data):
	try:
	cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
	return cert.fingerprint(hashes.SHA256()).hex().upper()
	except Exception as e:
	print(f"Error computing fingerprint: {e}")
	return None

	def extract_country_from_certificate(pem_data):
	try:
	cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
	issuer_countries = [i.value for i in cert.issuer.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)]
	return ",".join(set(issuer_countries))
	except Exception as e:
	print(f"Error extracting country: {e}")
	return ""

	def generate_pie_chart_with_legend(ca_countries):
	# Transform the ca_countries into a DataFrame
	country_counts = pd.Series(ca_countries).value_counts().rename_axis('Country').reset_index(name='Counts')

	# Increase the figure size to make more room for the pie chart and the legend
	fig, ax = plt.subplots(figsize=(15, 7))

	# Create the pie chart with the autopct set to display percentages
	wedges, _, autotexts = ax.pie(
	country_counts['Counts'],
	startangle=140,
	autopct='%1.1f%%',
	textprops=dict(color="w")
	)

	# Draw a circle at the center to make it a donut chart
	plt.gca().add_artist(plt.Circle((0, 0), 0.70, color='white'))

	# Set legend with country names and percentages, placed on the right side
	legend_labels = [f"{country}: {perc:.2f}%" for country, perc in zip(country_counts['Country'], country_counts['Counts'])]
	ax.legend(wedges, legend_labels, title="Country", loc="center left", bbox_to_anchor=(1.1, 0.5))

	# Adjust figure to prevent cutoff of legend or labels
	plt.subplots_adjust(left=0.1, bottom=0.1, right=0.75)

	# Set the title and show the plot
	plt.title('Country Distribution of Certificate Authorities')
	plt.show()

	def generate_trusted_ca_markdown_table_from_url(ca_url, roots_url):
	ca_csv_data = download_csv(ca_url)
	ca_data = pd.read_csv(ca_csv_data)
	ca_data = ca_data[ca_data['Certificate Record Type'] == 'Root Certificate']

	roots_csv_data = download_csv(roots_url)
	roots_data = pd.read_csv(roots_csv_data)
	roots_data['Computed SHA-256 Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint)
	fingerprint_to_country = dict(zip(roots_data['Computed SHA-256 Fingerprint'], roots_data['PEM'].apply(extract_country_from_certificate)))

	trusted_roots = {}
	ca_countries = {}

	for _, row in ca_data.iterrows():
	ca_owner = row['CA Owner']
	fingerprint = row.get('SHA-256 Fingerprint',
	'')
	country = fingerprint_to_country.get(fingerprint, "Unknown") # Use "Unknown" for CAs without a country
	status = row['Status of Root Cert']

	# Only include CAs that are trusted by at least one program
	if any(trust in status for trust in ["Apple: Included", "Google Chrome: Included", "Microsoft: Included", "Mozilla: Included"]):
	if ca_owner not in trusted_roots:
	trusted_roots[ca_owner] = set()
	ca_countries[ca_owner] = country if country else "Unknown"

	# Check for inclusion by each program
	if "Apple: Included" in status:
	trusted_roots[ca_owner].add("Apple")
	if "Google Chrome: Included" in status:
	trusted_roots[ca_owner].add("Google Chrome")
	if "Microsoft: Included" in status:
	trusted_roots[ca_owner].add("Microsoft")
	if "Mozilla: Included" in status:
	trusted_roots[ca_owner].add("Mozilla")
	# Generating markdown table
	markdown_table = "CA Owner \| Countries \| Apple \| Google Chrome \| Microsoft \| Mozilla\n"
	markdown_table += "--- \| --- \| --- \| --- \| --- \| ---\n"
	for ca_owner, stores in trusted_roots.items():
	countries = ca_countries.get(ca_owner, "Unknown")
	row = [ca_owner, countries] + ["✓" if store in stores else "" for store in ["Apple", "Google Chrome", "Microsoft", "Mozilla"]]
	markdown_table += " \| ".join(row) + "\n"
	markdown_table += f"\nTotal CAs: {len(trusted_roots)}\n"
	print(markdown_table)

	# Convert ca_countries to a list and then to a Series object for value counts
	ca_countries_list = list(ca_countries.values())
	generate_pie_chart_with_legend(ca_countries_list)

	# URLs for the datasets
	ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2'
	roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites'

	# Generate the markdown table and plot the pie chart with legend
	generate_trusted_ca_markdown_table_from_url(ca_url, roots_url)