Skip to content

Instantly share code, notes, and snippets.

@rmhrisk
Created March 18, 2024 22:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save rmhrisk/d0ecc39eab846c5c2d14fa11b1c20811 to your computer and use it in GitHub Desktop.
Save rmhrisk/d0ecc39eab846c5c2d14fa11b1c20811 to your computer and use it in GitHub Desktop.
import pandas as pd
import requests
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from io import StringIO
from cryptography.hazmat.primitives import hashes
import matplotlib.pyplot as plt
def download_csv(url):
response = requests.get(url)
response.raise_for_status()
return StringIO(response.text)
def compute_fingerprint(pem_data):
try:
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
return cert.fingerprint(hashes.SHA256()).hex().upper()
except Exception as e:
print(f"Error computing fingerprint: {e}")
return None
def extract_country_from_certificate(pem_data):
try:
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
issuer_countries = [i.value for i in cert.issuer.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)]
return ",".join(set(issuer_countries))
except Exception as e:
print(f"Error extracting country: {e}")
return ""
def generate_pie_chart_with_legend(ca_countries):
# Transform the ca_countries into a DataFrame
country_counts = pd.Series(ca_countries).value_counts().rename_axis('Country').reset_index(name='Counts')
# Increase the figure size to make more room for the pie chart and the legend
fig, ax = plt.subplots(figsize=(15, 7))
# Create the pie chart with the autopct set to display percentages
wedges, _, autotexts = ax.pie(
country_counts['Counts'],
startangle=140,
autopct='%1.1f%%',
textprops=dict(color="w")
)
# Draw a circle at the center to make it a donut chart
plt.gca().add_artist(plt.Circle((0, 0), 0.70, color='white'))
# Set legend with country names and percentages, placed on the right side
legend_labels = [f"{country}: {perc:.2f}%" for country, perc in zip(country_counts['Country'], country_counts['Counts'])]
ax.legend(wedges, legend_labels, title="Country", loc="center left", bbox_to_anchor=(1.1, 0.5))
# Adjust figure to prevent cutoff of legend or labels
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.75)
# Set the title and show the plot
plt.title('Country Distribution of Certificate Authorities')
plt.show()
def generate_trusted_ca_markdown_table_from_url(ca_url, roots_url):
ca_csv_data = download_csv(ca_url)
ca_data = pd.read_csv(ca_csv_data)
ca_data = ca_data[ca_data['Certificate Record Type'] == 'Root Certificate']
roots_csv_data = download_csv(roots_url)
roots_data = pd.read_csv(roots_csv_data)
roots_data['Computed SHA-256 Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint)
fingerprint_to_country = dict(zip(roots_data['Computed SHA-256 Fingerprint'], roots_data['PEM'].apply(extract_country_from_certificate)))
trusted_roots = {}
ca_countries = {}
for _, row in ca_data.iterrows():
ca_owner = row['CA Owner']
fingerprint = row.get('SHA-256 Fingerprint',
'')
country = fingerprint_to_country.get(fingerprint, "Unknown") # Use "Unknown" for CAs without a country
status = row['Status of Root Cert']
# Only include CAs that are trusted by at least one program
if any(trust in status for trust in ["Apple: Included", "Google Chrome: Included", "Microsoft: Included", "Mozilla: Included"]):
if ca_owner not in trusted_roots:
trusted_roots[ca_owner] = set()
ca_countries[ca_owner] = country if country else "Unknown"
# Check for inclusion by each program
if "Apple: Included" in status:
trusted_roots[ca_owner].add("Apple")
if "Google Chrome: Included" in status:
trusted_roots[ca_owner].add("Google Chrome")
if "Microsoft: Included" in status:
trusted_roots[ca_owner].add("Microsoft")
if "Mozilla: Included" in status:
trusted_roots[ca_owner].add("Mozilla")
# Generating markdown table
markdown_table = "CA Owner | Countries | Apple | Google Chrome | Microsoft | Mozilla\n"
markdown_table += "--- | --- | --- | --- | --- | ---\n"
for ca_owner, stores in trusted_roots.items():
countries = ca_countries.get(ca_owner, "Unknown")
row = [ca_owner, countries] + ["✓" if store in stores else "" for store in ["Apple", "Google Chrome", "Microsoft", "Mozilla"]]
markdown_table += " | ".join(row) + "\n"
markdown_table += f"\nTotal CAs: {len(trusted_roots)}\n"
print(markdown_table)
# Convert ca_countries to a list and then to a Series object for value counts
ca_countries_list = list(ca_countries.values())
generate_pie_chart_with_legend(ca_countries_list)
# URLs for the datasets
ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2'
roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites'
# Generate the markdown table and plot the pie chart with legend
generate_trusted_ca_markdown_table_from_url(ca_url, roots_url)
@rmhrisk
Copy link
Author

rmhrisk commented Mar 19, 2024

With that said, I would argue that the lack of eventual inclusion in all root programs is merely a signal, not an absolute indicator, that a CA isn't providing enough value to the web to justify the exposure it represents. A much better indicator would be the ultimate issuance volume over a fixed period of time. For example, if you meet all the requirements and successfully pass audits for 5 years, yet fail to achieve any material issuance volume, should you still be trusted?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment