Skip to content

Instantly share code, notes, and snippets.

@rmhrisk
Created March 18, 2024 15:15
Show Gist options
  • Save rmhrisk/39f266d7b0fc3a7ec62047828b98964f to your computer and use it in GitHub Desktop.
Save rmhrisk/39f266d7b0fc3a7ec62047828b98964f to your computer and use it in GitHub Desktop.
Rank Certificate Authority Unexpired Precertificates Percentage Cumulative %
1 Internet Security Research Group 338,978,164 47.928% 47.928%
2 DigiCert 81,481,742 11.521% 59.449%
3 Sectigo 71,963,799 10.175% 69.624%
4 GoDaddy 69,771,281 9.865% 79.489%
5 Amazon Trust Services 56,893,730 8.044% 87.533%
6 Google Trust Services LLC 56,021,094 7.921% 95.454%
7 Microsoft Corporation 23,484,401 3.320% 98.774%
8 IdenTrust Services, LLC 4,385,571 0.620% 99.394%
9 GlobalSign nv-sa 1,460,274 0.206% 99.601%
10 Actalis 802,313 0.113% 99.714%
11 Entrust 536,517 0.076% 99.790%
12 Asseco Data Systems S.A. (previously Unizeto Certum) 469,826 0.066% 99.856%
13 Buypass 228,402 0.032% 99.889%
14 SECOM Trust Systems CO., LTD. 209,764 0.030% 99.918%
15 SSL.com 177,890 0.025% 99.944%
16 Certainly LLC 118,103 0.017% 99.960%
17 Taiwan-CA Inc. (TWCA) 50,049 0.007% 99.967%
18 QuoVadis 48,498 0.007% 99.974%
19 SwissSign AG 27,290 0.004% 99.978%
20 ? 22,291 0.003% 99.981%
21 Deutsche Telekom Security GmbH 18,025 0.003% 99.984%
22 Chunghwa Telecom 16,632 0.002% 99.986%
23 Telia Company 13,353 0.002% 99.988%
24 D-TRUST 11,745 0.002% 99.990%
25 Viking Cloud, Inc. 9,355 0.001% 99.991%
26 Certigna 7,715 0.001% 99.992%
27 eMudhra Technologies Limited 5,722 0.001% 99.993%
28 Shanghai Electronic Certification Authority Co., Ltd. 5,532 0.001% 99.994%
29 China Financial Certification Authority (CFCA) 4,223 0.001% 99.994%
30 HARICA 3,901 0.001% 99.995%
- Total for all considered CAs - 99.995% 99.995%
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
def fetch_data(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
data = []
table = soup.find("table")
rows = table.find_all("tr")[2:] # Skip header rows
for row in rows:
cols = row.find_all("td")
if cols:
owner = cols[0].text.strip()
unexpired_precerts = int(cols[4].text.strip().replace(',', ''))
data.append([owner, unexpired_precerts])
return pd.DataFrame(data, columns=['Owner', 'Unexpired Precertificates'])
root_owner_url = 'https://crt.sh/cert-populations?group=RootOwner'
ca_owner_url = 'https://crt.sh/cert-populations?group=CAOwner'
root_owner_data = fetch_data(root_owner_url)
ca_owner_data = fetch_data(ca_owner_url)
# Adjust Certainly LLC and GoDaddy data
certainly_data = ca_owner_data[ca_owner_data['Owner'].str.contains('Certainly LLC', case=False)]['Unexpired Precertificates'].sum()
godaddy_indices = root_owner_data[root_owner_data['Owner'].str.contains('GoDaddy', case=False)].index
if not godaddy_indices.empty:
for idx in godaddy_indices:
root_owner_data.at[idx, 'Unexpired Precertificates'] -= certainly_data
# Now add Certainly LLC as a separate row
new_row = pd.DataFrame({'Owner': 'Certainly LLC', 'Unexpired Precertificates': certainly_data}, index=[0])
root_owner_data = pd.concat([root_owner_data, new_row], ignore_index=True)
# Combine Quovadis data with DigiCert
quovadis_total = root_owner_data[root_owner_data['Owner'].str.contains('Quovadis')]['Unexpired Precertificates'].sum()
root_owner_data.loc[root_owner_data['Owner'].str.contains('DigiCert'), 'Unexpired Precertificates'] += quovadis_total
root_owner_data = root_owner_data[~root_owner_data['Owner'].str.contains('Quovadis')]
# Combine Microsoft Corporation Core Services Engineering & Operations data with Microsoft Corporation
microsoft_cseo_total = root_owner_data[root_owner_data['Owner'].str.contains('Microsoft Corporation Core Services Engineering & Operations')]['Unexpired Precertificates'].sum()
root_owner_data.loc[root_owner_data['Owner'] == 'Microsoft Corporation', 'Unexpired Precertificates'] += microsoft_cseo_total
root_owner_data = root_owner_data[~root_owner_data['Owner'].str.contains('Microsoft Corporation Core Services Engineering & Operations')]
# Sort and calculate percentages
root_owner_data.sort_values('Unexpired Precertificates', ascending=False, inplace=True)
root_owner_data['Percentage'] = (root_owner_data['Unexpired Precertificates'] / root_owner_data['Unexpired Precertificates'].sum()) * 100
root_owner_data['Cumulative Percentage'] = root_owner_data['Percentage'].cumsum()
# Determine how many top CAs are needed to reach a cumulative percentage close to 100%
threshold = 99.995 # Define the threshold to reach close to 100%
top_cas_needed = root_owner_data[root_owner_data['Cumulative Percentage'] <= threshold]
# Plotting the pie chart with the needed number of CAs
fig, ax = plt.subplots(figsize=(12, 7))
wedges, texts, autotexts = ax.pie(
top_cas_needed['Unexpired Precertificates'],
autopct=lambda pct: f'{pct:.3f}%' if pct > 1 else '',
startangle=90
)
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('CAs by Unexpired Precertificates Issuance')
legend_labels = [f'{owner} - {pct:.3f}%' for owner, pct in zip(top_cas_needed['Owner'], top_cas_needed['Percentage'])]
ax.legend(wedges, legend_labels, title="Certificate Authorities", loc="center left", bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
# Generate the markdown table with the needed number of CAs
print("| Rank | Certificate Authority | Unexpired Precertificates | Percentage | Cumulative % |")
print("|------|----------------------|---------------------------|------------|--------------|")
rank = 1 # Start ranking at 1
for _, row in top_cas_needed.iterrows():
print(f"| {rank} | {row['Owner']} | {row['Unexpired Precertificates']:,} | {row['Percentage']:.3f}% | {row['Cumulative Percentage']:.3f}% |")
rank += 1 # Increment rank by 1 for each row
# Optionally, if you want to include the total for all CAs considered (not just the top 20)
total_percentage = top_cas_needed['Percentage'].sum()
print("| - | **Total for all considered CAs** | - | **{:.3f}%** | **{:.3f}%** |".format(total_percentage, total_percentage))
@rmhrisk
Copy link
Author

rmhrisk commented Mar 18, 2024

The updated output looks like this:

image
Rank Certificate Authority Unexpired Precertificates Percentage Cumulative %
1 Internet Security Research Group 339,096,117 47.911% 47.911%
2 DigiCert 81,567,944 11.525% 59.435%
3 Sectigo 72,068,468 10.183% 69.618%
4 GoDaddy 69,828,902 9.866% 79.484%
5 Amazon Trust Services 57,114,851 8.070% 87.554%
6 Google Trust Services LLC 55,991,420 7.911% 95.465%
7 Microsoft Corporation 23,496,018 3.320% 98.784%
8 IdenTrust Services, LLC 4,386,466 0.620% 99.404%
9 GlobalSign nv-sa 1,463,850 0.207% 99.611%
10 Actalis 802,443 0.113% 99.724%
11 Entrust 537,208 0.076% 99.800%
12 Asseco Data Systems S.A. (previously Unizeto Certum) 470,097 0.066% 99.867%
13 Buypass 228,125 0.032% 99.899%
14 SECOM Trust Systems CO., LTD. 209,771 0.030% 99.929%
15 SSL.com 177,784 0.025% 99.954%
16 Certainly LLC 117,480 0.017% 99.970%
17 Taiwan-CA Inc. (TWCA) 49,943 0.007% 99.977%
18 SwissSign AG 27,309 0.004% 99.981%
19 Deutsche Telekom Security GmbH 18,031 0.003% 99.984%
20 Chunghwa Telecom 16,631 0.002% 99.986%
21 Telia Company 13,356 0.002% 99.988%
22 D-TRUST 11,746 0.002% 99.990%
23 Viking Cloud, Inc. 9,428 0.001% 99.991%
24 Certigna 7,684 0.001% 99.992%
25 eMudhra Technologies Limited 5,724 0.001% 99.993%
26 Shanghai Electronic Certification Authority Co., Ltd. 5,530 0.001% 99.994%
27 China Financial Certification Authority (CFCA) 4,223 0.001% 99.994%
28 HARICA 3,901 0.001% 99.995%
- Total for all considered CAs - 99.995% 99.995%

@rmhrisk
Copy link
Author

rmhrisk commented Apr 29, 2024

image

Rank Certificate Authority Unexpired Precertificates Percentage Cumulative %
1 Internet Security Research Group 363,820,787 50.506% 50.506%
2 GoDaddy 78,213,543 10.858% 61.363%
3 DigiCert 72,780,038 10.103% 71.467%
4 Sectigo 60,414,175 8.387% 79.853%
5 Amazon Trust Services 58,081,483 8.063% 87.916%
6 Google Trust Services LLC 53,055,699 7.365% 95.281%
7 Microsoft Corporation 25,462,525 3.535% 98.816%
8 IdenTrust Services, LLC 4,332,393 0.601% 99.418%
9 GlobalSign nv-sa 1,485,722 0.206% 99.624%
10 Actalis 811,133 0.113% 99.736%
11 Entrust 562,764 0.078% 99.815%
12 Asseco Data Systems S.A. (previously Unizeto Certum) 469,804 0.065% 99.880%
13 Buypass 222,349 0.031% 99.911%
14 SECOM Trust Systems CO., LTD. 207,286 0.029% 99.939%
15 SSL.com 128,007 0.018% 99.957%
16 Certainly LLC 76,243 0.011% 99.968%
17 Taiwan-CA Inc. (TWCA) 49,473 0.007% 99.975%
18 SwissSign AG 27,760 0.004% 99.978%
19 Chunghwa Telecom 23,353 0.003% 99.982%
20 Deutsche Telekom Security GmbH 18,157 0.003% 99.984%
21 Telia Company 13,753 0.002% 99.986%
22 D-TRUST 12,299 0.002% 99.988%
23 Certigna 11,452 0.002% 99.989%
24 Viking Cloud, Inc. 10,589 0.001% 99.991%
25 China Financial Certification Authority (CFCA) 6,578 0.001% 99.992%
26 Shanghai Electronic Certification Authority Co., Ltd. 6,103 0.001% 99.993%
27 eMudhra Technologies Limited 5,837 0.001% 99.993%
28 certSIGN 4,465 0.001% 99.994%
29 HARICA 3,997 0.001% 99.995%
- Total for all considered CAs - 99.995% 99.995%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment