Skip to content

Instantly share code, notes, and snippets.

@mheidari98
Created February 27, 2023 09:26
Show Gist options
  • Save mheidari98/95edf18cde660ccd868982e09f819c59 to your computer and use it in GitHub Desktop.
Save mheidari98/95edf18cde660ccd868982e09f819c59 to your computer and use it in GitHub Desktop.
scrape Cloudflare IPv4s from asnlookup.com
import re
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
# old version and not working now
def getCfCIDR():
try:
r = requests.get('https://asnlookup.com/organization/cloudflare')
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser
soup = BeautifulSoup( r.text , "html5lib") # 'html5lib' , 'html.parser' 'lxml'
ASs = [ b.text for b in soup.find_all('b') if b.a ]
except Exception as e:
logging.error(f"Error to get Cloudflare ASN: {e}")
ASs = ASs if ASs else ['AS13335']
logging.debug(f"AS = {', '.join(ASs)}")
CIDRs = []
for AS in ASs :
r = requests.get(f'https://asnlookup.com/asn/{AS}')
soup = BeautifulSoup( r.text , "html5lib")
res = [ li.text for li in soup.find_all('li') if re.search("<li><a href=\"/cidr/.*0/", li.decode()) ]
logging.debug(f"AS {AS} \t=> CIDR = {len(res)}")
CIDRs += res
return CIDRs
def createDriver():
options = webdriver.ChromeOptions()
options.add_argument("--enable-javascript")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.headless = True
driver = webdriver.Chrome(executable_path="./chromedriver.exe", options=options)
driver.implicitly_wait(3)
return driver
try:
driver = createDriver()
driver.get("https://asnlookup.com/organization/cloudflare")
# this is just to ensure that the page is loaded
time.sleep(5)
html_doc = driver.page_source
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser
soup = BeautifulSoup(html_doc, 'html5lib') # 'html5lib' , 'html.parser' 'lxml'
ASs = [ b.text for b in soup.find_all('b') if b.a ]
except Exception as e:
ASs = ['AS13335']
print(f"Error to get Cloudflare ASN: {e}")
print(f"AS = {', '.join(ASs)}")
CIDRs = []
for AS in ASs :
driver.get(f"https://asnlookup.com/asn/{AS}")
time.sleep(3)
soup = BeautifulSoup( driver.page_source , "html5lib")
res = [ li.text for li in soup.find_all('li') if re.search("<li><a href=\"/cidr/.*0/", li.decode()) ]
print(f"AS {AS} \t=> CIDR = {len(res)}")
with open(f"Cloudflare_{AS}.txt", 'w') as f :
f.write('\n'.join(res))
CIDRs += res
with open(f"Cloudflare_Organization.txt", 'w') as f :
f.write('\n'.join(CIDRs))
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment