Created
February 27, 2023 09:26
-
-
Save mheidari98/95edf18cde660ccd868982e09f819c59 to your computer and use it in GitHub Desktop.
scrape Cloudflare IPv4s from asnlookup.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import time | |
import requests | |
from bs4 import BeautifulSoup | |
from selenium import webdriver | |
# old version and not working now | |
def getCfCIDR(): | |
try: | |
r = requests.get('https://asnlookup.com/organization/cloudflare') | |
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser | |
soup = BeautifulSoup( r.text , "html5lib") # 'html5lib' , 'html.parser' 'lxml' | |
ASs = [ b.text for b in soup.find_all('b') if b.a ] | |
except Exception as e: | |
logging.error(f"Error to get Cloudflare ASN: {e}") | |
ASs = ASs if ASs else ['AS13335'] | |
logging.debug(f"AS = {', '.join(ASs)}") | |
CIDRs = [] | |
for AS in ASs : | |
r = requests.get(f'https://asnlookup.com/asn/{AS}') | |
soup = BeautifulSoup( r.text , "html5lib") | |
res = [ li.text for li in soup.find_all('li') if re.search("<li><a href=\"/cidr/.*0/", li.decode()) ] | |
logging.debug(f"AS {AS} \t=> CIDR = {len(res)}") | |
CIDRs += res | |
return CIDRs | |
def createDriver(): | |
options = webdriver.ChromeOptions() | |
options.add_argument("--enable-javascript") | |
options.add_experimental_option('excludeSwitches', ['enable-logging']) | |
options.headless = True | |
driver = webdriver.Chrome(executable_path="./chromedriver.exe", options=options) | |
driver.implicitly_wait(3) | |
return driver | |
try: | |
driver = createDriver() | |
driver.get("https://asnlookup.com/organization/cloudflare") | |
# this is just to ensure that the page is loaded | |
time.sleep(5) | |
html_doc = driver.page_source | |
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser | |
soup = BeautifulSoup(html_doc, 'html5lib') # 'html5lib' , 'html.parser' 'lxml' | |
ASs = [ b.text for b in soup.find_all('b') if b.a ] | |
except Exception as e: | |
ASs = ['AS13335'] | |
print(f"Error to get Cloudflare ASN: {e}") | |
print(f"AS = {', '.join(ASs)}") | |
CIDRs = [] | |
for AS in ASs : | |
driver.get(f"https://asnlookup.com/asn/{AS}") | |
time.sleep(3) | |
soup = BeautifulSoup( driver.page_source , "html5lib") | |
res = [ li.text for li in soup.find_all('li') if re.search("<li><a href=\"/cidr/.*0/", li.decode()) ] | |
print(f"AS {AS} \t=> CIDR = {len(res)}") | |
with open(f"Cloudflare_{AS}.txt", 'w') as f : | |
f.write('\n'.join(res)) | |
CIDRs += res | |
with open(f"Cloudflare_Organization.txt", 'w') as f : | |
f.write('\n'.join(CIDRs)) | |
driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment