Created
June 4, 2024 18:24
-
-
Save KutsuyaYuki/2c3838479ea768df50b74fce526b3b4f to your computer and use it in GitHub Desktop.
Get the first 100 results from censys and save the IP addresses to a file called results.txt. Be sure the be logged in to censys first.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import urllib.parse | |
# Construct the query | |
query = "wordpress" | |
encoded_query = urllib.parse.quote(query) | |
# URL with encoded query | |
url = f"https://search.censys.io/_search?resource=hosts&sort=RELEVANCE&per_page=100&virtual_hosts=EXCLUDE&q={encoded_query}" | |
# Define headers with cookies for authentication | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0', | |
'Accept': '*/*', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate, br, zstd', | |
'X-Requested-With': 'XMLHttpRequest', | |
'Connection': 'keep-alive', | |
'Referer': f'https://search.censys.io/search?resource=hosts&sort=RELEVANCE&per_page=100&virtual_hosts=EXCLUDE&q={encoded_query}', | |
'Sec-Fetch-Dest': 'empty', | |
'Sec-Fetch-Mode': 'cors', | |
'Sec-Fetch-Site': 'same-origin', | |
'TE': 'trailers' | |
} | |
# Send GET request with headers | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() # Ensure we notice bad responses | |
# Setup Beautiful Soup parser | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Extract IP addresses from desired nodes | |
ips = [] | |
for i in range(2, 102): | |
selector = f'div.SearchResult:nth-child({i}) > a:nth-child(1) > strong:nth-child(2)' | |
result_node = soup.select_one(selector) | |
if result_node: | |
ips.append(result_node.text.strip()) | |
# Save results to results.txt file | |
with open('results.txt', 'w') as file: | |
for ip in ips: | |
file.write(f"{ip}\n") | |
print("Scraping completed. Results have been saved to results.txt.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment