Last active
June 16, 2023 21:25
-
-
Save KTibow/82af3683a604476841647b98acecf780 to your computer and use it in GitHub Desktop.
Phone number lookup (with web scraping)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# User input | |
import urllib.request, string | |
from urllib.parse import urlparse | |
phone_num = input("Phone number: ") | |
for char in string.punctuation: | |
phone_num = phone_num.replace(char, "") | |
try: | |
int(phone_num) | |
except Exception as e: | |
print("Please enter a phone number.") | |
exit() | |
if len(phone_num) != 10: | |
print("Please enter a 10-digit phone number.") | |
exit() | |
user_agent = "Mozilla/5.0 (Windows NT; Windows NT 10.0; en-US) WindowsPowerShell/5.1.18362.752" | |
class PhoneNumberLookup: | |
def __init__(self, template): | |
self._template = template | |
self.domain = urlparse(template).netloc | |
def template(self, phone): | |
return self._template.format(*list(phone)) | |
sources = [PhoneNumberLookup("https://www.numberhidden.com/{}{}{}-{}{}{}.html"), | |
PhoneNumberLookup("https://www.babunatraj.com/{}{}{}-{}{}{}/"), | |
PhoneNumberLookup("https://www.whoiscallingmefromthisnumber.com/{}{}{}-{}{}{}"), | |
PhoneNumberLookup("https://whosenumberisthiscallingme.com/{}{}{}-{}{}{}.html"), | |
PhoneNumberLookup("https://www.cellphonebank.org/{}{}{}-{}{}{}-{}xxx")] | |
data = [] | |
for source in sources: | |
print("Scanning source "+source.domain+"...") | |
request = urllib.request.Request(source.template(phone_num), | |
headers={'User-Agent': user_agent}) | |
response = urllib.request.urlopen(request) | |
html = response.read() | |
html = html.decode() | |
html = html.split("</tr>") | |
match = False | |
for person in html: | |
person = person.split("</td>") | |
if source.domain == 'whosenumberisthiscallingme.com': | |
person = person[0] | |
person = person.split(" ") | |
worked = False | |
for part in person: | |
part = part.replace("<tr>", "").replace("<td>", "").replace("-", "").strip() | |
for char in string.punctuation.replace("*", ""): | |
part = part.replace(char, "") | |
part = part.replace("a href", "") | |
if part == phone_num: | |
worked = True | |
match = True | |
if worked: | |
print("Found a match in "+source.domain+" for "+phone_num+"!") | |
index = 0 | |
for i, inf in enumerate(person): | |
inf = inf.replace("<td>", "").replace("<tr>", "").replace("- ", "").strip() | |
for char in inf: | |
if char in string.ascii_letters and index == 0: | |
index = i | |
personinp = person[0:index] | |
personinp.append(" ".join(person[index:len(person)])) | |
person = personinp | |
person = person[index].replace("<td>", "").replace("- ", "").strip().split(" ") | |
info = [" ".join(person[0:2]), " ".join(person[2:len(person)])] | |
name = info[0].strip(string.whitespace+string.punctuation) | |
place = info[1].strip(string.whitespace+string.punctuation) | |
data.append([source.domain, name, place]) | |
if not match: | |
print("Couldn't find a match in "+source.domain+".") | |
print("="*30) | |
print("="*45) | |
print("="*30) | |
print("Matches:") | |
for match in data: | |
print("{:<45} | {:^35} | {:^50}".format(*match)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
wtf