This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Small Python Script to scrape websites for | |
# email addresses and phone numbers(not a very great RE) | |
# Author: Dhruv Baldawa (@dhruvbaldawa on twitter) | |
# Github: http://www.github.com/dhruvbaldawa | |
import urllib,re | |
f = urllib.urlopen("http://www.example.com") | |
s = f.read() | |
re.findall(r"\+\d{2}\s?0?\d{10}",s) | |
re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}",s) |