Skip to content

Instantly share code, notes, and snippets.

@jumbophp
Created June 22, 2018 16:48
Show Gist options
  • Save jumbophp/5a5851035b026dc9da23db4f52b761bb to your computer and use it in GitHub Desktop.
Save jumbophp/5a5851035b026dc9da23db4f52b761bb to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import re
import requests
#Extract the info from the article
def article(url):
if ('.html' in url):
r = requests.get(url, verify=False)
data = r.text
soup = BeautifulSoup(data, "html5lib")
for phone in soup.find_all('div',{'class':'col-sm-6'}):
if(len(phone) == 11):
phone_number = phone.contents[2].rstrip()
text = phone.get_text().rstrip()
try:
text = re.sub('^[0-9]+', '', text)
f= open("phones.txt","a+")
string = ',' + phone_number.rstrip() + ',' + text.rstrip()
f.write(string)
except ValueError:
print ''
r = requests.get("https://www.anuntul.co.uk/chirii/", verify=False)
data = r.text
soup = BeautifulSoup(data, "html5lib")
for link in soup.find_all('a'):
link = (link.get('href'))
article(link)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment