jumbophp/extractor

## extractor
from bs4 import BeautifulSoup

import re

import requests

#Extract the info from the article
def article(url):
    if ('.html' in url):
        r  = requests.get(url, verify=False)
        data = r.text
        soup = BeautifulSoup(data, "html5lib")
        for phone in soup.find_all('div',{'class':'col-sm-6'}):
            if(len(phone) == 11):
                phone_number =  phone.contents[2].rstrip()
                text =  phone.get_text().rstrip()

                try:
                    text = re.sub('^[0-9]+', '', text)
                    f= open("phones.txt","a+")
                    string = ',' + phone_number.rstrip() + ',' + text.rstrip()
                    f.write(string)
                except ValueError:
                    print ''

r  = requests.get("https://www.anuntul.co.uk/chirii/", verify=False)

data = r.text

soup = BeautifulSoup(data, "html5lib")

for link in soup.find_all('a'):
    link = (link.get('href'))
    article(link)
	from bs4 import BeautifulSoup

	import re

	import requests

	#Extract the info from the article
	def article(url):
	if ('.html' in url):
	r = requests.get(url, verify=False)
	data = r.text
	soup = BeautifulSoup(data, "html5lib")
	for phone in soup.find_all('div',{'class':'col-sm-6'}):
	if(len(phone) == 11):
	phone_number = phone.contents[2].rstrip()
	text = phone.get_text().rstrip()

	try:
	text = re.sub('^[0-9]+', '', text)
	f= open("phones.txt","a+")
	string = ',' + phone_number.rstrip() + ',' + text.rstrip()
	f.write(string)
	except ValueError:
	print ''

	r = requests.get("https://www.anuntul.co.uk/chirii/", verify=False)

	data = r.text

	soup = BeautifulSoup(data, "html5lib")

	for link in soup.find_all('a'):
	link = (link.get('href'))
	article(link)