shivankgtm/turingo.py

## turingo.py
import requests
from bs4 import BeautifulSoup
finalList = []
pages = [   'https://simple.wikipedia.org/wiki/Category:Natural_resources',
            'https://simple.wikipedia.org/wiki/Category:Hydrogen_compounds',
            'https://simple.wikipedia.org/wiki/Category:Oxygen_compounds',
            'https://simple.wikipedia.org/wiki/Category:Oxides']
for i in range(len(pages)):
    page = requests.get(pages[i])
    soup = BeautifulSoup(page.content, 'html.parser')

    all_links = soup.find_all('a')
    for link in all_links:
        s = str(link.get('href'))[:5]
        if s == 'https':
            finalList.append(link.get('href'))
            #print(link.get('href'))
    print('********')

print(len(finalList))
# Here in Final List you will get all the links.
	import requests
	from bs4 import BeautifulSoup
	finalList = []
	pages = [ 'https://simple.wikipedia.org/wiki/Category:Natural_resources',
	'https://simple.wikipedia.org/wiki/Category:Hydrogen_compounds',
	'https://simple.wikipedia.org/wiki/Category:Oxygen_compounds',
	'https://simple.wikipedia.org/wiki/Category:Oxides']
	for i in range(len(pages)):
	page = requests.get(pages[i])
	soup = BeautifulSoup(page.content, 'html.parser')

	all_links = soup.find_all('a')
	for link in all_links:
	s = str(link.get('href'))[:5]
	if s == 'https':
	finalList.append(link.get('href'))
	#print(link.get('href'))
	print('********')

	print(len(finalList))
	# Here in Final List you will get all the links.