Skip to content

Instantly share code, notes, and snippets.

@cinghaman
Last active June 6, 2020 08:52
Show Gist options
  • Save cinghaman/4b44b892abf706521eb40aebbf4b9e82 to your computer and use it in GitHub Desktop.
Save cinghaman/4b44b892abf706521eb40aebbf4b9e82 to your computer and use it in GitHub Desktop.
Python3 Scrapping of webpage using BeautifulSoup
#scrap website using Python3 and BeautifulSoup
import requests
from bs4 import BeautifulSoup
result = requests.get("https://wiki.com") #add your url
src = result.content
soup = BeautifulSoup(src, 'lxml')
# name the output file to write to local disk
out_filename = "bookmarks_design.csv"
# header of csv file to be written
headers = "image,title,description,link \n"
# opens file, and writes headers
f = open(out_filename, "w")
f.write(headers)
urls = []
for div_tag in soup.find_all("div", {"class": "card-title"}):
a_tag = div_tag.find('a')
urls.append(a_tag.attrs['href'])
titles = []
for img_tag in soup.find_all("img", {"class": "img-responsive"}):
if 'alt' in img_tag.attrs:
titles.append(img_tag.attrs['alt'])
srcs = []
for src_tag in soup.find_all("img", {"class": "img-responsive"}):
if 'data-src' in src_tag.attrs:
srcs.append(src_tag.attrs['data-src'])
desc = []
for desc_tag in soup.find_all("div", {"class": "card-body"}):
desc.append(desc_tag.text)
print(urls)
print(titles)
print(srcs)
print(desc)
#output to a csv file
for i in range(len(srcs)):
f.write(srcs[i] + ", " + titles[i] + ", " + desc[i] + ", " + urls[i] + "\n")
f.close() # Close the file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment