vatsalsaglani/websiteopen.py

## websiteopen.py
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS
import pandas as pd
from urllib.parse import urlparse, urlsplit
from PIL import Image

website_page = 'http://nameofyourwebsite.com'
page = urlopen(website_page)
soup = BS(page)
soup

title_links = soup.find_all('h3', class_='entry-title')
title_links

links = {}
for i in range(0, len(title_links)):
    _title = title_links[i]
    only_title = _title.text.strip()
    #print(_title.text.strip())
    _link = [n['href'] for n in _title]
    only_link = _link[0]
    webpage_2 = only_link
    print(webpage_2)
    open_webpage_2 = urlopen(webpage_2)
    soup2 = BS(open_webpage_2)
    ex = soup2.find_all(['p', 'h4'])
    post_Text = ''
    for j in ex:
        post_Text += j.text.strip()

    #print(_link[0])
    links[i]=[only_title, only_link, post_Text]
links

A = []
B = []
C = []
for i in range(0, len(links)):
    A.append(links[i][0])
    B.append(links[i][1])
    C.append(links[i][2])

df = pd.DataFrame(A, columns=['Title'])
df['Link']=B
df['Post']=C

df.to_csv('file_name.csv', sep='\t', encoding='utf-8')
	from urllib.request import urlopen
	from bs4 import BeautifulSoup as BS
	import pandas as pd
	from urllib.parse import urlparse, urlsplit
	from PIL import Image

	website_page = 'http://nameofyourwebsite.com'
	page = urlopen(website_page)
	soup = BS(page)
	soup

	title_links = soup.find_all('h3', class_='entry-title')
	title_links

	links = {}
	for i in range(0, len(title_links)):
	_title = title_links[i]
	only_title = _title.text.strip()
	#print(_title.text.strip())
	_link = [n['href'] for n in _title]
	only_link = _link[0]
	webpage_2 = only_link
	print(webpage_2)
	open_webpage_2 = urlopen(webpage_2)
	soup2 = BS(open_webpage_2)
	ex = soup2.find_all(['p', 'h4'])
	post_Text = ''
	for j in ex:
	post_Text += j.text.strip()

	#print(_link[0])
	links[i]=[only_title, only_link, post_Text]
	links

	A = []
	B = []
	C = []
	for i in range(0, len(links)):
	A.append(links[i][0])
	B.append(links[i][1])
	C.append(links[i][2])

	df = pd.DataFrame(A, columns=['Title'])
	df['Link']=B
	df['Post']=C

	df.to_csv('file_name.csv', sep='\t', encoding='utf-8')