linzino7/zino_PTT.py

## zino_PTT.py
# -*- coding: utf-8 -*-
"""
Created on Tue May 12 04:38:47 2020
https://medium.com/p/a8216873a9d3
@author: Zino
"""

# 導入 模組(module)
import requests

# 把 到 ptt 八卦版 網址存到URL 變數中
URL = "https://www.ptt.cc/bbs/Gossiping/index.html"
# 設定Header與Cookie
my_headers = {'cookie': 'over18=1;'}
# 發送get 請求 到 ptt 八卦版
response = requests.get(URL, headers = my_headers)
# 印出回傳網頁程式碼
print(response.text)


# 導入 BeautifulSoup 模組(module)：解析HTML 語法工具
import bs4

# 2-1 把網頁程式碼(HTML) 丟入 bs4模組分析
soup = bs4.BeautifulSoup(response.text,"html.parser")

'''
<div class="title">
	<a href="/bbs/Gossiping/M.1589705973.A.912.html">
      [問卦] 為什麼八卦的民意在社會上都體現不出來呢
    </a>
</div>
'''
# 2-2 查找所有html 元素 過濾出 標籤名稱為 'div' 同時class為 title
titles = soup.find_all('div','title')

# 2-3 萃取文字出來。
# 因為我們有多個Tags存放在 List titles中。
# 所以需要使用for 迴圈將逐筆將List
for t in titles:
    print(t.text)
	# -- coding: utf-8 --
	"""
	Created on Tue May 12 04:38:47 2020
	https://medium.com/p/a8216873a9d3
	@author: Zino
	"""

	# 導入模組(module)
	import requests

	# 把到 ptt 八卦版網址存到URL 變數中
	URL = "https://www.ptt.cc/bbs/Gossiping/index.html"
	# 設定Header與Cookie
	my_headers = {'cookie': 'over18=1;'}
	# 發送get 請求到 ptt 八卦版
	response = requests.get(URL, headers = my_headers)
	# 印出回傳網頁程式碼
	print(response.text)



	# 導入 BeautifulSoup 模組(module)：解析HTML 語法工具
	import bs4

	# 2-1 把網頁程式碼(HTML) 丟入 bs4模組分析
	soup = bs4.BeautifulSoup(response.text,"html.parser")

	'''
	<div class="title">
	<a href="/bbs/Gossiping/M.1589705973.A.912.html">
	[問卦] 為什麼八卦的民意在社會上都體現不出來呢
	</a>
	</div>
	'''
	# 2-2 查找所有html 元素過濾出標籤名稱為 'div' 同時class為 title
	titles = soup.find_all('div','title')

	# 2-3 萃取文字出來。
	# 因為我們有多個Tags存放在 List titles中。
	# 所以需要使用for 迴圈將逐筆將List
	for t in titles:
	print(t.text)