- github [52.74.223.119]
- csdn.net [47.95.47.253]
- jianshu.com [106.75.17.181]
Last active
April 29, 2019 13:32
-
-
Save abearxiong/7bf62abfe2a1b4f3ec786f8436c4d22d to your computer and use it in GitHub Desktop.
程序员资料网站
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Apr 29 20:39:22 2019 | |
@author: xiong | |
""" | |
import requests | |
import lxml | |
from bs4 import BeautifulSoup | |
#import hashlib | |
def getContent(url): | |
html = requests.get(url) | |
soup = BeautifulSoup(html.text, 'lxml') | |
#print(soup.prettify()) | |
bodys = soup.find_all(name = 'body') | |
links = soup.find_all('a') | |
for body in bodys: | |
print(body.attrs, '\n') | |
for link in links: | |
if link.get('href'): | |
print(link.get('href')) | |
else: | |
print(link) | |
print(len(links)) | |
if __name__ == '__main__': | |
#url = "https://csdn.net" | |
url = "https://blog.csdn.net/csdn15698845876/article/details/78305467" | |
getContent(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment