Skip to content

Instantly share code, notes, and snippets.

View ZhangZhongwei73671's full-sized avatar

ZhangZhongwei73671

View GitHub Profile
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import time
caps = webdriver.DesiredCapabilities().FIREFOX
caps["marionette"] = True
binary = FirefoxBinary(r'f:\firefox\firefox.exe')
driver = webdriver.Firefox(firefox_binary=binary)
for i in range(0,18):
driver.get("https://zh.airbnb.com/s/shenzhen--china/homes?refinement_paths%5B%5D=%2Fhomes&page1=&cdn_cn=1&s_tag=_bVwCdeI&allow_override%5B%5D=&section_offset=" + str(i))
#BeautifulSoup爬虫实践:房屋价格数据
import requests
import pandas as pd
import csv
from bs4 import BeautifulSoup
import MySQLdb
conn = MySQLdb.connect(host ='localhost',user = 'root',passwd = 'yourpasswd',db = 'ershoufang',charset = "utf8")
cur = conn.cursor()
import requests
from bs4 import BeautifulSoup
link = "http://www.santostang.com/"
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'}
r = requests.get(link, headers= headers)
soup = BeautifulSoup(r.text, "html.parser")
first_title = soup.find("h1", class_="post-title").a.text.strip()
print("第一篇文章标题是:", first_title)