Skip to content

Instantly share code, notes, and snippets.

@JichunMa
Created February 7, 2018 13:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JichunMa/bd389ae1c261eb9bdbadad9c8fa0ae1a to your computer and use it in GitHub Desktop.
Save JichunMa/bd389ae1c261eb9bdbadad9c8fa0ae1a to your computer and use it in GitHub Desktop.
简书动态爬取
import requests
from lxml import etree
start_url_var = 'https://www.jianshu.com/users/9104ebf5e177/timeline'
base_url = 'https://www.jianshu.com/users/9104ebf5e177/timeline?max_id={}&page={}'
# 筛选第一层
# ul class="note-list"
last_max_id = -1
user_id = ''
def get_info(start_url):
source = requests.get(start_url).text
user_id = start_url.split('/')[4]
data_html = etree.HTML(source)
data_list = data_html.xpath('//ul[@class="note-list"]')
li_list = data_list[0].xpath('//li/@id')
print(get_last_id_less_one(li_list))
def get_last_id_less_one(id_list):
mm = []
id_str = id_list[-1]
# print(id_str)
return id_str.split('-')[1]
if __name__ == '__main__':
get_info(start_url_var)
# source = requests.get(test_url).text
# data_html = etree.HTML(source)
# data_list = data_html.xpath('//ul[@class="note-list"]')
# li_list = data_list[0].xpath('//li/@id')
# type_list = data_list[0].xpath('//span/@data-type')
# datetime_list = data_list[0].xpath('//span/@data-datetime')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment