Skip to content

Instantly share code, notes, and snippets.

@greyli
Last active December 17, 2019 11:35
Show Gist options
  • Save greyli/6abedbe97e99afc2e2726030e06256c1 to your computer and use it in GitHub Desktop.
Save greyli/6abedbe97e99afc2e2726030e06256c1 to your computer and use it in GitHub Desktop.
知乎文章抽奖脚本(某次抽奖某位读者提供的)
# -*- coding: utf-8 -*-
import requests
import json
import re
import time
import random
import webbrowser
# 补充自己的 COOKIE _xsrf 的值
COOKIE = '_xsrf=XXX'
# 抽奖的文章
ARTICLE_URL = 'https://zhuanlan.zhihu.com/p/97139851'
# 抽奖总数
CHOICE_TOTAL = 3
def get_comments(zhuanlan_url, cookie):
print('正在获取所有评论……')
headers = {
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
'origin': 'https://zhuanlan.zhihu.com',
'referer': zhuanlan_url,
'cookie': cookie,
}
params = {
'include': 'data[*].author,collapsed,reply_to_author,disliked,'
'content,voting,vote_count,is_parent_author,is_author,algorithm_right',
'order': 'normal',
'limit': 20,
'offset': 0,
'status': 'open'
}
code = re.search('\d{4,}', zhuanlan_url)[0]
api_url = 'https://www.zhihu.com/api/v4/articles/{}/comments'.format(code)
comments = list()
while True:
resp = requests.get(api_url, params=params, headers=headers)
resp.encoding = resp.apparent_encoding
resp_data = json.loads(resp.text)
if resp_data['paging']['is_end'] is True:
break
comment_page = resp_data['data']
# print(comment_page)
comments.extend(comment_page)
params['offset'] += 20
time.sleep(1)
count = len(comments)
print('一共%s条评论' % count)
return comments
def parse_authors(comments):
"""
解析API的内容,返回评论用户列表
"""
print('正在去除重复评论,获取所有参与评论的用户……')
authors = list()
url_tokens = set()
for idx, cm in enumerate(comments):
# 跳过精彩评论
if cm['featured'] is True:
continue
url_token = cm['author']['member']['url_token']
# 跳过重复的评论用户
if url_token in url_tokens:
continue
author = {
# 用户主页
'user_url': 'https://www.zhihu.com/people/' + url_token,
# 评论所在页数
'page': idx // 20 + 1,
# 评论在页数里的顺序(精彩评论也计入顺序)
'order': idx % 20 + 1,
}
authors.append(author)
url_tokens.add(url_token)
count = len(authors)
print('一共%s名用户' % count)
return authors
def choice(chosen, num):
print('正在随机抽取%d名幸运用户……' % CHOICE_TOTAL)
res = list()
random.seed()
for _ in range(num):
res.append(random.choice(chosen))
return res
def run():
comments = get_comments(ARTICLE_URL, COOKIE)
authors = parse_authors(comments)
# print(authors)
res = choice(authors, CHOICE_TOTAL)
# print(res)
luckylist = []
print('获赠名单如下:\n')
for user in res:
url = user['user_url']
page = user['page']
order = user['order']
print('* 用户URL:' + url + ' | 页数:'+ str(page) + ' | 序号:' + str(order) + '\n')
luckylist.append(url)
# print(res)
# time.sleep(5)
# for url in luckylist:
# webbrowser.open(url)
if __name__ == '__main__':
print('当前时间为:' + time.strftime('%Y-%m-%d %A %X',time.localtime(time.time())))
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment