greyli/luckyzhihu.py

## luckyzhihu.py
# -*- coding: utf-8 -*-
import requests
import json
import re
import time
import random
import webbrowser

# 补充自己的 COOKIE _xsrf 的值
COOKIE = '_xsrf=XXX'

# 抽奖的文章
ARTICLE_URL = 'https://zhuanlan.zhihu.com/p/97139851'

# 抽奖总数
CHOICE_TOTAL = 3


def get_comments(zhuanlan_url, cookie):
    print('正在获取所有评论……')
    headers = {
        'accept': '*/*',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Accept-Encoding': 'gzip',
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) '
                      'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
        'origin': 'https://zhuanlan.zhihu.com',
        'referer': zhuanlan_url,
        'cookie': cookie,
    }

    params = {
        'include': 'data[*].author,collapsed,reply_to_author,disliked,'
                   'content,voting,vote_count,is_parent_author,is_author,algorithm_right',
        'order': 'normal',
        'limit': 20,
        'offset': 0,
        'status': 'open'
    }

    code = re.search('\d{4,}', zhuanlan_url)[0]
    api_url = 'https://www.zhihu.com/api/v4/articles/{}/comments'.format(code)
    comments = list()
    while True:
        resp = requests.get(api_url, params=params, headers=headers)
        resp.encoding = resp.apparent_encoding
        resp_data = json.loads(resp.text)
        if resp_data['paging']['is_end'] is True:
            break
        comment_page = resp_data['data']
        # print(comment_page)
        comments.extend(comment_page)
        params['offset'] += 20
        time.sleep(1)

    count = len(comments)
    print('一共%s条评论' % count)
    return comments


def parse_authors(comments):
    """
    解析API的内容，返回评论用户列表
    """
    print('正在去除重复评论，获取所有参与评论的用户……')
    authors = list()
    url_tokens = set()
    for idx, cm in enumerate(comments):
        # 跳过精彩评论
        if cm['featured'] is True:
            continue
        url_token = cm['author']['member']['url_token']
        # 跳过重复的评论用户
        if url_token in url_tokens:
            continue
        author = {
            # 用户主页
            'user_url': 'https://www.zhihu.com/people/' + url_token,
            # 评论所在页数
            'page': idx // 20 + 1,
            # 评论在页数里的顺序(精彩评论也计入顺序)
            'order': idx % 20 + 1,
        }
        authors.append(author)
        url_tokens.add(url_token)

    count = len(authors)
    print('一共%s名用户' % count)
    return authors


def choice(chosen, num):
    print('正在随机抽取%d名幸运用户……' % CHOICE_TOTAL)
    res = list()
    random.seed()
    for _ in range(num):
        res.append(random.choice(chosen))
    return res


def run():
    comments = get_comments(ARTICLE_URL, COOKIE)
    authors = parse_authors(comments)
    # print(authors)
    res = choice(authors, CHOICE_TOTAL)
    # print(res)
    luckylist = []
    print('获赠名单如下：\n')
    for user in res:
        url = user['user_url']
        page = user['page']
        order = user['order']
        print('* 用户URL：' + url + ' | 页数：'+ str(page) + ' | 序号：' + str(order) + '\n')

        luckylist.append(url)

    # print(res)
    # time.sleep(5)
    # for url in luckylist:
    #     webbrowser.open(url)


if __name__ == '__main__':
    print('当前时间为：' + time.strftime('%Y-%m-%d %A %X',time.localtime(time.time())))
    run()
	# -- coding: utf-8 --
	import requests
	import json
	import re
	import time
	import random
	import webbrowser

	# 补充自己的 COOKIE _xsrf 的值
	COOKIE = '_xsrf=XXX'

	# 抽奖的文章
	ARTICLE_URL = 'https://zhuanlan.zhihu.com/p/97139851'

	# 抽奖总数
	CHOICE_TOTAL = 3


	def get_comments(zhuanlan_url, cookie):
	print('正在获取所有评论……')
	headers = {
	'accept': '/',
	'accept-encoding': 'gzip, deflate, br',
	'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
	'Accept-Encoding': 'gzip',
	'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) '
	'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
	'origin': 'https://zhuanlan.zhihu.com',
	'referer': zhuanlan_url,
	'cookie': cookie,
	}

	params = {
	'include': 'data[*].author,collapsed,reply_to_author,disliked,'
	'content,voting,vote_count,is_parent_author,is_author,algorithm_right',
	'order': 'normal',
	'limit': 20,
	'offset': 0,
	'status': 'open'
	}

	code = re.search('\d{4,}', zhuanlan_url)[0]
	api_url = 'https://www.zhihu.com/api/v4/articles/{}/comments'.format(code)
	comments = list()
	while True:
	resp = requests.get(api_url, params=params, headers=headers)
	resp.encoding = resp.apparent_encoding
	resp_data = json.loads(resp.text)
	if resp_data['paging']['is_end'] is True:
	break
	comment_page = resp_data['data']
	# print(comment_page)
	comments.extend(comment_page)
	params['offset'] += 20
	time.sleep(1)

	count = len(comments)
	print('一共%s条评论' % count)
	return comments


	def parse_authors(comments):
	"""
	解析API的内容，返回评论用户列表
	"""
	print('正在去除重复评论，获取所有参与评论的用户……')
	authors = list()
	url_tokens = set()
	for idx, cm in enumerate(comments):
	# 跳过精彩评论
	if cm['featured'] is True:
	continue
	url_token = cm['author']['member']['url_token']
	# 跳过重复的评论用户
	if url_token in url_tokens:
	continue
	author = {
	# 用户主页
	'user_url': 'https://www.zhihu.com/people/' + url_token,
	# 评论所在页数
	'page': idx // 20 + 1,
	# 评论在页数里的顺序(精彩评论也计入顺序)
	'order': idx % 20 + 1,
	}
	authors.append(author)
	url_tokens.add(url_token)

	count = len(authors)
	print('一共%s名用户' % count)
	return authors


	def choice(chosen, num):
	print('正在随机抽取%d名幸运用户……' % CHOICE_TOTAL)
	res = list()
	random.seed()
	for _ in range(num):
	res.append(random.choice(chosen))
	return res


	def run():
	comments = get_comments(ARTICLE_URL, COOKIE)
	authors = parse_authors(comments)
	# print(authors)
	res = choice(authors, CHOICE_TOTAL)
	# print(res)
	luckylist = []
	print('获赠名单如下：\n')
	for user in res:
	url = user['user_url']
	page = user['page']
	order = user['order']
	print('* 用户URL：' + url + ' \| 页数：'+ str(page) + ' \| 序号：' + str(order) + '\n')

	luckylist.append(url)

	# print(res)
	# time.sleep(5)
	# for url in luckylist:
	# webbrowser.open(url)


	if __name__ == '__main__':
	print('当前时间为：' + time.strftime('%Y-%m-%d %A %X',time.localtime(time.time())))
	run()