Skip to content

Instantly share code, notes, and snippets.

@dongweiming
Created June 10, 2017 04:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dongweiming/db7f1df7a4b272e5a101b9e292891474 to your computer and use it in GitHub Desktop.
Save dongweiming/db7f1df7a4b272e5a101b9e292891474 to your computer and use it in GitHub Desktop.
# coding=utf-8
import re
import ast
import time
from datetime import datetime
from collections import defaultdict
from http.cookies import SimpleCookie
import requests
from bs4 import BeautifulSoup
url = 'https://mp.weixin.qq.com/misc/appmsgcomment?action=list_comment&type={type}&begin={start}&count=10&comment_id=3196803350&token=708943601&lang=zh_CN&mp_version=7'
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
js_regex = 'list : (.*),\W+total_count'
c = SimpleCookie()
with open('cookies.txt') as f:
c.load(f.read())
cookies = {}
for key, morsel in c.items():
cookies[key] = morsel.value
comments = []
for type in (0, 1):
start = 0
while 1:
r = requests.get(url.format(type=type, start=start*10),
cookies=cookies, headers={'user-agent': ua})
soup = BeautifulSoup(r.text, 'lxml')
js = [js for js in soup.find_all('script')
if 'wx.cgiData' in str(js)][0]
match = re.compile(js_regex).search(str(js))
data = ast.literal_eval(match.group(1).replace(
'false', 'False').replace('true', 'True'))
comments_ = data['comment']
comments.extend(comments_)
if len(comments_) < 10:
break
start += 1
time.sleep(0.3)
sorted(comments, key=lambda x:x['post_time'])
NUMBER = int(input('输入幸运数字❯ '))
COUNT = 5
NUMERIC_REGEX = re.compile(r'(\d){2}')
END_TIMESTAMP = datetime(2017, 6, 9)
SEEN_USERS = set()
NUMBER_MAP = defaultdict(list)
for comment in comments:
created = datetime.fromtimestamp(comment['post_time'])
if created >= END_TIMESTAMP:
break
content = re.sub('<[^<]+?>', '', comment['content'])
match = NUMERIC_REGEX.search(content)
if not match:
continue
number = match.group()
_content = content.replace(number, '', 1)
if NUMERIC_REGEX.search(_content):
continue
nickname = comment['nick_name']
if nickname in SEEN_USERS:
continue
SEEN_USERS.add(nickname)
NUMBER_MAP[int(number)].append(
(nickname, comment['id'], str(created), content))
values = NUMBER_MAP.get(int(NUMBER), [])
if not len(values) >= COUNT:
offset = 1
while 1:
values += sorted(sum([NUMBER_MAP.get(NUMBER + o, [])
for o in (offset, -offset)], []),
key=lambda x: x[1])
if len(values) > COUNT:
break
offset += 1
values = values[:COUNT]
print('\n如下{}位同学获得《流畅的Python》:\n'.format(COUNT))
for val in values:
print('用户: {:<6}\t楼层: {:<4} 参与时间: {:<10} \n'
'评论内容:{}'.format(*val))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment