Skip to content

Instantly share code, notes, and snippets.

@xdqi
Last active March 18, 2016 20:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save xdqi/8637806 to your computer and use it in GitHub Desktop.
Save xdqi/8637806 to your computer and use it in GitHub Desktop.
An analytics of an election of chrome ba (Jan 25, 2014) in Python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import requests
from bs4 import BeautifulSoup
import re
# 声明变量
PAGE = 'http://wapp.baidu.com/p/2833065220'
page_content = requests.get(PAGE).text
page_soup = BeautifulSoup(page_content)
support = 0
supporters = []
oppose = 0
opposers = []
abstain = 0
abstainers = []
# 检查发帖人级别
def level_check(user, level=10):
check_url = 'http://tieba.baidu.com/i/data/panel?ie=utf-8&un=' + user
check_text = requests.get(check_url).text
current_level = int(re.findall(
ur'"forum_name":"chrome","level_id":(\d+)', check_text)[0])
return current_level >= 10
# 获取页数及吧管理投票情况
pager_text = page_soup.select('div .h')[0].get_text()
page_num = int(re.findall(ur'第1/(\d+)页', pager_text)[0])
admin_voting = list(page_soup.select('.i form div')[0].stripped_strings)
admin_support = int(re.findall(ur'支持.(\d+)票', admin_voting[0])[0])
admin_oppose = int(re.findall(ur'不支持.(\d+)票', admin_voting[1])[0])
admin_abstain = int(re.findall(ur'弃权.(\d+)票', admin_voting[2])[0])
admin_num = int(re.findall(ur'已有投票人数:(\d+)', admin_voting[4])[0])
# 逐页获取
for i in range(0, page_num):
page = PAGE + '?pn=' + str(i * 30)
content = requests.get(page).text
soup = BeautifulSoup(content)
post_num = len(soup.select('.i'))
for post in range(0, post_num):
messages = list(soup.select('.i')[post].stripped_strings)
try:
voting = re.findall(ur'\d+楼\. (.+)', messages[0])[0]
if voting == u'支持' and level_check(messages[1]):
support += 1
supporters.append(messages[1])
if voting == u'不支持' and level_check(messages[1]):
oppose += 1
opposers.append(messages[1])
if voting == u'弃权' and level_check(messages[1]):
abstain += 1
abstainers.append(messages[1])
except:
pass
print page_soup.title.string, '\n'
print u'吧管理支持:', admin_support, u'不支持:', admin_oppose, u'弃权:', admin_abstain
print u'吧管理支持率:', admin_support / (
admin_support + admin_oppose + admin_abstain) * 100, '%\n'
print u'吧友支持:', support, ', '.join(supporters)
print u'吧友不支持:', oppose, ', '.join(opposers)
print u'吧友弃权:', abstain, ', '.join(abstainers)
print u'吧友支持率:', support / (support + oppose + abstain) * 100, '%\n'
print u'总支持率:', admin_support / (
admin_support + admin_oppose + admin_abstain) * 30 + support / (
support + oppose + abstain) * 70, '%\n'
print u'Powered by Kirito Python Analytics 2014'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment