Skip to content

Instantly share code, notes, and snippets.

@SymeonChen
Last active October 21, 2016 03:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SymeonChen/8fd61b895bdf0e1e471802fe054e123a to your computer and use it in GitHub Desktop.
Save SymeonChen/8fd61b895bdf0e1e471802fe054e123a to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
import re
from lxml import etree
import pprint
s = requests.session()
# for num in range(1,12):
# url = 'https://www.v2ex.com/t/313225?p='+str(num)
# r = s.get(url)
# with open('page'+str(num)+'.html', 'w') as f:
# f.write(r.text)
# print('Download page',num,'successful!')
name_list=[]
num_list=[]
for num in range(1,12):
with open('page'+str(num)+'.html','rb') as f:
r = f.read()
page = etree.HTML(r)
user_name = page.xpath(u'//*[@id="Main"]/div[4]//table/tr/td[3]/strong/a')
user_comment = page.xpath(u'//*[@id="Main"]/div[4]//table/tr/td[3]/div[4]')
user_floor = page.xpath(u'//*[@id="Main"]/div[4]//table/tr/td[3]/div[1]/span')
for (name,comment,floor) in zip(user_name,user_comment,user_floor):
num_re = re.compile('[0-9]+')
comment_num = num_re.findall(comment.text)
if comment_num:
comment_num = comment_num[0]
else:
#someone does not joined the game,999 is a mark
comment_num = 999
#someone first comment doesn't contain number
if name.text in name_list and comment_num != 999:
pass
#ignore useless answer like 999、66666666、233333
if int(comment_num) > 100:
pass
else:
name_list.append(name.text)
# print(name.text,'+',int(comment_num))
num_list.append(int(comment_num))
if(int(comment_num)==84):
print(name.text)
print(floor.text)
#sort by number of times
from collections import Counter
result = Counter(num_list)
pprint.pprint(result)
#sort by number
result = [0 for x in range(0,100)]
for i in num_list:
result[i]+=1
pprint.pprint(dict(zip(range(0,100),result)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment