Skip to content

Instantly share code, notes, and snippets.

@ihciah
Created June 1, 2015 08:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ihciah/5dcfae3e34cf9b295f5b to your computer and use it in GitHub Desktop.
Save ihciah/5dcfae3e34cf9b295f5b to your computer and use it in GitHub Desktop.
两个无聊的小脚本
# -*- coding: utf-8 -*-
#过滤最近1月发帖并且12级的用户
import urllib,urllib2,re
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
URLBASE='http://tieba.baidu.com/f?kw=%E5%8D%8E%E4%B8%9C%E7%90%86%E5%B7%A5%E5%A4%A7%E5%AD%A6&ie=utf-8&pn='
result=set()
realres=[]
pool = ThreadPool(10)
def conp(s):
return re.findall(r'&ie=utf-8&fr=frs" target="_blank">(.+?)</a>',s)
def getlevel(s):
pram=urllib.urlencode({'un':s,'ie':'utf-8','fr':'frs'})
url='http://tieba.baidu.com/home/main/?'+pram
pres=urllib2.urlopen(url).read()
return re.findall(u'<span>华东理工大学</span><span class="forum_level (.+?)">'.encode('GBK'),pres)
def doit(i):
mk=getlevel(i)
if len(mk)>0:
print mk[0],i
if mk[0]=='lv12':
realres.append(i)
for i in range(0,2050,50):
res=urllib2.urlopen(URLBASE+str(i)).read()
result|=set(conp(res))
print i,len(result)
l=list(result)
pool.map(doit,l)
pool.close()
pool.join()
print realres
# -*- coding: utf-8 -*-
#输出指定文件中用户的关注列表
import urllib,urllib2,re
f=open('D:/in.txt','r')
l=f.readlines()
for i in l:
print i
m=i.replace('\n','').replace('\r','').replace(' ','')
pram=urllib.urlencode({'un':m,'ie':'utf-8','fr':'frs'})
url='http://tieba.baidu.com/home/main/?'+pram
pres=urllib2.urlopen(url).read()
print i
for t in re.findall(r'class="u-f-item unsign"><span>(.+?)</span>',pres):
print t.decode('GBK')
print '------------'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment