Skip to content

Instantly share code, notes, and snippets.

@ma6174
Last active August 29, 2015 13:56
Show Gist options
  • Save ma6174/9209925 to your computer and use it in GitHub Desktop.
Save ma6174/9209925 to your computer and use it in GitHub Desktop.
someone need it maybe~
#!/usr/bin/env python
# coding=utf-8
import urllib
import BeautifulSoup
import sys
import Queue
import threading
import logging
q = Queue.Queue()
reload(sys)
sys.setdefaultencoding('utf-8')
start = 131910
end = 169506
task = range(start, end)
f = open('data.txt', 'a+')
logging.basicConfig(level=logging.DEBUG)
def procucer():
while True:
try:
num = task.pop()
except:
break
logging.info(num)
url = "http://student.tiaoji.kaoyan.com/student-tiaoji-info-" + \
str(num)
try:
data = urllib.urlopen(url).read()
td = BeautifulSoup.BeautifulSOAP(data).findAll('td')
use = (3, 6, 8, 10, 12, 14, 16, 18, 20, 22)
all = [str(num), ]
for i in use:
all.append(td[i].text)
except:
continue
line = '\t'.join(all) + '\n'
q.put(line)
for i in range(20):
t = threading.Thread(target=procucer)
t.daemon = True
t.start()
total = 0
while True:
line = q.get()
total += 1
f.write(line)
f.flush()
if total == end - start:
break
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment