Skip to content

Instantly share code, notes, and snippets.

@simpleapples
Last active December 1, 2016 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simpleapples/5bec1113f07998222e5aa3e8d49ad353 to your computer and use it in GitHub Desktop.
Save simpleapples/5bec1113f07998222e5aa3e8d49ad353 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
SESSION_ID = ''
FILE_PATH = ''
def get_stu_info(page):
base_url = 'http://www.nextsecond.cn/fellow'
url = base_url + '?page=' + str(page)
cookies = {'sessionid': SESSION_ID}
response = requests.get(url, cookies=cookies)
soup = BeautifulSoup(response.text, 'html.parser')
result = []
for row in soup.findAll('tr'):
cols = row.findAll('td')
line = []
for col in cols:
if col.get_text():
line.append(col.get_text().strip())
if line:
result.append(','.join(line))
return result
def write_to_file(result):
with open(FILE_PATH, 'w+', encoding='utf-8-sig') as file:
for item in result:
file.write(item + '\n')
def main():
all_result = []
page = 1
while(page < 1000):
result = get_stu_info(page)
all_result += result
print('processing...page...' + str(page))
page += 1
write_to_file(all_result)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment