Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python
# coding=utf8
# author=evi1m0#n0tr00t
# Fri Apr 10 14:14:35 2015
import os
import re
import sys
import wget
import requests
import urlparse
import threadpool as tp
def _archives(author):
archives_url = '{}/archive'.format(author)
print '[*] Target URL: {}'.format(archives_url)
year_content = requests.get(archives_url).content
years = re.findall('<div class=fi-list id=fiList>(.*?)</section>', year_content)[0]
months = re.findall('<a href="(.*?)" class="fi-border-bt2', years)
print '[*] Months count: {}'.format(len(months))
months_url = []
archives_list = []
for month in months:
if 'month=' in urlparse.urlparse(month).query:
for url in months_url:
month_content = requests.get(url).content
urls = re.findall('</div><a href="(.*?)" class=info-detail target=_blank>', month_content)
for u in urls:
return archives_list
def main(url):
_page = requests.get(url).content
_title = re.findall('<h2 class="title content-title">(.*?)</h2>', _page)[0]
_filename = '{author}/{title}'.format(author=sys.argv[1], title=_title)
print '[+] Download: {}'.format(_title)
try:, out=_filename, bar='')
except Exception, e:
print '[-] Error: ' + str(e)
if __name__ == '__main__':
if len(sys.argv) == 1:
print '[-] Usage: {} Blog_name'.format(sys.argv[0])
print '[-] Example: {} evi1m0'.format(sys.argv[0])
author = sys.argv[1]
if not os.path.exists(author):
archives = _archives(author)
print '[*] Archives statistics: {}'.format(len(archives))
# threadpool
pool = tp.ThreadPool(30)
reqs = tp.makeRequests(main, archives)
[pool.putRequest(req) for req in reqs]

This comment has been minimized.

Copy link

commented Apr 27, 2015

Hi, I tried your code. But the comments plus the template could not be accessed without WIFI.... which means we can't get those info after Baidu shut down...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.