Skip to content

Instantly share code, notes, and snippets.

@ysc3839
Last active March 8, 2019 08:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ysc3839/f324a161444e9e9489f4d7886558af34 to your computer and use it in GitHub Desktop.
Save ysc3839/f324a161444e9e9489f4d7886558af34 to your computer and use it in GitHub Desktop.
msdn-itellyou-getter
#!/usr/bin/env python
import re
import json
import requests
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}
POST_HEADERS = {'origin': 'https://msdn.itellyou.cn', 'referer': 'https://msdn.itellyou.cn/'}
def main():
s = requests.Session()
s.headers.update(HEADERS)
r = s.get('https://msdn.itellyou.cn/')
matches = re.finditer('data-menuid="(.+?)".*?>(.+?)<', r.text)
categories = []
for match in matches:
categories.append({'id': match.group(1), 'name': match.group(2)})
print(categories)
for category in categories:
r = s.post('https://msdn.itellyou.cn/Category/Index', params={'id': category['id']}, headers=POST_HEADERS)
indexes = r.json()
print(indexes)
for index in indexes:
r = s.post('https://msdn.itellyou.cn/Category/GetLang', params={'id': index['id']}, headers=POST_HEADERS)
langs = r.json()['result']
#print(langs)
for lang in langs:
r = s.post('https://msdn.itellyou.cn/Category/GetList', params={'id': index['id'], 'lang': lang['id'], 'filter': False}, headers=POST_HEADERS)
products = r.json()['result']
#print(products)
for product in products:
r = s.post('https://msdn.itellyou.cn/Category/GetProduct', params={'id': product['id']}, headers=POST_HEADERS)
product['fileinfo'] = r.json()['result']
lang['products'] = products
index['langs'] = langs
category['indexes'] = indexes
with open('msdn-itellyou.json', 'w') as f:
json.dump(categories, f)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment