Skip to content

Instantly share code, notes, and snippets.

@non117
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save non117/5ede9fdfec77b595e92b to your computer and use it in GitHub Desktop.
Save non117/5ede9fdfec77b595e92b to your computer and use it in GitHub Desktop.
スクレイピングテンプレート.py
# -*- coding: utf-8 -*-
import lxml.html
import requests
from pathlib import Path
'''
urlを叩いてxpathを適用したelement[]を返すよ
'''
def getElement(url, xpath):
res = requests.get(url)
if res.status_code == 200:
elem = lxml.html.fromstring(res.text)
return elem.xpath(xpath)
else:
raise Exception(res.url, res.status_code)
'''
element[]のattributeを取ってくるよ
'''
def getAttributes(elems, atrb):
return map(lambda e:e.attrib[atrb], elems)
'''
指定されたpath(Pathオブジェクト)にコンテンツを保存するよ
'''
def doDownload(url, path):
res = requests.get(url)
path.open('wb').write(res.content)
def main():
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment