Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
スクレイピングテンプレート.py
# -*- coding: utf-8 -*-
import lxml.html
import requests
from pathlib import Path
'''
urlを叩いてxpathを適用したelement[]を返すよ
'''
def getElement(url, xpath):
res = requests.get(url)
if res.status_code == 200:
elem = lxml.html.fromstring(res.text)
return elem.xpath(xpath)
else:
raise Exception(res.url, res.status_code)
'''
element[]のattributeを取ってくるよ
'''
def getAttributes(elems, atrb):
return map(lambda e:e.attrib[atrb], elems)
'''
指定されたpath(Pathオブジェクト)にコンテンツを保存するよ
'''
def doDownload(url, path):
res = requests.get(url)
path.open('wb').write(res.content)
def main():
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.