Skip to content

Instantly share code, notes, and snippets.

@p3t3r67x0
Created February 27, 2019 03:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save p3t3r67x0/b5cb21e5b79e07f3db2ceec30e08d9f8 to your computer and use it in GitHub Desktop.
Save p3t3r67x0/b5cb21e5b79e07f3db2ceec30e08d9f8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
from lxml import html
def load_doc():
doc= requests.get('http://www.spiegel.de/kultur/literatur/spiegel-bestseller-taschenbuecher-a-1025518.html')
if doc.status_code == 200:
return doc.content
def extract_dom(doc):
dom = html.fromstring(doc)
blocks = dom.xpath('//div[@class="content clearfix"]')
for block in blocks:
img = block.xpath('./div/div/a/img/@src')
title = block.xpath('./div/a/span[@class="title bestseller-popup-link"]/text()')
div = block.xpath('./div[@class="bestseller-popup-content clearfix"]/div[@class="bestseller-popup-klappentext"]/text()')
print('<div class="row mb-50"><div class="col-xs-12 col-sm-3">')
print('<img src="http://www.spiegel.de{}" width="200px" height="auto">'.format(img[0]))
print('</div><div class="col-xs-12 col-sm-9"><h2>{}</h2>'.format(title[0]))
try:
print('<p>{}</p>'.format(div[0]))
except:
pass
print('</div></div>')
def main():
doc = load_doc()
extract_dom(doc)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment