Skip to content

Instantly share code, notes, and snippets.

@reclosedev
Last active December 13, 2015 20:09
Show Gist options
  • Save reclosedev/bf58c122d3c8eb47787d to your computer and use it in GitHub Desktop.
Save reclosedev/bf58c122d3c8eb47787d to your computer and use it in GitHub Desktop.
Собирает информацию о книгах из LiveLib для раздачи http://rutor.org/torrent/248905/
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import urllib
import jinja2
import lxml.html
import requests
try:
import requests_cache
requests_cache.configure("livelib")
except ImportError:
pass
BOOK_FOLDER = ur"d:\tmp\books\Все книги в формате FB2\\"
FIND_URL = "http://www.livelib.ru/find/"
def find_book_info(book_name):
quoted_name = urllib.quote_plus(book_name.encode("utf-8"))
r = requests.get(FIND_URL + quoted_name)
doc = lxml.html.fromstring(r.content)
doc.make_links_absolute(FIND_URL)
div_info = doc.cssselect("div.bookinfo")
if not div_info:
return {"info": book_name,
"description": "NOT FOUND"}
div_info = div_info[0]
description = div_info.getparent().getnext()
description.tag = "div"
return {"info": lxml.html.tostring(div_info,
encoding="utf8").decode("utf8"),
"description": lxml.html.tostring(description,
encoding="utf8").decode("utf8")}
def generate_report(filename, groups):
env = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
template = env.get_template("output_template.html")
stream = template.stream(groups=groups)
stream.dump(open(filename, "wb"), "utf-8")
def main():
groups = []
for dirpath, dirnames, filenames in os.walk(BOOK_FOLDER):
group = dirpath.replace(BOOK_FOLDER, "")
print group
books = []
for filename in filenames:
if not filename.endswith(".fb2"):
continue
name, ext = os.path.splitext(filename)
year, name = name.split(" ", 1)
book = find_book_info(name)
book["year"] = year
book["name"] = name
book["filename"] = filename
books.append(book)
if books:
groups.append({"name": group, "books": books})
generate_report("books.html", groups)
if __name__ == "__main__":
main()
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/html">
<head>
<meta charset="utf-8">
<title>Книги — победители премий</title>
<link rel="stylesheet" type="text/css" href="http://s.livelib.ru/css/style.css" />
<style>
body {margin: 3% 10%}
</style>
</head>
<body>
<p>
<a href="http://rutor.org/torrent/248905/">Раздача с книгами</a> <br>
<a href="https://gist.github.com/reclosedev/bf58c122d3c8eb47787d">Скрипт на Python 2.7 (зависимости: requests, lxml, jinja2)</a>
</p>
{% for group in groups %}
<h2>{{ group.name }}</h2>
<table style="width: 100%">
{% for book in group.books %}
<tr><td colspan="2">{{ book.year }}</td></tr>
<tr>
<td>{{ book.info }}</td>
<td>{{ book.description }}</td>
</tr>
<tr><td colspan="2" style="border: none"><hr></td></tr>
{% endfor %}
</table>
<br>
{% endfor %}
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment