Skip to content

Instantly share code, notes, and snippets.

@ephemerr
Created April 19, 2023 20:35
Show Gist options
  • Save ephemerr/c3b6aad2a531307c32febbe5b8cbb548 to your computer and use it in GitHub Desktop.
Save ephemerr/c3b6aad2a531307c32febbe5b8cbb548 to your computer and use it in GitHub Desktop.
Merge multiple fb2 files into one example
from bs4 import BeautifulSoup
import os
xml = '<?xml version="1.0" encoding="UTF-8"?>'\
'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">'\
'<description>'\
' <title-info>'\
' <genre>foreign_prose</genre>'\
' <genre>literature_20</genre>'\
' <author>'\
' <first-name>Рэй</first-name>'\
' <last-name>Брэдбери</last-name>'\
' </author>'\
' <book-title>Рассказы о Гринтауне</book-title>'\
' <keywords>лирика,магический реализм,дневники писателей,философская фантастика,психологическая фантастика</keywords>'\
' <lang>ru</lang>'\
' <src-lang>en</src-lang>'\
' <sequence name="Гринтаунский цикл"/>'\
' </title-info>'\
'</description>'\
'<title><p>Рэй Брэдбери</p>'\
'<p>Рассказы о Гринтауне</p>'\
'</title>'
body = "<body>"
for filename in os.listdir(os.getcwd()):
print(filename)
if "fb2" not in filename:
continue
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
soup = BeautifulSoup(f.read(), features="xml")
try:
section = soup.find("section")
title = section.title.text
print(f"TITLE: {title}")
body = body + str(section)
except:
print(f"TITLE: not found")
continue
body = body + "</body>"
xml = xml + body + "</FictionBook>"
with open("res.fb2", 'w') as f:
f.write(xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment