Last active
April 9, 2024 05:16
-
-
Save dkam/35a5197e79bc69047f0eb3a2f083588d to your computer and use it in GitHub Desktop.
Read and write ePub metadata
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'bundler/inline' | |
#gemfile do | |
# source 'https://rubygems.org' | |
# gem 'zip' | |
# gem 'nokogiri' | |
# gem 'tty-prompt' | |
# gem 'debug' | |
#end | |
require 'zip' | |
require 'nokogiri' | |
require 'tty-prompt' | |
require 'debug' | |
class Epub | |
def initialize(file) | |
@file = file | |
end | |
def read_epub | |
Zip::File.open(@file) do |zip_file| | |
opf_name = find_opf_file(zip_file) | |
opf_entry = zip_file.find_entry(opf_name) | |
opf_content = opf_entry.get_input_stream.read | |
opf_doc = Nokogiri::XML(opf_content) | |
yield zip_file, opf_doc, opf_name if block_given? | |
end | |
end | |
def write_epub | |
read_epub do |zip_file, opf_doc, opf_name| | |
yield zip_file, opf_doc, opf_name if block_given? | |
zip_file.get_output_stream(opf_name) do |output_stream| | |
output_stream.write(opf_doc.to_xml) | |
end | |
end | |
end | |
def opf | |
read_epub do |zip_file, opf_doc, opf_name| | |
opf_doc | |
end | |
end | |
def series | |
read_epub do |zip_file, opf_doc, opf_name| | |
series_element = opf_doc.at_css('meta[name="calibre:series"]') | |
series_element["content"] if series_element | |
end | |
end | |
def series=(new_series_name) | |
write_epub do |zip_file, opf_doc, opf_name| | |
series_element = opf_doc.at_css('meta[name="calibre:series"]') | |
if series_element && new_series_name.nil? | |
# Delete the <meta> element for the series | |
series_element.remove | |
elsif series_element | |
# Update the content of the <meta> element | |
series_element['content'] = new_series_name | |
else | |
# Create a new <meta> element for the series if it doesn't exist | |
opf_namespace_url = "http://www.idpf.org/2007/opf" | |
metadata_element = opf_doc.at_css("metadata, opf|metadata", 'opf' => opf_namespace_url) | |
series_element = Nokogiri::XML::Node.new('meta', opf_doc) | |
series_element['name'] = 'calibre:series' | |
series_element['content'] = new_series_name | |
metadata_element.add_child(series_element) | |
end | |
end | |
end | |
def series_index | |
read_epub do |zip_file, opf_doc, opf_name| | |
series_index = opf_doc.at_css('meta[name="calibre:series_index"]') | |
series_index['content'] if series_index | |
end | |
end | |
def series_index=(new_si) | |
write_epub do |zip_file, opf_doc, opf_name| | |
series_index = opf_doc.at_css('meta[name="calibre:series_index"]') | |
old_si = series_index['content'] if series_index | |
if series_index && new_si.nil? | |
series_index.remove | |
elsif series_index | |
series_index['content'] = new_si | |
else | |
opf_namespace_url = "http://www.idpf.org/2007/opf" | |
metadata_element = opf_doc.at_css("metadata, opf|metadata", 'opf' => opf_namespace_url) | |
si_element = Nokogiri::XML::Node.new('meta', opf_doc) | |
si_element['name'] = 'calibre:series_index' | |
si_element['content'] = new_si | |
metadata_element.add_child(si_element) | |
end | |
end | |
end | |
def find_opf_file(zip_file) | |
# Locate and read the container.xml file | |
container_xml_entry = zip_file.find_entry('META-INF/container.xml') | |
container_xml = container_xml_entry.get_input_stream.read | |
# Parse the container.xml file | |
doc = Nokogiri::XML(container_xml) | |
# Find the rootfile element and extract the 'full-path' attribute | |
doc.at_xpath("xmlns:container/xmlns:rootfiles/xmlns:rootfile")['full-path'] | |
end | |
end | |
def run(argv) | |
files = [] | |
action = nil | |
attribute = nil | |
new_series_name = nil | |
argv.each_with_index do |arg, index| | |
if arg == 'get' || arg == 'set' | |
action = arg.to_sym | |
attribute = argv[index + 1] | |
new_series_name = argv[index + 2] | |
break | |
else | |
files << arg if File.exist?(arg) | |
end | |
end | |
raise ArgumentError.new("Action must be :get or :set") unless [:get, :set].include?(action) | |
raise ArgumentError.new("Attribute must be 'series' or 'series_index'") unless ['series', 'series_index'].include?(attribute) | |
files.sort! | |
if attribute == 'series' | |
result = files.map do |file| | |
e = Epub.new(file) | |
{file: }.merge e.series_name(file, action:, new_series_name:) | |
end | |
result.each { |r| puts "#{r[:file]} : #{r[:series_name]}" } if action == :get | |
result.each { |r| puts "#{r[:file]} : #{r[:old_series_name]} => #{r[:new_series_name]}" } if action == :set | |
elsif attribute == 'series_index' | |
result = files.map do |file| | |
e = Epub.new(file) | |
{file: }.merge(e.series_index(file, action: )) | |
end | |
result.each { |r| puts "#{r[:file]} : #{r[:old_series_index]}" } if action == :get | |
result.each { |r| puts "#{r[:file]} : #{r[:old_series_index]} -> #{r[:new_series_index]}" } if action == :set | |
end | |
end | |
if __FILE__ == $PROGRAM_NAME | |
run(ARGV) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment