ericboehs/kiwix-add.sh

## kiwix-add.sh
#!/bin/bash
xml="/volume1/kiwix-share/"
library="/volume1/kiwix-share/"
log=($(find $library -name '*.zim' | sort))

if [ -f "$xml/library.log" ]; then
	IFS=$'\n' read -d '' -r -a oldlog < "$xml/library.log"
fi

if [[ "${log[@]}" == "${oldlog[@]}" ]]; then
		echo No change
	else
		echo Update library
		rm -f "$xml/library.log"
		rm -f "$xml/library_new.xml"
		for zim in "${log[@]}"; do
			echo $zim
			echo $zim >> "$xml/library.log"
			kiwix-manage "$xml/library_new.xml" add "$zim"
		done
	mv "$xml/library_new.xml" "$xml/library.xml"
	systemctl restart pkgctl-kiwix
fi

## kiwix-content-parser.rb
require 'nokogiri'
require 'json'
require 'open3'
require 'tempfile'

existing_zims = Dir.glob('*.zim')

# Check if kiwix-content.html is old and redownload if so
if !File.exist?('kiwix-content.html') || File.mtime('kiwix-content.html') < Time.now - 24*60*60
  puts "Downloading Kiwix listing. This may take a minute."
  `wget -O kiwix-content.html https://wiki.kiwix.org/wiki/Content`
end

# Path to your HTML file
file_path = 'kiwix-content.html'

# Read the HTML content from the file
html_content = File.read(file_path)

# Parse the HTML
doc = Nokogiri::HTML(html_content)

# Initialize an array to hold the parsed rows
rows = []

# Extract data only from the table with the ID 'zimtable'
doc.css('table#zimtable tr').each do |row|
  cells = row.css('td').map(&:text).map(&:strip)
  bittorrent_link = row.css('td a').find { |link| link['href'].include?('.torrent') }

  # Extract the BitTorrent URL and filename
  bittorrent_url = bittorrent_link['href'] if bittorrent_link
  filename = bittorrent_url.split('/').last if bittorrent_url

  # Only proceed if there are enough cells (to avoid header rows, if any)
  if cells.size >= 5
    next if cells[1] != "en"

    row_data = {
      name: cells[0],
      language: cells[1],
      size: cells[2],
      date_created: cells[3],
      flavor: cells[4],
      bittorrent_url: bittorrent_url,
      filename: filename
    }

    if row_data[:filename]
      filename = row_data[:filename].gsub('.torrent', '')
      filename = filename.gsub('.zim', '')

      row_data[:filename_on_disk] = existing_zims.select { |zim| zim.start_with?(filename) }.first
      row_data[:full_filename] = filename + '_' + row_data[:date_created] + '.zim'

      if existing_zims.include?(row_data[:full_filename])
        row_data[:status] = 'Downloaded'
      else
        # if the downloaded zim file starts with the filename but has a different date_created, mark it as needs update
        if row_data[:filename_on_disk] && row_data[:filename_on_disk].start_with?(filename) && !row_data[:filename_on_disk].include?(row_data[:date_created])
          row_data[:status] = 'Needs-Update'
        else
          row_data[:status] = 'Not-Downloaded'
        end
      end
    end

    rows << row_data
  end
end

# Filter rows where status is needs update
# rows = rows.select { |row| row[:status] == 'Needs Update' }
#rows = rows.select { |row| row[:status] == 'Downloaded' }

# For each row, wget the bit torrent URL to /volume1/docker/transmission/watch

# Assuming rows array is populated as in your script
# Generate a list for fzf
Tempfile.create('kiwix_selection') do |tempfile|
  row_format = "%-4s\t%-50s %-10s %-15s %-20s %-15s\n"

  # Print the header
  puts sprintf(row_format, "#", "Name", "Size", "Date Created", "Flavor", "Status")

  # Print each row using the defined format
  rows.each_with_index do |row, i|
    next if row[:flavor].include? 'playlist'

    flavor = row[:flavor][0..19]
    tempfile.puts sprintf(row_format, i+1, row[:name], row[:size], row[:date_created], flavor, row[:status])
    #tempfile.puts "#{row[:name]}\t\t#{row[:size]}\t\t#{row[:date_created]}\t#{row[:flavor]}"
  end
  tempfile.close

  # Use fzf to select from the list
  selected, status = Open3.capture2("cat #{tempfile.path} | fzf --multi --delimiter='\t'")

  # Process selection
  unless selected.empty?
    selected.lines.each do |line|
      id, _ = line.chomp.split("\t")
      id = id.chomp.to_i - 1
      row = rows[id]

      if ["Not-Downloaded", "Needs-Update"].include?(row[:status])
        system("wget", "-P", "/Volumes/docker/transmission/watch", row[:bittorrent_url])
        puts "Downloading: #{row[:filename]}"
      else
        puts "Skipping downloaded or unavailable: #{row[:filename]}"
      end
    end
  else
    puts "No selection made."
  end
end
	#!/bin/bash
	xml="/volume1/kiwix-share/"
	library="/volume1/kiwix-share/"
	log=($(find $library -name '*.zim' \| sort))

	if [ -f "$xml/library.log" ]; then
	IFS=$'\n' read -d '' -r -a oldlog < "$xml/library.log"
	fi

	if [[ "${log[@]}" == "${oldlog[@]}" ]]; then
	echo No change
	else
	echo Update library
	rm -f "$xml/library.log"
	rm -f "$xml/library_new.xml"
	for zim in "${log[@]}"; do
	echo $zim
	echo $zim >> "$xml/library.log"
	kiwix-manage "$xml/library_new.xml" add "$zim"
	done
	mv "$xml/library_new.xml" "$xml/library.xml"
	systemctl restart pkgctl-kiwix
	fi
	require 'nokogiri'
	require 'json'
	require 'open3'
	require 'tempfile'

	existing_zims = Dir.glob('*.zim')

	# Check if kiwix-content.html is old and redownload if so
	if !File.exist?('kiwix-content.html') \|\| File.mtime('kiwix-content.html') < Time.now - 246060
	puts "Downloading Kiwix listing. This may take a minute."
	`wget -O kiwix-content.html https://wiki.kiwix.org/wiki/Content`
	end

	# Path to your HTML file
	file_path = 'kiwix-content.html'

	# Read the HTML content from the file
	html_content = File.read(file_path)

	# Parse the HTML
	doc = Nokogiri::HTML(html_content)

	# Initialize an array to hold the parsed rows
	rows = []

	# Extract data only from the table with the ID 'zimtable'
	doc.css('table#zimtable tr').each do \|row\|
	cells = row.css('td').map(&:text).map(&:strip)
	bittorrent_link = row.css('td a').find { \|link\| link['href'].include?('.torrent') }

	# Extract the BitTorrent URL and filename
	bittorrent_url = bittorrent_link['href'] if bittorrent_link
	filename = bittorrent_url.split('/').last if bittorrent_url

	# Only proceed if there are enough cells (to avoid header rows, if any)
	if cells.size >= 5
	next if cells[1] != "en"

	row_data = {
	name: cells[0],
	language: cells[1],
	size: cells[2],
	date_created: cells[3],
	flavor: cells[4],
	bittorrent_url: bittorrent_url,
	filename: filename
	}

	if row_data[:filename]
	filename = row_data[:filename].gsub('.torrent', '')
	filename = filename.gsub('.zim', '')

	row_data[:filename_on_disk] = existing_zims.select { \|zim\| zim.start_with?(filename) }.first
	row_data[:full_filename] = filename + '_' + row_data[:date_created] + '.zim'

	if existing_zims.include?(row_data[:full_filename])
	row_data[:status] = 'Downloaded'
	else
	# if the downloaded zim file starts with the filename but has a different date_created, mark it as needs update
	if row_data[:filename_on_disk] && row_data[:filename_on_disk].start_with?(filename) && !row_data[:filename_on_disk].include?(row_data[:date_created])
	row_data[:status] = 'Needs-Update'
	else
	row_data[:status] = 'Not-Downloaded'
	end
	end
	end

	rows << row_data
	end
	end

	# Filter rows where status is needs update
	# rows = rows.select { \|row\| row[:status] == 'Needs Update' }
	#rows = rows.select { \|row\| row[:status] == 'Downloaded' }

	# For each row, wget the bit torrent URL to /volume1/docker/transmission/watch

	# Assuming rows array is populated as in your script
	# Generate a list for fzf
	Tempfile.create('kiwix_selection') do \|tempfile\|
	row_format = "%-4s\t%-50s %-10s %-15s %-20s %-15s\n"

	# Print the header
	puts sprintf(row_format, "#", "Name", "Size", "Date Created", "Flavor", "Status")

	# Print each row using the defined format
	rows.each_with_index do \|row, i\|
	next if row[:flavor].include? 'playlist'

	flavor = row[:flavor][0..19]
	tempfile.puts sprintf(row_format, i+1, row[:name], row[:size], row[:date_created], flavor, row[:status])
	#tempfile.puts "#{row[:name]}\t\t#{row[:size]}\t\t#{row[:date_created]}\t#{row[:flavor]}"
	end
	tempfile.close

	# Use fzf to select from the list
	selected, status = Open3.capture2("cat #{tempfile.path} \| fzf --multi --delimiter='\t'")

	# Process selection
	unless selected.empty?
	selected.lines.each do \|line\|
	id, _ = line.chomp.split("\t")
	id = id.chomp.to_i - 1
	row = rows[id]

	if ["Not-Downloaded", "Needs-Update"].include?(row[:status])
	system("wget", "-P", "/Volumes/docker/transmission/watch", row[:bittorrent_url])
	puts "Downloading: #{row[:filename]}"
	else
	puts "Skipping downloaded or unavailable: #{row[:filename]}"
	end
	end
	else
	puts "No selection made."
	end
	end