Dan-Q/bbc-news-with-sports.rb

## bbc-news-with-sports.rb
#!/usr/bin/env ruby
require 'bundler/inline'

# Dependencies:
# * open-uri - load remote URL content easily
# * nokogiri - parse/filter XML
gemfile do
  source 'https://rubygems.org'
  gem 'nokogiri'
end
require 'open-uri'

# Regular expression describing the GUIDs to reject from the resulting RSS feed
# We want to drop iPlayer/Sounds/Ideas links but retain sports for some reason
REJECT_GUIDS_MATCHING = /^https:\/\/www\.bbc\.(co\.uk|com)\/(iplayer|sounds|ideas|news\/videos|programmes)\//

REJECT_TITLES_MATCHING = /^(BBC News app)$/

# Load and filter the original RSS
rss = Nokogiri::XML(open('https://feeds.bbci.co.uk/news/rss.xml?edition=uk'))
rss.css('item').select{|item|
  item.css('guid').text =~ REJECT_GUIDS_MATCHING ||
  item.css('title').text =~ REJECT_TITLES_MATCHING
}.each{|item| item.swap("<!-- [REJECTED] #{item.to_s.gsub(/--/, '[hyphen][hyphen]')} -->")}

# Strip the anchors off the <guid>s: BBC News "republishes" stories by using guids with #0, #1, #2 etc, which results in duplicates in feed readers
rss.css('guid').each{|g|g.content=g.content.gsub(/#.*$/,'')}

# Now there might be duplicate <guid>s, which is usually harmless but isn't pretty (and violates the spec). Let's remove the dupes.
rss.css('guid').map(&:text).each do |guid|
  matching_items = rss.css('item').select{|item| item.css('guid').text == guid }
  duplicate_items = matching_items[1..-1]
  duplicate_items.each{|item| item.swap("<!-- [DUPLICATE] #{item.to_s.gsub(/--/, '[hyphen][hyphen]')} -->")}
end

# Tag us as the generator
generator = rss.css('generator')[0]
generator.content = "Dan Q's 'BBC News without the crap but with the sport' <https://danq.me/2024/03/09/bbc-news-without-the-crap/> generator. Was: #{generator.content}"

# Update the src to us:
rss.xpath('//atom:link').attr('href', '[URL TO WHERE YOU PLAN TO HOST THE FILE]')

File.open( 'bbc-news-with-sport.xml', 'w' ){ |f| f.puts(rss.to_s) }

## bbc-news-without-sports.rb
#!/usr/bin/env ruby
require 'bundler/inline'

# Dependencies:
# * open-uri - load remote URL content easily
# * nokogiri - parse/filter XML
gemfile do
  source 'https://rubygems.org'
  gem 'nokogiri'
end
require 'open-uri'

# Regular expression describing the GUIDs to reject from the resulting RSS feed
# We want to drop everything from the "sport" section of the website, also any iPlayer/Sounds/Ideas links
REJECT_GUIDS_MATCHING = /^https:\/\/www\.bbc\.(co\.uk|com)\/(sport|iplayer|sounds|ideas|news\/videos|programmes)\//

REJECT_TITLES_MATCHING = /^(BBC News app)$/

# Load and filter the original RSS
rss = Nokogiri::XML(open('https://feeds.bbci.co.uk/news/rss.xml?edition=uk'))
rss.css('item').select{|item|
  item.css('guid').text =~ REJECT_GUIDS_MATCHING ||
  item.css('title').text =~ REJECT_TITLES_MATCHING
}.each{|item| item.swap("<!-- [REJECTED] #{item.to_s.gsub(/--/, '[hyphen][hyphen]')} -->")}

# Strip the anchors off the <guid>s: BBC News "republishes" stories by using guids with #0, #1, #2 etc, which results in duplicates in feed readers
rss.css('guid').each{|g|g.content=g.content.gsub(/#.*$/,'')}

# Now there might be duplicate <guid>s, which is usually harmless but isn't pretty (and violates the spec). Let's remove the dupes.
rss.css('guid').map(&:text).each do |guid|
  matching_items = rss.css('item').select{|item| item.css('guid').text == guid }
  duplicate_items = matching_items[1..-1]
  duplicate_items.each{|item| item.swap("<!-- [DUPLICATE] #{item.to_s.gsub(/--/, '[hyphen][hyphen]')} -->")}
end

# Tag us as the generator
generator = rss.css('generator')[0]
generator.content = "Dan Q's 'BBC News without the crap' <https://danq.me/2025/02/03/bbc-news-rss-improved/ ‎> generator. Was: #{generator.content}"

# Update the src to us:
rss.xpath('//atom:link').attr('href', '[URL TO WHERE YOU PLAN TO HOST THE FILE]')

File.open( 'bbc-news-no-sport.xml', 'w' ){ |f| f.puts(rss.to_s) }
	#!/usr/bin/env ruby
	require 'bundler/inline'

	# Dependencies:
	# * open-uri - load remote URL content easily
	# * nokogiri - parse/filter XML
	gemfile do
	source 'https://rubygems.org'
	gem 'nokogiri'
	end
	require 'open-uri'

	# Regular expression describing the GUIDs to reject from the resulting RSS feed
	# We want to drop iPlayer/Sounds/Ideas links but retain sports for some reason
	REJECT_GUIDS_MATCHING = /^https:\/\/www\.bbc\.(co\.uk\|com)\/(iplayer\|sounds\|ideas\|news\/videos\|programmes)\//

	REJECT_TITLES_MATCHING = /^(BBC News app)$/

	# Load and filter the original RSS
	rss = Nokogiri::XML(open('https://feeds.bbci.co.uk/news/rss.xml?edition=uk'))
	rss.css('item').select{\|item\|
	item.css('guid').text =~ REJECT_GUIDS_MATCHING \|\|
	item.css('title').text =~ REJECT_TITLES_MATCHING
	}.each{\|item\| item.swap("<!-- [REJECTED] #{item.to_s.gsub(/--/, '[hyphen][hyphen]')} -->")}

	# Strip the anchors off the <guid>s: BBC News "republishes" stories by using guids with #0, #1, #2 etc, which results in duplicates in feed readers
	rss.css('guid').each{\|g\|g.content=g.content.gsub(/#.*$/,'')}

	# Now there might be duplicate <guid>s, which is usually harmless but isn't pretty (and violates the spec). Let's remove the dupes.
	rss.css('guid').map(&:text).each do \|guid\|
	matching_items = rss.css('item').select{\|item\| item.css('guid').text == guid }
	duplicate_items = matching_items[1..-1]
	duplicate_items.each{\|item\| item.swap("<!-- [DUPLICATE] #{item.to_s.gsub(/--/, '[hyphen][hyphen]')} -->")}
	end

	# Tag us as the generator
	generator = rss.css('generator')[0]
	generator.content = "Dan Q's 'BBC News without the crap but with the sport' <https://danq.me/2024/03/09/bbc-news-without-the-crap/> generator. Was: #{generator.content}"

	# Update the src to us:
	rss.xpath('//atom:link').attr('href', '[URL TO WHERE YOU PLAN TO HOST THE FILE]')

	File.open( 'bbc-news-with-sport.xml', 'w' ){ \|f\| f.puts(rss.to_s) }