Created
August 16, 2018 12:58
-
-
Save buren/0c15aea508913b6ba76089bdb55463aa to your computer and use it in GitHub Desktop.
Given a CSV file with expected redirects - verify that each redirect is performed correctly.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# USAGE: | |
# $ check_redirects --help | |
# or | |
# $ ruby check_redirects.rb --help | |
require 'bundler/inline' | |
gemfile do | |
source 'https://rubygems.org' | |
gem 'honey_format', '~> 0.18' | |
gem 'http', '~> 3.3' | |
end | |
require 'optparse' | |
require 'honey_format' | |
require 'http' | |
class RedirectChecker | |
attr_reader :results, :domain | |
def check(from_url, expected_to) | |
@results ||= [] | |
url = to_url(from_url) | |
expected = remove_spaces(expected_to)&.force_encoding('UTF-8') | |
response = Http.get(url) | |
# Paths can contain scary characters: 'sa-far-du-ersatt\xE2\x80\xA6marens-flygstrul/' | |
location = response.headers['Location']&.force_encoding('UTF-8') | |
success = location == expected # if both are nil - no redirect is the success case | |
unless success | |
loc_part = location ? "instead redirected to #{location}" : "didn't redirect" | |
puts "[FAILED] #{url} should redirect to #{expected} but #{loc_part}." | |
end | |
[success, url, location, expected].tap { |r| @results << r } | |
end | |
private | |
def remove_spaces(string) | |
string.to_s.gsub(/[[:space:]]/, '') | |
end | |
def to_url(url, scheme: 'http://') | |
url = remove_spaces(url) | |
return url if %w[http:// https://].any? { |s| url.start_with?(s) } | |
"#{scheme}#{url}" | |
end | |
end | |
options = {} | |
OptionParser.new do |parser| | |
parser.banner = 'Usage: ruby check_redirects.rb --help' | |
parser.default_argv = ARGV | |
parser.on('--input=val0', String, 'CSV file path (required) - must be a file with two columns: from_url, to_url') do |string| | |
options[:input] = string | |
end | |
parser.on('--output=val0', String, 'CSV output path (optional)') do |string| | |
options[:output] = string | |
end | |
parser.on('-h', '--help', 'How to use') do | |
puts parser | |
exit | |
end | |
end.parse! | |
input_path = options[:input] || fail(ArgumentError, '--input required - CSV file path must be provided') | |
output_path = options[:output] || input_path.gsub(/\.csv/, '-redirect-results.csv') | |
puts "=== [CheckRedirect] Reading input file #{input_path} ===" | |
checker = RedirectChecker.new | |
input_csv = HoneyFormat::CSV.new(File.read(input_path)) | |
puts "=== [CheckRedirect] Going to check #{input_csv.rows.length} redirects ===" | |
header = %w[success from redirected_to expected_redirect] | |
rows = input_csv.rows.map { |row| checker.check(row.from_url, row.to_url) } | |
output_csv = HoneyFormat::Matrix.new(rows, header: header).to_csv | |
File.write(output_path, output_csv) | |
puts "=== [CheckRedirect] Wrote output to #{output_path} ===" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment