Skip to content

Instantly share code, notes, and snippets.

@KJTsanaktsidis
Created February 13, 2024 00:39
Show Gist options
  • Save KJTsanaktsidis/f85be084d61aca54f8493ab63fe0707f to your computer and use it in GitHub Desktop.
Save KJTsanaktsidis/f85be084d61aca54f8493ab63fe0707f to your computer and use it in GitHub Desktop.
utf8_validation_benchmark.rb
# frozen_string_literal: true
puts "setting up gems..."
require 'bundler/inline'
gemfile do
gem 'activesupport', '~> 7'
gem 'benchmark-ips', '~> 2'
gemspec path: '.'
end
require 'benchmark/ips'
require 'securerandom'
require 'oj'
require 'active_support/all'
Oj.optimize_rails
# Use a fixed seed so our tests with different Oj builds uses the same strings.
rng = Random.new(129_021_596_923_479_670_633_526_415_686_503_548_047)
ONE_MEGABYTE = 1 * 1024 * 1024
PRINTABLE_CHARS = %w(
a b c d e f g h i j k l m n o p q r s t u v w x y z
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
0 1 2 3 4 5 6 7 8 9 0
).freeze
# These string generation routines are not very fast but were easy to write
# Generate a long string containing 1MB of printable, no-html-escape needed chars
puts "generating testcase: long_7bit_printable_string..."
long_7bit_printable_string = +''
long_7bit_printable_string << PRINTABLE_CHARS[rng.rand(PRINTABLE_CHARS.size)] while long_7bit_printable_string.length < ONE_MEGABYTE
# A string of ASCII chars, some of which might need HTML escaping (but all of
# which are one byte when encoded in UTF-8)
puts "generating testcase: long_ascii_string..."
long_ascii_string = +''
long_ascii_string << rng.rand(128) while long_ascii_string.length < ONE_MEGABYTE
# A long string of <'s, which will need to be HTML escaped
puts "generating testcase: long_angle_bracket_string..."
long_angle_bracket_string = '<' * ONE_MEGABYTE
# A long string of UTF-8 characters, most of which will be multibyte
puts "generating testcase: long_utf8_multibyte_string..."
long_utf8_multibyte_string = +''
long_utf8_multibyte_string << [rng.rand(0x110000)].pack('U') while long_utf8_multibyte_string.bytesize < ONE_MEGABYTE
puts "starting benchmark.."
Benchmark.ips do |bm|
%w(long_7bit_printable_string long_ascii_string long_angle_bracket_string long_utf8_multibyte_string).each do |testcase_name|
testcase_string = binding.local_variable_get(testcase_name)
bm.report("#{testcase_name} with RailsXEsc mode") do
ActiveSupport::JSON::Encoding.escape_html_entities_in_json = true
ActiveSupport::JSON.encode(testcase_string)
end
bm.report("#{testcase_name} with RailsEsc mode") do
ActiveSupport::JSON::Encoding.escape_html_entities_in_json = false
ActiveSupport::JSON.encode(testcase_string)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment