Skip to content

Instantly share code, notes, and snippets.

@GustavoCaso
Last active October 22, 2018 13:32
Show Gist options
  • Save GustavoCaso/524861c48e8b2f7b9b8ec3fffba24190 to your computer and use it in GitHub Desktop.
Save GustavoCaso/524861c48e8b2f7b9b8ec3fffba24190 to your computer and use it in GitHub Desktop.
Benchmark for Analyzer
require 'bundler/inline'
require 'benchmark'
gemfile do
source 'https://rubygems.org'
gem 'pry'
end
number_of_job_payload = ARGV[0].to_i || 100_000
class Base
def initialize(where_conditions: {}, group_by: %w[job_class shop_id api_client_id target_hostname])
@where_conditions = where_conditions
@group_by = group_by
@counts_per_attribute = Hash.new { |hash, key| hash[key] = Hash.new(0) }
end
def process_payloads(batches)
batches.each do |job_payloads|
process(job_payloads)
end
end
def process(job_payloads)
group_payloads_attributes_by_job(job_payloads) do |job_payload_attributes|
job_payload_attributes.each do |attribute_key, attribute_value|
sanitized_attribute_value = attribute_value.delete('"')
if @where_conditions[attribute_key]
break unless @where_conditions[attribute_key].include?(sanitized_attribute_value)
end
@counts_per_attribute[attribute_key][sanitized_attribute_value] += 1
end
end
end
def group_payloads_attributes_by_job(job_payloads)
raise NotImplementedError
end
def pattern_to_extract_attributes
if @group_by.include?("shop_id")
group_by_dup = @group_by.clone
group_by_dup.delete("shop_id")
Regexp.union(shop_id_regex, attributes_regex(group_by_dup))
else
attributes_regex(@group_by)
end
end
def shop_id_regex
/arguments":\[{\"(shop_id)\":(\"[a-zA-Z0-9:\."-]*\"|\d*)/
end
def attributes_regex(attributes)
/\"(#{Regexp.union(*attributes)})\":(\"[a-zA-Z0-9:\."-]*\"|\d*)/
end
end
class AnalyzerGlobalScan < Base
def group_payloads_attributes_by_job(job_payloads)
batch_of_payloads = job_payloads.join(", ")
batch_payloads_attributes = batch_of_payloads.scan(pattern_to_extract_attributes).flatten.compact.each_slice(2).to_a
number_to_get_exact_payloads = batch_payloads_attributes.size / job_payloads.size
batch_payloads_attributes.each_slice(number_to_get_exact_payloads).to_a.each do |job_payload|
yield job_payload
end
end
end
class AnalyzerEachScan < Base
def group_payloads_attributes_by_job(payloads)
payloads.each do |payload|
scanned_results = payload.scan(pattern_to_extract_attributes)
# We need to flatten to being able to remove the duplicate shop_id
# attribute.
# Then we group them by two (attribute_key, attribute_value)
yield scanned_results.flatten.compact.each_slice(2).to_a
end
end
end
class AnalyzerGlobalScanCustomIteration < Base
def group_payloads_attributes_by_job(job_payloads)
batch_of_payloads = job_payloads.join(", ")
batch_of_payloads.scan(pattern_to_extract_attributes).each_with_object([]) do |attribute_key_value, attributes|
cleaned_attribute = attribute_key_value.compact
if cleaned_attribute.include?('job_class')
attributes = attributes << []
attributes.last << cleaned_attribute
else
attributes.last << cleaned_attribute
end
end.each do |job_payload|
yield job_payload
end
end
end
class AnalyzerGlobalScanCustomIterationBasicRegex < Base
def group_payloads_attributes_by_job(job_payloads)
batch_of_payloads = job_payloads.join(", ")
batch_of_payloads.scan(pattern_to_extract_attributes).each_with_object([]) do |attribute_key_value, attributes|
# we group by job_class because we know each job payload
# have a job_class
if attribute_key_value.include?('job_class')
attributes = attributes << []
attributes.last << attribute_key_value
else
# This will solve the issue with multiple shop_id
next if attributes.last.include?(attribute_key_value)
attributes.last << attribute_key_value
end
end.each do |job_payload|
yield job_payload
end
end
def pattern_to_extract_attributes
/\"(#{Regexp.union(*@group_by)})\":(\"[a-zA-Z0-9:\."-]*\"|\d*)/
end
end
class AnalyzerSplitByJobClass < Base
def group_payloads_attributes_by_job(job_payloads)
batch_of_payloads = job_payloads.join(", ")
batch_of_payloads.scan(pattern_to_extract_attributes).join(' ').split(/(?=job_class)/).each do |attributes|
yield attributes.split.uniq.each_slice(2)
end
end
def pattern_to_extract_attributes
/\"(#{Regexp.union(*@group_by)})\":(\"[a-zA-Z0-9:\."-]*\"|\d*)/
end
end
JOB_CLASSES = %w[
Appscale::Jobs::AnalyzerTest::WebhookQueueJob
Appscale::Jobs::AnalyzerTest::Whatever
Appscale::Jobs::AnalyzerTest::ILikeThisOne
Appscale::Jobs::AnalyzerTest::ImBackBaby
]
def generate_job_payload(job_class)
"{\"class\":\"#{job_class}\",\"args\":[{\"job_class\":\"#{job_class}\",\"job_id\":\"657a094f-d9ee-4f0a-92ea-bf8314482390\",\"provider_job_id\":null,\"queue_name\":\"webhook\",\"priority\":null,\"arguments\":[{\"shop_id\":690933842,\"_aj_symbol_keys\":[\"shop_id\"]}],\"executions\":0,\"locale\":\"en\",\"log_level\":0,\"attempt\":0,\"request_id\":null,\"queue_start\":1540041993.7910662,\"expected_run_time\":1540041993.791,\"pod_id\":0,\"privacy_level\":null,\"feature_set\":null,\"shop_id\":690933842,\"queued_by_shopify_version\":\"0aec3a435b6de9f4da41bc511e2257727f4cf6ef\",\"queued_by_section\":\"NilSectionGlobals\",\"queued_with_readonly_master\":false}]}"
end
def genarate_job_payloads(number_of_job_payloads)
[].tap do |array|
number_of_job_payloads.times do
array << generate_job_payload(JOB_CLASSES.sample)
end
end
end
batches = [
genarate_job_payloads(number_of_job_payload),
genarate_job_payloads(number_of_job_payload),
]
global_scan = AnalyzerGlobalScan.new
global_scan_custom_iteration = AnalyzerGlobalScanCustomIteration.new
global_scan_custom_iteration_basic_regex = AnalyzerGlobalScanCustomIterationBasicRegex.new
split_by_job_class = AnalyzerSplitByJobClass.new
each_scan = AnalyzerEachScan.new
Benchmark.bmbm(28) do |x|
x.report('global_scan:') { global_scan.process_payloads(batches) }
x.report('global_scan_custom_iteration:') { global_scan_custom_iteration.process_payloads(batches) }
x.report('global_scan_custom_iteration_basic_regex:') { global_scan_custom_iteration_basic_regex.process_payloads(batches) }
x.report('split_by_job_class:') { split_by_job_class.process_payloads(batches) }
x.report('each_scan:') { each_scan.process_payloads(batches) }
end
@GustavoCaso
Copy link
Author

After talking with Moe, he pointed out that some method names are not very descriptive.

Also, know you can invoke the benchmark passing the number of job payloads that you want to execute.

ruby benchmark.rb 10000

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment