Skip to content

Instantly share code, notes, and snippets.

@chinying
Created December 5, 2019 03:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chinying/bb69394bf3a73ff55682b98f6cc6d2ca to your computer and use it in GitHub Desktop.
Save chinying/bb69394bf3a73ff55682b98f6cc6d2ca to your computer and use it in GitHub Desktop.
require "jwt"
require "json"
LOCATION_SG = Time::Location.load("Asia/Singapore")
# eg. 2019-11-21T08:24:24.702Z
VAULT_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%LZ"
alias SampleIdWithTimestamp = {Int32, String}
def get_file_lines(filename)
arr = Array(String).new
File.open filename do |io|
lines = io.each_line.to_a
arr = lines
end
arr
end
def get_unique_logins(logs : Array(String)): Set(JSON::Any)
emails = Set(JSON::Any).new
filtered = logs.select(&.includes?("/auth/passwordless/complete"))
filtered.each do |result|
matched = result.match(/\/auth\/passwordless\/complete\?token[0-9a-zA-z=.]*/)
if matched
token = matched[0].split("=")[1]
# puts token
payload, header = JWT.decode(token, verify: false, validate: false)
# puts payload
emails.add(payload["email"])
end
end
emails
end
def get_sample_downloads(logs : Array(String)): Array(SampleIdWithTimestamp)
samples = Array(SampleIdWithTimestamp).new
filtered = logs.select(&.includes?("/sample/download"))
.reject(&.includes?("500"))
filtered.each do |log|
has_timestamp = /\[(?<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.]*Z)/.match(log)
has_sample = /\/datasets\/(?<id>data_[0-9a-z-]*)\/sample\/download 200/.match(log)
if has_timestamp && has_sample
timestamp = has_timestamp.try &.["timestamp"]
timestamp_in_epoch = Time.parse(timestamp, VAULT_TIMESTAMP_FORMAT, LOCATION_SG).to_s("%s").to_i
samples << {timestamp_in_epoch, has_sample.try &.["id"]}
end
end
samples
end
def get_requests(logs : Array(String)) : Set(String)
filtered = logs.select(&.includes?("/requests/"))
requests = Set(String).new
filtered.each do |result|
matched = /POST \/requests\/(?<id>req_[0-9a-z-]*)\/submit 200/.match(result)
has_match = matched.try &.["id"]
if has_match
requests.add(has_match)
end
end
requests
end
def main
all_sample_downloads = Array(SampleIdWithTimestamp).new
all_unique_logins = Set(JSON::Any).new
all_unique_requests = Set(String).new
filenames = ["w1.log", "w2.log", "w3.log", "w4.log"]
filenames.each do |f|
puts "running for #{f}"
logs = get_file_lines(f)
all_sample_downloads.concat(get_sample_downloads(logs))
all_unique_logins |= get_unique_logins(logs)
all_unique_requests |= get_requests(logs)
end
puts "\nSample downloads"
unique_sample_downloads_by_timestamp = all_sample_downloads
.group_by{ |tuple| tuple[0] }
puts unique_sample_downloads_by_timestamp.values.map {|hash| hash[0][1]}
puts unique_sample_downloads_by_timestamp.size
puts "\nUnique logins"
puts all_unique_logins
puts all_unique_logins.size
puts "\nUnique Requests"
puts all_unique_requests
puts all_unique_requests.size
end
def get_lines_around(logs : Array(String), idx : Int)
logs[(idx - 5)..(idx + 5)]
end
main
# logs = get_file_lines("w3.log")
# samples = Array({Int32, String}).new
# logs.each_with_index do |line, idx|
# has_timestamp = /\[(?<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.]*Z)/.match(line)
# has_sample = /\/datasets\/(?<id>data_[0-9a-z-]*)\/sample\/download 200/.match(line)
# if has_timestamp && has_sample
# timestamp = has_timestamp.try &.["timestamp"]
# timestamp_in_epoch = Time.parse(timestamp, "%Y-%m-%dT%H:%M:%S.%LZ", locationSG).to_s("%s").to_i
# samples << {timestamp_in_epoch, has_sample.try &.["id"]}
# end
# end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment