Created
December 5, 2019 03:47
-
-
Save chinying/bb69394bf3a73ff55682b98f6cc6d2ca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "jwt" | |
require "json" | |
LOCATION_SG = Time::Location.load("Asia/Singapore") | |
# eg. 2019-11-21T08:24:24.702Z | |
VAULT_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%LZ" | |
alias SampleIdWithTimestamp = {Int32, String} | |
def get_file_lines(filename) | |
arr = Array(String).new | |
File.open filename do |io| | |
lines = io.each_line.to_a | |
arr = lines | |
end | |
arr | |
end | |
def get_unique_logins(logs : Array(String)): Set(JSON::Any) | |
emails = Set(JSON::Any).new | |
filtered = logs.select(&.includes?("/auth/passwordless/complete")) | |
filtered.each do |result| | |
matched = result.match(/\/auth\/passwordless\/complete\?token[0-9a-zA-z=.]*/) | |
if matched | |
token = matched[0].split("=")[1] | |
# puts token | |
payload, header = JWT.decode(token, verify: false, validate: false) | |
# puts payload | |
emails.add(payload["email"]) | |
end | |
end | |
emails | |
end | |
def get_sample_downloads(logs : Array(String)): Array(SampleIdWithTimestamp) | |
samples = Array(SampleIdWithTimestamp).new | |
filtered = logs.select(&.includes?("/sample/download")) | |
.reject(&.includes?("500")) | |
filtered.each do |log| | |
has_timestamp = /\[(?<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.]*Z)/.match(log) | |
has_sample = /\/datasets\/(?<id>data_[0-9a-z-]*)\/sample\/download 200/.match(log) | |
if has_timestamp && has_sample | |
timestamp = has_timestamp.try &.["timestamp"] | |
timestamp_in_epoch = Time.parse(timestamp, VAULT_TIMESTAMP_FORMAT, LOCATION_SG).to_s("%s").to_i | |
samples << {timestamp_in_epoch, has_sample.try &.["id"]} | |
end | |
end | |
samples | |
end | |
def get_requests(logs : Array(String)) : Set(String) | |
filtered = logs.select(&.includes?("/requests/")) | |
requests = Set(String).new | |
filtered.each do |result| | |
matched = /POST \/requests\/(?<id>req_[0-9a-z-]*)\/submit 200/.match(result) | |
has_match = matched.try &.["id"] | |
if has_match | |
requests.add(has_match) | |
end | |
end | |
requests | |
end | |
def main | |
all_sample_downloads = Array(SampleIdWithTimestamp).new | |
all_unique_logins = Set(JSON::Any).new | |
all_unique_requests = Set(String).new | |
filenames = ["w1.log", "w2.log", "w3.log", "w4.log"] | |
filenames.each do |f| | |
puts "running for #{f}" | |
logs = get_file_lines(f) | |
all_sample_downloads.concat(get_sample_downloads(logs)) | |
all_unique_logins |= get_unique_logins(logs) | |
all_unique_requests |= get_requests(logs) | |
end | |
puts "\nSample downloads" | |
unique_sample_downloads_by_timestamp = all_sample_downloads | |
.group_by{ |tuple| tuple[0] } | |
puts unique_sample_downloads_by_timestamp.values.map {|hash| hash[0][1]} | |
puts unique_sample_downloads_by_timestamp.size | |
puts "\nUnique logins" | |
puts all_unique_logins | |
puts all_unique_logins.size | |
puts "\nUnique Requests" | |
puts all_unique_requests | |
puts all_unique_requests.size | |
end | |
def get_lines_around(logs : Array(String), idx : Int) | |
logs[(idx - 5)..(idx + 5)] | |
end | |
main | |
# logs = get_file_lines("w3.log") | |
# samples = Array({Int32, String}).new | |
# logs.each_with_index do |line, idx| | |
# has_timestamp = /\[(?<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.]*Z)/.match(line) | |
# has_sample = /\/datasets\/(?<id>data_[0-9a-z-]*)\/sample\/download 200/.match(line) | |
# if has_timestamp && has_sample | |
# timestamp = has_timestamp.try &.["timestamp"] | |
# timestamp_in_epoch = Time.parse(timestamp, "%Y-%m-%dT%H:%M:%S.%LZ", locationSG).to_s("%s").to_i | |
# samples << {timestamp_in_epoch, has_sample.try &.["id"]} | |
# end | |
# end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment