Skip to content

Instantly share code, notes, and snippets.

@perilstar
Created May 6, 2018 05:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save perilstar/93c32f8109b4a44dddca6ca30e4a67aa to your computer and use it in GitHub Desktop.
Save perilstar/93c32f8109b4a44dddca6ca30e4a67aa to your computer and use it in GitHub Desktop.
Minecraft username scraper
module Scraper
@@lines, @@matches = [], []
def self.input(filename)
log = File.open(filename).read
@@lines.clear
log.force_encoding("ISO-8859-1")
log.each_line {|val| @@lines.push(val.encode("UTF-8"))}
end
def self.filter_chat
# must be a chat message, and not a warning
@@lines.keep_if {|val| val.match(/\[CHAT\]/)}
end
def self.filter_possible_usernames
# must include an alphanumeric or underscore string 3 to 16 characters in length
# that is not preceded or followed by alphanumeric or underscore characters
@@lines.keep_if {|val| val.match(/(^|[^\w_])([\w_]{3,16})($|[^\w_])/)}
end
def self.strip_thread_info
@@lines = @@lines.map {|val| val.gsub(/.*\[CHAT\] /, "").strip}
end
def self.strip_formatting
@@lines = @@lines.map {|val| val.gsub(/§\w/, "")}
end
def self.output filename
a = File.open(filename, "w")
a << @@matches.join("\n")
a.close
end
PATTERNS =
[
# Hypixel
/^(?:\[(?:VIP|MVP|HELPER|MOD|YT|MOJANG|ADMIN|OWNER|Mystery Box|\?)\+?\]\s)?([\w]{3,16})(?: joined| left|:| found).*/
]
UNLIKELY_NAMES =
[
/^you$/i,
/^winners?$/i,
/^kits?$/i,
/^owners?$/i,
/^members?$/i,
/^flags?$/i,
/^usernames?$/i,
/^regions?$/i,
/^scores?$/i,
/^teams?$/i,
/^bounds?$/i,
/^co-?ord(inate)?s$/i,
/^sides?$/i,
/^objectives?$/i,
/^points?$/i,
/^checkpoints?$/i,
]
def self.find_patterns keep_unlikelies = false
@@matches = []
PATTERNS.each do |pattern|
@@lines.each do |val|
found = val.match(pattern)
@@matches.push (found)[1] if found
end
end
@@matches.uniq!
@@matches.sort!
if !keep_unlikelies
UNLIKELY_NAMES.each do |pattern|
@@matches.delete_if {|val| val.match(pattern)}
end
end
end
end
keep_unlikelies = ARGV[-1] =~ /all/i
if keep_unlikelies
ARGV.pop
end
file_in, file_out = ARGV
file_in ||= "./latest.log"
file_out ||= "./out.txt"
Scraper.input file_in
Scraper.filter_chat
Scraper.strip_thread_info
Scraper.filter_possible_usernames
Scraper.strip_formatting
Scraper.find_patterns(keep_unlikelies)
Scraper.output file_out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment