Created
May 6, 2018 05:12
-
-
Save perilstar/93c32f8109b4a44dddca6ca30e4a67aa to your computer and use it in GitHub Desktop.
Minecraft username scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Scraper | |
@@lines, @@matches = [], [] | |
def self.input(filename) | |
log = File.open(filename).read | |
@@lines.clear | |
log.force_encoding("ISO-8859-1") | |
log.each_line {|val| @@lines.push(val.encode("UTF-8"))} | |
end | |
def self.filter_chat | |
# must be a chat message, and not a warning | |
@@lines.keep_if {|val| val.match(/\[CHAT\]/)} | |
end | |
def self.filter_possible_usernames | |
# must include an alphanumeric or underscore string 3 to 16 characters in length | |
# that is not preceded or followed by alphanumeric or underscore characters | |
@@lines.keep_if {|val| val.match(/(^|[^\w_])([\w_]{3,16})($|[^\w_])/)} | |
end | |
def self.strip_thread_info | |
@@lines = @@lines.map {|val| val.gsub(/.*\[CHAT\] /, "").strip} | |
end | |
def self.strip_formatting | |
@@lines = @@lines.map {|val| val.gsub(/§\w/, "")} | |
end | |
def self.output filename | |
a = File.open(filename, "w") | |
a << @@matches.join("\n") | |
a.close | |
end | |
PATTERNS = | |
[ | |
# Hypixel | |
/^(?:\[(?:VIP|MVP|HELPER|MOD|YT|MOJANG|ADMIN|OWNER|Mystery Box|\?)\+?\]\s)?([\w]{3,16})(?: joined| left|:| found).*/ | |
] | |
UNLIKELY_NAMES = | |
[ | |
/^you$/i, | |
/^winners?$/i, | |
/^kits?$/i, | |
/^owners?$/i, | |
/^members?$/i, | |
/^flags?$/i, | |
/^usernames?$/i, | |
/^regions?$/i, | |
/^scores?$/i, | |
/^teams?$/i, | |
/^bounds?$/i, | |
/^co-?ord(inate)?s$/i, | |
/^sides?$/i, | |
/^objectives?$/i, | |
/^points?$/i, | |
/^checkpoints?$/i, | |
] | |
def self.find_patterns keep_unlikelies = false | |
@@matches = [] | |
PATTERNS.each do |pattern| | |
@@lines.each do |val| | |
found = val.match(pattern) | |
@@matches.push (found)[1] if found | |
end | |
end | |
@@matches.uniq! | |
@@matches.sort! | |
if !keep_unlikelies | |
UNLIKELY_NAMES.each do |pattern| | |
@@matches.delete_if {|val| val.match(pattern)} | |
end | |
end | |
end | |
end | |
keep_unlikelies = ARGV[-1] =~ /all/i | |
if keep_unlikelies | |
ARGV.pop | |
end | |
file_in, file_out = ARGV | |
file_in ||= "./latest.log" | |
file_out ||= "./out.txt" | |
Scraper.input file_in | |
Scraper.filter_chat | |
Scraper.strip_thread_info | |
Scraper.filter_possible_usernames | |
Scraper.strip_formatting | |
Scraper.find_patterns(keep_unlikelies) | |
Scraper.output file_out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment