Skip to content

Instantly share code, notes, and snippets.

@pzb
Created June 11, 2017 18:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pzb/5aba13a67bd9fa64b3769397c842889b to your computer and use it in GitHub Desktop.
Save pzb/5aba13a67bd9fa64b3769397c842889b to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# Only used for tests
require 'simpleidn'
class PSLNode
def initialize
@children = {}
@terminus = false
@exception = false
end
def add_rule(x, excpt)
lbl = x.shift
if lbl.nil?
raise 'Duplicate rule' if @terminus
@terminus = true
@exception = excpt
return
end
@children[lbl] ||= PSLNode.new
@children[lbl].add_rule(x, excpt)
end
def get_regdom(lbls, matched_lbls = [])
lbl = lbls.shift
if lbl.nil?
return matched_lbls if @terminus && @exception
return nil
end
return @children[lbl].get_regdom(lbls, matched_lbls << lbl) if @children.key?(lbl)
return @children['*'].get_regdom(lbls, matched_lbls << lbl) if @children.key?('*')
if @terminus
return matched_lbls if @exception
return matched_lbls << lbl
end
nil
end
def walk(prefix = '', &block)
block.call(prefix, @exception) if @terminus
@children.keys.sort.each do |label|
@children[label].walk(label + '.' + prefix, &block)
end
end
end
class PSLTree
def initialize
@root = PSLNode.new
# Default rule, as per spec
@root.add_rule(['*'], false)
end
def add_rule(rulestr)
exception = false
if rulestr[0] == '!'
exception = true
rulestr = rulestr[1..-1]
end
lbls = rulestr.split('.').reverse
@root.add_rule(lbls, exception)
end
def get_regdom(dom)
dom.downcase!
lbls = dom.split('.').reverse
r = @root.get_regdom(lbls)
return r if r.nil?
r.reverse.join('.')
end
# walk and test are not used to in the core code
def walk(&block)
@root.walk(&block)
end
def test(input, expected)
ex = SimpleIDN.to_unicode(expected)
ex = nil if ex == 'null'
rd = get_regdom(SimpleIDN.to_unicode(input))
result = (rd == ex)
puts "test(#{input}, #{expected}) = #{result} (#{rd})"
result
end
end
PRIVATE_DELIM = '===BEGIN PRIVATE DOMAINS==='.freeze
COMMENT_PREFIX = '//'.freeze
SPACE_RE = /\p{Space}/
SPACE_LINE = /\A\p{Space}*\z/
psl = PSLTree.new
privreg = false
## The Public Suffix List consists of a series of lines, separated by \n.
IO.foreach(ARGV[0], "\n") do |l|
if !privreg && l.include?(PRIVATE_DELIM)
privreg = true
next
end
## Each line is only read up to the first whitespace; entire lines can also be commented using //.
## Each line which is not entirely whitespace or begins with a comment contains a rule.
next if l.start_with?(COMMENT_PREFIX)
next if l =~ SPACE_LINE
rule = l.split(SPACE_RE).first
psl.add_rule(rule)
end
psl.walk do |n, expt|
pfx = expt ? '!' : ''
puts "#{pfx}#{n}"
end
null = nil
# The following applies to lines below this point.
# From: https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt
# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/
# Mixed case.
psl.test('COM', null);
psl.test('example.COM', 'example.com');
psl.test('WwW.example.COM', 'example.com');
# Leading dot.
psl.test('.com', null);
psl.test('.example', null);
psl.test('.example.com', null);
psl.test('.example.example', null);
# Unlisted TLD.
psl.test('example', null);
psl.test('example.example', 'example.example');
psl.test('b.example.example', 'example.example');
psl.test('a.b.example.example', 'example.example');
# Listed, but non-Internet, TLD.
#checkPublicSuffix('local', null);
#checkPublicSuffix('example.local', null);
#checkPublicSuffix('b.example.local', null);
#checkPublicSuffix('a.b.example.local', null);
# TLD with only 1 rule.
psl.test('biz', null);
psl.test('domain.biz', 'domain.biz');
psl.test('b.domain.biz', 'domain.biz');
psl.test('a.b.domain.biz', 'domain.biz');
# TLD with some 2-level rules.
psl.test('com', null);
psl.test('example.com', 'example.com');
psl.test('b.example.com', 'example.com');
psl.test('a.b.example.com', 'example.com');
psl.test('uk.com', null);
psl.test('example.uk.com', 'example.uk.com');
psl.test('b.example.uk.com', 'example.uk.com');
psl.test('a.b.example.uk.com', 'example.uk.com');
psl.test('test.ac', 'test.ac');
# TLD with only 1 (wildcard) rule.
psl.test('mm', null);
psl.test('c.mm', null);
psl.test('b.c.mm', 'b.c.mm');
psl.test('a.b.c.mm', 'b.c.mm');
# More complex TLD.
psl.test('jp', null);
psl.test('test.jp', 'test.jp');
psl.test('www.test.jp', 'test.jp');
psl.test('ac.jp', null);
psl.test('test.ac.jp', 'test.ac.jp');
psl.test('www.test.ac.jp', 'test.ac.jp');
psl.test('kyoto.jp', null);
psl.test('test.kyoto.jp', 'test.kyoto.jp');
psl.test('ide.kyoto.jp', null);
psl.test('b.ide.kyoto.jp', 'b.ide.kyoto.jp');
psl.test('a.b.ide.kyoto.jp', 'b.ide.kyoto.jp');
psl.test('c.kobe.jp', null);
psl.test('b.c.kobe.jp', 'b.c.kobe.jp');
psl.test('a.b.c.kobe.jp', 'b.c.kobe.jp');
psl.test('city.kobe.jp', 'city.kobe.jp');
psl.test('www.city.kobe.jp', 'city.kobe.jp');
# TLD with a wildcard rule and exceptions.
psl.test('ck', null);
psl.test('test.ck', null);
psl.test('b.test.ck', 'b.test.ck');
psl.test('a.b.test.ck', 'b.test.ck');
psl.test('www.ck', 'www.ck');
psl.test('www.www.ck', 'www.ck');
# US K12.
psl.test('us', null);
psl.test('test.us', 'test.us');
psl.test('www.test.us', 'test.us');
psl.test('ak.us', null);
psl.test('test.ak.us', 'test.ak.us');
psl.test('www.test.ak.us', 'test.ak.us');
psl.test('k12.ak.us', null);
psl.test('test.k12.ak.us', 'test.k12.ak.us');
psl.test('www.test.k12.ak.us', 'test.k12.ak.us');
# IDN labels.
psl.test('食狮.com.cn', '食狮.com.cn');
psl.test('食狮.公司.cn', '食狮.公司.cn');
psl.test('www.食狮.公司.cn', '食狮.公司.cn');
psl.test('shishi.公司.cn', 'shishi.公司.cn');
psl.test('公司.cn', null);
psl.test('食狮.中国', '食狮.中国');
psl.test('www.食狮.中国', '食狮.中国');
psl.test('shishi.中国', 'shishi.中国');
psl.test('中国', null);
# Same as above, but punycoded.
psl.test('xn--85x722f.com.cn', 'xn--85x722f.com.cn');
psl.test('xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
psl.test('www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
psl.test('shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn');
psl.test('xn--55qx5d.cn', null);
psl.test('xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
psl.test('www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
psl.test('shishi.xn--fiqs8s', 'shishi.xn--fiqs8s');
psl.test('xn--fiqs8s', null);
psl.test("www.goog", "goog")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment