Skip to content

Instantly share code, notes, and snippets.

@makenowjust
Created May 3, 2023 14:32
Show Gist options
  • Save makenowjust/d190cf1caf7c08e2592ddf8d6558d08b to your computer and use it in GitHub Desktop.
Save makenowjust/d190cf1caf7c08e2592ddf8d6558d08b to your computer and use it in GitHub Desktop.
require 'fileutils'
UNICODE_VERSION = "15.0.0"
BASE_DIR = "#{__dir__}/#{UNICODE_VERSION}"
UCD_DIR = "#{BASE_DIR}/ucd"
BASE_URL = "https://www.unicode.org/Public/#{UNICODE_VERSION}/ucd"
def download_file(file)
dir = "#{BASE_DIR}/#{File.dirname(file)}"
FileUtils.mkdir_p(dir)
Dir.chdir(dir) do
`curl -LO "#{BASE_URL}/#{file}"` unless File.exist?("#{BASE_DIR}/#{file}")
end
end
def load_file(file)
download_file(file)
File.read("#{BASE_DIR}/#{file}").lines(chomp: true)
end
def load_binary_property()
data = []
in_binary_property = false
load_file("PropertyAliases.txt").each do |line|
in_binary_property ||= !!(line =~ /# Binary Properties/)
next unless in_binary_property
m = line.match(/^([^;]+);([^;]+)(?:;([^;]+))?$/)
next unless m
short, long, other = m[1].strip, m[2].strip, m[3]&.strip
data << [long, [short, other].compact]
end
data
end
def load_general_category_value()
data = []
load_file("PropertyValueAliases.txt").each do |line|
m = line.match(/^gc ; ([^;#]+);([^;#]+)(?:;([^;#]+))?/)
next unless m
short, long, other = m[1].strip, m[2].strip, m[3]&.strip
data << [long, [short, other].compact]
end
data
end
def load_script_value()
data = []
load_file("PropertyValueAliases.txt").each do |line|
m = line.match(/^sc ; ([^;#]+);([^;#]+)(?:;([^;#]+))?/)
next unless m
short, long, other = m[1].strip, m[2].strip, m[3]&.strip
data << [long, [short, other].compact]
end
data
end
def load_age_value()
data = []
load_file("PropertyValueAliases.txt").each do |line|
m = line.match(/^age; (\d+\.\d+)\s*;([^;#]+)(?:;([^;#]+))?/)
next unless m
short, long, other = m[1].strip, m[2].strip, m[3]&.strip
data << [long, [short, other].compact]
end
data
end
def load_block_value()
data = []
load_file("PropertyValueAliases.txt").each do |line|
m = line.match(/^blk; ([^;#]+);([^;#]+)(?:;([^;#]+))?/)
next unless m
short, long, other = m[1].strip, m[2].strip, m[3]&.strip
data << [long, [short, other].compact]
end
data
end
binary_properties = load_binary_property()
general_category_values = load_general_category_value()
script_values = load_script_value()
age_values = load_age_value()
block_values = load_block_value()
puts "val BinaryPropertyNames: Map[String, String] = Map("
binary_properties.each do |(name, aliases)|
[name, *aliases].uniq.each do |a|
puts " #{a.inspect} -> #{name.inspect},"
end
end
puts ")"
puts
puts "val GeneralCategoryValues: Map[String, String] = Map("
general_category_values.each do |(name, aliases)|
[name, *aliases].uniq.each do |a|
puts " #{a.inspect} -> #{name.inspect},"
end
end
puts ")"
puts
puts "val ScriptValues: Map[String, String] = Map("
script_values.each do |(name, aliases)|
[name, *aliases].uniq.each do |a|
puts " #{a.inspect} -> #{name.inspect},"
end
end
puts ")"
puts
puts "val AgeValues: Set[String] = Set("
age_values.each do |(_, (name))|
puts " #{name.inspect},"
end
puts ")"
puts
puts "val BlockValues: Set[String] = Set("
block_values.each do |(name, _)|
puts " #{name.inspect},"
end
puts ")"
puts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment