Skip to content

Instantly share code, notes, and snippets.

@rphsoftware
Created July 19, 2020 15:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rphsoftware/0e275005dd77eb4e15a875569f001dca to your computer and use it in GitHub Desktop.
Save rphsoftware/0e275005dd77eb4e15a875569f001dca to your computer and use it in GitHub Desktop.
File that generates a bitflag-based binary file containing what unicode characters are deemed uppercase (by the official Unicode Consortium document)
use std::{fs, env};
use minidom::Element;
use std::fs::File;
use std::io::Write;
fn main() {
let mut a = env::current_dir().expect("Something went wrong");
println!("{}", a.as_os_str().to_string_lossy());
a.push("ucd.all.flat.xml");
println!("{}", a.as_os_str().to_string_lossy());
let contents = fs::read_to_string(a)
.expect("File read error");
let root: Element = contents.parse().unwrap();
let mut data: [u8; 139264] = [0; 139264];
for child in root.children() {
if child.name().eq("repertoire") {
for subchild in child.children() {
if !subchild.attr("cp").is_none() {
if subchild.attr("Upper").unwrap() == "Y" {
let z =
i64::from_str_radix(subchild.attr("cp").unwrap(), 16)
.expect("An error occured while decoding hex number");
let index = z >> 3;
let ae = z - (index << 3);
data[index as usize] += 1 << ae;
println!("{} {} {} {}", z, index, ae, data[index as usize])
}
}
}
}
}
let mut pos = 0;
let mut buffer = File::create("out.bin").expect("Failed to create file");
while pos < data.len() {
let bytes_written = buffer.write(&data[pos..])
.expect("Failed to write data");
pos += bytes_written;
}
buffer.flush().expect("Failed to flush the file down the toilet");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment