Skip to content

Instantly share code, notes, and snippets.

@shepmaster
Last active April 14, 2023 01:13
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shepmaster/fb7f4c9519a074ea7186ca7b75afb9dd to your computer and use it in GitHub Desktop.
Save shepmaster/fb7f4c9519a074ea7186ca7b75afb9dd to your computer and use it in GitHub Desktop.
SNAFU-modified version of https://sabrinajewson.org/blog/errors
//! This crate provides types for UCD’s `Blocks.txt`.
pub struct Blocks {
ranges: Vec<(RangeInclusive<u32>, String)>,
}
impl Blocks {
pub fn block_of(&self, c: char) -> &str {
self.ranges
.binary_search_by(|(range, _)| {
if *range.end() < u32::from(c) {
cmp::Ordering::Less
} else if u32::from(c) < *range.start() {
cmp::Ordering::Greater
} else {
cmp::Ordering::Equal
}
})
.map(|i| &*self.ranges[i].1)
.unwrap_or("No_Block")
}
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, FromFileError> {
let path = path.as_ref();
let tmp0 = fs::read_to_string(path).context(from_file_error::ReadFileSnafu { path })?;
Self::from_str(&tmp0).context(from_file_error::ParseSnafu { path })
}
pub fn download(agent: &ureq::Agent) -> Result<Self, DownloadError> {
let response = agent
.get(LATEST_URL)
.call()
.context(download_error::RequestSnafu)?;
Self::from_str(
&response
.into_string()
.context(download_error::ReadBodySnafu)?,
)
.context(download_error::ParseSnafu)
}
}
impl FromStr for Blocks {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let ranges = s
.lines()
.enumerate()
.map(|(i, line)| {
(
i,
line.split_once('#').map(|(line, _)| line).unwrap_or(line),
)
})
.filter(|(_, line)| !line.is_empty())
.map(|(i, line)| {
(|| {
let (range, name) = line.split_once(';').context(NoSemicolonSnafu)?;
let (range, name) = (range.trim(), name.trim());
let (start, end) = range.split_once("..").context(NoDotDotSnafu)?;
let start = u32::from_str_radix(start, 16).context(ParseIntSnafu)?;
let end = u32::from_str_radix(end, 16).context(ParseIntSnafu)?;
Ok((start..=end, name.to_owned()))
})()
.context(ParseSnafu { line: i })
})
.collect::<Result<Vec<_>, ParseError>>()?;
Ok(Self { ranges })
}
}
#[derive(Debug, Snafu)]
#[non_exhaustive]
#[snafu(module)]
pub enum DownloadError {
#[snafu(display("failed to download Blocks.txt from the Unicode website"))]
Request {
#[snafu(source(from(ureq::Error, Box::new)))]
source: Box<ureq::Error>,
},
#[snafu(display("failed to download Blocks.txt from the Unicode website"))]
ReadBody { source: io::Error },
#[snafu(display("failed to download Blocks.txt from the Unicode website"))]
Parse { source: ParseError },
}
#[derive(Debug, Snafu)]
#[non_exhaustive]
#[snafu(module)]
pub enum FromFileError {
#[snafu(display("error reading `{}`", path.display()))]
ReadFile { source: io::Error, path: Box<Path> },
#[snafu(display("error reading `{}`", path.display()))]
Parse { source: ParseError, path: Box<Path> },
}
#[derive(Debug, Snafu)]
#[non_exhaustive]
#[snafu(display("invalid Blocks.txt data on line {}", self.line + 1))]
pub struct ParseError {
pub line: usize,
pub source: ParseErrorKind,
}
#[derive(Debug, Snafu)]
pub enum ParseErrorKind {
#[non_exhaustive]
#[snafu(display("no semicolon"))]
NoSemicolon,
#[non_exhaustive]
#[snafu(display("no `..` in range"))]
NoDotDot,
#[non_exhaustive]
#[snafu(display("one end of range is not a valid hexadecimal integer"))]
ParseInt { source: ParseIntError },
}
#[cfg(test)]
mod tests {
#[test]
fn real_unicode() {
let data = include_str!("../Blocks.txt").parse::<Blocks>().unwrap();
assert_eq!(data.block_of('\u{0080}'), "Latin-1 Supplement");
assert_eq!(data.block_of('½'), "Latin-1 Supplement");
assert_eq!(data.block_of('\u{00FF}'), "Latin-1 Supplement");
assert_eq!(data.block_of('\u{EFFFF}'), "No_Block");
}
use crate::Blocks;
}
pub const LATEST_URL: &str = "https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt";
use std::cmp;
use std::fs;
use std::io;
use std::num::ParseIntError;
use std::ops::RangeInclusive;
use std::path::Path;
use std::str::FromStr;
use snafu::prelude::*;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment