Created
August 4, 2023 02:35
-
-
Save crlf0710/87480a71ff9b9d35bd6da0054b0d0645 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(missing_docs)] // FIXME | |
pub trait Atom<'a> { | |
fn as_str(&self) -> &str; | |
fn is_whitespace(&self) -> bool; | |
fn is_comment_content(&self) -> bool; | |
fn can_prepend_lrm(&self) -> bool; | |
fn prepend_lrm(&self); | |
fn prepend_fsi(&self); | |
fn remove_lrm_and_rlm(&self); | |
// FIXME: split this into smaller functions | |
fn append_pdi_and_pdf(&self); | |
} | |
fn is_l_class_or_none(ch: Option<char>) -> bool { | |
todo!() | |
} | |
fn is_b_class(ch: Option<char>) -> bool { | |
todo!() | |
} | |
fn is_fsi(ch: Option<char>) -> bool { | |
todo!() | |
} | |
fn atom_first_cp(atom: &dyn Atom) -> Option<char> { | |
atom.as_str().chars().nth(0) | |
} | |
fn atom_last_cp(atom: &dyn Atom) -> Option<char> { | |
atom.as_str().chars().last() | |
} | |
fn atom_find_first_cp_in_10_class(atom: &dyn Atom) -> Option<char> { | |
todo!() | |
} | |
fn atom_find_first_cp_in_8_class(atom: &dyn Atom) -> Option<char> { | |
todo!() | |
} | |
fn atom_find_first_cp_in_5_class(atom: &dyn Atom) -> Option<char> { | |
todo!() | |
} | |
fn atom_has_unmatched_isolate_or_embedding_initiators(atom: &dyn Atom) -> bool { | |
todo!() | |
} | |
pub trait AtomList { | |
type Atom<'a>: Atom<'a> | |
where | |
Self: 'a; | |
type AtomIter<'a>: Iterator<Item = Self::Atom<'a>> | |
where | |
Self: 'a; | |
fn atoms(&self) -> Self::AtomIter<'_>; | |
fn record_conversion_error(&self); | |
} | |
pub fn conversion_to_plain_text<L, I>(lines: I) | |
where | |
L: AtomList, | |
I: IntoIterator<Item = L>, | |
{ | |
let mut needs_lrm = false; | |
'outer: for line in lines { | |
let mut atom_list = line.atoms().peekable(); | |
let mut last_atom = None; | |
loop { | |
let Some(atom) = atom_list.next() else { | |
break; | |
}; | |
if atom.is_whitespace() { | |
atom.remove_lrm_and_rlm(); | |
} | |
if needs_lrm { | |
if atom.can_prepend_lrm() { | |
atom.prepend_lrm(); | |
needs_lrm = false; | |
} else { | |
let first_cp_in_10_class = atom_find_first_cp_in_10_class(&atom); | |
if !is_l_class_or_none(first_cp_in_10_class) { | |
line.record_conversion_error(); | |
continue 'outer; | |
} | |
} | |
} | |
if atom.is_comment_content() { | |
let first_cp = atom_first_cp(&atom); | |
if !is_fsi(first_cp) { | |
let first_cp_in_8_class = atom_find_first_cp_in_8_class(&atom); | |
if !is_l_class_or_none(first_cp_in_8_class) { | |
atom.prepend_fsi(); | |
} | |
} | |
if let Some(next_atom) = atom_list.peek() { | |
let next_first_cp = atom_first_cp(next_atom); | |
if !is_b_class(next_first_cp) { | |
// FIXME: | |
atom.append_pdi_and_pdf(); | |
} | |
} | |
} | |
if let Some(next_atom) = atom_list.peek() { | |
let next_first_cp = atom_first_cp(next_atom); | |
if !is_b_class(next_first_cp) { | |
if atom_has_unmatched_isolate_or_embedding_initiators(&atom) { | |
line.record_conversion_error(); | |
continue 'outer; | |
} | |
let first_cp_in_5_class = atom_find_first_cp_in_5_class(&atom); | |
if !is_l_class_or_none(first_cp_in_5_class) { | |
needs_lrm = true; | |
} | |
} | |
} | |
last_atom = Some(atom); | |
} | |
if let Some(last_atom) = last_atom { | |
let last_cp = atom_last_cp(&last_atom); | |
if !is_b_class(last_cp) { | |
needs_lrm = false; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment