Skip to content

Instantly share code, notes, and snippets.

/file.rs Secret

Created June 30, 2014 18:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/86f6ac802285dea84902 to your computer and use it in GitHub Desktop.
Save anonymous/86f6ac802285dea84902 to your computer and use it in GitHub Desktop.
extern crate libc;
pub trait CharWidth {
fn width(self) -> Option<uint>;
}
pub trait StringWidth {
fn width(self) -> uint;
}
impl CharWidth for char {
fn width(self) -> Option<uint> {
width(self)
}
}
impl<'a> StringWidth for &'a str {
fn width(self) -> uint {
self.chars().map(|c| c.width().unwrap_or(1)).fold(0, |a, b| a + b)
}
}
// One might be tempted to use libc's wcwidth function here. But that would be a mistake
// since glibc's wcwidth is completely broken and doesn't even pass the test below.
pub fn width(c: char) -> Option<uint> {
let c = c as u32;
// Fast path for ascii
if c >= 0x20 && c <= 0x7e {
return Some(1);
}
if (c > 0x00 && c < 0x20) || (c >= 0x7f && c < 0xa0) {
return None;
}
let ret = match c {
0x00000 => 0,
// Non-spacing characters (Mn + Me + Cf - 0x00AD)
0x00300 .. 0x0036f => 0,
0x00483 .. 0x00489 => 0,
0x00591 .. 0x005bd => 0,
0x005bf => 0,
0x005c1 .. 0x005c2 => 0,
0x005c4 .. 0x005c5 => 0,
0x005c7 => 0,
0x00600 .. 0x00604 => 0,
0x00610 .. 0x0061a => 0,
0x0061c => 0,
0x0064b .. 0x0065f => 0,
0x00670 => 0,
0x006d6 .. 0x006dd => 0,
0x006df .. 0x006e4 => 0,
0x006e7 .. 0x006e8 => 0,
0x006ea .. 0x006ed => 0,
0x0070f => 0,
0x00711 => 0,
0x00730 .. 0x0074a => 0,
0x007a6 .. 0x007b0 => 0,
0x007eb .. 0x007f3 => 0,
0x00816 .. 0x00819 => 0,
0x0081b .. 0x00823 => 0,
0x00825 .. 0x00827 => 0,
0x00829 .. 0x0082d => 0,
0x00859 .. 0x0085b => 0,
0x008e4 .. 0x008fe => 0,
0x00900 .. 0x00902 => 0,
0x0093a => 0,
0x0093c => 0,
0x00941 .. 0x00948 => 0,
0x0094d => 0,
0x00951 .. 0x00957 => 0,
0x00962 .. 0x00963 => 0,
0x00981 => 0,
0x009bc => 0,
0x009c1 .. 0x009c4 => 0,
0x009cd => 0,
0x009e2 .. 0x009e3 => 0,
0x00a01 .. 0x00a02 => 0,
0x00a3c => 0,
0x00a41 .. 0x00a42 => 0,
0x00a47 .. 0x00a48 => 0,
0x00a4b .. 0x00a4d => 0,
0x00a51 => 0,
0x00a70 .. 0x00a71 => 0,
0x00a75 => 0,
0x00a81 .. 0x00a82 => 0,
0x00abc => 0,
0x00ac1 .. 0x00ac5 => 0,
0x00ac7 .. 0x00ac8 => 0,
0x00acd => 0,
0x00ae2 .. 0x00ae3 => 0,
0x00b01 => 0,
0x00b3c => 0,
0x00b3f => 0,
0x00b41 .. 0x00b44 => 0,
0x00b4d => 0,
0x00b56 => 0,
0x00b62 .. 0x00b63 => 0,
0x00b82 => 0,
0x00bc0 => 0,
0x00bcd => 0,
0x00c3e .. 0x00c40 => 0,
0x00c46 .. 0x00c48 => 0,
0x00c4a .. 0x00c4d => 0,
0x00c55 .. 0x00c56 => 0,
0x00c62 .. 0x00c63 => 0,
0x00cbc => 0,
0x00cbf => 0,
0x00cc6 => 0,
0x00ccc .. 0x00ccd => 0,
0x00ce2 .. 0x00ce3 => 0,
0x00d41 .. 0x00d44 => 0,
0x00d4d => 0,
0x00d62 .. 0x00d63 => 0,
0x00dca => 0,
0x00dd2 .. 0x00dd4 => 0,
0x00dd6 => 0,
0x00e31 => 0,
0x00e34 .. 0x00e3a => 0,
0x00e47 .. 0x00e4e => 0,
0x00eb1 => 0,
0x00eb4 .. 0x00eb9 => 0,
0x00ebb .. 0x00ebc => 0,
0x00ec8 .. 0x00ecd => 0,
0x00f18 .. 0x00f19 => 0,
0x00f35 => 0,
0x00f37 => 0,
0x00f39 => 0,
0x00f71 .. 0x00f7e => 0,
0x00f80 .. 0x00f84 => 0,
0x00f86 .. 0x00f87 => 0,
0x00f8d .. 0x00f97 => 0,
0x00f99 .. 0x00fbc => 0,
0x00fc6 => 0,
0x0102d .. 0x01030 => 0,
0x01032 .. 0x01037 => 0,
0x01039 .. 0x0103a => 0,
0x0103d .. 0x0103e => 0,
0x01058 .. 0x01059 => 0,
0x0105e .. 0x01060 => 0,
0x01071 .. 0x01074 => 0,
0x01082 => 0,
0x01085 .. 0x01086 => 0,
0x0108d => 0,
0x0109d => 0,
0x0135d .. 0x0135f => 0,
0x01712 .. 0x01714 => 0,
0x01732 .. 0x01734 => 0,
0x01752 .. 0x01753 => 0,
0x01772 .. 0x01773 => 0,
0x017b4 .. 0x017b5 => 0,
0x017b7 .. 0x017bd => 0,
0x017c6 => 0,
0x017c9 .. 0x017d3 => 0,
0x017dd => 0,
0x0180b .. 0x0180e => 0,
0x018a9 => 0,
0x01920 .. 0x01922 => 0,
0x01927 .. 0x01928 => 0,
0x01932 => 0,
0x01939 .. 0x0193b => 0,
0x01a17 .. 0x01a18 => 0,
0x01a1b => 0,
0x01a56 => 0,
0x01a58 .. 0x01a5e => 0,
0x01a60 => 0,
0x01a62 => 0,
0x01a65 .. 0x01a6c => 0,
0x01a73 .. 0x01a7c => 0,
0x01a7f => 0,
0x01b00 .. 0x01b03 => 0,
0x01b34 => 0,
0x01b36 .. 0x01b3a => 0,
0x01b3c => 0,
0x01b42 => 0,
0x01b6b .. 0x01b73 => 0,
0x01b80 .. 0x01b81 => 0,
0x01ba2 .. 0x01ba5 => 0,
0x01ba8 .. 0x01ba9 => 0,
0x01bab => 0,
0x01be6 => 0,
0x01be8 .. 0x01be9 => 0,
0x01bed => 0,
0x01bef .. 0x01bf1 => 0,
0x01c2c .. 0x01c33 => 0,
0x01c36 .. 0x01c37 => 0,
0x01cd0 .. 0x01cd2 => 0,
0x01cd4 .. 0x01ce0 => 0,
0x01ce2 .. 0x01ce8 => 0,
0x01ced => 0,
0x01cf4 => 0,
0x01dc0 .. 0x01de6 => 0,
0x01dfc .. 0x01dff => 0,
0x0200b .. 0x0200f => 0,
0x0202a .. 0x0202e => 0,
0x02060 .. 0x02064 => 0,
0x02066 .. 0x0206f => 0,
0x020d0 .. 0x020f0 => 0,
0x02cef .. 0x02cf1 => 0,
0x02d7f => 0,
0x02de0 .. 0x02dff => 0,
0x0302a .. 0x0302d => 0,
0x03099 .. 0x0309a => 0,
0x0a66f .. 0x0a672 => 0,
0x0a674 .. 0x0a67d => 0,
0x0a69f => 0,
0x0a6f0 .. 0x0a6f1 => 0,
0x0a802 => 0,
0x0a806 => 0,
0x0a80b => 0,
0x0a825 .. 0x0a826 => 0,
0x0a8c4 => 0,
0x0a8e0 .. 0x0a8f1 => 0,
0x0a926 .. 0x0a92d => 0,
0x0a947 .. 0x0a951 => 0,
0x0a980 .. 0x0a982 => 0,
0x0a9b3 => 0,
0x0a9b6 .. 0x0a9b9 => 0,
0x0a9bc => 0,
0x0aa29 .. 0x0aa2e => 0,
0x0aa31 .. 0x0aa32 => 0,
0x0aa35 .. 0x0aa36 => 0,
0x0aa43 => 0,
0x0aa4c => 0,
0x0aab0 => 0,
0x0aab2 .. 0x0aab4 => 0,
0x0aab7 .. 0x0aab8 => 0,
0x0aabe .. 0x0aabf => 0,
0x0aac1 => 0,
0x0aaec .. 0x0aaed => 0,
0x0aaf6 => 0,
0x0abe5 => 0,
0x0abe8 => 0,
0x0abed => 0,
0x0fb1e => 0,
0x0fe00 .. 0x0fe0f => 0,
0x0fe20 .. 0x0fe26 => 0,
0x0feff => 0,
0x0fff9 .. 0x0fffb => 0,
0x101fd => 0,
0x10a01 .. 0x10a03 => 0,
0x10a05 .. 0x10a06 => 0,
0x10a0c .. 0x10a0f => 0,
0x10a38 .. 0x10a3a => 0,
0x10a3f => 0,
0x11001 => 0,
0x11038 .. 0x11046 => 0,
0x11080 .. 0x11081 => 0,
0x110b3 .. 0x110b6 => 0,
0x110b9 .. 0x110ba => 0,
0x110bd => 0,
0x11100 .. 0x11102 => 0,
0x11127 .. 0x1112b => 0,
0x1112d .. 0x11134 => 0,
0x11180 .. 0x11181 => 0,
0x111b6 .. 0x111be => 0,
0x116ab => 0,
0x116ad => 0,
0x116b0 .. 0x116b5 => 0,
0x116b7 => 0,
0x16f8f .. 0x16f92 => 0,
0x1d167 .. 0x1d169 => 0,
0x1d173 .. 0x1d182 => 0,
0x1d185 .. 0x1d18b => 0,
0x1d1aa .. 0x1d1ad => 0,
0x1d242 .. 0x1d244 => 0,
0xe0001 => 0,
0xe0020 .. 0xe007f => 0,
0xe0100 .. 0xe01ef => 0,
// Hangul Jamo init. consonants
0x01100 .. 0x0115f => 2,
0x02329 => 2,
0x0232a => 2,
// CJK ... Yi
0x02e80 .. 0x0a4cf if c != 0x0303f => 2,
// Hangul Syllables
0x0ac00 .. 0x0d7a3 => 2,
// CJK Compatibility Ideographs
0x0f900 .. 0x0faff => 2,
// Vertical forms
0x0fe10 .. 0x0fe19 => 2,
// CJK Compatibility Forms
0x0fe30 .. 0x0fe6f => 2,
// Fullwidth Forms
0x0ff00 .. 0x0ff60 => 2,
0x0ffe0 .. 0x0ffe6 => 2,
0x20000 .. 0x2fffd => 2,
0x30000 .. 0x3fffd => 2,
_ => 1,
};
Some(ret)
}
#[test]
fn test_string_width() {
assert_eq!("test".width(), 4);
assert_eq!("你好".width(), 4);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment