Skip to content

Instantly share code, notes, and snippets.

@np
Created June 16, 2011 22:18
Show Gist options
  • Save np/1030431 to your computer and use it in GitHub Desktop.
Save np/1030431 to your computer and use it in GitHub Desktop.
{-# LANGUAGE PatternGuards #-}
import Yi.Char.Unicode
import Data.Char
import Data.List
import qualified System.IO.UTF8 as U8
genLine :: String -> String -> String
genLine name s = "<Multi_key> " ++ keysFromString name ++ " : \"" ++ s ++ "\""
genLines :: [(String,String)] -> String
genLines = unlines . map (uncurry genLine)
keysFromString :: String -> String
keysFromString = intercalate " " . map keyFromChar
keyFromCharData :: [(Char,String)]
keyFromCharData =
[('\'', "apostrophe")
,('`', "grave")
,('<', "less")
,('>', "greater")
,('|', "bar")
,('(', "parenleft")
,(')', "parenright")
,('[', "bracketleft")
,(']', "bracketright")
,('{', "braceleft")
,('}', "braceright")
,('+', "plus")
,('-', "minus")
,('^', "asciicircum")
,('.', "period")
,('=', "equal")
,('~', "asciitilde")
,('/', "slash")
,('\\', "backslash")
,('?', "question")
,('!', "exclam")
,('_', "underscore")
,(':', "colon")
,('*', "asterisk")
,('"', "quotedbl")
,('#', "numbersign")
,(',', "comma")
,(' ', "space")
{-
,('', "")
-}
]
keyFromChar :: Char -> String
keyFromChar c | isAscii c && isAlphaNum c = ['<',c,'>']
| Just s <- lookup c keyFromCharData = '<' : s ++ ">"
| otherwise = error $ "keyFromChar: `" ++ c : "'"
disambMore = filter ((/= "fake") . snd) . disamb . ((".>","fake"):)
main :: IO ()
main =
U8.writeFile ".XCompose" . genLines . disambMore
$ greek ++ symbols ++ subscripts ++ superscripts
module Yi.Char.Unicode (greek, symbols, subscripts, superscripts, checkAmbs, disamb) where
import Data.List (isPrefixOf)
import Control.Applicative
greek :: [(String, String)]
greek = [(name, unicode) | (_,name,unicode) <- greekData] ++
[ ([leading,shorthand],unicode)
| (Just shorthand,_,unicode) <- greekData
, leading <- ['\'', 'g'] ]
-- | Triples: (shorthand, name, unicode)
greekData :: [(Maybe Char, String, String)]
greekData = [(Just 'a', "alpha", "α")
,(Just 'b', "beta", "β")
,(Just 'g', "gamma", "γ")
,(Just 'G', "Gamma", "Γ")
,(Just 'd', "delta", "δ")
,(Just 'D', "Delta", "Δ")
,(Just 'e' , "epsilon", "ε")
,(Just 'z', "zeta", "ζ")
,(Just 'N' , "eta", "η") -- N is close to n which is graphically close
,(Just 'E' , "eta", "η") -- E is close to e which is the start of eta
,(Nothing , "theta", "θ")
,(Nothing , "Theta", "Θ")
,(Just 'i', "iota", "ι")
,(Just 'k', "kapa", "κ")
,(Just 'l', "lambda", "λ")
,(Just 'L', "Lambda", "Λ")
,(Just 'm', "mu", "μ")
,(Just 'n', "nu", "ν")
,(Just 'x', "xi", "ξ")
,(Just 'o', "omicron", "ο")
,(Just 'p' , "pi", "π")
,(Just 'P' , "Pi", "Π")
,(Just 'r', "rho", "ρ")
,(Just 's', "sigma", "σ")
,(Just 'S', "Sigma", "Σ")
,(Just 't', "tau", "τ")
,(Just 'f' , "phi", "φ")
,(Just 'F' , "Phi", "Φ")
,(Just 'c', "chi", "χ")
,(Just 'C', "Chi", "Χ")
,(Nothing , "psi", "ψ")
,(Nothing , "Psi", "Ψ")
,(Just 'w', "omega", "ω")
,(Just 'O', "Omega", "Ω")
]
accents :: [(String,String)]
accents = concat
[a 'a' 'á' 'à' 'â' 'ä' 'ã' 'å' ' '
,a 'c' 'ć' ' ' 'ĉ' ' ' ' ' ' ' 'ç'
,a 'e' 'é' 'è' 'ê' 'ë' ' ' ' ' ' '
,a 'i' 'í' 'ì' 'î' 'ï' 'ĩ' ' ' ' '
,a 'n' 'ń' ' ' ' ' ' ' 'ñ' ' ' 'ņ'
,a 'o' 'ó' 'ò' 'ô' 'ö' 'õ' ' ' ' '
,a 'u' 'ú' 'ù' 'û' 'ü' ' ' ' ' ' '
,a 'y' 'ý' ' ' 'ŷ' 'ÿ' ' ' ' ' ' '
]
where a k acute agrave acircum adiar atild aring aced =
filter ((/=" ").snd)
[b 'e' acute
,b '`' agrave
,b 'i' acircum
,b 'u' adiar
,b '~' atild
,b 'o' aring
,b ',' aced] where b p l = ([p,k],[l])
parens :: [((String,String),(String,String))]
parens =
-- parens
[a "<" "⟨" ">" "⟩"
,a "<<" "⟪" ">>" "⟫"
-- These two confuse gnome-terminal.
,a "|(" "〖" ")|" "〗"
,a "{|" "⦃" "|}" "⦄"
,a "{{" "⦃" "}}" "⦄"
,a "[[" "⟦" "]]" "⟧"
,a "|_" "⌊" "_|" "⌋"
,a "r|_" "⌈" "r_|" "⌉"
]
where a x y z t = ((x,y),(z,t))
symParen :: ((String,String),(String,String)) -> (String,String)
symParen ((x,y),(_,z)) = ('s':x, y ++ z)
symbols :: [(String, String)]
symbols =
map fst parens ++ map snd parens ++ map symParen parens ++
accents ++
[("99", "«")
,("00", "»")
,("90", "«»")
-- quantifiers
,("forall", "∀")
,("exists", "∃")
,("rA", "∀") -- reversed A
,("rE", "∃") -- reversed E
,("/rE", "∄")
-- operators
,("<|","◃")
-- ,("<|","◁") alternative
,("|>","▹")
-- ,("|>", "▷")
,("><","⋈")
,("<)", "◅")
,("(>", "▻")
,("v","∨")
,("u","∪")
,("V","⋁")
,("+u","⊎")
,("u+","⊎")
,("u[]","⊔")
,("n[]","⊓")
,("^","∧")
,("/\\", "∧")
,("\\/", "∨")
,("o","∘")
,(".","·")
,("...", "…")
,("c...", "⋯") -- 'c' for centered
,("x","×")
,("neg","¬")
,("-.","∸")
--- arrows
,("<-","←")
,("->","→")
,("|->","↦")
,("<-|","↤")
,("<--","⟵")
,("-->","⟶")
,("|-->","⟼")
,("|^", "↑")
,("==>","⟹")
,("=>","⇒")
,("<=","⇐")
,("<=>","⇔")
,("~>","↝")
,("<~","↜")
,("~->","⇝")
,("<-~","⇜")
,("<-<", "↢")
,(">->", "↣")
,("<->", "↔")
,("|<-", "⇤")
,("->|", "⇥")
,(">>=","↠")
,("->>","↠")
,("/-", "↼")
,("\\-", "↽")
,("-/", "⇁")
,("-\\", "⇀")
,("-|->", "⇸")
--- relations
,("c=","⊆")
,("/c=","⊈")
,("c","⊂")
,("/c","⊄")
,("c-","∈")
,("/c-","∉")
,("c/=","⊊")
,("rc=","⊇") -- r for reversed
,("rc","⊃") -- r for reversed
,("rc-","∋") -- r for reversed
,("r/c-","∌") -- r for reversed
,("rc/=","⊋") -- r for reversed
,(">=","≥")
,("=<","≤")
,("/>=","≱")
,("/=<","≰")
,("c[]","⊏")
,("rc[]","⊐")
,("c[]=","⊑")
,("rc[]=","⊒")
,("/c[]=","⋢")
,("/rc[]=","⋣")
,("c[]/=","⋤")
,("rc[]/=","⋥")
---- equal signs
,("=def","≝")
,("=?","≟")
,("=o","≗")
,("==","≡")
,("~~","≈")
,("~-","≃")
,("~=","≅")
,("~","∼")
,("/=","≠")
,("/==","≢")
,(":=","≔")
,("=:","≕")
-- misc
,("_|_","⊥")
,("Top","⊤")
,("l","ℓ") -- same as cl
,("::","∷")
,(":", "∶")
,("0", "∅")
,("r8","∞")
,("*", "★") -- or "⋆"
,("/'l","ƛ")
,("d","∂")
,("#b","♭") -- music bemol
,("#f","♮") -- music flat
,("##","♯") -- music #
,("Hot","♨")
,("Cut","✂")
,("Pen","✎")
,("Tick","✓")
-- Currency Symbols
,("B|","฿")
,("ob","ⓑ")
,("e=","€") -- alternatives =C =c E= =E =e
,("L-","£") -- alternatives -L
,("Y=","¥") -- alternatives =Y
,("ox","¤") -- currency sign
,("xo","¤") -- currency sign
{-
CE "₠" # EURO-CURRENCY SIGN
C/ "₡" # COLON SIGN
/C "₡" # COLON SIGN
Cr "₢" # CRUZEIRO SIGN
Fr "₣" # FRENCH FRANC SIGN
L= "₤" # LIRA SIGN
=L "₤" # LIRA SIGN
m/ "₥" # MILL SIGN
/m "₥" # MILL SIGN
N= "₦" # NAIRA SIGN
=N "₦" # NAIRA SIGN
Pt "₧" # PESETA SIGN
Rs "₨" # RUPEE SIGN
W= "₩" # WON SIGN
=W "₩" # WON SIGN
"₪" # NEW SHEQEL SIGN
d- "₫" # DONG SIGN
"₭" # KIP SIGN
"₮" # TUGRIK SIGN
"₯" # DRACHMA SIGN
"₰" # GERMAN PENNY SIGN
"₱" # PESO SIGN
"₲" # GUARANI SIGN
"₳" # AUSTRAL SIGN
"₴" # HRYVNIA SIGN
"₵" # CEDI SIGN
|c "¢" # CENT SIGN
c| "¢" # CENT SIGN
c/ "¢" # CENT SIGN
/c "¢" # CENT SIGN
-}
-- dashes
,("-","−")
-- quotes
,("\"","“”")
,("r`","′")
-- turnstyles
,("|-", "⊢")
,("|/-", "⊬")
,("-|", "⊣")
,("|=", "⊨")
,("|/=", "⊭")
,("||-", "⊩")
-- circled/squared operators
-- ⊝ ⍟ ⎊ ⎉
] ++
-- maybe this makes too much combinations and is wasteful?
[ (xy,[z])
| x <- ['o','O']
, (y,z) <- [('+','⊕')
,('-','⊖')
,('x','⊗')
,('/','⊘')
,('*','⊛')
,('=','⊜')
,('.','⊙')
]
, not (x=='o' && y=='=')
, xy <- [[x,y],[y,x]]
]
++
[ -- I'm not yet happy with those
("Oo","⊚")
,("oo","°")
,("o^","°")
]
++
[
("[+]","⊞")
,("[-]","⊟")
,("[x]","⊠")
,("[.]","⊡")
,("[]","∎")
,("[ ]","☐")
,("[Tick]","☑")
-- circles "◎◍◐◑◒◓◔◕◖◗◠◡◴◵◶◷⚆⚇⚈⚉"
--,("ob","●")
--,("ow","○")
,("o..","◌")
,("oO","◯")
,("!!","‼")
,("??","⁇")
,("?!","⁈")
,("?b!","‽") -- 'b' like backspace
,("!?","⁉")
,("r?", "¿") -- 'r' for reversed
,("r!", "¡") -- 'r' for reversed
,("eth","ð")
] ++ [ (leading:l, [u]) | leading <- ['|','b'], (l,u) <-
[("N",'ℕ')
,("H",'ℍ')
,("P",'ℙ')
,("R",'ℝ')
,("C",'ℂ')
,("D",'ⅅ')
,("Q",'ℚ')
,("Z",'ℤ')
,("gg",'ℽ')
,("gG",'ℾ')
,("gP",'ℿ')
,("gS",'⅀')
]
] ++ [
-- c for cal
("cP","℘")
,("cL","ℒ")
,("cR","ℛ")
,("cN","𝒩")
,("cE","ℰ")
,("cF","ℱ")
,("cH","ℋ")
,("cI","ℐ")
,("cM","ℳ")
,("ce","ℯ")
,("cg","ℊ")
,("co","ℴ")
,("cl","ℓ")
]
checkAmbs :: [(String, String)] -> [(String, String)]
checkAmbs table = check
where ambs = [ (x, y)
| v@(x, _) <- table
, w@(y, _) <- table
, v /= w
, x `isPrefixOf` y ]
check | null ambs = table
| otherwise = error $ "checkAmbs: ambiguous declarations for " ++ show ambs
disamb :: [(String, String)] -> [(String, String)]
disamb table = map f table
where f v@(x, vx) =
let ambs = [ w
| w@(y, _) <- table
, v /= w
, x `isPrefixOf` y ]
in if null ambs then v else (x ++ " ", vx)
-- More:
-- arrows: ⇆
-- turnstyles: ⊦ ⊧
-- subscript: ₔ
zipscripts :: Char -> String -> String -> [(String, String)]
zipscripts c ascii unicode
= zip (fmap ((c:) . pure) ascii) (fmap pure unicode)
subscripts, superscripts :: [(String, String)]
subscripts = zipscripts '_' "0123456789+-=()aeioruvx"
"₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑᵢₒᵣᵤᵥₓ"
superscripts = zipscripts '^' -- NOTE that qCFQSVXYZ are missing
"0123456789+-=()abcdefghijklmnoprstuvwxyzABDEGHIJKLMNOPRTUW"
"⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖʳˢᵗᵘᵛʷˣʸᶻᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂ"
{-
∣ ⋆ ⁺ ∙ ∥   ⁻ • ✶ ↯ ≺ ⋎ ″ ≳ ≲ ◁ ∗ ≊ ◇ ⌝ ⌜ ≉ ≰ ‿ ≋ ⨀ ⊴ ≮ □ ⇛ ⊸ ≯
⇿ ⇨ ↓ ↡ ↛ ⇓ ⇑
⋐ ⁆ ⁅ ϕ ⊪ ◂ ≴ ≁ ÷ ◈ ⑵ ⑴ ∩ ̂ ≻ ⇾ ↟ ► ≇ ∔ ▶ ≛ ⇦ ⦈ ⦇ ⑶ ⋃ ⋂ ≵ ½ ’ —
ö ɐ ∁ Μ ʇ ɥ ǝ ı ó í å à é
-}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment