Skip to content

Instantly share code, notes, and snippets.

@NV
Created December 1, 2009 00:38
Show Gist options
  • Save NV/245940 to your computer and use it in GitHub Desktop.
Save NV/245940 to your computer and use it in GitHub Desktop.
Script for tweet shortening using unicode ligatures and other compound symbols
// ==UserScript==
// @name ligatweet
// @namespace http://leaverou.me/demos/ligatweet/
// @description Script for tweet shortening using unicode ligatures and other compound symbols
// @include htt*://twitter.com/*
// @author Lea Verou (UserJS by Nikita Vasilyev)
// @version 1.0
// @license Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php
// ==/UserScript==
(function(){
var ligatures = [
// From most discreet to most noticeable, from more characters to less characters
[{
'viii': '2177'
}, {
'\\.\\.\\.': '0085',
'\\b1/3\\b': '2153',
'\\b2/3\\b': '2154',
'\\b1/8\\b': '215B',
'\\b3/8\\b': '215C',
'\\b5/8\\b': '215D',
'\\b7/8\\b': '215E',
'iii': '2172',
'vii': '2176',
'xii': '217B',
'<=>': '21D4',
'10\\.': '2491',
'11\\.': '2492',
'12\\.': '2493',
'13\\.': '2494',
'14\\.': '2495',
'15\\.': '2496',
'16\\.': '2497',
'17\\.': '2498',
'18\\.': '2499',
'19\\.': '249a',
'20\\.': '249b',
'ffi': 'FB03',
'ffl': 'FB04'
}, {
'--': '0097',
'AE': '00C6', 'ae': '00E6',
'oe': '009C', 'OE': '0152',
'IJ': '0132', 'ij': '0133',
"L'": '013D',
"I'": '013E',
'LJ': '01c7', 'Lj': '01c8', 'lj': '01c9',
'NJ': '01ca', 'Nj': '01cb', 'nj': '01cc',
'DZ': '01f1', 'Dz': '01f2', 'dz': '01f3',
'ts': '02A6',
'tf': '02A7',
'tc': '02A8',
'fn': '02A9',
'ls': '02Aa',
'lz': '02Ab',
'Hb': '040A',
'bl': '042B',
'IO': '042E',
'io': '044E',
'Oy': '0478',
'oy': '0479',
'ue': '1D6B',
'll': '2016', // or 0965?
'!!': '203C',
'\\?\\?': '2047',
'\\?!': '2048',
'!\\?': '2049',
'Rs': '20A8',
'tb': '2114',
'ii': '2171',
'iv': '2173',
'vi': '2175',
'ix': '2178',
'xi': '217A',
'<-': '2190', '->': '2192',
'=>': '21D2',
'<<': '226A', '>>': '226B',
'1\\.': '2488',
'2\\.': '2489',
'3\\.': '248a',
'4\\.': '248b',
'5\\.': '248c',
'6\\.': '248d',
'7\\.': '248e',
'8\\.': '248f',
'9\\.': '2490',
'ff': 'FB00',
'fi': 'FB01',
'fl': 'FB02',
'ft': 'FB05',
'st': 'FB06',
' !': 'FE15',
' \\?': 'FE16'
}],
[{
'\\b1/5\\b': '2155',
'\\b2/5\\b': '2156',
'\\b3/5\\b': '2157',
'\\b4/5\\b': '2158',
'\\b1/6\\b': '2159',
'\\b5/6\\b': '215A',
'VIII': '2166',
'kcal': '3389',
'a\\.m\\.': '33C2',
'K\\.K\\.': '33CD',
'p\\.m\\.': '33D8'
}, {
'\\b1/4\\b': '00BC',
'\\b1/2\\b': '00BD',
'\\b3/4\\b': '00BE',
'Pts': '20A7',
'TEL': '2121',
//'FAX': '213B', //unsupported by IE, Chrome
'\\b1/3\\b': '2153',
'\\b2/3\\b': '2154',
'\\b1/5\\b': '2155',
'\\b2/5\\b': '2156',
'\\b3/5\\b': '2157',
'\\b4/5\\b': '2158',
'\\b1/6\\b': '2159',
'\\b5/6\\b': '215A',
'\\b1/8\\b': '215B',
'\\b3/8\\b': '215C',
'\\b5/8\\b': '215D',
'\\b7/8\\b': '215E',
'III': '2162',
'VII': '2166',
'XII': '216B',
'hPa': '3371',
'bar': '3374',
'cal': '3388',
'kHz': '3391', 'MHz': '3392', 'GHz': '3393', 'THz': '3394',
'kPa': '33AA', 'MPa': '33AB', 'GPa': '33AC',
'rad': '33AD',
'Co\\.': '33C7',
'log': '33D2',
'mil': '33D5',
'mol': '33D6',
'PPM': '33D9'
}, {
'hu': '0195',
'Hu': '01f6',
'd3': '02A4',
'IE': '0464',
'ie': '0465',
'du': '0502',
'un': '057F',
//'tt': '07DA', //unsupported by Chrome
'oc': '1142',
//'oo': '1147', // Bad hinting
'oi': '13BA',
'oo': '13C7',
'II': '2161',
'IV': '2163',
'VI': '2165',
'IX': '2168',
'XI': '216A',
'CD': '2180',
//'\\.\\.': '28C0', //unsupported by IE,Chrome
'XX': '3037',
'da': '3372',
'AU': '3373',
'oV': '3375',
'pc': '3376',
'nA': '3381', 'mA': '3383', 'kA': '3384',
'KB': '3385', 'MB': '3386', 'GB': '3387',
'pF': '338A', 'nF': '338B',
'mg': '338E', 'kg': '338F',
'Hz': '3390',
'ml': '3396', 'kl': '3398',
'fm': '3399',
'nm': '339A', 'mm': '339C', 'cm': '339D', 'km': '339E',
'Pa': '33A9',
'ps': '33B0', 'ns': '33B1', 'ms': '33B3',
'pV': '33B4', 'nV': '33B5', 'mV': '33B7', 'kV': '33B8', 'MV': '33B9',
'pW': '33BA', 'nW': '33BB', 'mW': '33BD', 'kW': '33BE', 'MW': '33BF',
'Bq': '33C3',
'cc': '33C4',
'cd': '33C5',
'dB': '33C8',
'Gy': '33C9',
'ha': '33CA',
'HP': '33CB',
'in': '33CC',
'KM': '33CE',
'kt': '33CF',
'lm': '33D0',
'ln': '33D1',
'lx': '33D3',
'mb': '33D4',
'pH': '33D7',
'PR': '33DA',
'sr': '33DB',
'Sv': '33DC',
'Wb': '33DD'
}]
];
// The DOM-agnostic function that utilizes them
ligatweetize = function ligatweetize(tweet, goal, insensitive) {
if(!tweet) {
return '';
}
// Set defaults
var goal = goal || 140;
var ligatweet = tweet,
regFlag = 'g' + (insensitive? 'i' : ''),
// Extract links, #hashtags and @usernames, so that they remain intact
intactRegex = /(@\S*|#\S*|http:\/\/\S+)/g,
intactArray = ligatweet.match(intactRegex) || [];
// An @# is definietely not present in the rest of the tweet (even if it was, we kept it in intactArray)
var ligatweet = ligatweet.replace(intactRegex, '@#');
// How many characters were removed?
var removed = tweet.length - ligatweet.length + 2*intactArray.length;
outerloop: for(var i=0; i<ligatures.length; i++) {
for(var j=0; j<ligatures[i].length; j++) {
for(var combo in ligatures[i][j]) {
// Why not join all needles into 1 regex? Because then we can't have the 'i' flag (how will we find the replacement??)
ligatweet = ligatweet.replace(new RegExp(combo, regFlag), '&#x' + ligatures[i][j][combo] + ';')
if(ligatweet.length - 2*intactArray.length + removed <= goal) {
break outerloop;
}
}
}
}
// Replace @#s with the corresponding @usernames, #hashtags and links we removed previously
ligatweet = ligatweet.replace(/@#/g, function(){ return intactArray.shift(); });
return ligatweet;
}
var status = document.getElementById('status');
status.addEventListener('input', function input(){
if (status.value.length > 140) {
status.innerHTML = ligatweetize(status.value);
if (status.defaultValue) {
status.value = status.defaultValue;
}
}
}, true);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment