Skip to content

Instantly share code, notes, and snippets.

@mjethani
Last active September 22, 2015 22:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mjethani/2f78005ebef97633f2ca to your computer and use it in GitHub Desktop.
Save mjethani/2f78005ebef97633f2ca to your computer and use it in GitHub Desktop.
An ECMAScript 6 version of Peter Norvig's spelling corrector
npm install -g babel

git clone https://gist.github.com/2f78005ebef97633f2ca.git
cd 2f78005ebef97633f2ca

curl -O http://norvig.com/big.txt
babel-node test

http://norvig.com/spell-correct.html

$ grep -v ^$ spell.js | wc -l
      32
Copyright (c) 2015 Manish Jethani
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
import * as fs from 'fs';
function nonempty(array) { return array && array.length > 0 ? array : null; }
function unique(array) { return [ ...new Set(array) ]; }
function words(text) { return text.toLowerCase().match(/[a-z]+/g); }
function train(features) {
return features.reduce((m, f) => (m[f] = (m[f] | 0) + 1, m), Object.create(null));
}
export const NWORDS = train(words(fs.readFileSync('big.txt', 'utf8')));
const alphabet = [ ...'abcdefghijklmnopqrstuvwxyz' ];
function edits1(word) {
let splits = [ ...word ].map((c, i) => [ word.slice(0, i), word.slice(i) ]);
let deletes = splits.map(([ a, b ]) => a + b.slice(1));
let transposes = splits.slice(0, -1).map(([ a, b ]) => a + b[1] + b[0] + b.slice(2));
let replaces = splits.reduce((r, [ a, b ]) =>
[ ...r, ...alphabet.map(c => a + c + b.slice(1)) ], []);
let inserts = [ ...splits, [ word, '' ] ].reduce((r, [ a, b ]) =>
[ ...r, ...alphabet.map(c => a + c + b) ], []);
return unique([ ...deletes, ...transposes, ...replaces, ...inserts ]);
}
function knownEdits2(word) {
return unique(edits1(word).reduce((r, e1) =>
[ ...r, ...edits1(e1).filter(e2 => NWORDS[e2]) ], []));
}
function known(words) { return unique([ ...words ].filter(w => NWORDS[w])); }
function best(candidates) {
return candidates.reduce((m, x) =>
(NWORDS[x] || 1) > m[1] ? [ x, NWORDS[x] || 1 ] : m, [ , -1 ])[0];
}
export function correct(word) {
return best(nonempty(known([ word ])) || nonempty(known(edits1(word))) ||
nonempty(knownEdits2(word)) || [ word ]);
}
import * as spell from './spell';
function spelltest(tests, { bias = 0, verbose = false } = {}) {
let n = 0, bad = 0, unknown = 0, start = Date.now();
if (bias) {
for (let target of Object.keys(tests)) {
spell.NWORDS[target] = (spell.NWORDS[target] || 1) + bias;
}
}
for (let [ target, wrongs ] of Object.keys(tests).map(x => [ x, tests[x] ])) {
for (let wrong of wrongs.split(' ')) {
n++;
let w = spell.correct(wrong);
if (w !== target) {
bad++;
unknown += !spell.NWORDS[target];
if (verbose) {
console.log(`correct(${wrong}) => ${w} (${spell.NWORDS[w] || 1})` +
`; expected ${target} (${spell.NWORDS[target] || 1})`);
}
}
}
}
return {
bad, bias, unknown, secs: Math.round((Date.now() - start) / 1000),
pct: (100 - 100 * bad / n) | 0, n
};
}
const tests1 = {
'access': 'acess',
'accessing': 'accesing',
'accommodation': 'accomodation acommodation acomodation',
'account': 'acount',
'address': 'adress adres',
'addressable': 'addresable',
'arranged': 'aranged arrainged',
'arrangeing': 'aranging',
'arrangement': 'arragment',
'articles': 'articals',
'aunt': 'annt anut arnt',
'auxiliary': 'auxillary',
'available': 'avaible',
'awful': 'awfall afful',
'basically': 'basicaly',
'beginning': 'begining',
'benefit': 'benifit',
'benefits': 'benifits',
'between': 'beetween',
'bicycle': 'bicycal bycicle bycycle',
'biscuits': 'biscits biscutes biscuts bisquits buiscits buiscuts',
'built': 'biult',
'cake': 'cak',
'career': 'carrer',
'cemetery': 'cemetary semetary',
'centrally': 'centraly',
'certain': 'cirtain',
'challenges': 'chalenges chalenges',
'chapter': 'chaper chaphter chaptur',
'choice': 'choise',
'choosing': 'chosing',
'clerical': 'clearical',
'committee': 'comittee',
'compare': 'compair',
'completely': 'completly',
'consider': 'concider',
'considerable': 'conciderable',
'contented': 'contenpted contende contended contentid',
'curtains': 'cartains certans courtens cuaritains curtans curtians curtions',
'decide': 'descide',
'decided': 'descided',
'definitely': 'definately difinately',
'definition': 'defenition',
'definitions': 'defenitions',
'description': 'discription',
'desiccate': 'desicate dessicate dessiccate',
'diagrammatically': 'diagrammaticaally',
'different': 'diffrent',
'driven': 'dirven',
'ecstasy': 'exstacy ecstacy',
'embarrass': 'embaras embarass',
'establishing': 'astablishing establising',
'experience': 'experance experiance',
'experiences': 'experances',
'extended': 'extented',
'extremely': 'extreamly',
'fails': 'failes',
'families': 'familes',
'february': 'febuary',
'further': 'futher',
'gallery': 'galery gallary gallerry gallrey',
'hierarchal': 'hierachial',
'hierarchy': 'hierchy',
'inconvenient': 'inconvienient inconvient inconvinient',
'independent': 'independant independant',
'initial': 'intial',
'initials': 'inetials inistals initails initals intials',
'juice': 'guic juce jucie juise juse',
'latest': 'lates latets latiest latist',
'laugh': 'lagh lauf laught lugh',
'level': 'leval',
'levels': 'levals',
'liaison': 'liaision liason',
'lieu': 'liew',
'literature': 'litriture',
'loans': 'lones',
'locally': 'localy',
'magnificent': 'magnificnet magificent magnifcent magnifecent magnifiscant magnifisent magnificant',
'management': 'managment',
'meant': 'ment',
'minuscule': 'miniscule',
'minutes': 'muinets',
'monitoring': 'monitering',
'necessary': 'neccesary necesary neccesary necassary necassery neccasary',
'occurrence': 'occurence occurence',
'often': 'ofen offen offten ofton',
'opposite': 'opisite oppasite oppesite oppisit oppisite opposit oppossite oppossitte',
'parallel': 'paralel paralell parrallel parralell parrallell',
'particular': 'particulaur',
'perhaps': 'perhapse',
'personnel': 'personnell',
'planned': 'planed',
'poem': 'poame',
'poems': 'poims pomes',
'poetry': 'poartry poertry poetre poety powetry',
'position': 'possition',
'possible': 'possable',
'pretend': 'pertend protend prtend pritend',
'problem': 'problam proble promblem proplen',
'pronunciation': 'pronounciation',
'purple': 'perple perpul poarple',
'questionnaire': 'questionaire',
'really': 'realy relley relly',
'receipt': 'receit receite reciet recipt',
'receive': 'recieve',
'refreshment': 'reafreshment refreshmant refresment refressmunt',
'remember': 'rember remeber rememmer rermember',
'remind': 'remine remined',
'scarcely': 'scarcly scarecly scarely scarsely',
'scissors': 'scisors sissors',
'separate': 'seperate',
'singular': 'singulaur',
'someone': 'somone',
'sources': 'sorces',
'southern': 'southen',
'special': 'speaical specail specal speical',
'splendid': 'spledid splended splened splended',
'standardizing': 'stanerdizing',
'stomach': 'stomac stomache stomec stumache',
'supersede': 'supercede superceed',
'there': 'ther',
'totally': 'totaly',
'transferred': 'transfred',
'transportability': 'transportibility',
'triangular': 'triangulaur',
'understand': 'undersand undistand',
'unexpected': 'unexpcted unexpeted unexspected',
'unfortunately': 'unfortunatly',
'unique': 'uneque',
'useful': 'usefull',
'valuable': 'valubale valuble',
'variable': 'varable',
'variant': 'vairiant',
'various': 'vairious',
'visited': 'fisited viseted vistid vistied',
'visitors': 'vistors',
'voluntary': 'volantry',
'voting': 'voteing',
'wanted': 'wantid wonted',
'whether': 'wether',
'wrote': 'rote wote'
};
const tests2 = {
'forbidden': 'forbiden',
'decisions': 'deciscions descisions',
'supposedly': 'supposidly',
'embellishing': 'embelishing',
'technique': 'tecnique',
'permanently': 'perminantly',
'confirmation': 'confermation',
'appointment': 'appoitment',
'progression': 'progresion',
'accompanying': 'acompaning',
'applicable': 'aplicable',
'regained': 'regined',
'guidelines': 'guidlines',
'surrounding': 'serounding',
'titles': 'tittles',
'unavailable': 'unavailble',
'advantageous': 'advantageos',
'brief': 'brif',
'appeal': 'apeal',
'consisting': 'consisiting',
'clerk': 'cleark clerck',
'component': 'componant',
'favourable': 'faverable',
'separation': 'seperation',
'search': 'serch',
'receive': 'recieve',
'employees': 'emploies',
'prior': 'piror',
'resulting': 'reulting',
'suggestion': 'sugestion',
'opinion': 'oppinion',
'cancellation': 'cancelation',
'criticism': 'citisum',
'useful': 'usful',
'humour': 'humor',
'anomalies': 'anomolies',
'would': 'whould',
'doubt': 'doupt',
'examination': 'eximination',
'therefore': 'therefoe',
'recommend': 'recomend',
'separated': 'seperated',
'successful': 'sucssuful succesful',
'apparent': 'apparant',
'occurred': 'occureed',
'particular': 'paerticulaur',
'pivoting': 'pivting',
'announcing': 'anouncing',
'challenge': 'chalange',
'arrangements': 'araingements',
'proportions': 'proprtions',
'organized': 'oranised',
'accept': 'acept',
'dependence': 'dependance',
'unequalled': 'unequaled',
'numbers': 'numbuers',
'sense': 'sence',
'conversely': 'conversly',
'provide': 'provid',
'arrangement': 'arrangment',
'responsibilities': 'responsiblities',
'fourth': 'forth',
'ordinary': 'ordenary',
'description': 'desription descvription desacription',
'inconceivable': 'inconcievable',
'data': 'dsata',
'register': 'rgister',
'supervision': 'supervison',
'encompassing': 'encompasing',
'negligible': 'negligable',
'allow': 'alow',
'operations': 'operatins',
'executed': 'executted',
'interpretation': 'interpritation',
'hierarchy': 'heiarky',
'indeed': 'indead',
'years': 'yesars',
'through': 'throut',
'committee': 'committe',
'inquiries': 'equiries',
'before': 'befor',
'continued': 'contuned',
'permanent': 'perminant',
'choose': 'chose',
'virtually': 'vertually',
'correspondence': 'correspondance',
'eventually': 'eventully',
'lonely': 'lonley',
'profession': 'preffeson',
'they': 'thay',
'now': 'noe',
'desperately': 'despratly',
'university': 'unversity',
'adjournment': 'adjurnment',
'possibilities': 'possablities',
'stopped': 'stoped',
'mean': 'meen',
'weighted': 'wagted',
'adequately': 'adequattly',
'shown': 'hown',
'matrix': 'matriiix',
'profit': 'proffit',
'encourage': 'encorage',
'collate': 'colate',
'disaggregate': 'disaggreagte disaggreaget',
'receiving': 'recieving reciving',
'proviso': 'provisoe',
'umbrella': 'umberalla',
'approached': 'aproached',
'pleasant': 'plesent',
'difficulty': 'dificulty',
'appointments': 'apointments',
'base': 'basse',
'conditioning': 'conditining',
'earliest': 'earlyest',
'beginning': 'begining',
'universally': 'universaly',
'unresolved': 'unresloved',
'length': 'lengh',
'exponentially': 'exponentualy',
'utilized': 'utalised',
'set': 'et',
'surveys': 'servays',
'families': 'familys',
'system': 'sysem',
'approximately': 'aproximatly',
'their': 'ther',
'scheme': 'scheem',
'speaking': 'speeking',
'repetitive': 'repetative',
'inefficient': 'ineffiect',
'geneva': 'geniva',
'exactly': 'exsactly',
'immediate': 'imediate',
'appreciation': 'apreciation',
'luckily': 'luckeley',
'eliminated': 'elimiated',
'believe': 'belive',
'appreciated': 'apreciated',
'readjusted': 'reajusted',
'were': 'wer where',
'feeling': 'fealing',
'and': 'anf',
'false': 'faulse',
'seen': 'seeen',
'interrogating': 'interogationg',
'academically': 'academicly',
'relatively': 'relativly relitivly',
'traditionally': 'traditionaly',
'studying': 'studing',
'majority': 'majorty',
'build': 'biuld',
'aggravating': 'agravating',
'transactions': 'trasactions',
'arguing': 'aurguing',
'sheets': 'sheertes',
'successive': 'sucsesive sucessive',
'segment': 'segemnt',
'especially': 'especaily',
'later': 'latter',
'senior': 'sienior',
'dragged': 'draged',
'atmosphere': 'atmospher',
'drastically': 'drasticaly',
'particularly': 'particulary',
'visitor': 'vistor',
'session': 'sesion',
'continually': 'contually',
'availability': 'avaiblity',
'busy': 'buisy',
'parameters': 'perametres',
'surroundings': 'suroundings seroundings',
'employed': 'emploied',
'adequate': 'adiquate',
'handle': 'handel',
'means': 'meens',
'familiar': 'familer',
'between': 'beeteen',
'overall': 'overal',
'timing': 'timeing',
'committees': 'comittees commitees',
'queries': 'quies',
'econometric': 'economtric',
'erroneous': 'errounous',
'decides': 'descides',
'reference': 'refereence refference',
'intelligence': 'inteligence',
'edition': 'ediion ediition',
'are': 'arte',
'apologies': 'appologies',
'thermawear': 'thermawere thermawhere',
'techniques': 'tecniques',
'voluntary': 'volantary',
'subsequent': 'subsequant subsiquent',
'currently': 'curruntly',
'forecast': 'forcast',
'weapons': 'wepons',
'routine': 'rouint',
'neither': 'niether',
'approach': 'aproach',
'available': 'availble',
'recently': 'reciently',
'ability': 'ablity',
'nature': 'natior',
'commercial': 'comersial',
'agencies': 'agences',
'however': 'howeverr',
'suggested': 'sugested',
'career': 'carear',
'many': 'mony',
'annual': 'anual',
'according': 'acording',
'receives': 'recives recieves',
'interesting': 'intresting',
'expense': 'expence',
'relevant': 'relavent relevaant',
'table': 'tasble',
'throughout': 'throuout',
'conference': 'conferance',
'sensible': 'sensable',
'described': 'discribed describd',
'union': 'unioun',
'interest': 'intrest',
'flexible': 'flexable',
'refered': 'reffered',
'controlled': 'controled',
'sufficient': 'suficient',
'dissension': 'desention',
'adaptable': 'adabtable',
'representative': 'representitive',
'irrelevant': 'irrelavent',
'unnecessarily': 'unessasarily',
'applied': 'upplied',
'apologised': 'appologised',
'these': 'thees thess',
'choices': 'choises',
'will': 'wil',
'procedure': 'proceduer',
'shortened': 'shortend',
'manually': 'manualy',
'disappointing': 'dissapoiting',
'excessively': 'exessively',
'comments': 'coments',
'containing': 'containg',
'develop': 'develope',
'credit': 'creadit',
'government': 'goverment',
'acquaintances': 'aquantences',
'orientated': 'orentated',
'widely': 'widly',
'advise': 'advice',
'difficult': 'dificult',
'investigated': 'investegated',
'bonus': 'bonas',
'conceived': 'concieved',
'nationally': 'nationaly',
'compared': 'comppared compased',
'moving': 'moveing',
'necessity': 'nessesity',
'opportunity': 'oppertunity oppotunity opperttunity',
'thoughts': 'thorts',
'equalled': 'equaled',
'variety': 'variatry',
'analysis': 'analiss analsis analisis',
'patterns': 'pattarns',
'qualities': 'quaties',
'easily': 'easyly',
'organization': 'oranisation oragnisation',
'the': 'thw hte thi',
'corporate': 'corparate',
'composed': 'compossed',
'enormously': 'enomosly',
'financially': 'financialy',
'functionally': 'functionaly',
'discipline': 'disiplin',
'announcement': 'anouncement',
'progresses': 'progressess',
'except': 'excxept',
'recommending': 'recomending',
'mathematically': 'mathematicaly',
'source': 'sorce',
'combine': 'comibine',
'input': 'inut',
'careers': 'currers carrers',
'resolved': 'resoved',
'demands': 'diemands',
'unequivocally': 'unequivocaly',
'suffering': 'suufering',
'immediately': 'imidatly imediatly',
'accepted': 'acepted',
'projects': 'projeccts',
'necessary': 'necasery nessasary nessisary neccassary',
'journalism': 'journaism',
'unnecessary': 'unessessay',
'night': 'nite',
'output': 'oputput',
'security': 'seurity',
'essential': 'esential',
'beneficial': 'benificial benficial',
'explaining': 'explaning',
'supplementary': 'suplementary',
'questionnaire': 'questionare',
'employment': 'empolyment',
'proceeding': 'proceding',
'decision': 'descisions descision',
'per': 'pere',
'discretion': 'discresion',
'reaching': 'reching',
'analysed': 'analised',
'expansion': 'expanion',
'although': 'athough',
'subtract': 'subtrcat',
'analysing': 'aalysing',
'comparison': 'comparrison',
'months': 'monthes',
'hierarchal': 'hierachial',
'misleading': 'missleading',
'commit': 'comit',
'auguments': 'aurgument',
'within': 'withing',
'obtaining': 'optaning',
'accounts': 'acounts',
'primarily': 'pimarily',
'operator': 'opertor',
'accumulated': 'acumulated',
'extremely': 'extreemly',
'there': 'thear',
'summarys': 'sumarys',
'analyse': 'analiss',
'understandable': 'understadable',
'safeguard': 'safegaurd',
'consist': 'consisit',
'declarations': 'declaratrions',
'minutes': 'muinutes muiuets',
'associated': 'assosiated',
'accessibility': 'accessability',
'examine': 'examin',
'surveying': 'servaying',
'politics': 'polatics',
'annoying': 'anoying',
'again': 'agiin',
'assessing': 'accesing',
'ideally': 'idealy',
'scrutinized': 'scrutiniesed',
'simular': 'similar',
'personnel': 'personel',
'whereas': 'wheras',
'when': 'whn',
'geographically': 'goegraphicaly',
'gaining': 'ganing',
'requested': 'rquested',
'separate': 'seporate',
'students': 'studens',
'prepared': 'prepaired',
'generated': 'generataed',
'graphically': 'graphicaly',
'suited': 'suted',
'variable': 'varible vaiable',
'building': 'biulding',
'required': 'reequired',
'necessitates': 'nessisitates',
'together': 'togehter',
'profits': 'proffits'
};
if (require.main === module) {
console.log(spelltest(tests1));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment