Created
December 4, 2012 16:13
-
-
Save Xophmeister/4205666 to your computer and use it in GitHub Desktop.
Linguistic White Noise Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import itertools | |
import bisect | |
BNC = [ | |
('the', 6187267), | |
('be', 4239632), | |
('of', 3093444), | |
('and', 2687863), | |
('a', 2186369), | |
('in', 1924315), | |
('to', 1620850), | |
('have', 1375636), | |
('it', 1090186), | |
('to', 1039323), | |
('for', 887877), | |
('i', 884599), | |
('that', 760399), | |
('you', 695498), | |
('he', 681255), | |
('on', 680739), | |
('with', 675027), | |
('do', 559596), | |
('at', 534162), | |
('by', 517171), | |
('not', 465486), | |
('this', 461945), | |
('but', 459622), | |
('from', 434532), | |
('they', 433441), | |
('his', 426896), | |
('that', 384313), | |
('she', 380257), | |
('or', 373808), | |
('which', 372031), | |
('as', 364164), | |
('we', 358039), | |
('an', 343063), | |
('say', 333518), | |
('will', 297281), | |
('would', 272345), | |
('can', 266116), | |
('if', 261089), | |
('their', 260919), | |
('go', 249540), | |
('what', 249466), | |
('there', 239460), | |
('all', 230737), | |
('get', 220940), | |
('her', 218258), | |
('make', 217268), | |
('who', 205432), | |
('as', 201968), | |
('out', 201819), | |
('up', 195426), | |
('see', 191661), | |
('know', 185534), | |
('time', 183427), | |
('take', 179220), | |
('them', 173414), | |
('some', 171174), | |
('could', 168387), | |
('so', 167324), | |
('him', 165014), | |
('year', 163930), | |
('into', 163469), | |
('its', 163081), | |
('then', 160652), | |
('think', 153881), | |
('my', 152619), | |
('come', 151871), | |
('than', 147618), | |
('more', 146029), | |
('about', 144554), | |
('now', 143801), | |
('last', 140063), | |
('your', 138334), | |
('me', 138151), | |
('no', 137026), | |
('other', 135185), | |
('give', 131417), | |
('just', 128517), | |
('should', 128393), | |
('these', 125442), | |
('people', 125430), | |
('also', 124884), | |
('well', 124451), | |
('any', 123655), | |
('only', 122128), | |
('new', 115523), | |
('very', 114911), | |
('when', 113655), | |
('may', 113024), | |
('way', 112636), | |
('look', 111058), | |
('like', 110090), | |
('use', 108820), | |
('her', 108710), | |
('such', 108524), | |
('how', 103508), | |
('because', 103003), | |
('when', 102621), | |
('as', 101583), | |
('good', 100652), | |
('find', 98899), | |
('man', 97985), | |
('our', 95001), | |
('want', 94293), | |
('day', 92699), | |
('between', 91141), | |
('even', 90473), | |
('there', 89890), | |
('many', 89659), | |
('those', 88862), | |
('one', 86364), | |
('after', 85939), | |
('down', 84446), | |
('yeah', 83382), | |
('so', 79028), | |
('thing', 77612), | |
('tell', 77245), | |
('through', 75588), | |
('back', 75494), | |
('still', 72774), | |
('must', 72059), | |
('child', 71008), | |
('here', 70947), | |
('over', 70676), | |
('too', 70164), | |
('put', 69978), | |
('own', 69459), | |
('on', 68362), | |
('no', 67999), | |
('work', 67842), | |
('become', 67219), | |
('more', 67198), | |
('old', 66999), | |
('government', 66894), | |
('mean', 66556), | |
('part', 65773), | |
('leave', 64447), | |
('life', 64423), | |
('great', 64369), | |
('where', 64118), | |
('case', 63577), | |
('woman', 63087), | |
('over', 62993), | |
('seem', 62445), | |
('same', 62402), | |
('us', 62350), | |
('work', 62248), | |
('need', 62201), | |
('feel', 62185), | |
('system', 61912), | |
('each', 61492), | |
('might', 61446), | |
('may', 61446), | |
('much', 61338), | |
('ask', 60879), | |
('group', 60689), | |
('number', 60607), | |
('yes', 60592), | |
('however', 60498), | |
('another', 60182), | |
('again', 59829), | |
('world', 59094), | |
('area', 58449), | |
('show', 58152), | |
('course', 57776), | |
('company', 57754), | |
('shall', 57056), | |
('under', 56638), | |
('problem', 56483), | |
('against', 56208), | |
('never', 55899), | |
('all', 55704), | |
('most', 54966), | |
('service', 54468), | |
('try', 54422), | |
('call', 53396), | |
('hand', 53265), | |
('party', 52979), | |
('high', 52703), | |
('about', 52561), | |
('something', 52452), | |
('school', 52227), | |
('in', 51652), | |
('small', 51626), | |
('place', 51537), | |
('before', 51259), | |
('why', 50877), | |
('while', 50548), | |
('away', 50294), | |
('keep', 50092), | |
('point', 49187), | |
('house', 49022), | |
('different', 48373), | |
('country', 48177), | |
('really', 48062), | |
('provide', 47923), | |
('week', 47512), | |
('hold', 47234), | |
('large', 47185), | |
('member', 47141), | |
('off', 46285), | |
('always', 46228), | |
('next', 46221), | |
('follow', 46145), | |
('without', 45867), | |
('turn', 45487), | |
('end', 45160), | |
('within', 45042), | |
('local', 44920), | |
('where', 44496), | |
('during', 44013), | |
('bring', 43894), | |
('most', 43792), | |
('word', 43750), | |
('begin', 43740), | |
('although', 43635), | |
('example', 43402), | |
('next', 43139), | |
('family', 42773), | |
('rather', 42341), | |
('fact', 42241), | |
('like', 41909), | |
('social', 41617), | |
('write', 41497), | |
('state', 41351), | |
('percent', 41205), | |
('quite', 41169), | |
('both', 41162), | |
('start', 41029), | |
('run', 40858), | |
('long', 40492), | |
('right', 40460), | |
('set', 40381), | |
('help', 40265), | |
('every', 40114), | |
('home', 39850), | |
('month', 39819), | |
('side', 39626), | |
('night', 39315), | |
('important', 39265), | |
('eye', 39192), | |
('head', 39000), | |
('information', 38656), | |
('question', 38608), | |
('business', 38204), | |
('play', 38053), | |
('power', 37963), | |
('money', 37892), | |
('change', 37884), | |
('move', 37836), | |
('interest', 37744), | |
('order', 37736), | |
('book', 37675), | |
('often', 37640), | |
('development', 37386), | |
('young', 37278), | |
('national', 37231), | |
('pay', 36665), | |
('hear', 36575), | |
('room', 36360), | |
('whether', 36169), | |
('water', 35767), | |
('form', 35758), | |
('car', 35295), | |
('other', 35164), | |
('yet', 35058), | |
('perhaps', 35039), | |
('meet', 34970), | |
('level', 34885), | |
('until', 34807), | |
('though', 34801), | |
('policy', 34775), | |
('include', 34753), | |
('believe', 34603), | |
('council', 34496), | |
('already', 34292), | |
('possible', 34178), | |
('nothing', 34064), | |
('line', 33888), | |
('allow', 33687), | |
('need', 33660), | |
('effect', 33423), | |
('big', 33300), | |
('use', 32998), | |
('lead', 32923), | |
('stand', 32899), | |
('idea', 32798), | |
('study', 32786), | |
('lot', 32733), | |
('live', 32675), | |
('job', 32484), | |
('since', 32404), | |
('name', 32309), | |
('result', 32259), | |
('body', 32231), | |
('happen', 32075), | |
('friend', 31927), | |
('right', 31873), | |
('least', 31713), | |
('right', 31630), | |
('almost', 31588), | |
('much', 31284), | |
('carry', 31258), | |
('authority', 31231), | |
('long', 31143), | |
('early', 31110), | |
('view', 31102), | |
('himself', 31082), | |
('public', 30983), | |
('together', 30960), | |
('talk', 30930), | |
('report', 30857), | |
('after', 30855), | |
('only', 30775), | |
('before', 30731), | |
('bit', 30675), | |
('face', 30624), | |
('sit', 30599), | |
('market', 30596), | |
('appear', 30595), | |
('continue', 30466), | |
('able', 30454), | |
('political', 30366), | |
('later', 30326), | |
('hour', 30218), | |
('rate', 30179), | |
('law', 30169), | |
('door', 30166), | |
('court', 29976), | |
('office', 29943), | |
('let', 29768), | |
('war', 29722), | |
('produce', 29490), | |
('reason', 29194), | |
('less', 29147), | |
('minister', 29141), | |
('subject', 29091), | |
('person', 28981), | |
('term', 28896), | |
('particular', 28887), | |
('full', 28836), | |
('involve', 28772), | |
('sort', 28760), | |
('require', 28711), | |
('suggest', 28665), | |
('far', 28626), | |
('towards', 28600), | |
('anything', 28321), | |
('period', 28300), | |
('consider', 28271), | |
('read', 28216), | |
('change', 28210), | |
('society', 28150), | |
('process', 28035), | |
('mother', 27784), | |
('offer', 27697), | |
('late', 27673), | |
('voice', 27665), | |
('both', 27644), | |
('once', 27632), | |
('police', 27508), | |
('kind', 27485), | |
('lose', 27484), | |
('add', 27367), | |
('probably', 27303), | |
('expect', 27221), | |
('ever', 27195), | |
('available', 27184), | |
('no', 27178), | |
('price', 27166), | |
('little', 26953), | |
('action', 26894), | |
('issue', 26889), | |
('far', 26856), | |
('remember', 26748), | |
('position', 26625), | |
('low', 26616), | |
('cost', 26556), | |
('little', 26553), | |
('matter', 26304), | |
('community', 26289), | |
('remain', 26244), | |
('figure', 26191), | |
('type', 26034), | |
('research', 26018), | |
('actually', 25990), | |
('education', 25987), | |
('fall', 25966), | |
('speak', 25788), | |
('few', 25781), | |
('today', 25775), | |
('enough', 25635), | |
('open', 25614), | |
('bad', 25608), | |
('buy', 25582), | |
('programme', 25444), | |
('minute', 25440), | |
('moment', 25371), | |
('girl', 25366), | |
('age', 25340), | |
('centre', 25272), | |
('stop', 25066), | |
('control', 25041), | |
('value', 25024), | |
('send', 24816), | |
('health', 24527), | |
('decide', 24380), | |
('main', 24370), | |
('win', 24310), | |
('understand', 24252), | |
('decision', 24233), | |
('develop', 24205), | |
('class', 24148), | |
('industry', 24144), | |
('receive', 24111), | |
('back', 24095), | |
('several', 24002), | |
('return', 23976), | |
('build', 23931), | |
('spend', 23799), | |
('force', 23785), | |
('condition', 23742), | |
('itself', 23712), | |
('paper', 23694), | |
('themselves', 23673), | |
('off', 23629), | |
('major', 23629), | |
('describe', 23533), | |
('agree', 23497), | |
('economic', 23484), | |
('increase', 23474), | |
('upon', 23409), | |
('learn', 23394), | |
('general', 23340), | |
('century', 23259), | |
('therefore', 23218), | |
('father', 23216), | |
('section', 23188), | |
('patient', 23106), | |
('around', 23106), | |
('activity', 23105), | |
('road', 23103), | |
('table', 23092), | |
('including', 23082), | |
('church', 23008), | |
('reach', 22992), | |
('real', 22982), | |
('lie', 22959), | |
('mind', 22926), | |
('likely', 22899), | |
('among', 22864), | |
('team', 22781), | |
('experience', 22751), | |
('death', 22712), | |
('soon', 22703), | |
('act', 22657), | |
('sense', 22601), | |
('staff', 22363), | |
('certain', 22297), | |
('student', 22237), | |
('half', 22232), | |
('around', 22180), | |
('language', 22117), | |
('walk', 22101), | |
('die', 22087), | |
('special', 22040), | |
('difficult', 22033), | |
('international', 22010), | |
('particularly', 22002), | |
('department', 21939), | |
('management', 21884), | |
('morning', 21845), | |
('draw', 21778), | |
('hope', 21763), | |
('across', 21763), | |
('plan', 21727), | |
('product', 21712), | |
('city', 21596), | |
('early', 21585), | |
('committee', 21575), | |
('ground', 21504), | |
('letter', 21488), | |
('create', 21470), | |
('evidence', 21454), | |
('foot', 21339), | |
('clear', 21260), | |
('boy', 21205), | |
('game', 21181), | |
('food', 21044), | |
('role', 21016), | |
('practice', 20950), | |
('bank', 20946), | |
('else', 20919), | |
('support', 20904), | |
('sell', 20902), | |
('event', 20839), | |
('building', 20770), | |
('range', 20726), | |
('behind', 20694), | |
('sure', 20678), | |
('report', 20660), | |
('pass', 20633), | |
('black', 20609), | |
('stage', 20586), | |
('meeting', 20544), | |
('sometimes', 20517), | |
('thus', 20488), | |
('accept', 20373), | |
('town', 20170), | |
('art', 20168), | |
('further', 20138), | |
('club', 20113), | |
('cause', 20091), | |
('arm', 20089), | |
('history', 20064), | |
('parent', 20060), | |
('land', 20001), | |
('trade', 19928), | |
('watch', 19869), | |
('white', 19865), | |
('situation', 19856), | |
('whose', 19833), | |
('ago', 19808), | |
('teacher', 19744), | |
('record', 19639), | |
('manager', 19636), | |
('relation', 19628), | |
('common', 19565), | |
('strong', 19558), | |
('whole', 19542), | |
('field', 19542), | |
('free', 19539), | |
('break', 19512), | |
('yesterday', 19459), | |
('support', 19344), | |
('window', 19340), | |
('account', 19260), | |
('explain', 19218), | |
('stay', 19207), | |
('few', 19160), | |
('wait', 19152), | |
('usually', 19151), | |
('difference', 19138), | |
('material', 19057), | |
('air', 19046), | |
('wife', 19039), | |
('cover', 19015), | |
('apply', 18982), | |
('project', 18916), | |
('raise', 18913), | |
('sale', 18900), | |
('relationship', 18866), | |
('indeed', 18858), | |
('light', 18853), | |
('claim', 18828), | |
('form', 18757), | |
('base', 18742), | |
('care', 18717), | |
('someone', 18681), | |
('everything', 18675), | |
('certainly', 18647), | |
('rule', 18579), | |
('home', 18540), | |
('cut', 18511), | |
('grow', 18433), | |
('similar', 18432), | |
('story', 18418), | |
('quality', 18415), | |
('tax', 18313), | |
('worker', 18247), | |
('nature', 18223), | |
('structure', 18201), | |
('data', 18188), | |
('necessary', 18107), | |
('pound', 18058), | |
('method', 18044), | |
('unit', 17958), | |
('central', 17947), | |
('bed', 17947), | |
('union', 17902), | |
('movement', 17880), | |
('board', 17878), | |
('true', 17836), | |
('well', 17809), | |
('simply', 17756), | |
('contain', 17732), | |
('especially', 17694), | |
('open', 17662), | |
('short', 17635), | |
('personal', 17622), | |
('detail', 17584), | |
('model', 17553), | |
('bear', 17461), | |
('single', 17415), | |
('join', 17331), | |
('reduce', 17226), | |
('establish', 17224), | |
('herself', 17197), | |
('wall', 17194), | |
('face', 17193), | |
('easy', 17065), | |
('private', 17022), | |
('computer', 16976), | |
('former', 16973), | |
('hospital', 16898), | |
('chapter', 16860), | |
('scheme', 16804), | |
('theory', 16776), | |
('choose', 16731), | |
('wish', 16647), | |
('property', 16646), | |
('achieve', 16628), | |
('financial', 16622), | |
('poor', 16579), | |
('officer', 16555), | |
('up', 16509), | |
('charge', 16503), | |
('director', 16487), | |
('drive', 16477), | |
('deal', 16402), | |
('place', 16392), | |
('approach', 16331), | |
('chance', 16288), | |
('application', 16281), | |
('seek', 16270), | |
('cos', 16247), | |
('foreign', 16234), | |
('along', 16233), | |
('top', 16176), | |
('amount', 16138), | |
('son', 16077), | |
('operation', 16040), | |
('fail', 16038), | |
('human', 16008), | |
('opportunity', 16002), | |
('simple', 15911), | |
('leader', 15903), | |
('look', 15872), | |
('share', 15840), | |
('production', 15837), | |
('recent', 15812), | |
('firm', 15767), | |
('picture', 15748), | |
('source', 15747), | |
('security', 15742), | |
('serve', 15722), | |
('according', 15722), | |
('end', 15688), | |
('contract', 15672), | |
('wide', 15644), | |
('occur', 15635), | |
('agreement', 15627), | |
('better', 15626), | |
('kill', 15620), | |
('act', 15620), | |
('site', 15602), | |
('either', 15599), | |
('labour', 15552), | |
('plan', 15538), | |
('various', 15502), | |
('since', 15496), | |
('test', 15491), | |
('eat', 15446), | |
('loss', 15442), | |
('close', 15439), | |
('represent', 15416), | |
('love', 15398), | |
('colour', 15370), | |
('clearly', 15349), | |
('shop', 15292), | |
('benefit', 15251), | |
('animal', 15250), | |
('heart', 15242), | |
('election', 15227), | |
('purpose', 15159), | |
('standard', 15156), | |
('due', 15140), | |
('secretary', 15106), | |
('rise', 15106), | |
('date', 15044), | |
('hard', 15034), | |
('music', 15024), | |
('hair', 15020), | |
('prepare', 14961), | |
('factor', 14960), | |
('other', 14959), | |
('anyone', 14956), | |
('pattern', 14934), | |
('manage', 14894), | |
('piece', 14873), | |
('discuss', 14861), | |
('prove', 14812), | |
('front', 14758), | |
('evening', 14739), | |
('royal', 14711), | |
('tree', 14692), | |
('population', 14664), | |
('fine', 14650), | |
('plant', 14638), | |
('pressure', 14635), | |
('response', 14627), | |
('catch', 14627), | |
('street', 14622), | |
('pick', 14622), | |
('performance', 14620), | |
('knowledge', 14609), | |
('despite', 14592), | |
('design', 14565), | |
('page', 14546), | |
('enjoy', 14527), | |
('individual', 14487), | |
('suppose', 14482), | |
('rest', 14440), | |
('instead', 14437), | |
('wear', 14434), | |
('basis', 14420), | |
('size', 14416), | |
('environment', 14403), | |
('per', 14402), | |
('fire', 14379), | |
('series', 14348), | |
('success', 14330), | |
('natural', 14304), | |
('wrong', 14274), | |
('near', 14248), | |
('round', 14244), | |
('thought', 14214), | |
('list', 14212), | |
('argue', 14196), | |
('final', 14178), | |
('future', 14174), | |
('introduce', 14155), | |
('analysis', 14149), | |
('enter', 14141), | |
('space', 14119), | |
('arrive', 14093), | |
('ensure', 14032), | |
('demand', 14025), | |
('statement', 13990), | |
('to', 13989), | |
('attention', 13968), | |
('love', 13921), | |
('principle', 13877), | |
('pull', 13852), | |
('set', 13691), | |
('doctor', 13684), | |
('choice', 13682), | |
('refer', 13673), | |
('feature', 13669), | |
('couple', 13668), | |
('step', 13625), | |
('following', 13592), | |
('thank', 13531), | |
('machine', 13518), | |
('income', 13509), | |
('training', 13503), | |
('present', 13475), | |
('association', 13471), | |
('film', 13466), | |
('region', 13452), | |
('effort', 13386), | |
('player', 13337), | |
('everyone', 13337), | |
('present', 13308), | |
('award', 13274), | |
('village', 13249), | |
('control', 13240), | |
('organisation', 13237), | |
('whatever', 13236), | |
('news', 13223), | |
('nice', 13183), | |
('difficulty', 13177), | |
('modern', 13156), | |
('cell', 13153), | |
('close', 13106), | |
('current', 13105), | |
('legal', 13103), | |
('energy', 13083), | |
('finally', 13014), | |
('degree', 12996), | |
('mile', 12952), | |
('means', 12942), | |
('growth', 12938), | |
('whom', 12926), | |
('treatment', 12903), | |
('sound', 12902), | |
('above', 12889), | |
('task', 12883), | |
('provision', 12876), | |
('affect', 12867), | |
('please', 12862), | |
('red', 12857), | |
('happy', 12854), | |
('behaviour', 12853), | |
('concerned', 12852), | |
('point', 12844), | |
('function', 12819), | |
('identify', 12801), | |
('resource', 12790), | |
('defence', 12760), | |
('garden', 12746), | |
('floor', 12745), | |
('technology', 12713), | |
('style', 12697), | |
('feeling', 12671), | |
('science', 12644), | |
('relate', 12634), | |
('doubt', 12628), | |
('horse', 12623), | |
('force', 12606), | |
('answer', 12596), | |
('compare', 12591), | |
('suffer', 12584), | |
('individual', 12583), | |
('forward', 12582), | |
('announce', 12582), | |
('user', 12549), | |
('fund', 12523), | |
('character', 12511), | |
('risk', 12466), | |
('normal', 12451), | |
('myself', 12444), | |
('nor', 12442), | |
('dog', 12406), | |
('obtain', 12382), | |
('quickly', 12381), | |
('army', 12379), | |
('indicate', 12369), | |
('forget', 12353), | |
('station', 12328), | |
('glass', 12296), | |
('cup', 12294), | |
('previous', 12286), | |
('husband', 12263), | |
('recently', 12249), | |
('publish', 12242), | |
('serious', 12232), | |
('anyway', 12232), | |
('visit', 12213), | |
('capital', 12188), | |
('either', 12167), | |
('note', 12166), | |
('season', 12161), | |
('argument', 12125), | |
('listen', 12080), | |
('show', 12079), | |
('responsibility', 12078), | |
('significant', 12073), | |
('deal', 12067), | |
('prime', 12031), | |
('economy', 12003), | |
('element', 11997), | |
('finish', 11977), | |
('duty', 11931), | |
('fight', 11929), | |
('train', 11907), | |
('maintain', 11881), | |
('attempt', 11877), | |
('leg', 11858), | |
('investment', 11850), | |
('save', 11827), | |
('throughout', 11810), | |
('design', 11810), | |
('suddenly', 11795), | |
('brother', 11757), | |
('improve', 11753), | |
('avoid', 11750), | |
('wonder', 11735), | |
('tend', 11734), | |
('title', 11720), | |
('hotel', 11683), | |
('aspect', 11643), | |
('increase', 11628), | |
('help', 11606), | |
('industrial', 11599), | |
('express', 11591), | |
('summer', 11563), | |
('determine', 11551), | |
('generally', 11537), | |
('daughter', 11522), | |
('exist', 11515), | |
('used', 11513), | |
('share', 11505), | |
('baby', 11503), | |
('nearly', 11484), | |
('smile', 11477), | |
('sorry', 11453), | |
('sea', 11430), | |
('skill', 11423), | |
('claim', 11412), | |
('treat', 11394), | |
('remove', 11385), | |
('concern', 11368), | |
('university', 11367), | |
('labour', 11356), | |
('left', 11343), | |
('dead', 11341), | |
('discussion', 11315), | |
('specific', 11306), | |
('customer', 11292), | |
('box', 11292), | |
('outside', 11276), | |
('state', 11247), | |
('conference', 11243), | |
('whole', 11228), | |
('total', 11152), | |
('profit', 11119), | |
('division', 11119), | |
('throw', 11110), | |
('procedure', 11100), | |
('fill', 11098), | |
('king', 11045), | |
('assume', 11044), | |
('image', 11024), | |
('oil', 11022), | |
('obviously', 11014), | |
('unless', 11011), | |
('appropriate', 11010), | |
('circumstance', 11009), | |
('military', 10998), | |
('proposal', 10963), | |
('mention', 10950), | |
('client', 10948), | |
('sector', 10937), | |
('direction', 10905), | |
('admit', 10905), | |
('though', 10876), | |
('replace', 10873), | |
('basic', 10860), | |
('hard', 10843), | |
('instance', 10809), | |
('sign', 10808), | |
('original', 10808), | |
('successful', 10803), | |
('okay', 10798), | |
('reflect', 10764), | |
('aware', 10764), | |
('measure', 10760), | |
('attitude', 10758), | |
('yourself', 10746), | |
('disease', 10736), | |
('exactly', 10729), | |
('above', 10719), | |
('commission', 10712), | |
('intend', 10708), | |
('beyond', 10705), | |
('seat', 10691), | |
('president', 10678), | |
('encourage', 10664), | |
('addition', 10664), | |
('goal', 10655), | |
('round', 10635), | |
('miss', 10627), | |
('popular', 10600), | |
('affair', 10561), | |
('technique', 10548), | |
('respect', 10540), | |
('drop', 10537), | |
('professional', 10527), | |
('less', 10522), | |
('once', 10511), | |
('item', 10503), | |
('fly', 10483), | |
('reveal', 10482), | |
('version', 10481), | |
('maybe', 10472), | |
('ability', 10468), | |
('operate', 10462), | |
('good', 10451), | |
('campaign', 10441), | |
('heavy', 10439), | |
('advice', 10437), | |
('institution', 10399), | |
('top', 10398), | |
('discover', 10366), | |
('surface', 10361), | |
('library', 10356), | |
('pupil', 10320), | |
('record', 10315), | |
('refuse', 10312), | |
('prevent', 10286), | |
('advantage', 10285), | |
('dark', 10264), | |
('teach', 10248), | |
('memory', 10221), | |
('culture', 10196), | |
('blood', 10176), | |
('cost', 10167), | |
('majority', 10146), | |
('answer', 10140), | |
('variety', 10138), | |
('press', 10129), | |
('depend', 10125), | |
('bill', 10125), | |
('competition', 10119), | |
('ready', 10110), | |
('general', 10101), | |
('access', 10099), | |
('hit', 10098), | |
('stone', 10076), | |
('useful', 10071), | |
] | |
wordCount = 10000 | |
words, weights = zip(*BNC) | |
cDist = list(itertools.accumulate(weights)) | |
for n in range(0, wordCount): | |
x = random.random() * cDist[-1] | |
print(words[bisect.bisect(cDist, x)], end = ' ') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment