Skip to content

Instantly share code, notes, and snippets.

@coding37
Created February 15, 2018 11:59
Show Gist options
  • Save coding37/a5705142fe1943b93a8cef4988b3ba5f to your computer and use it in GitHub Desktop.
Save coding37/a5705142fe1943b93a8cef4988b3ba5f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import nltk\n",
"from nltk.stem.lancaster import LancasterStemmer\n",
"import os\n",
"import json\n",
"import datetime\n",
"stemmer = LancasterStemmer()\n",
"\n",
"import csv\n",
"from collections import defaultdict\n",
"\n",
"columns = defaultdict(list) # each value in each column is appended to a list\n",
"\n",
"with open('ISEAR.csv') as f:\n",
" reader = csv.DictReader(f) # read rows into a dictionary format\n",
" for row in reader: # read a row as {column1: value1, column2: value2,...}\n",
" for (k,v) in row.items(): # go over each column name and value \n",
" columns[k].append(v)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"label=columns['class']\n",
"sent=columns['sentence']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7516 documents\n",
"7 classes ['guilt', 'joy', 'anger', 'sadness', 'fear', 'disgust', 'shame']\n",
"5862 unique stemmed words ['dun', 'mal', 'concert', 'rejoin', 'joy', 'spent', 'needless', 'shop-lifting', '20', '3,000', 'ad', 'van', 'sleepless', 'brittain', 'zomb', 'text-book', 'steet', 'flet', 'spank', 'individ', 'teas', 'val', 'unavoid', 'allright', 'if', 'attain', 'remov', 'parapsycholog', 'apolog', '7:00', 'sky', 'bound', 'dent', 'suff', 'excit', 'mangrov', 'slid', 'disregard', 'paranorm', 'entir', 'mistook', 'hundr', 'zesco', 'city', 'self-initiative', 'calcutt', 'juic', 'mous', 'smooch', 'springboard', 'has', 'excap', 'out-pati', 'chichir', 'atmosph', 'fail', 'blee', 'street', 'prefect', 'contraceiv', 'inflam', 'phys', 'ressembl', \"o'clock\", 'frong-seat-passenger', 'styl', 'sociolog', \"'s\", 'opposit', 'under-graduate', 'his', 'custody', 'spok', 'assert', 'prep', 'sharply', 'gathh', 'eas', 'outskirt', 'psych', 'ghand', 'regard', 'incult', 'group', 'ext', 'absail', 'morn', 'swimming-pool', 'trick', 'equip', 'outland', 'imposs', 'full-scale', 'econ', 'infinit', 'black-beetle', 'gound', 'tackel', 'lt2', 'lov', 'rock-climbing', 'horseplay', 'limp', 'point', 'doudt', 'lav', 'part', 'charact', 'diificult', 'carp', 'h.k', 'twenty', 'black', 'admit', 'ass', 'boph', 'collid', 'september,1970', 'high-school', 'board', 'goe', 'verg', 'intrud', 'omit', 'grow', 'cigarat', 'homosex', 'nun', '1979', 'co-passengers', 'luck', 'inat', 'motel', 'video-tape', 'aust', 'might', 'flash', 'nasogast', 'perceiv', 'cadet', 'c.u.h.k', 'martin', '12/83', 'agress', 'must', 'unzasu', 'church', 'presid', '1983/1984', 'ent', 'carress', 'carnav', 'exit', 'earn', 'such', 'purest', 'inherit', 'bir', 'kwatch', 'listless', 'desert', 'curt', 'lif', 'peopl', 'expery', 'curios', 'rely', 'divorc', 'turkey', 'gym', 'vice-chairman', 'spid', 'anxy', 'law', 'flock', 'upsurg', 'stack', 'nor', 'stud', 'evalu', 'newsworthy', 'man', 'rar', 'lightn', 'arrog', 'nev.', 'load', 'fidg', 'planet', 'cliff', 'reprehend', 'tap', 'spectac', 'teen', 'togeth', 'wom', 'trac', 'forfeit', 'dog', 'scraped', 'affect', 'sit', 'of', 'interrupt', '15-16', 'ceremony', 'defect', 'refug', 'insec', 'consist', 'threads', 'classm', 'beach', 'dummy', 'custom', 'unfamili', 'vent', 'flunk', 'misty', 'carrot', 'kit', 'x', 'judo', 'loos', 'climb', 'mou', 'injust', 'prelimin', 'hang', 'rady', 'inject', 'workm', 'pain', 'emot', 'polit', 'sick-bed', 'rop', 'feld', 'spoilt', 'thief', 'headlight', 'altern', 'sev', 'fortnight', 'vigo', 'remind', 'dans', 'curtain', '263', 'abdomin', 'accord', 'jon', 'ref', 'award', 'snatch', 'contud', 'menstru', 'bread', 'passeng', 'worthy', 'clebr', 'radio/tv', 'yastrebetz', 'ski-trip', 'cri', 'inst', 'outsid', 'rush', 'den', 'b.a', 'kandal', 'usurp', 'book-shop', 'possess', 'parlia', 'chew', 'inter-college', 'cham', 'girlfriend', 'salv', 'becam', 'circ', 'plain', 'res', 'jiany', 'lovesick', 'poem', 'lier', 'uncov', 'torn', 'weaknesses/fault', 'illoy', 'applaint', 'display', 'clev', 'armrest', 'g-10', 'zig-zag', 'count', 'soldy', 'fim', 'eng', 'categ', 'e.g', 'suspect', 'lev', 'grown', 'capac', 'build', 'seat', 'respect', 'spaghett', 'soweto', '14', 'wil', 'usc', 'years-course', 'attir', 'cours', 'friend', 'au-pair-girl', 'rid', 'abdom', 'student-house', 'unhappy', 'allow', 'teacher-parent', 'someth', 'indefin', 'everybody', 'co-resident', 'grav', 'virt', 'mercy', 'potty', 'partn', 'chunk', 'sensit', 'psychy', 'convert', 'bark', 'blantyr', 'friends/partner', 'overwhelm', 'ice-hockey', 'reply', 'lyr', 'sour', 'vehic', 'out-of-the-way', 'accel', 'sportshal', 'renew', 'aa', 'pibl', 'forward', 'carefr', 'motherland', 'day', 'transgress', 'also', 'lodg', 'blunt', 'ident', \"couldn't/did\", '..', 'wher', '2300', 'moscow', 'soup', 'declin', 'rout', 'world', 'humanoid', 'fac', 'nazi-germany', 'insidy', 'kasungu', 'stab', 'won', 'crech', 'road', 'meet', 'personnel', 'occas', 'morocco', 'spat', 'unscrew', 'sistem', 'reass', 'vain', 'whol', 'crockery', 'franto', 'indoor-hockey', 'prec', 'chikol', 'swallow', '1a', 'rais', 'ideas/belief', 'physiotherapy', '1980', 'sibl', 'herd', 'a.m', 'cherry', 'moist', 'lead', 'kindergart', 'wor', 'whhen', 'text', 'string', 'bird', 'reprimand', 'hardw', 'gaug', 'mankind', 'speak', 'wind-surf', 'elsewh', 'tabl', 'back', 'fem', 'over-estimating', 'swerv', '79', 'vicin', 'inebry', 'granny', 'hereaft', 'crow', 'knob', '35', 'feebl', 'month', 'conqu', 'anyth', 'unfortun', 'precy', 'penultim', \"surfin'usa\", 'overtak', 'man-hole', 'parcel', 'exboyfriend', 'she', 'sent', 'unsatisfact', 'jumb', 'incom', 'hitch-hiking', 'asham', 'amus', 'mov', 'corrid', 'nsim', 'undu', 'strapping', 'madrid', 'c119', 'bef', 'staff', 'remuer', 'doct', 'bench', 'self-pity', 'car', 'upset', 'perform', 'queensland', 'fractoin', 'milit', 'wallet', 'tru', 'sart', 'stroke', 'slut', 'grass-parakeet', 'slattern', 'decemb', 'uncomfort', 'nicaragu', 'accept', 'longlast', 'colon', 'volley', 'vagu', 'beath', 'golf', 'persoon', 'choir', 'threaten', 'bal', 'reconcil', 'meantim', 'appl', 'spotlight', 'slic', 'gynecolog', '10-year', 'bath', 'hah', 'ice-cream', 'unforget', 'philistin', 'zealand', 'vitosh', 'biarritz', 'obvius', '5b', 'day-dreaming', '20s', 'christ', 'unpres', 'hardest', 'cv', 'room-mates', 'galic', 'am', 'frank', 'expost', 'good', 'tog', 'goddess', 'mainland', 'greec', 'worm', 'gain', 'unthought', 'gul', 'm.phil', 'institut', 'shadow', 'domin', 'bus', 'leukem', 'g.v', 'abov', 'southsid', \"'ve\", '22nd', 'uncertainty', 'kil', 'heart-attack', 'thank-you', 'pepol', 'hint', 'puberty', 'refus', 'noon', 'bring', 'closest', 'actioon', 'cook', 'aspirin', 'recont', 'sunday', 'serv', 'apprehend', 'yourn', 'sharp', 'connect', 'roam', 'inh', 'bom', 'bacam', 's.s.c', 's.paulo', 'kidney', 'croquet', 'easy-going', 'loggerhead', 'til', 'spend', 'nearest', 'scoot', 'misconduc', 'mischievy', 'priviledg', 'newslet', 'idiot', 'assault', 'lost', 'meadow', 'cathol', 'cant', 'cauty', 'shoplift', 'masturb', 'fulfil', 'construct', 'quart', 'switch', 'off-hand', 'lasr', 'lung-cancer', 'coff', 'est', 'whistl', '11.30', 'progress', 'effect', 'undeserv', 'angry', 'prop', 'both', 'adultery', 'overpow', 'backward', 'p.u.c', 'instac', 'saddest', 'convey', 'persist', 'battl', 'beyond', 'flimsy', 'fianct', 'wristwatch', 'thrown', 'unsteady', 'uncal', 'gon', 'dash', 'nb', 'are', '10th', 'uncondit', 'surnam', '400', 'learnt', 'awhil', 'f', 'voic', 'whimsy', 'cheekbon', 'pre-marital', 'karate-teacher', 'loook', 'person', 'when', 'concert-tour', 'marathon', 'opt', 'mailbox', 'renount', 'breast', 'moreov', 'blu', 'victim', 'greatest', 'runeberg', 'sorrow', 'discredit', 'utrecht', 'x-rays', 'pair', 'talk', 'witch', 'towel', 'recr', 'box', 'correspond', 'check-up', 'appart', 'set-backs', 'path', 'hand-made', 'lumb', 'cornet', 'abandon', 'janu', 'book-report', 'run-out', 'overcook', 'unmot', 'hom', 'ath', 'proba', 'unrela', 'lent', 'gear-lev', 'sak', 'just', 'enthusiasm', 'now', 'scop', 'maim', 'socc', 'cheek', 'feel', 'lay-out', 'glant', 'fir', 'thrushing', 'educ', 'sr.', 'barrack', 'drunkard', 'flight', 'microbiolog', 'inappropry', 'horrot', 'strictly', 'drumskin', 'nasty', 'sweetheart', 'discovery', 'encout', 'warn', 'keny', '.the', 'swel', 'handicap', 'init', 'bedsheet', 'opportuinty', 'and/or', 'bled', 'stor', 'tiresom', 'competitress', 'highschool', 'guit', 'tremend', 'hen', 'dribbl', 't.b', 'm.', 'four-year', 'queu', 'tour', 'harass', 'san', 'camp', 'notebook', 'fam', 'tv/video', 'pool', 'prud', 'contempt', 'ethiop', 'heap', '49ers', 'exerc', 'recov', 'boot', 'python', 'i.', 'very', '4', 'estrang', 'depot', 'victor', 'necess', 'icy', 'misbehav', 'niggl', 'gloom', 'begg', 'me', 'fiv', 'petrol', 'runaround', 'fel', 'propos', 'cak', 'fatty', 'lun', 'agree', 'grey', 'substitut', 'scenery', 'ray', 'intend', 'pry', 'video-tapes', 'court', 'inevit', 'kms', 'expel', 'bus-stop', 'select', 'inhabit', 'pok', 'stepfath', 'rainco', 'told', 'project', 'onto', 'prospect', 'stigm', 'monopol', 'eag', 'km/h', 'banan', 'each', 'melbourn', 'nathend', 'aloud', 'span', 'bil', 'colleagu', 'precip', '..i', 'unknow', 'seemad', 'threatening', 'joiy', 'landscap', '2-4', 'colt', 'train', 'print', 'unaw', 'bak', 'exempl', 'regret', '3-4', 'envelop', 'candy', 'appear', 'up', 'chang', 'collaps', 'bowel', 'tampon', 'aphas', 'irrelev', 'motorcyc', 'dust', 'mce', 'chin', 'august,1983', 'brazil', 'roar', 'cop', 'anyway', 'breath', 'singl', 'nuis', 'meas', 'tok', 'meat', 'kwach', 'century', 'resum', 'roadsid', 'easy', 'deer', 'springs', 'draught', 'b.e', 'conduc', 'cher', '50p', 'ai', 'genet', 'underestim', 'vivid', 'sang', 'poorest', 'rumo', 'templ', 'rud', 'upmost', 'decapit', 'threw', 'suspend', 'perfom', 'skul', 'appet', 'tempt', 'dnce', 'yugoslav', 'healthy', 'replac', 'everyday', 'great-britain', 'bunk', 'misunderstand', 'drew', 'loaf', 'intox', 'increas', 'u.s.', 'calumny', 'dutch', 'tyr', 'pretext', 'rath', '2/3', 'girllfriend', 'tooth-paste', 'command', 'meddl', 'farth', 'mishap', 'answ', 'nutrit', 'proof', 'insert', 'kitw', 'roach', 'tv-program', 'displeas', 'grass', 'f.b', 'mood', 'dist', 'enquir', 'peak', 'half', 'though', 'drug', 'reconnect', 'nocturn', 'complain', 'tought', 'ev', 'gipsy', 'guide-rail', 'night', 'sag', 'cult', 'second', 'showjump', 'sympathy', 'orph', 'ser', 'i.c.c', 'summon', 'rol', 'pra', 'dusgust', 'fog', 'angrgy', 'beg', 'hav', 'unsupport', 'salt', 'benson', 'pessim', 'prom', 'prohibit', 'privileg', 'spluttered', 'stiiborn', 'ruin', 'thay', 'biolog', 'bad', '6-7', 'undernea', 'unprep', 'pharm', 'exhibit', 'dismiss', 'crit', 'reoccur', 'jewel', 'famy', 'cos', 'aggrss', 'spont', 'discount', 'freeterm', 'ingratitud', 'mummy', 'non-rioting', 'uninterest', 'hunt', 'singing-lesson', 'inappl', 'weapon', 'toilet', 'cauliflow', 'mourn', 'pour', 'resign', ')', 'self-discipline', 'furtherm', 'despoil', 'bord', 'amount', '21/30', 'tiny', 'deaf', 'refrig', 'extend', 'afth', 'unhealthy', 'comprom', 'over-night', 'wat', 'overs', 'seasid', 'overreact', 'past', 'mango', '6', 'aeropl', 'reward', 'charm', 'glimps', '9-10', 'en', 'sil', 'fastidy', 'foet', 'reg', 'paret', 'track-team', 'queen', 'uniform', 'ok', 'facil', 'chef', 'trampl', 'crush', 'valid', 'pinch', 'then', 'absent-mindedness', 'arcad', 'dissappoint', 'ii', 'engl', 'kettl', 'sat', 'meal', 'struggle', '1400', 'volitil', 'report', 'ten', 'shift', 'girl', 'nearby', 'end', 'feedom', 'year', 'cost', 'troublesom', 'dial', 'entitl', 'would-be', 'northeast', 'thaw', 'wheel', 'ballet', 'concet', 'obstac', 'brigad', 'c.s', 'respir', 'pg310', 'solo', 'trembl', 'sink', 'unr', 'curatao', '3.30', 'ethn', 'bloop', 'unexpress', 'lousy', 'stricken', 'ashamesd', 'ca', 'yield', 'crter', 'scriptures', 'effort', 'frontwindow', 'overtook', 'harrass', 'mil', 'met', '53', 'imp', 'downstair', 'comsomol', 'finac', '17', 'gat', '95', 'shel', 'self', 'pedolog', 'screwing', 'breakfast', 'expert', 'song', 'adept', 'stat', 'newcast', 'cold', 'proud', 'bomb', 'fut', 'legitim', 'aquaint', 'k10', 'inspir', 'collect', 'comp', 'keep', 'hungry', 'push', 'forget', 'subjug', 'blank', 'faint', 'macho', 'gest', 'bald', 'mag', 'overcam', 'fish', 'dalla', 'baobab', 'introduc', 'hometown', 'bash', 'lymph', 'hitchhik', 'govern', 'doom', 'gold', 'chas', 'thorough', 'ther', 'non', 'ros', 'sudy', 'scor', 'pub', 'spac', 'stuck', 'panty', 'urolog', 'piss', \"n't\", 'harmon', 'blad', 'cush', 'imec', \"friends'fianct\", 'sist', 'neck', 'swifty', 'squand', 'stream', 'unemploy', 'post-graduate', 'arth', 'indipend', 'askad', 'riv', 'brut', 'discuss', 'langu', 'confess', 'disillud', 'outward', 'deputy', 'piano', 'schhol', 'discountinu', 'cemetery', 'walk', 'av', 'fault', 'lab', '45', 'refil', 'stir', 'many', 'concern', 'p.m.', 'class-mate', 'driend', 'can', 'ambit', 'psycho', 'el', 'interview', 'forgot', 'market', 'windshield', 'confus', 'list', 'lizzy', 'sanct', 'fantast', 'grew', 'afr', '8-10', 'rugby', 'fortuit', 'chair', 'complex', 'ventil', 'atic', 'shop-keeper', 'filmshow', 'rustl', 'marry', 'rec', 'nam', 'candid', '16-year', '22', 'treas', 'lack', 'truffaut', 'careless', 'gril', 'siz', 'kept', 'transport', 'step-dad', 'offend', 'christmas', 'howev', 'deceit', '30th', '70', 'shut', 'visit', 'sand', 'rift', 'double-crossing', 'forcep', 'cor', 'latest', 'insect', 'non-existent', 'warm', 'c+', 'sophom', 'urin', 'dai', 'lit', 'ant', 'field', 'band', 'fit', 'outdo', 'block', '5', 'overboard', 'cert', 'town', 'squeak', 'disheart', 'don', 'reluct', 'strongly', 'govr', 'daddy', 'moth', 'thought', 'penit', 'gay', 'mattress', 'ory', 'upr', 'aros', 'journ', 'imprison', 'preach', 'ortograph', 'honey-moon', 'immens', 'throwing', 'mix', 'nh', 'persuad', 'kingdom', 'bean', 'ago', 'anim', 'comfid', 'loath', 'oper', 'would', 'manip', 'plow', 'fear', 'bureaucr', 'jacko-lantern', 'thermo', 'tomorrow', 'hash', 'surround', 'de', 'hot', 'profit', 'took', 'startl', 'kal', 'absent-minded', 'staircas', 'pharmacolog', 'drag', 'yavann', 'incorp', 'flat', 'agricult', 'kong', 'injury', 'boss', 'murray', 'brok', 'thirsty', 'dur', 'boyfriend', 'fil', 'lik', 'entry', 'asfixy', 'impos', 'wath', 'expend', 'sign', 'bun', 'bluff', 'teach', 'zon', 'formalin', 'leaf', 'everytim', 'ineptitud', 'badminton', 'hel', 'basket', 'pict', 'eve,1984', 'breakdown', 'seclud', 'concert5', 'alright', 'nevertheless', 'wash', 'evrey', 'prid', 'caus', 'tthe', 'supermarket', 'disgu', 'sec', 'caf', 'preserv', 'superv', 'foot-ball', 'gam', 'lifestyl', 'im', 'end-of-term', 'eleg', 'sneak', 'candy-bar', 'cum', 'borrow', 'detain', 'parents-in-law', 'n.z', 'pen', 'mug', 'lout', 'un', 'reachad', 'sob', 'distroy', 'recogn', 'stereo', 'ski-jumping', 'schoolar', 'dorm', 'horizon', 'vampir', 'party', 'anym', ':', 'anct', 'tor', 'diamond', 'hollow', 'slighty', 'sing', 'eld', 'pilf', '180¦', 'question', 'mit', 'griev', 'c', 'belov', 'investig', 'unrestrain', 'tol', 'shotgun', 'uns', 'nothig', 'crack', 'liberty', 'clear', 'sue', 'computer-assignment', 'immobl', 'wip', 'inch', 'auth', 'spirit', 'margin', 'oldest', 'wastepap', 'transl', '380', 'anyhow', 'overstrain', 'adv', 'disagree', 'uncertain', 'salesperson', 'cafeter', 'wednesday', 'army', 'room', 'accquaint', 'enthusiast', 'two-year', 'doing', \"''\", 'holocaust', 'p', 'pre-enrollment', 'bend', 'driv', 'post', 'fry', 'ran', 'bottl', 'esp', 'yel', 'pop', 'constraint', 'swim', 'ferry-boat', 'defam', 'gre', 'sydney', 'c.o.u', 'dig', 'park', 'innoc', 'beid', 'stillbir', 'landlady', 'carry', 'lad', 'termin', '6.00', 'bless', 'insult', 'consult', 'misfortun', 'p.m', 'splinters', 'observ', 'shak', 'pakeh', 'sweatheart', 'pupil', 'episod', 'gard', '1960', 'overdos', 'caterpill', 'different', 'thatch', 'unforg', 'mom', 'home-town', 'photo', 'boy', 'inv', 'beam', 'forgo', 'pts', '-that', 'juny', 'rainy', 'hassl', 'ir', 'feal', 'roughneck', 'onevery', 'stain', 'entertain', 'shortcom', 'sometim', 'research', 'misus', 'calam', 'niec', 'pend', 'fountain', 'fist', 'brib', 'hoot', 'footbal', 'ex-husband', 'norway', 'tim', 'foul', 'love-letter', 'vuln', 'card', 'fairy', 'glassy', 'counter-attack', 'furlough', 'until', 'perspir', 'b.ed', 'compel', 'fath', 'distinct', 'bat', 'ist', 'sur', 'strived', 'youngest', 'molest', 'who', 'preson', 'priv', 'slash', 'reprov', \"'\", 'ins', 'good-for-nothing', 'pow', 'reun', 'intead', 'cinem', 'kitch', 'prim', 'tray', 'drast', 'tee', 'misdiagnos', 'laundry', 'traf', 'abort', 'caught', 'dancing-tavern', 'catarrh', 'king', 'bago', 'expos', 'mess', 'farewel', 'fina', 'invalid', 'destruct', 'for', 'middl', 'bee', 'instal', 'msce', 'appeard', 'fol', 'unap', 'parents/friend', 'nitsch', '1/10', 'malicy', 'presum', 'barrel', 'upset/sad', 'aw', 'calib', 'book', 'foud', 'footstep', 'background', 'rub', 'liv', 'blund', 'dreamt', 'dynasty', 'rose-bush', 'vig', 'religy', 'shoot', 'we', 'convok', 'myself', 'bikin', 'embark', 'whatch', 'witch-craft', 'basket-ball', 'toast', 'discomfort', 'thin', 'mod', 'idol', 'penth', \"boss'car\", 'continy', 'explod', '18', '2.30', 'curricul', 'tot', 'what', 'gulf', 'freud', 'dol', 'deseas', 'toy', 'emerg', 'transfer', 'meningit', 'it/them', 'cheeky', '6.30', 'lingadz', 'headm', 'carcass', 'divid', 'cab', 'frat', 'vand', 'myst', 'bed-ridden', 'vomit', 'littl', 'but', 'talky', '1000', 'sri', 'entrust', 'august', 'heavy', 'stand', 'yukky', 'degr', 'ass-kissing', 'absurd', 'k20', 'co-operation', 'buis', 'unru', 'nerv', 'saddn', 'gallow', 'excret', 'fao', 'pm', 'discov', 'div', 'assess', 'fas', 'blat', 'inconsist', 'decid', 'jianhau', 'ex-girlfriend', '25th', 'maryhuan', 'contact', 'audiec', 'smoknig', 'compuls', 'join', 'scratch', 'depress', 'geron', 'prefer', 'untoward', 'bor', 'ashamad', 'pract', 'senseless', 'bond', 'scat', 'priest', 'thrilled', 'fetid', 'hand-bag', 'vulg', 'downcast', 'platform', 'squirrel', 'close-up', 'throusers', 'chairm', 'grunt', 'attend', 'origin', 'hyp', 'h.s.s', 'swamp', 'soc', 'send', 'lady', 'policy', 'thursday', 'hum', 'p.t.s', 'tie', 'nos', 'thouhght', 'phon', 'avert', 'situaiton', 'k150', 'ear-ring', 'popidol', 'porno', 'bias', 'min', 'sho', 'gratitud', 'liqu', 'perm', 'profound', 'shit', 'bosom', 'handl', 'trag', 'u.e', 'swan', 'kafu', 'spring', 'shim', 'bia', 'otherw', 'overcom', 'yourself', 'pornograph', 'excel', 'bonfir', 'rost', 'goodtim', 'barbecu', 'support', 'bird-cage', 'principl', 'life-jacket', 'pes', 'determin', 'playground', 'emin', 'om', 'cycle-track', 'quet', 'flurry', 'stitch', 'aim', 'adjust', 'compass', 'defenseless', 'auckland', 'racket', 'scleral', 'recommend', 'kor', 'grnadfather', 'brant', 'driveway', 'nephew', 'lowland', 'eight', 'snowfal', 'yellow', 'afraid', 'bit', 'fif', 'quarel', 'coop', 'vic', 'chronical', 'slippery', 'thoroughf', 'cot', 'fuck', 'comput', 'smooth', 'deep', 'esc', 'neut', 'uplift', 'permit', 'good-humour', 'lord', 'perhap', 'harm', 'splashed', 'becom', 'porcelain', 'cleans', 'gorg', 'fool', 'pradesh', 'danub', 'furth', 'm.sc', 'impaty', 'chi', 'hairdress', 'salo', 'nowh', 'innertub', 'mantlepiec', 'eq', 'vind', 'dim', 'espec', 'mortu', 'pav', 'unmarry', 'bisshop', 'worthless', 'read', '140', 'marijuan', 'tarmac', 'gambl', 'head', 'raggy', 'southern', 'slap', 'pick-pocket', 'three', 'enlight', 'masterb', 'gross', 'almanach', 'amb', 'bathroom', 'seventeen', 'cousin', 'suvenir', 'kne', 'class', 'scuffl', 'guine', 'grab', 'attack', 'conserv', 'slop', 'begin', 'himself', 'jalon', 'bridg', 'hor', 'diff', 'rib', 'strength', 'ordin', 'split', 'grandfath', 'ne', 'unfound', '100', 'rock', 'hamburg', 'telegram', 'snack', 'withrew', 'frag', 'lied', 'long', 'compet', 'scand', 'defacto', 'accost', 'flu', 'slov', 'terty', 'self-respect', 'burgl', 'escort', 'crim', 'misery', 'montros', 'inter-city', 'nicknam', 'white-out', 'story', 'timid', '50', 'ramp', 'dict', 'hamp', 'door', 'guinea-pig', 'from', 'own', 'work', 'devot', 'way', 'incorrect', 'obvy', 'separt', '26', 'lucky', 'high-heeled', 'spot', 'unnerv', 'hypocrit', 'indecid', '71.5', 'moony', 'screen', 'nineteen', 'ski-jump', 'energy', 'meant', 'fras', 'nut', 'brisb', 'hasty', 'pond', 'reflect', 'mon', 'inadequ', 'delinqu', 'nev', 'fre', 'reason', 'chick', 'fatsy', 'intercours', 'jos', 'zero', 'hygy', 'int', 'ral', 'naray', 'faht', 'ma', 'bkok', 'infect', 'adopt', 'haul', 'crocodil', 'polterg', 'libr', 'sept', 'resort', 'photocop', 'discontinu', 'between', 'knot', 'term', '13', 'peculi', 'falt', 'co-operate', 'stupid', 'kanishk', 'dard', 'studennt', 'seny', 'bitchy', 'life-partner', 'soon', 'explain', 'hiperton', 'distast', 'ndol', 'itch', 'quit', 'windy', 'collab', 'olympiad', 'near', 'catch', 'anybody', 'deadlin', '11th', 'rusty', 'boredom', 'shov', 'ground', 'thug', 'xma', 'anasthes', '1984', 'thou', 'janit', 'holdiday', 'throat', 'striven', 'slow-worm', 'underground', 'defend', 'cellulos', 'a', 'fact', 'supply', 'environ', 'interact', 'winnebago', 'loud', 'young', 'flam', 'pitty', 'account', 'eigh', 'shirk', 'caes', 'scal', 'bachel', 'jean', 'coach', 'kuikk', 'a.m.', 'puls', 'ty.b.sc', 'first', '1500', 'month-end', 'stut', '1977', 'fun', 'being', 'arm', 'non-sense', 'jealousy', 'sid', 'sauc', 'examn', 'lift', 'seem', 'fastly-driven', 'clerk', 'flap', 'trifl', 'suspens', 'conscy', 'dil', 'heart-rending', 'oft', 'universy', 'protect', 'alarm', 'phantast', 'henry', 'sav', 'excess', 'competiot', 'it', 'us', 'guanggho', 'left', 'norm', 'search', 'tragedy', 'spinach', 'crumb', 'graduaty', 'embezzel', 'whos', '23', 'greas', 'placent', 'coev', 'dust-bin', 'sucess', 'everyboby', 'conf', 'refec', 'model', 'station-hall', 'malipeng', 'dirty', 'beacaus', 'hesit', 'skin', 'bict', 'wardrob', 'negro', 'auxy', 'acquir', 'susp', 'incest', 'prescrib', 'force-fed', 'understand', 'beat', 'accompl', 'speech', 'stol', 'stuff', 'crak', 'mean-acting', 'malaw', 'tal', 'eachoth', 'compart', 'telephon', 'neverthless', 'to', 'jew', 'spil', 'bloodsuck', 'sport', 'beforehand', 'rain', 's', 'neuropsycholog', 'prakash', 'gush', 'parson', 'errand', 'pushbik', 'spec', 'self-blaming', 'gap', 'sin', 'rearrang', 'flaunt', 'ar', 'consol', 'eveyon', 'diet', 'zomby', 'right', 'cre', 'v.c.r', 'ask', 'overturn', 'outlook', 'indec', 'near-accident', 'mountain', 'yet', 'restless', 'map', 'drinknig', 'neglig', 'props', 'cattl', 'sland', 'alimony', 'somersault', 'tetan', 'fin', 'mt', 'ticket', 'grand', 'countrysid', 'weekend', 'preclin', 'fri', 'stood', 'produc', 'unjust', 'first-day', 'duck', 'exhaust', 'eleven', 'pri', 'his/her', 'selfconfid', 'interv', 'doorm', 'period', 'western', 'nappy', 'oiut', 'utt', 'mmemb', 'rip', 'pulp', 'hi', 'peac', 'immedy', 'tedy', 'ey', 'dumb', 'guilt-conscious', 'tum', 'wand', 'dispropot', 'deny', 'poul', 'gynaecolog', 'unintelig', 'yeaterday', '...', 'kankaanpss', 'isol', 'powerless', 'eg', 'unwit', 'whit', 'volum', 'defy', 'darl', 'sel', 'pseudo-girlfriend', 'maid', 'wii', 'm.a', 'fridg', 'strike', 'sweet', 'play', 'comrad', 'snackb', '1975', '3.00', 'discrimin', 'craft', 'twaddl', 'draw', 'nowaday', 'shout', 'chap', 'benea', 'in-law', 'cloud-burst', 'neurot', 'swing', 'irrevoc', 'die', 'disq', 'part-time', 'express', 'techn', 'rom', 'impress', 'gruel', 'useless', 'balcony', 'cel', 'daughter-in-law', 'appoint', 'oversea', 'wors', 'born', 'commit', 'petty-minded', 'jerk', 'stationery', 'cast', 'spain', 'hurt', 'bureau', 'siucid', 'empty', 'i.e.', 'ste', 'bar-person', 'aggress', '-rooms', 'roadway', 'step', 'newspap', 'snap', 'tonsil', 'besid', 'sgts', 'bitch', 'ord', 'desp', 'rich', 'bount', 'grocery', 'halloween', 'indy', 'insign', 'laz', 'sanator', 'may', 'raflect', 'fascin', 'sheet', 'backdo', 'ego', 'risk', 'trnsgressions', 'som', 'hospit', 'hook', 'lat', 'nonch', 'lizard', 'reproach', 'seagul', 'spoil', 'scrub', 'zambez', 'und', 'hold-up', 'concret', 'glaz', 'acceiv', 'champ', 'entrail', 'gun-point', \"'ll\", 'disappoint', 'paranoid', 'bookshop', 'frump', 'exalt', 'kiosk', 'repaint', 'hero', 'delay', 'suicid', 'fest', 'u.e.f.a', '21st', 'meat-factory', 'tvavn', 'bowl', 'interrel', 'sisit', 'provoc', 'poison', 'remot', 'roe', 'mem', 'hist', 'apply', 'co-worker', 'definit', 'away', 'hol', 'unsiut', 'froz', 'limburg', 'next-door', 'doorway', 'indign', 'cod-liver', 'sarcast', 'bay', '5-7', 'grandmoth', 'went', 'unattract', 'station-mistress', 'as', 'dont', 'jok', 'semi-trailer', 'stress', 'aisl', 'pear', 'dear', 'feat', 'tub', 'icebox', 'cyc', 'show', 'texa', 'erect', 'terr', 'enrol', 'belong', 'scholarship', 'ex-flatmate', 'neg', 'freshm', 'phis', 'remain', 'deprec', 'ghost', 'malt', 'embarass', 'alloc', 'dirt', 'etc.', 'tril', 'was', 'two-month', 'issu', 'sorry', 'likew', 'paint', 'you', 'disconnect', 'catastroph', 'today', 'total', 'structure', 'punct', 'round-about', 'recagn', 'wherev', '2c', 'raio', 'pettico', 'moss', 'belogradchik', 'indig', 'sol', 'withdraw', '31', 'fuel', 'eighty', 'war', 'cas', 'kiss', 'pertain', 'altough', 'betel', 'demand', 'murd', 'anaesthes', 'sof', 'kaw', 'knock', 'adapt', \"parents'wishe\", 'handkerchief', 'veron', 'york', 'thorn', 'dormit', 'cardr', 'uncoop', 'yesterday', 'incin', 'fast', 'blindworm', 'challeng', 'shopkeep', 'undergo', 'whom', 'airport', 'welcom', '000', '1983', 'nin', 'stay', 'stag', 'f.', 'avail', 'implod', 'despair', 'soppos', 'gland', 'landlord', 'eith', 'robbery', 'doubt', 'swear', 'tough', 'bandwagon', 'tir', 'happpy', 'unseen', 'misjudg', 'negoty', 'ford', 'slaght', '20000', 'inhum', 'cash', 'seiz', 'scold', 'televid', 'bed', 'bouquet', 'rapid', 'stick', '7', 'rattl', 'bootlick', 'questionair', 'swam', 'esteem', 'dusty', 'mystery', 'alley', 'pet', 'bristl', '150', 'pew', 'uneth', 'forthcom', 'wonderful', 'moto', 'expound', 'trade-entranc', 'colleg', 'senil', 'dee', 'skid-row', 'implicit', 'reinforc', 'foxtail', 'stinky', 'ear', 'her', 'already', 'forgav', 'hemorh', 'doorbel', 'invad', 'broke-down', 'housekeep', 'gold-medal', 'shock', 'californ', 'unnecess', 'grand-mother', 'nois', 'gas', 'naughty', 'dekk', 'mask', '11', 'tourna', 'mingl', 'expir', 'saleswom', 'indisciplin', 'prison', 'cs', 'ex-boyfriend', 'sutu', 'room-mate', 'toild', 'tempera', 'wear', 'interf', 'whan', 'soppy', '3rd', 'chok', 'plot', 'aspect', 'result', 'protest', 'hir', 'vertigo', '1972', 'off', 'paw-paws', 'writ', 'liquid', 'dung', 'etc', 'per', 'plant', 'sustain', 'claim', 'eclips', 'high', 'head-master', 'minut', 'tag', 'overhast', 'prejud', 'prevy', 'march', 'typhoid', 'jail', 'bad-manner', 'impot', 'shin', 'acquaint', 'alcool', 'pedest', 'studet', 'area', 'tactless', 'nonacadem', 'n', 'reef', 'evok', 'chao', 'soembody', 'gout', 'red-tapism', '``', 'o', 'illit', 'video', 'ut', 'noisy', 'acc', 'dilig', 'mex', 'gar', 'pe', 'flop', 'hid', 'steep', 'avoid', 'glad', 'particul', '10.00', 'phonecal', 'troph', 'streets', 'faith', 'roof', 'm160', 'half-teasingly', 'indep', 'barley', 'country', 'time-tabl', 'p3', 'bliss', 'fruit', 'slam', 'overt', 'sicil', 'plan', 'somath', 'act', 'delicy', 'diabet', 'said', 'axam', 'school', 'grin', 'dark', 'bawl', 'aft', 'unkind', 'packet', 'desir', 'tatoo', 'tank', 'pil', 'unlock', 'stap-bank', 'indo', 'abs', 'disco', 'e', 'flatm', 'shirt', 'walkm', 'socio-linguistic', 'stamp', 'whre', 'malays', 'tonight', 'goos', 'soil', 'almond', 'woodwork', 'low', 'left-handed', 'han', 'hyena', 'slaught', '1976', 'ex-colleague', 'bewitch', 'inspect', 'degrad', 'homecom', 'headmast', 'upward', 'stiff', 'import', 'congrat', 'main', 'beetl', 'novel', 'classnot', 'intact', 'prize-competition', 'disproport', 'paramilit', 'bug', '16', '1966', 'enough', 'unit', '84', 'expect', 'wallaby', 'fai', 'endless', 'side-issues', 'chines', 'approx', 'honest', 'duby', 'confid', 'lind', 'compens', 'mayb', 'untim', 'intim', 'specim', 'foresaw', 'squaw', 'nak', 'id', 'plovdiv-bourgas', 'headmistress', 'end-of-semester', 'tham', 'revolv', 'apoplect', 'wel', '12-year-old', '2-3', 'piggy', 'basebal', '2.00', 'obsc', 'imput', 'glutton', 'improp', 'beauty', 'parfum', 'tut', 'tart', 'journey', 'unz', '-down', 'hous', 'zair', 'gift', 'doubl', 'proc', 'fat', 'fountain-p', 'one-way', 'look', 'difficult', 'most', 'yr', 'tort', 'guard-man', 'imf', '4-0', 'corps', '210', 't.v', 'harb', 'thick', 'behind', 'boyfry', 'burn', 'frock', 'attract', 'anyon', 'widow', 'deceiv', 'undergon', 'collegu', 'reberbahn', 'malar', 'rejoyc', 'calc', 'squadron', 'exact', 'zoophiliac', 'fer', 'thourgh', 'accum', 'drawn', '+2', 'surg', 'leagu', 'onboard', 'spokesm', 'tract', 'varn', 'composit', 'iron', 'kilo', 'returnrd', 'reag', 'hostess', 'broad', 'sop', 'schools', 'sam', 'forbid', 'comfort', 'infrastuct', 'viol', 'scot', 'eyesight', 's.', 'stubborn', 'indefinit', 'succ', 'over-protecting', 'k80', 'childr', 'flor', 'def', 'flush', 'sung', 'transmit', '12:00', 'abhor', 'equ', 'larg', 'consolid', 'transp', 'amaz', 'colloqu', 'ship', 'lorry', 'nap', 'tight', 'muc', 'punk-girls', 'mech', 'bapt', 'fant', 'sought', 'insipid', 'unrequit', 'interest', 'marshmellow', 'slit', 'incid', 'cal', 'improv', 'out', 'fizzl', 'dullest', 'intersect', 'put', 'addit', 'anythong', 'sleep', 'hong', 'fron', 'od', 'ex', 'dar', 'undertook', 'thiev', 'dissuad', 'sniff', 'blush', 'adress', 'curv', 'bolt', 'mauritan', 'above-board', 'request', 'mus', 'trail', 'brush-off', 'fost', 'disast', 'behalf', 'cob', 'lot', 'volunt', 'hamst', 'paid', 'humy', 'studyy', 'contribut', 'shovel', 'dog.it', '10/11', 'oncom', 'row', 'floot', 'addict', 'soch', 'with', 'help', 'thing', 'plight', 'pee', 'confirm', 'crash', 'squ', 'schoolmates', 'lan', 'new', 'vice-principal', 'migr', 'employ', 'asthm', 'trib', 'tel', 'fent', 'fuvest', 'inter-department', 'arithmet', 'red-tape', 'pud', 'smoo', 'around', 'time-limit', 'grandm', 'cli', 'key', 'overdu', '20th', 'embrac', 'ug', 'record', 'bag', 'vir', 'striking', 'alik', 'trust', 'guess', 'flew', 'bertoluch', 'unm', 'casset', 'kid', 'everyth', 'co-op', 'spect', 'evenb', 'incontin', 'coffin', 'fiant', 'rep', 'label', 'brothel', 'loc', 'cia', 'waitress', 'sold', 'uneasy', 'mother-in-law', 'idl', 'spee', 'scrupulous', 'aunty', 'afford', 'm.s.c', 'cm', 'ourselv', 'seek', 'filt', 'spel', '[', 'gestic', 'exhil', 'madm', 'celib', 'withdrew', 'phas', 'overdraw', 'allot', 'depart', 'fuss', 'make-up', 'stak', 'quiet', 'achiev', 'forest', 'tre', 'wed', 'fought', 'decompsit', 'colo', 'press', 'callend', 'condemn', 'grad', 'universt', 'fatigu', 'alway', 'fawcet', 'people/press', 'sarcasm', 'finch', 'erron', 'revers', 'dant', 'whale-hunting', 'defin', 'v.s.p', 'phenomen', 'schoolmate', 'crawl', 'tobacco', 'shaft', 'christian', 'dannielsson', 'worry-wort', 'memb', 'carneval-party', 'export', 'prpare', 'voc', 'sourc', 'gradu', 'steady', 'lap', 'threated', 'reduc', 'submit', 'fellow', 'anbd', 'asleep', 'fashon', 'rumph', 'controvers', 'frieend', 'mainstay', 'puppy', 'forty', 'profess', 'stal', 'thund', 'soap', 'undef', 'maqgd', 'wild', 'hut', 'happiest', 'ilh', 'moon', 'prob', 'plu', 'distress', 'money', 'plea', 'veh', 'mum', 'exam', '40-50', 'tram', 'flow', 'mis', 'london', 'shelf', 'unwil', 'carpet', 'hitl', 'he', 'drink', 'react', 'medit', 'abus', 'indiff', 'knowledg', 'postcard', 'tri', 'bankrupt', 'oesophag', 'belittl', 'u.i.t', 'theref', 'psycholog', 'gen', 'choo-choo', 'tolm', 'oblig', 'op', 'child', 'annoy', 'mock', 'crib', 'demonst', 'mchesi', 'irrespons', 'subtl', 'muddy', 'brea', 'so-called', 'virgin', 'kind', 'compat', 'incomprehend', 'stict', 'thirteen', 'difficul', 'gig', 'pal', 'wond', 'himsellf', 'twentie', 'etern', 'lawn', 'acquaplan', 'spread', 'unexpect', 'genuin', 'imprt', 'boast', 'menst', 'apologet', 'tyb', 'sham', 'aote', 'slow', 'vil', 'climax', 'mamb', 'pos', 'dawn', 'discharg', 'chechoslovak', 'concours', 'bombay', 'fran', 'brief', 'stool', 'virtu', 'vas', 'narrow-mindedness', 'bik', 'overheard', 'annivers', 'stam', 'closefriend', 'continu', 'property', 'instruct', 'pollut', 'buay', 'lady-like', 'reminisc', 'mark-books', 'convuls', 'down', 'lp', 'conceiv', 'seal', 'undam', 'se', 'jeep', 'swindl', 'learn', 'peoply', 'paty', 'banquet', 'merry', 'educatioon', 'mexico', 'intens', '9', 'e.g.', 'short-cut', 'delict', 'p.v.c', 'discord', 'tread', 'disconight', 'standard', 'bold', 'skip', 'repress', 'subject', 'junk', 'yatch', 'thrashes', 'tyrol', 'separet', 'sundry', 'aspir', 'jol', 'peel', 'wd', 'scrubber', 'channel', 'attribut', 'hearty', 'blak', 'crook', 'vow', 'liar', 'dea', 'blow', 'strangers', 'fart', 'subordin', 'butchery', 'capt', 'folkl', 'student-like', 'phantasy', 'rush-hour', 'survey', 'flat-mate', 'lanscap', 'hostil', 'nod', 'mold', 'outset', 'prevail', 'b.b.q', 'perc', 'hal', 'foretel', 'quick', 'neglect', 'embarrass', 'spoon', 'mozart', 'unc', 'triv', 'hitch-hiked', 'hostel', 'deficy', 'incauty', 'ticket-collector', 'wind', 'non-present', 'm.c.e', 'prime-minister', 'jaw', 'mathem', 'n/a', 'permisss', 'crept', 'sproul', 'duty', 'fantasy', 'abrupt', 'z.c.b.c', 'wrong', 'tact', 'thereby', 'grenad', 'unworthy', 'feelig', 'chos', 'gooey', 'cross', 'across', 'unoff', 'repres', 'exprey', 'bhop', 'furnit', 'napl', 'flirt', 'bolst', 'into', 'shameless', 'comparison', 'desappear', 'beer', 'hello', 'hip', 'dishonest', 'freedom', 'season', 'priz', 'subscrib', 'struggling', 'g220', 'oil', 'enemy', 'cerebr', 'ecograph', 'convoc', 'fasch', 'ita', 'chief', 'smal', 'buttock', 'blew', 'catty', 'fetch', 'incoh', 'secod', '8pm', 'cano', 'car-accident', 'schoolwork', 'get', 'cho', 'guffaw', 'self-control', 'excurs', 'transit', 'suit', 'miscarry', 'occur', 'japanes', 'omin', 'mad', 'two', 'paw-paw', 'marriot', 'fold', 'gum', 'forev', 'remark', 'amerik', 'maurin', 'daylight', 'cannel', 'publ', 'theft', 'cassm', 'chilumb', 'hour', 'diagnos', 'advert', 'drown', 'bulgar', 'weight', 'red-hand', 'chagrin', 'bucket', 'luxury', 'shed', 'adult', 'tent', 'auto', 'ars', 'toward', 'throw', 'physiolog', 'mustang', 'feedback', 'confind', 'conveny', 'lick', 'overnight', 'merchand', 'licens', 'beast', 'walkway', '-8', 'vice-versa', 'impertin', 'dul', 'lowest', 'accid', 'conservatoir', 'devil', 'withdrawn', 'annd', 'dislik', 'fierc', 'want', 'zamb', 'strong', 'school-mate', 'floricult', 'bru', 'exac', 'built', 'cancel', 'unknown', 'eco-activists', 'jersey', 'sail', 'friend/son', 'missunderstand', 'chameleon', 'hind', 'bang', 'tv', 'whilst', 'ye', 'gentlem', 'mer', 'broth', 'lesson', 'breakup', 'goal', 'calm', 'beh', 'revolt', 'grip', 'rhodes', 'tom', 'go', 'rap', 'brav', 'zhu', 'resprct', 'tussl', 'seesaw', 'transfus', 'k100', 'nightm', 'non-smoking', 'withold', 'wo', 'guil', 'poverty', 'pre-university', '500', 'overal', 'terrac', 'parapleg', 'hik', 'congreg', 'coin', 'hair', 'dumbfound', 'horr', 'awak', 'enclos', 'surgeon', 'plac', 'rack', 'u.t.h', 'instru', 'macaron', 'breathless', 'convict', 'slope-it', 'concid', 'that', 'youngst', 'self-esteem', 'recal', 'grand-uncle', 'morbid', 'tamp', 'invit', 'apprach', 'extrem', 'could', 'drunk', 'sou', 'retir', 'epilept', 'west', 'lug', 'youth', 'paulo', 'wrinkl', 'anthony', 'shown', 'incred', '20-35', 'brought', 'about', 'dev', 'thrashed', 'road-block', 'led', 'certain', 'counsel', 'f4', 'hing', 'engin', 'resta', 'unsult', 'somewh', 'council', 'porch', '6th', 'sixteen', 'disfig', 'ankl', 'in-laws', 'popcorn', 'l.s.h.s', 'nudist-beach', 'depend', 'top', 'among', 'freeway', 'favo', 'thank', 'gunshot', 'inconsid', 'proport', 'holy', 'tavern', 'lying', 'trunk', 'drov', 'instead', 'phillipin', 'fav', 'desagr', 'cardiac', 'pur', 'did', 'dandy', 'bav', 'pre-arranged', 'patron', 'lock', 'lend', 'tong', 'kaley', 'ndirand', 'busy', 'satisfact', 'trous', 'less', 'shar', 'twelv', 'wer', 'unsuccess', 'fom', 'sleepy', 'slur', 'sad', 'nationality/colour', 'ward', 'slak', 'appont', 'jam', 'overload', 'doe', 'mislead', 'miss', 'avenu', 'didcov', 'roulet', 'worry', 'alamed', 'describ', 'pric', 'find', 'guilty-conscious', 'mail', 'uncontrol', 'km', 'luncheonet', 'bid', 'emotional/spirit', 'someboby', 'known', 'wad', 'pip', 'smel', 'short', 'brother/sist', \"q'land\", 'joyful', 'frog', 'balloon', 'trait', 'credit', '1/2', 'irrevers', 'fev', 'stank', '28th', 'hemi-paralyzed', 'eldest', 'par', 'antth', '2nd', \"'d\", 'chirimb', 'surrend', 'stomach', '9,000', 'bump', 'pick-pockets', 'intransig', 'thumb', 'french', 'sped', 'the', 'again', 'ring', 'judg', 'raid', 'test', 'pullov', 'mann', 'distract', 'involv', 'respond', 'actress', 'held', 'hold', 'muldoon', 'couter-argued', 'policem', 'oll', 'thereaft', 'leav', 'mast', 'over-weight', 'expl', 'a+', 'ta', 'dostoersky', 'clo', 'yr.', 'saliv', 'thi', 'labo', 'enjoy', 'ter', 'infidel', 'crap', 'rememb', 'red-cheek', 'nobody', 'provok', 'refr', 'inquiry', 'overflow', 'toad', 'crippl', 'celebr', '40', 'lip', 'spons', 'camer', 'oth', 'severl', 'ap', 'yo', 'smack', 'simil', 'indulg', 'fus', 'any', 'overslept', 'infl', 'biochem', 'wnat', 'match', 'fals', 'expens', 'ed120', 'dread', 'loss', 'sheen', 'mr.', 'volley-ball', 'relay', 'municip', 'suggest', 'check', 'startw', 'codf', 'approxim', 'greet', 'vac', 'surf', 'ach', 'oneself', 'conclud', 'six', 'nav', 'pag', 'latrin', 'mistak', 'proposit', 'afric', 'ascy', 'promot', 'roch', 'l', 'helsink', 'overjoy', '9.30/10', '4-letter', 'postm', 'grovel', 'school-girl', 'egot', 'green', 'infr', 'rotterdam', 'below', 'rum', 'rebuk', 'jimmy', 'jumbo-jet', 'grac', 'drain', 'year-old', 'defenceless', 'guest', 'thrashing', 'half-heartedly', 'fed', 'perosn', 'pre-kindergarten', 'limb', 'hug', 'palsy', 'requir', 'vienn', 'mini-bus', 'diazepam', 'skat', 'lak', 'threat', 'basketbal', 'academy', 'hedg', 'heart', 'snarl', 'suspicy', 'incompet', 'overpr', 'elucid', 'noise-maker', 'unsavoury', 'riot', 'runway', 'pakeha', 'believ', 'clin', 'exploit', 'smear', 'tast', 'cat', 'ctit', 'twist', 'stockholm', 'pelv', 'mass', 'pret', 'kapent', 'chasm', 'round', 'tuberculos', 'no.2', 'rog', 'copenh', 'triangul', 'misbehavio', 'nov', 'sess', '300', 'lic', 'sympathet', 'england', 'sketch', 'demonstrationand', 'wit', 'hepatit', 'foil', 'equivoc', 'dying', 'cow', '70th', 'someon', 'is', 'anticip', 'dissert', 'midterm', 'star', 'sea', 'exposit', 'fak', 'rear', 'publica', 'tail', 'undertak', 'm.a.-i', 'cutlery', 'herb', 'destin', 'dat', 'mow', 'anaem', 'item', 'seeth', 'plumb', 'ven', 'telegraph', 'hs', 'delivery', 'common', 'fict', 'smok', 'bunch', 'patern', 'cit', 'favorit', '$', 'pregn', 'aborigin', 'coutry', 'tow', 'snot', 'cury', 'greasy', 'kiosqu', 'austral', 'fright', 'worthwhil', 'byk', 'confront', 'gummy', 'boil', 'comit', 'supr', 'excus', 'imperfect', 'diseas', 'strongest', 'hit-and-run', 'intoduc', 'industry', 'stop', 'vict', 'subsequ', 'northsid', 'tongue-tied', 'indirect', '1985', 'predica', 'senty', 'woman-officer-in-charge', 'urg', 'west-indies', 'random', 'monk', 'ign', 'oppress', 'crud', 'swol', 'rumb', 'incestu', \"'re\", 'attackad', 'whims', 'crew', 'alert', 'affair', 'snob', 'photograph', 'bloody', 'missil', 'swiss', 'los', 'letterbox', 'sum', 'poss', 'synchronica', 'wal', 'highway', 'feet', 'defec', 'applaus', 'fortun', 'kore', 'fragil', 'avy', 'awkward', 'ton', 'scraps', 'searg', 'stad', 'descend', 'discothequ', 'clar', 'snowstorm', 'unsubsid', 'week', 'eighteen', 'bulg', 'semest', 'uneduc', 'revid', 'squeez', 'mary', 'melanchol', 'happy', 'unload', 'imagin', 'wamkulu', 'zip', 'rob', 'rel', 'org', 'allud', 'dissecting-course', 'sleet-storm', 'maggot', 'veterin', 'view', 'nag', 'birthray', 'intercollegy', 'ruch', 'sor', 'over-excited', 'brain', 'expose/explor', 'thre', 'had', 'fond', 'jeal', 'els', 'unansw', 'opin', 'hil', 'pla', 'chipat', 'cru', 'const', 'red-had', 'storm', 'angl', 'hit', 'famin', 'annount', 'amsterdam', 'enquiry', 'travel', 'sup', 'eleph', 'occass', 'seal-hunters', 'adject', 'mr', 'lamp', 'doorknob', 'lunch', 'loo', 'pul', 'win', 'workplac', 'milk', 'four', 'suppress', '7-8', 'chain', 'pithch', 'chock', 'subst', 'spi', 'flut', 'good-looking', 'hyperact', 'startee', 'uncleany', 'pant', 'house-work', 'territ', 'ski-race', 'scream', 'spit', 'inter-house', 'jubl', 'lol', 'revult', 'witty', 'marit', 'etiop', 'disapprov', 'adh', 'impuls', 'der', 'two-timing', 's.y.bajaj', 'cramp', 'reel', 'p.t', 'joint', 'applaud', 'pit', 'screamed', 'pity', 'height', 'monz', 'bush', 'unint', 'cue', 'reign', 'detry', 'tai', 'sint', 'candit', 'tea', 'smart', 'gut', 'than', 'vietnam', 'involunt', 'ves', 'life-saving', 'embarras', 'schoolfriend', 'adjac', 'feac', 'lash', 'stretched', 'ag', 'quarrel', 'flesh', 'nic', 'perspect', 'meddlesom', 'copperbelt', 'trans-tasman', 'chant', 'recently-completed', 'sick', 'jim', '(', 'satisfy', 'edit', 'eindhov', 'misrepres', 'sub-questions', 'saun', 'jest', 'pre-set', 'orchestr', 'ball-point-pen', 'ric', 'deal', 'object', 'genit', 'hent', 'whe', 'lam', 'diego', 'mark', 'slaughter-house', 'bullet', '!', 'loung', 'starv', 'overshadow', 'plast', 'monst', 'moody', 'peolp', 'rag', 'guid', 'wallow', 'anti-pornography', 'caress', 'panel', 'hobby', 'coury', 'step-father', 'supervid', 'com', 'che', 'enterpr', 'relationshop', 'resourc', 'insid', 'wildest', 'mirac', 'valu', 'apt', 'misunderstood', 'triffl', 'sulk', 'unesthet', 'take-off', 'forehead', 'expuls', 'savagery', 'habit', 'spin', 'bibl', 'admin', 'symphony', 'intellig', 'flick', 'alon', 'chronic', 'amok', 'wino', 'one-man', 'reserv', '5th', 'germ', 'subway', 'univers', 'solicit', 'workshift', 'ullev', 'spitoon', 'disqual', 'tchaikovsdy', 'ditch', 'co-owners', 'adolesc', 'bypass', 'naz', 'sank', 'europ', 'bul', 'loan', 'derog', 'assembl', 'rent', 'ardm', 'insight', 'struggles', '9-5', '11-months', 'straight', 'limit', 'through', 'frigth', 'inter-national', 'tun', 'disloy', 'intimid', 'glow', 'sleev', 'diary', 'bath-tub', 'splash', 'study', 'jump', 'pretenty', 'includ', 'feir', 'gav', \"'cos\", 'withhim', 'documentay', 'kamuzu', 'john', 'gath', 'chopstick', 'decad', 'sex', 'kaikoh', 'ment', 'men', 'mangwan', 'dodg', 'cancc', 'soul', 'civil', 'scrapped', 'house/garden', '10', 'duc', 'going', 'empathy', 'g', 'basin', 'dep', 'perturb', 'felow', 'lazy', 'panhandl', 'wrot', 'sens', 'rescu', 'ill-considered', 'benefit', 'aid', 'unleash', 'worknig', 'waist', 'distort', 'far', 'guy', 'dign', 'fled', 'beco', 'convint', 'mort', 'b+', 'tax', 'neat/tidy', 'incens', 'shap', 'accidenta', 'bracelet', 'billiard', 'bahavio', 'berkeley', 'canad', 'interst', 'hallway', 'presc', 'sco', 'te', 'sient', 'diesel', 'eyelid', 'tradit', 'cross-country', 'untru', 'thos', 'angel', 'paralys', 'step-brother', 'desk', 'radio', 'track', 'unst', 'rang', 'bon', 'mana', 'spar', 'earthquak', 'unimport', 'karapiro', 'sciss', 'highest', 'cov', 'pint', 'afflu', 'tumbl', 'grevy', 'whisp', 'seg', 'someeon', 'hiss', '1', 'skunk', 'lilleham', 'febru', 'grant', 'dress', 'cap', 'autumn', 'spree', 'placc', 'reb', 'fan', 'del', 'troubl', 'statu', 'curs', 'brand', 'certainty', 'loudest', 'mat', 'unconscy', 'berlin', 'uc', '120', 'imcompet', 'acquaintac', 'imit', 'septemb', '4.30', 'whash', 'smash', 'hors', 'arbit', 'job', 'farm', 'hitchcock', 'spreading', 'oppos', 'brasil', 'regul', 'bicyc', 'amt', 'strict', 'woulld', 'girdl', 'undesir', 'slog', 'backyard', 'audit', 'exag', 'cell', 'mellit', 'valley', 'reunit', 'corn', 'surv', 'med', 'nurs', 'flood', 'real', 'graph', 'absc', 'twin', 'mr.w', 'barg', 'dac', 'self-assured', 'dogm', 'fossick', 'compress', 'unwar', 'tramway-carriage', 'scout', 'pizz', 'wurm', 'becaus', 'scoundrel', 'fluid', 'inform', 'cool', ']', '1st', 'fal', 'flag', 'execut', 'carton', 'consum', 'mean', 'campground', 'prostitut', '30s', 'extermin', 'regatt', 'adequ', 'stranded', 'solo-parent', 'motorway', 'baby', 'acknowledg', 'rous', 'frightenee', 'rejo', 'barry', 'paddl', 'grat', 'boy-friend', 'maneuv', 'weary', 'tramp', 'baby-sitting', '1968', 'blackboard', 'mmy', 'aunt', 'accus', 'say', 'measl', 'jackpot', 'fellatio', 'pocket', 'hard', 'din', 'obstin', 'railway', 'vary', 'ready', 'hop', 'repuls', 'sampl', 'supery', 'chil', 'champagn', 'pump', 'lusak', 'key-ring', 'de-facto', 'guilt', 'neith', 'felt', 'alow', 'tongu', 'tennis-club', '6-8', 'hitch', 'surpl', 'skirt', 'intern', 'fellow-students', 'nitht', 'gossip', 'nuant', '2000', 'coron', 'chocol', 'submerg', 'il', 'rest', 'pricip', 'toddl', 'flunkey', 'toefel', 'creepy', 'land', 'obtain', 'log', 'commun', 'encount', 'cubic', 'william', 'rev', 'raw', 'mund', 'mud', 'unfriend', 'axcurs', 'at', 'dehum', 'ide', 'i.e', 'begun', 'econom', 'desol', 'flute-teacher', 'predict', 'deserv', 'flar', 'exclud', 'paedophiliac', 'essay', 'jel', 'foodst', 'latin', 'ax', 'vars', 'shark', 'pretty', 'machin', 'quadrupl', 'woodland', 'burp', 'heat', 'tin', 'dazzl', 'self-centered', 'son', 'thwarted', 'faec', 'shiny', 'wood', 'complaint', 'against', 'sloppy', 'harbo', 'chat', 'note-book', 'dialect', 'naust', 'stong', 'nkhoma', 'bum', 'truck', 'pasolin', 'meryl', 'flo', 'phlegm', 'fornit', 'boo', 'approv', 'resolv', 'run', 'fund', 'protegt', 'e.n.t', 'songbook', 'verb', 'det', 'psalm', 'crab', 'cup', 'milo', 'brew', 'god', 'mist', 'b', 'cookery', 'cough', 'turb', 'againt', 'burd', '1bs', 'schure', 'fashion-show', 'good-bye', 'gypsy', 'sportwom', 'melody', 'reciproc', 'psychotherap', 'mob', 'savio', 'col', 'trig', 'bright', 'debt', 'honey', 'aquar', 'culprit', 'servil', 'april', 'counterfeit', 'coconut', 'somebody', 'weep', 'contin', 'guarus', 'dead', 'sametim', 'next', 'relax', 'spittl', 'petty', 'few', 'hotel', 'netherland', 'whitesand', 'respons', 'stagn', 'pig', 'harsh', 'cent', 'germany', 'asy', 'autopilot', '102', 'potato', 'pass', 'via', 'sugarc', 'fob', 'grandp', 'abud', 'volleybal', 'impract', 'whereupon', 'dis', 'slightest', 'bet', 'math', 'prov', 'unlimit', 'surrog', 'apron', 'haunt', 'interc', 'welf', 'autonom', 'grand-father', 'ledg', 'discont', 'bitum', 'leading-strings', 'ruff', 'their', 'suppl', 'tuesday', 'wedlock', 'jungl', 'rebuff', 'cruel', 'mouth', 'bas', 'advers', 'host', 'mild', 'dril', 'dizzy', 'gymnast', 'window', 'correct', 'additon', 'passerby', 'asid', 'lawy', 'screams', 'wellington', '60', 'quiz', 'feign', 'flaw', 'disappear', 'novemb', 'target', 'quietest', 'gree', 'shop-assistant', '1/8', 'address', '7th', 'drank', 'task', 'rot', 'cucumb', 'risky', 'reliev', 'gradmoth', 'aren', 'pick-up', 'gandh', 'strikers', 'they', 'maor', 'recruit', 'pot', 'imply', 'caretak', 'zemb', 'idea', 'mellon', 'exc', 'pest', 'unescort', 'serg', 'morin', 'anoth', 'break', 'gravy', 'resullt', 'cid', 'heart-broken', '19', 'matern', 'program', 'blurt', 'cry', 'delh', 'prosp', 'been', 'struck', 'anatom', 'gulp', 'switchboard', 'xii', 'sister-in-law', 'fram', 'textbook', 'death', 'ghost/murder', 'lin', 'pold', 'okay', 'p110', 'best', 'peptalk', 'diarrhoe', 'suppos', 'clown', 'snak', 'medicin', 'or', 'stink', 'ceas', 'iqam', 'academ', 'succumb', 'fury', 'hear', 'artic', 'rul', 'syphilit', 'apprecy', 'princip', 'sud', 'distrust', 'island', 'pantry', 'loudmou', 'util', \"'84\", 'surfac', 'depl', 'downhil', 'wth', 'hond', 'nijeg', 'pap', 'obstruct', 'zeeland', 'whereby', 'process', 'norveg', 'apart', 'condit', 'last', 'sunlight', 'sadistint', '8:00', 'ahead', 'choos', 'plead', 'await', 'smould', 'ret', 'budgy', 'atroph', 'cambridg', 'sunny', 'which', 'enco', 'enorm', 'knif', 'pref', ',', 'shal', 'prem', 'monday', 'mangoch', 'tramway', 'glycerin', 'scholer', 'octob', 'sacr', 'super-natural', 'surgery', 'class-work', 'crop', 'vet', 'too', 'fee', '7:20', 'kniv', 'deck', 'classroom', 'and', 'intermingl', 'sort', 'gang', 'lee', 'afternoon', 'assocy', '4-5', 'sect', 'trash', 'throughout', 'step-mother', 'ind', 'halp', 'overhear', 'prolong', 'swept', 'app', 'analys', 'old-fashion', 'massacr', 's.a.', '110km/h', 'fight', 'detest', 'delight', 'watch', 'skid', 'pill', 'threateningly', 'democr', 'sierr', 'mahjong', 'u.g.c', 'aly', 'south-africa', 'carn', 'unexplain', 'pay', 'recit', 'bought', 'insufficiet', 'reach', 'roast', 'graz', 'guity', 'fishy', 'team', 'dam', 'misdemeano', 'messy', 'cla', 'mir', '8-week', 'them', 'homesick', 'gluttony', 'paddock', 'noise-makers', 'drip', 'thoughtless', 'level', 'almost', 'pitch', 'purs', 'handb', 'choc', 'maiz', 'slimy', 'follow', 'dang', 'protrud', 'domest', 'reclus', 'tanzan', 'israel', 'seldom', 'polynes', 'assult', 'refer', 'fawn', 'favourit', 'ov', '31st', 'semin', 'dyestuff', 'twic', 'occlud', 'unw', 'torch', 'que', 'coordin', 'funct', 'our', 'purpos', 'turku', 'floyd', 'prepccupy', 'tak', 'circumst', 'ic', 'neurolog', 'nomin', 'pretend', 'dustbin', 'bedroom', 'indir', 'paddy', 'afterward', 'butch', 'mongu', 'stov', 'thhe', 'denigr', 'apparat', 'monet', 'threatened', 'fisherm', 'hemorrh', 'ded', 'behavy', 'tho', 'convers', 'daght', 'damp', 'santand', 'temp', 'polythecn', 'driving-test', 'passport', 'devast', 'dish', 'red', 'everywh', 'firm', 'asphix', 'restrict', 'pleas', 'inter-rail-trip', 'ang', 'comprehend', 'saw', 'instructy', 'marstrand', 'lusaka-kabwe', 'faul', 'bel', 'dram', 'recognit', 'quest', 'britain', 'lie', 'redempt', 'trip', 'pol', 'russ', 'mangl', '%', 'abroad', 'fract', 'consequ', 'exampl', 'repay', 'pillow', 'strictness', 'creak', 'jr.', 'occupy', 'patrol', 'district', 'narrow', 'appp', 'i', 'footballmatch', 'mak', 'brightest', 'denount', 'utensil', 'sek', 'rail', 'burnt', 'appeas', 'comply', 'rough', 'adk', 'lard', 'mid', 'strange', 'primary-school', 'hand', 'fratern', '49er', 'necrophiliac', 'do', 'jog', 'got', 'authorit', 'unfair', 'ano', 'misinterpret', 'benidorm', 'taught', 'co-operative', 'playm', 'oi', 'undrink', 'friday', 'resid', 'lung', 'rod', 'sun', 'pregiud', 'ucb', 'attempt', 'bear', 'beggg', 'apartheid', 'audy', 'cring', 'nursery', 'ifk', 'stabl', 'cocktail', 'how', 'lank', 'seen', 'melt', 'love/lik', 'soemon', 'weath', 'delib', 'found', 'snow', 'shepherd', 'pray', 'unfaith', 'crooky', 'cowel', 'coat', 'leath', 'hiroshim', 'clutch', 'contest', 'mith', 'column', 'ingraty', 'undang', 'settl', 'amnesty', 'ston', 'him', 'frust', 'engross', 'dump', 'drieberg', 'wait', 'downtown', 'strolling', 'big', 'homework', 'wax', 'shor', 'stronger', 'sen', 'control', 'anf', 'team-mate', 'hopeless', 'pit-latrine', 'traff', 'mard', 'calumn', 'in', 'low-spirited', 'traum', 'boycot', '&', 'bacaus', '80', 'quei', 'everythy', 'understood', 'dad', 'rikshaw', 'reveng', 'examin', 'branch', 'sadness-unpleasant', 'flip', 'perfect', 'marocco', 'thirty', 'maintain', 'hindsight', 'guard', 'py', 'sunshin', 'als', 'front', 'cupboard', 'chem', 'exarc', 'zalu', 'wee', 'dwel', 'pink', 'sery', 'admon', 'notch', 'wherea', 'etiquet', 'gear', '200', 'b.sc', 'bust', 'knew', 'audiov', 'lonaval', 'hat', 't', 'influ', 'whatsoev', 'break-up', 'jetty', 'corrupt', 'rabid', 'absolv', 'o.k', 'nieghbo', 'wound', 'grief', 'assum', 'long-time', 'undress', 'whenev', 'coincid', 'unsuit', 'disbeliev', 'my', 'fix', 'peep', 'ought', 'vis', 'feed-back', 'viscy', 'neighb', 'repair', 'peer', 'resist', 'wheth', 'bach', 'brookfield', 'estim', 'pick', 'relatio', 'cought', 'upon', 'uncom', 'babysit', 'malfunct', 'luth', 'unrest', 'ont', 'deject', 'constern', 'company', 'roll-cal', 'ris', 'poon', 'akin', 'chip', 'tiss', 'inspit', 'grumbl', 'k43', 'mot', 'deg', 'everyon', 'cous', 'canteen', 'free', 'blanket', 'abut', 'ten-year-old', 'house-hunting', 'wet', 'cons', 'vvery', 'musc', ';', 'releas', 'deeply', 'sug', 'behav', 'light', 'therapy', 'food', 'frant', 'sut', 'competit', 'query', 'fairground', 'screaming', 'f.3', 'try', '12.00', 'surpass', 'set', 'oxyg', 'luca', 'cav', 'anaunt', 'cigaret', 'abnorm', '8', 'daught', '5-10', 'disciplin', 'destroy', 'violin', 'charg', 'questionnair', 'helpless', 'atteck', 'mainroad', 'escap', 'filthy', 'tv-chairs', 'godmoth', 'rigid', '6-2', '4th/5th', 'heckl', 'county', 'fliperam', 'bey', 'tip', 'pouch', 'cloth', 'chewing-gum', 'lliv', 'belt', 'should', 'girl-friend', 'innuendo', 'see', 'subvert', 'hung', 'raft', 'illegitim', 's.y.j.c', 't.e', 'birthday', 'an', 'chor', 'fad', 'unbear', 'ferry', 'felf', 'unconsid', 'exceiv', 'effet', 'nons', 'sal', 'wharf', 'sororoty', 'amongst', 'lect', 'yanu', 'heard', 'particip', 'misconceiv', 'streep', 'disturb', 'fur', '3', 'grand-daughter', 'obey', 'orang', 'abl', 'art', 'movy', 'undescrib', 'congest', 'nijmeg', 'disagr', 'shy', 'coast', 'whip', 'ulc', 'laugh', '30', 'nevad', 'kilomet', 'st.', 'insist', 'drum', 'graveyard', 'naiv', 'elab', 'meanwhil', 'silv', 'holiday', 'aprox', 'relig', 'diamet', 'fash', 'loop', 'toot', 'simpl', 'belch', 'develop', 'gravel', 'cabin', 'sweep', 'suffernig', 'slim', 'denmark', 'assign', 'jar', 'bourga', 'success', 'third', 'pres', 'husband', 'rat', 'pack', 'punch', 'blood', 'snow-covered', 'heal', 'glov', 'spars', 'cozy', '2', 'inadvert', 'decreas', 'chaot', 'suburb', 'demol', 'picn', 'rav', 'mutton', 'postgradu', 'excr', 'seep', 'nud', 'awok', 'whin', 'ful', 'puddl', 'conflict', 'syphil', 'depr', 'week-end', 'misl', 'non-injury', 'cart', 'u.s.a', 'motorbik', 'americ', 'hollow-cheeked', 'arrang', 'pereson', 'stil', 'worst', 'anyht', 'blind', '1982', 'slip', 'incit', 'h.s.c', 'plat', 'crochet', 'kick', '12', 'untidy', 'unpunct', 'humo', 'unind', 'lay', 'elaps', 'contain', 'shot', 'shook', 'cuddl', 'manifest', 'nest', 'burst', 'verba', 'reform', 'black-out', 'schizophrenic', 'body', 'maj', 'alcohol', 'civo', '6-12', 'f.5', 'detect', 'plug', 'dinner-service', 'buy', 'rabbit', 'dec', 'so', 'shabby', '5/6', 'lump', 'fing', 'footpa', 'commerc', 'socy', 'elev', 'robert', 'shelt', 'al', 'pencil', 'unmind', 'frost', 'self-iterested', 'inert', 'clumsy', 'upstair', '10t', 'disconsol', 'downgrad', 'system', 'assist', 'wing', 'panick', 'disgust', 'hainburg', 'surpr', 'wok', 'geograph', 'suffoc', 'numb', 'frict', 'foreign', 'housesit', 'saf', 'facul', 'interfer', 'forg', 'keen', '--', 'twelve', 'gook', 'accompany', 'fig', 'vert', 'much', 'astrologyc', 'rank', 'on', 'fair', 'overindulg', 'steak', 'threats', 'deb', 'blam', 'ejac', 'chelston', 'cont', 'amidoph', 'monit', 'endeavo', 'mid-term', 'er', 'assassin', 'due', 'facto', 'nonsens', 'sep', 'bin', 'bank', 'onlyon', 'occipit', 'coward', 'nichola', 'accquir', 'petr', 'cur', 'least', 'utc', 'fag', 'over-crowding', 'terrfy', 'instil', 'mar', 'why', 'appropry', 'neighbo', 'crowd', 'sci', '24', 'wav', 'decl', 'mid-night', 'thes', 'clos', 'ec110', 'foggy', 'lust', 'wayn', 'airpl', 'mang', 'antihygy', 'lion', 'growl', 'c.u', 'know', 'vit', 'impost', 'doll', 'dependiong', 'unimpart', 'auditor', 'mororcyc', 'tablet', 'dry', 'themselv', 'club', 'syndrom', 'emphas', 'unwel', 'rotatee', 'toady', 'meaningless', 'solv', 'phrase', 'east', 'fot', 'pan', 'underwear', 'situ', 'scen', 'thousand', 'dean', 'pacy', 'rehears', 'loft', '3000/', 'majong', 'piec', 'outd', 'couch', 'soft', 'titl', 'mut', 'ban', 'bur', 'adjoin', 'muddl', 'kg', 'reject', 'skil', 'evil', 'admir', 'paul', 'thrust', 'postpon', 'oh', '13th', 'cockroach', 'pun', 'accredit', 'drought', 'opportun', 'nat', 'famili', 'mishang', '5-1', 'penury', 'info', 'be', 'inacceiv', 'pastry', 'saus', 'pact', 'argu', 'quot', 'lok', 'humil', '2a', 'johnny', 'biscuit', 'cour', 'exchang', 'noth', 'whatev', 'handleb', 'unlik', 'old', '9th', 'pooh', 'sew', '20.00', 'refect', 'air', 'turn', 'pect', 'self-insight', 'vineg', 'bicycle-accident', 'prompt', 'sout', 'kerb', 'impregn', 'insinu', 'prev', 'humbl', '12th', 'cam', 'wierd', 'architect', 'shop', 'chung', 'putrid', 'moonlight', 'bandit', 'contract', 'pursu', 'within', 'brac', 'clock', 'spous', 'wish', 'although', 'stroll', 'simply', 'secret', 'cheap', 'return', 'whil', 'by', 'receiv', 'moral/religious', 'chees', 'crossroad', 'glass-eye', '12:30', 'tied', 'allerg', 'luapul', 'ow', 'pilot', 'g.p.a', 'dissatisfy', '1978', 'k30', 'd', '31/2', 'hyst', 'longest', 'punjab', '15', 'reperbahn', 'touch', 'goodby', 'togeht', 'agr', 'glass', 'pluck', 'design', 'ger', 'diagnost', 'word', 'papu', 'wak', 'negative/i', 'along', 'brother-in-law', 'chapel', 'bottom', 'practit', 'crocokil', 'wept', 'think', 'belg', 'impud', 'hurry', 'form', 'exacut', 'jury', 'regardless', 'thu', 'long-term', 'aids/homosexuality', 'acccout', 'suprev', 'unthink', 'cut', 'leg', 'enact', 'method', 'died', 'net', 'boat', 'puzzl', 'recur', 'disobey', 'glu', 'antagon', 'hap', 'submarin', 'no', 'brainfunct', 'frog-burgers', 'typ', 'mor', 'nuclea', 'undon', 'camera', 'rs', 'unpass', 'salvad', 'nause', 'somet', 'mind', 'm', 'nail', 'start', 'magazin', 'nee', 'gospel', 'docu', 'tear', 'direct', 'uncanny', 'frequ', 'slight', 'arnhem', 'yard', 'gun', 'athlet', 'adm', 'la', 'merit', 'busha', 'chyn', 'stray', 'stairway', '16-year-old', 'unreason', 'mop', 'vip', 'wast', 'sufficy', 'stair', 'aftaid', 'penet', 'ski', 'lean', 'itself', 'posit', 'debut', 'shil', 'poor', 'stroked', 'scary', 'transf', 'every', 'interpret', 'approach', 'captain', 'arrest', 'crutch', 'clubbed/spik', 'exasp', 'obedy', 'accomod', 'tend', 'chess', 'not', '5c', 'malos', 'hyen', 'broadcast', 'rug', 'scorn', 'problem', 'gutttenburg', 'occ', 'overtim', 'weak', 'cris', 'misplac', 'lon', 'detail', 'sights', 'provid', 'terrible/defensiv', 'grotesqu', 'headquart', 'independ', 'disp', 'tens', 'duly', 'mend', 'ecst', 'wif', 'breadcrumb', 'herself', 'saturday', 'rac', 'without', 'foot', 'crazy', 'funny', 'garl', 'physics-examination', 'wint', 'ridic', 'hypocrisy', 'disclos', 'suck', 'film', 'fly', 'panicky', 'despit', 'stranger', 'coleagu', 'plycholog', 'bar', 'carpark', 'intol', 'slept', 'unpleas', '18th', 'bridesmaid', 'consid', 'housem', 'carinth', 'procrastin', 'uneq', 'gottenburg', 'olymp', '21', 'dissect', \"'m\", 'self-confidence', 'interupt', 'wid', 'illust', 'snout', 'midair', 'kabw', 'pat', 'midnight', 'jes', 'eat', 'elect', 'underw', 'wich', 'galso', 'brak', 'discard', 'swed', 'extr', 'behavio', 'seing', 'scar', 'rupt', 'tackl', 'cle', 'giv', 'crimin', 'wellpay', 'chico', 'inter-cultural', 'sunset', 'coupl', 'horrend', 'double-crossed', 'leopard', 'mim', 'lilongw', 'complet', 'whenevr', 'parano', 'jay', 'lettuc', 'hindr', 'scruples', 'tidy', 'drop', 'abomin', 'plung', 'somehow', 'firecrack', 'gothenburg', 'dr.', 'u.k.', 'passs', 'irrit', 'ultim', 'kidnap', '10yrs', 'strangely', 'finland', 'sight', 'ashtray', 'let', 'neigbo', 'threatens', 'qual', 'theoret', 'pointless', 'breackdown', 'eel', 'forc', 'dress-rehearsal', 'dream', 'east-germany', 'compound', 'unbeliev', 'outright', 'nuclear', 'attitud', 'agit', 'betray', 'smil', 'fresh', 'sound', 'hern', 'rehabilit', 'blown', 'impolit', 'guttenburg', 'doz', 'purchas', 'coplain', 'p.a', 'aircraft', 'drift', 'cricket', 'fur-seals', 'latim', 'long-distance', 'spun', '24th', 'ess', 'immigr', 'bent', 'integr', 'ghost-like']\n"
]
}
],
"source": [
"words = []\n",
"classes = []\n",
"documents = []\n",
"ignore_words = ['?','@','-','.','_','/']\n",
"# loop through each sentence in our training data\n",
"for x in range(len(label)):\n",
" # tokenize each word in the sentence\n",
" w = nltk.word_tokenize(sent[x])\n",
" # add to our words list\n",
" words.extend(w)\n",
" # add to documents in our corpus\n",
" documents.append((w, label[x]))\n",
" # add to our classes list\n",
" if label[x] not in classes:\n",
" classes.append(label[x])\n",
"# stem and lower each word and remove duplicates\n",
"words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]\n",
"words = list(set(words))\n",
"classes = list(set(classes))\n",
"print (len(documents), \"documents\")\n",
"print (len(classes), \"classes\", classes)\n",
"print (len(words), \"unique stemmed words\", words)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# words 5862\n",
"# classes 7\n"
]
}
],
"source": [
"# create our training data\n",
"training = []\n",
"output = []\n",
"# create an empty array for our output\n",
"output_empty = [0] * len(classes)\n",
"# training set, bag of words for each sentence\n",
"for doc in documents:\n",
" # initialize our bag of words\n",
" bag = []\n",
" # list of tokenized words for the pattern\n",
" pattern_words = doc[0]\n",
" # stem each word\n",
" pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n",
" # create our bag of words array\n",
" for w in words:\n",
" bag.append(1) if w in pattern_words else bag.append(0)\n",
" training.append(bag)\n",
" # output is a '0' for each tag and '1' for current tag\n",
" output_row = list(output_empty)\n",
" output_row[classes.index(doc[1])] = 1\n",
" output.append(output_row)\n",
"print (\"# words\", len(words))\n",
"print (\"# classes\", len(classes))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# MAIN_CODE\n",
"import numpy as np\n",
"import time\n",
"\n",
"# compute sigmoid nonlinearity\n",
"def sigmoid(x):\n",
" output = 1/(1+np.exp(-x))\n",
" return output\n",
"def sigmoid_output_to_derivative(output):\n",
" return output*(1-output)\n",
" \n",
"def clean_up_sentence(sentence):\n",
" # tokenize the pattern\n",
" sentence_words = nltk.word_tokenize(sentence)\n",
" # stem each word\n",
" sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n",
" return sentence_words\n",
"\n",
"# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence\n",
"def bow(sentence, words, show_details=False):\n",
" # tokenize the pattern\n",
" sentence_words = clean_up_sentence(sentence)\n",
" # bag of words\n",
" bag = [0]*len(words) \n",
" for s in sentence_words:\n",
" for i,w in enumerate(words):\n",
" if w == s: \n",
" bag[i] = 1\n",
" if show_details:\n",
" print (\"found in bag: %s\" % w)\n",
"\n",
" return(np.array(bag))\n",
"\n",
"def think(sentence, show_details=False):\n",
" x = bow(sentence.lower(), words, show_details)\n",
" if show_details:\n",
" print (\"sentence:\", sentence, \"\\n bow:\", x)\n",
" # input layer is our bag of words\n",
" l0 = x\n",
" # matrix multiplication of input and hidden layer\n",
" l1 = sigmoid(np.dot(l0, synapse_0))\n",
" # output layer\n",
" l2 = sigmoid(np.dot(l1, synapse_1))\n",
" return l2"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# ANN and Gradient Descent code from https://iamtrask.github.io//2015/07/27/python-network-part2/\n",
"def train(X, y, hidden_neurons=10, alpha=1, epochs=50000, dropout=False, dropout_percent=0.5):\n",
"\n",
" print (\"Training with %s neurons, alpha:%s, dropout:%s %s\" % (hidden_neurons, str(alpha), dropout, dropout_percent if dropout else '') )\n",
" print (\"Input matrix: %sx%s Output matrix: %sx%s\" % (len(X),len(X[0]),1, len(classes)) )\n",
" np.random.seed(1)\n",
"\n",
" last_mean_error = 1\n",
" # randomly initialize our weights with mean 0\n",
" synapse_0 = 2*np.random.random((len(X[0]), hidden_neurons)) - 1\n",
" synapse_1 = 2*np.random.random((hidden_neurons, len(classes))) - 1\n",
"\n",
" prev_synapse_0_weight_update = np.zeros_like(synapse_0)\n",
" prev_synapse_1_weight_update = np.zeros_like(synapse_1)\n",
"\n",
" synapse_0_direction_count = np.zeros_like(synapse_0)\n",
" synapse_1_direction_count = np.zeros_like(synapse_1)\n",
" \n",
" for j in iter(range(epochs+1)):\n",
" # Feed forward through layers 0, 1, and 2\n",
" print(j)\n",
" layer_0 = X\n",
" layer_1 = sigmoid(np.dot(layer_0, synapse_0))\n",
" \n",
" if(dropout):\n",
" layer_1 *= np.random.binomial([np.ones((len(X),hidden_neurons))],1-dropout_percent)[0] * (1.0/(1-dropout_percent))\n",
"\n",
" layer_2 = sigmoid(np.dot(layer_1, synapse_1))\n",
"\n",
" # how much did we miss the target value?\n",
" layer_2_error = y - layer_2\n",
" # if this current iteration's error is greater than the last iteration, break out\n",
" if np.mean(np.abs(layer_2_error)) < last_mean_error:\n",
" print (\"delta after \"+str(j)+\" iterations:\" + str(np.mean(np.abs(layer_2_error))) )\n",
" last_mean_error = np.mean(np.abs(layer_2_error))\n",
" else:\n",
" print (\"break:\", np.mean(np.abs(layer_2_error)), \">\", last_mean_error )\n",
" break\n",
" \n",
" # in what direction is the target value?\n",
" # were we really sure? if so, don't change too much.\n",
" layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)\n",
"\n",
" # how much did each l1 value contribute to the l2 error (according to the weights)?\n",
" layer_1_error = layer_2_delta.dot(synapse_1.T)\n",
"\n",
" # in what direction is the target l1?\n",
" # were we really sure? if so, don't change too much.\n",
" layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)\n",
" \n",
" synapse_1_weight_update = (layer_1.T.dot(layer_2_delta))\n",
" synapse_0_weight_update = (layer_0.T.dot(layer_1_delta))\n",
" \n",
" if(j > 0):\n",
" synapse_0_direction_count += np.abs(((synapse_0_weight_update > 0)+0) - ((prev_synapse_0_weight_update > 0) + 0))\n",
" synapse_1_direction_count += np.abs(((synapse_1_weight_update > 0)+0) - ((prev_synapse_1_weight_update > 0) + 0)) \n",
" \n",
" synapse_1 += alpha * synapse_1_weight_update\n",
" synapse_0 += alpha * synapse_0_weight_update\n",
" \n",
" prev_synapse_0_weight_update = synapse_0_weight_update\n",
" prev_synapse_1_weight_update = synapse_1_weight_update\n",
"\n",
" now = datetime.datetime.now()\n",
"\n",
" # persist synapses\n",
" synapse = {'synapse0': synapse_0.tolist(), 'synapse1': synapse_1.tolist(),\n",
" 'datetime': now.strftime(\"%Y-%m-%d %H:%M\"),\n",
" 'words': words,\n",
" 'classes': classes\n",
" }\n",
" synapse_file = \"synapses.json\"\n",
"\n",
" with open(synapse_file, 'w') as outfile:\n",
" json.dump(synapse, outfile, indent=4, sort_keys=True)\n",
" print (\"saved synapses to:\", synapse_file)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training with 4000 neurons, alpha:0.1, dropout:False \n",
"Input matrix: 7516x5862 Output matrix: 1x7\n",
"0\n",
"delta after 0 iterations:0.497132358066\n",
"1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Home\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:7: RuntimeWarning: overflow encountered in exp\n",
" import sys\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"delta after 1 iterations:0.142857142857\n",
"2\n",
"break: 0.142857142857 > 0.142857142857\n",
"saved synapses to: synapses.json\n",
"processing time: 175.07154989242554 seconds\n"
]
}
],
"source": [
"X = np.array(training)\n",
"y = np.array(output)\n",
"\n",
"start_time = time.time()\n",
"train(X, y, hidden_neurons=4000, alpha=0.1, epochs=15, dropout=False, dropout_percent=0.2)\n",
"\n",
"elapsed_time = time.time() - start_time\n",
"print (\"processing time:\", elapsed_time, \"seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# load our calculated synapse values\n",
"synapse_file = 'synapses.json' \n",
"with open(synapse_file) as data_file: \n",
" synapse = json.load(data_file) \n",
" synapse_0 = np.asarray(synapse['synapse0']) \n",
" synapse_1 = np.asarray(synapse['synapse1'])\n",
"\n",
"def classify(sentence, show_details=False):\n",
" results = think(sentence, show_details)\n",
" print (\"Result: \",results)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Classes: ['guilt', 'joy', 'anger', 'sadness', 'fear', 'disgust', 'shame']\n",
"found in bag: pass\n",
"found in bag: the\n",
"found in bag: last\n",
"found in bag: exam\n",
"sentence: Passed the last exam \n",
" bow: [0 0 0 ..., 0 0 0]\n",
"Result: [ 0. 0. 0. 0. 0. 0. 0.]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Home\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:7: RuntimeWarning: overflow encountered in exp\n",
" import sys\n"
]
}
],
"source": [
"print(\"Classes: \",classes)#the output classes\n",
"classify(\"Passed the last exam\",show_details=True)#testing the sentence present in training data.Class for this sentence is 'joy'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment