Created
February 27, 2015 03:45
-
-
Save mattalhonte/80ab7ed302473e8114a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:32839fddcfb80693665b2ca10367c475856d92f5dd6b8ce6a9e604f30ab1e43f" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd\n", | |
"import nltk\n", | |
"from nltk.util import ngrams\n", | |
"nltk.download('punkt')\n", | |
"nltk.download('stopwords')\n", | |
"\n", | |
"#Importing the dataset\n", | |
"%cd C:\\Users\\Matt\\Dropbox\\Python Workspace\\CROW\\CROL-PDF\n", | |
"data = pd.read_csv(\"procPublicationRequest_Oct-Dec_2014_clean - procPublicationRequest_Oct-Dec_2014_clean.csv\")\n", | |
"\n", | |
"#Snagging the \"human_readable\" column\n", | |
"human_readableList = list(data['human_readable'])\n", | |
"\n", | |
"#Turn the values into strings\n", | |
"strReadable = [str(a) for a in human_readableList]\n", | |
"\n", | |
"#Split into individual words\n", | |
"listOfLists = [a.split() for a in strReadable]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[nltk_data] Downloading package punkt to\n", | |
"[nltk_data] C:\\Users\\Matt\\AppData\\Roaming\\nltk_data...\n", | |
"[nltk_data] Package punkt is already up-to-date!\n", | |
"[nltk_data] Downloading package stopwords to\n", | |
"[nltk_data] C:\\Users\\Matt\\AppData\\Roaming\\nltk_data...\n", | |
"[nltk_data] Package stopwords is already up-to-date!\n", | |
"C:\\Users\\Matt\\Dropbox\\Python Workspace\\CROW\\CROL-PDF\n" | |
] | |
} | |
], | |
"prompt_number": 48 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#While we're here, let's output the raw words to a text file\n", | |
"myCorpus = ''\n", | |
"for myEntry in strReadable:\n", | |
" myCorpus = myCorpus + \"\\n\"+ myEntry" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 49 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"f = open('rawCorpus', 'w')\n", | |
"f.write(myCorpus)\n", | |
"f.close()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 50 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Now we have a data file that'll probably a little faster to mess with (maybe?)\n", | |
"file = open('rawCorpus.txt')\n", | |
"t = file.read()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 51 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Let's tokenize it and turn into an NLTK Text file\n", | |
"myCorpusTokenized = nltk.word_tokenize(t)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 52 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpusText = nltk.Text(myCorpusTokenized)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 53 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Now that we've got a bigger body of text, we can look at more interesting patterns in phrasing\n", | |
"corpusText.collocations()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"New York; substantially similar; similar titles; titles within; HEREBY\n", | |
"GIVEN; within agency; York City; sidewalk caf; 10:00 A.M.; proposed\n", | |
"contract; Annual Contracting; Contracting Plan; agency intends; 2015\n", | |
"Annual; public hearing; square foot; unenclosed sidewalk; COMMUNITY\n", | |
"BOARD; four years; End date\n" | |
] | |
} | |
], | |
"prompt_number": 54 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpusFreqDist = nltk.FreqDist(corpusText)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 55 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Most commom words!\n", | |
"list(corpusFreqDist.most_common(50))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 36, | |
"text": [ | |
"[(',', 8033),\n", | |
" ('the', 4860),\n", | |
" ('of', 4570),\n", | |
" ('.', 2727),\n", | |
" ('and', 2338),\n", | |
" ('to', 2179),\n", | |
" (')', 1806),\n", | |
" ('(', 1747),\n", | |
" (':', 1714),\n", | |
" ('in', 1520),\n", | |
" ('a', 1259),\n", | |
" ('at', 1207),\n", | |
" ('for', 1125),\n", | |
" ('New', 1107),\n", | |
" ('York', 988),\n", | |
" ('Street', 910),\n", | |
" ('on', 875),\n", | |
" ('be', 674),\n", | |
" ('City', 665),\n", | |
" ('The', 641),\n", | |
" ('proposed', 561),\n", | |
" ('by', 519),\n", | |
" ('an', 513),\n", | |
" ('is', 501),\n", | |
" ('contract', 498),\n", | |
" ('Manhattan', 470),\n", | |
" ('will', 461),\n", | |
" ('agency', 397),\n", | |
" ('that', 396),\n", | |
" ('$', 382),\n", | |
" ('2014', 379),\n", | |
" ('from', 369),\n", | |
" ('Borough', 363),\n", | |
" ('Floor', 360),\n", | |
" ('within', 354),\n", | |
" ('Avenue', 332),\n", | |
" ('NY', 329),\n", | |
" ('date', 321),\n", | |
" ('1', 321),\n", | |
" (\"'s\", 311),\n", | |
" ('Board', 292),\n", | |
" ('public', 285),\n", | |
" ('similar', 280),\n", | |
" ('or', 279),\n", | |
" ('Services', 278),\n", | |
" ('Department', 274),\n", | |
" ('titles', 272),\n", | |
" ('substantially', 272),\n", | |
" ('as', 264),\n", | |
" ('with', 252)]" | |
] | |
} | |
], | |
"prompt_number": 36 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Let's clean things up a little bit. Changing everything to lower-case is usually a good idea. \n", | |
"#\"Public Hearing\" will equal \"PUBLIC HEARING\"\n", | |
"lowerTokens = [w.lower() for w in myCorpusTokenized]\n", | |
"lowerText = nltk.Text(lowerTokens)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 56 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"list(nltk.FreqDist(lowerText).most_common(50))\n", | |
"#Already saved some doubling-up! Note the 5649 mentions of \"the\", instead of 4860 like in the last list" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 57, | |
"text": [ | |
"[(',', 8033),\n", | |
" ('the', 5649),\n", | |
" ('of', 4822),\n", | |
" ('.', 2727),\n", | |
" ('to', 2409),\n", | |
" ('and', 2392),\n", | |
" (')', 1806),\n", | |
" ('(', 1747),\n", | |
" (':', 1714),\n", | |
" ('in', 1686),\n", | |
" ('a', 1419),\n", | |
" ('for', 1261),\n", | |
" ('at', 1215),\n", | |
" ('new', 1191),\n", | |
" ('york', 1003),\n", | |
" ('street', 988),\n", | |
" ('on', 888),\n", | |
" ('is', 704),\n", | |
" ('city', 698),\n", | |
" ('be', 674),\n", | |
" ('agency', 647),\n", | |
" ('contract', 623),\n", | |
" ('proposed', 583),\n", | |
" ('an', 533),\n", | |
" ('public', 532),\n", | |
" ('by', 522),\n", | |
" ('services', 510),\n", | |
" ('manhattan', 479),\n", | |
" ('floor', 469),\n", | |
" ('will', 461),\n", | |
" ('that', 425),\n", | |
" ('borough', 414),\n", | |
" ('notice', 401),\n", | |
" ('board', 396),\n", | |
" ('hearing', 385),\n", | |
" ('$', 382),\n", | |
" ('from', 379),\n", | |
" ('2014', 379),\n", | |
" ('date', 363),\n", | |
" ('within', 356),\n", | |
" ('avenue', 336),\n", | |
" ('ny', 329),\n", | |
" ('district', 325),\n", | |
" (\"'s\", 324),\n", | |
" ('1', 321),\n", | |
" ('application', 315),\n", | |
" ('community', 307),\n", | |
" ('a.m.', 291),\n", | |
" ('personnel', 290),\n", | |
" ('department', 285)]" | |
] | |
} | |
], | |
"prompt_number": 57 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Let's see some bigrams!\n", | |
"corpusBigrams = list(ngrams(lowerTokens,2))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 58 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpusBigramFreqs = nltk.FreqDist(corpusBigrams)\n", | |
"corpusBigramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 59, | |
"text": [ | |
"[(('of', 'the'), 1267),\n", | |
" (('new', 'york'), 999),\n", | |
" (('in', 'the'), 666),\n", | |
" (('street', ','), 633),\n", | |
" (('.', 'the'), 491),\n", | |
" ((',', 'new'), 460),\n", | |
" (('the', 'proposed'), 454),\n", | |
" (('agency', ':'), 409),\n", | |
" ((',', 'and'), 392),\n", | |
" (('borough', 'of'), 378),\n", | |
" (('york', ','), 372),\n", | |
" ((',', '2014'), 363),\n", | |
" (('contract', ':'), 343),\n", | |
" (('for', 'the'), 335),\n", | |
" (('to', 'the'), 334),\n", | |
" (('york', 'city'), 321),\n", | |
" (('date', 'of'), 317),\n", | |
" (('on', 'the'), 316),\n", | |
" ((',', 'ny'), 314),\n", | |
" (('will', 'be'), 308),\n", | |
" (('public', 'hearing'), 280),\n", | |
" (('substantially', 'similar'), 276),\n", | |
" (('in', 'substantially'), 272),\n", | |
" (('personnel', 'in'), 272),\n", | |
" (('similar', 'titles'), 272),\n", | |
" (('titles', 'within'), 269),\n", | |
" (('department', 'of'), 266),\n", | |
" (('floor', ','), 264),\n", | |
" (('within', 'agency'), 264),\n", | |
" (('pursuant', 'to'), 259),\n", | |
" (('at', 'the'), 252),\n", | |
" (('for', 'a'), 241),\n", | |
" (('proposed', 'contract'), 228),\n", | |
" (('of', 'manhattan'), 226),\n", | |
" (('the', 'new'), 221),\n", | |
" (('notice', 'is'), 217),\n", | |
" (('the', 'following'), 214),\n", | |
" (('of', 'a'), 213),\n", | |
" (('is', 'hereby'), 213),\n", | |
" (('hereby', 'given'), 212),\n", | |
" (('--', '--'), 210),\n", | |
" (('manhattan', ','), 208),\n", | |
" ((',', 'manhattan'), 207),\n", | |
" ((',', 'at'), 204),\n", | |
" (('the', 'borough'), 202),\n", | |
" (('the', 'agency'), 201),\n", | |
" (('(', 's'), 196),\n", | |
" (('s', ')'), 196),\n", | |
" (('office', 'of'), 189),\n", | |
" (('of', 'services'), 188)]" | |
] | |
} | |
], | |
"prompt_number": 59 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Let's see tri-grams!\n", | |
"corpusTrigrams = list(ngrams(lowerTokens,3))\n", | |
"corpusTrigramFreqs = nltk.FreqDist(corpusTrigrams)\n", | |
"corpusTrigramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 38, | |
"text": [ | |
"[((',', 'new', 'york'), 457),\n", | |
" (('new', 'york', ','), 372),\n", | |
" (('new', 'york', 'city'), 321),\n", | |
" (('of', 'the', 'proposed'), 307),\n", | |
" (('date', 'of', 'the'), 274),\n", | |
" (('in', 'substantially', 'similar'), 272),\n", | |
" (('personnel', 'in', 'substantially'), 272),\n", | |
" (('substantially', 'similar', 'titles'), 272),\n", | |
" (('similar', 'titles', 'within'), 269),\n", | |
" (('within', 'agency', ':'), 264),\n", | |
" (('titles', 'within', 'agency'), 264),\n", | |
" (('borough', 'of', 'manhattan'), 223),\n", | |
" (('notice', 'is', 'hereby'), 213),\n", | |
" (('is', 'hereby', 'given'), 212),\n", | |
" (('the', 'new', 'york'), 206),\n", | |
" (('--', '--', '--'), 204),\n", | |
" (('the', 'proposed', 'contract'), 201),\n", | |
" (('(', 's', ')'), 196),\n", | |
" (('proposed', 'contract', ':'), 189),\n", | |
" (('in', 'the', 'borough'), 187),\n", | |
" (('the', 'borough', 'of'), 187),\n", | |
" (('hereby', 'given', 'that'), 185),\n", | |
" (('york', ',', 'ny'), 173),\n", | |
" (('agency', 'intends', 'to'), 169),\n", | |
" (('the', 'agency', 'intends'), 166),\n", | |
" (('of', 'new', 'york'), 165),\n", | |
" (('end', 'date', 'of'), 145),\n", | |
" ((',', 'borough', 'of'), 144),\n", | |
" (('to', 'utilize', ':'), 137),\n", | |
" (('intends', 'to', 'utilize'), 137),\n", | |
" (('a', 'term', 'of'), 136),\n", | |
" (('headcount', 'of', 'personnel'), 136),\n", | |
" (('of', 'personnel', 'in'), 136),\n", | |
" (('for', 'a', 'term'), 136),\n", | |
" (('start', 'date', 'of'), 133),\n", | |
" (('not', 'included', 'in'), 128),\n", | |
" (('annual', 'contracting', 'plan'), 126),\n", | |
" (('contracting', 'plan', 'and'), 126),\n", | |
" (('s', ')', 'not'), 126),\n", | |
" (('plan', 'and', 'schedule'), 126),\n", | |
" ((')', 'not', 'included'), 126),\n", | |
" (('and', 'operate', 'an'), 125),\n", | |
" ((',', 'and', 'operate'), 124),\n", | |
" (('caf', 'for', 'a'), 124),\n", | |
" (('fy', '2015', 'annual'), 124),\n", | |
" (('2015', 'annual', 'contracting'), 124),\n", | |
" (('maintain', ',', 'and'), 124),\n", | |
" (('sidewalk', 'caf', 'for'), 124),\n", | |
" (('years', '.', ')'), 123),\n", | |
" (('unenclosed', 'sidewalk', 'caf'), 120)]" | |
] | |
} | |
], | |
"prompt_number": 38 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#We can keep going!\n", | |
"corpus4grams = list(ngrams(lowerTokens,4))\n", | |
"corpus4gramFreqs = nltk.FreqDist(corpus4grams)\n", | |
"corpus4gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 39, | |
"text": [ | |
"[(('personnel', 'in', 'substantially', 'similar'), 272),\n", | |
" (('in', 'substantially', 'similar', 'titles'), 272),\n", | |
" (('substantially', 'similar', 'titles', 'within'), 269),\n", | |
" ((',', 'new', 'york', ','), 269),\n", | |
" (('titles', 'within', 'agency', ':'), 264),\n", | |
" (('similar', 'titles', 'within', 'agency'), 264),\n", | |
" (('date', 'of', 'the', 'proposed'), 264),\n", | |
" (('notice', 'is', 'hereby', 'given'), 212),\n", | |
" (('the', 'new', 'york', 'city'), 199),\n", | |
" (('of', 'the', 'proposed', 'contract'), 199),\n", | |
" (('--', '--', '--', '--'), 198),\n", | |
" (('the', 'proposed', 'contract', ':'), 186),\n", | |
" (('is', 'hereby', 'given', 'that'), 185),\n", | |
" (('in', 'the', 'borough', 'of'), 183),\n", | |
" (('new', 'york', ',', 'ny'), 173),\n", | |
" (('the', 'agency', 'intends', 'to'), 166),\n", | |
" (('agency', 'intends', 'to', 'utilize'), 137),\n", | |
" (('intends', 'to', 'utilize', ':'), 137),\n", | |
" (('for', 'a', 'term', 'of'), 136),\n", | |
" (('headcount', 'of', 'personnel', 'in'), 136),\n", | |
" (('of', 'personnel', 'in', 'substantially'), 136),\n", | |
" (('start', 'date', 'of', 'the'), 131),\n", | |
" (('end', 'date', 'of', 'the'), 131),\n", | |
" (('s', ')', 'not', 'included'), 126),\n", | |
" (('(', 's', ')', 'not'), 126),\n", | |
" (('annual', 'contracting', 'plan', 'and'), 126),\n", | |
" ((')', 'not', 'included', 'in'), 126),\n", | |
" (('contracting', 'plan', 'and', 'schedule'), 126),\n", | |
" (('sidewalk', 'caf', 'for', 'a'), 124),\n", | |
" ((',', 'and', 'operate', 'an'), 124),\n", | |
" (('fy', '2015', 'annual', 'contracting'), 124),\n", | |
" (('caf', 'for', 'a', 'term'), 124),\n", | |
" (('maintain', ',', 'and', 'operate'), 124),\n", | |
" (('2015', 'annual', 'contracting', 'plan'), 124),\n", | |
" (('the', 'borough', 'of', 'manhattan'), 123),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for'), 120),\n", | |
" (('new', 'york', 'city', 'charter'), 116),\n", | |
" (('hereby', 'given', 'that', 'the'), 116),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk'), 112),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf'), 112),\n", | |
" (('and', 'operate', 'an', 'unenclosed'), 112),\n", | |
" (('22', 'reade', 'street', ','), 109),\n", | |
" (('city', 'of', 'new', 'york'), 108),\n", | |
" (('in', 'the', 'matter', 'of'), 106),\n", | |
" (('the', 'city', 'of', 'new'), 105),\n", | |
" (('of', 'services', 'sought', ':'), 104),\n", | |
" (('solicitation', 'the', 'agency', 'intends'), 103),\n", | |
" (('method', 'of', 'solicitation', 'the'), 103),\n", | |
" (('of', 'solicitation', 'the', 'agency'), 103),\n", | |
" (('a', 'term', 'of', 'four'), 102)]" | |
] | |
} | |
], | |
"prompt_number": 39 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpus5grams = list(ngrams(lowerTokens,5))\n", | |
"corpus5gramFreqs = nltk.FreqDist(corpus5grams)\n", | |
"corpus5gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 40, | |
"text": [ | |
"[(('personnel', 'in', 'substantially', 'similar', 'titles'), 272),\n", | |
" (('in', 'substantially', 'similar', 'titles', 'within'), 269),\n", | |
" (('similar', 'titles', 'within', 'agency', ':'), 264),\n", | |
" (('substantially', 'similar', 'titles', 'within', 'agency'), 264),\n", | |
" (('--', '--', '--', '--', '--'), 192),\n", | |
" (('of', 'the', 'proposed', 'contract', ':'), 186),\n", | |
" (('date', 'of', 'the', 'proposed', 'contract'), 186),\n", | |
" (('notice', 'is', 'hereby', 'given', 'that'), 185),\n", | |
" ((',', 'new', 'york', ',', 'ny'), 152),\n", | |
" (('agency', 'intends', 'to', 'utilize', ':'), 137),\n", | |
" (('headcount', 'of', 'personnel', 'in', 'substantially'), 136),\n", | |
" (('of', 'personnel', 'in', 'substantially', 'similar'), 136),\n", | |
" (('the', 'agency', 'intends', 'to', 'utilize'), 134),\n", | |
" (('start', 'date', 'of', 'the', 'proposed'), 131),\n", | |
" (('end', 'date', 'of', 'the', 'proposed'), 131),\n", | |
" (('(', 's', ')', 'not', 'included'), 126),\n", | |
" (('annual', 'contracting', 'plan', 'and', 'schedule'), 126),\n", | |
" (('s', ')', 'not', 'included', 'in'), 126),\n", | |
" (('maintain', ',', 'and', 'operate', 'an'), 124),\n", | |
" (('caf', 'for', 'a', 'term', 'of'), 124),\n", | |
" (('2015', 'annual', 'contracting', 'plan', 'and'), 124),\n", | |
" (('fy', '2015', 'annual', 'contracting', 'plan'), 124),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term'), 124),\n", | |
" (('in', 'the', 'borough', 'of', 'manhattan'), 121),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a'), 120),\n", | |
" (('is', 'hereby', 'given', 'that', 'the'), 116),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n", | |
" ((',', 'and', 'operate', 'an', 'unenclosed'), 112),\n", | |
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n", | |
" (('the', 'city', 'of', 'new', 'york'), 104),\n", | |
" (('method', 'of', 'solicitation', 'the', 'agency'), 103),\n", | |
" (('of', 'solicitation', 'the', 'agency', 'intends'), 103),\n", | |
" (('solicitation', 'the', 'agency', 'intends', 'to'), 103),\n", | |
" (('a', 'term', 'of', 'four', 'years'), 102),\n", | |
" (('for', 'a', 'term', 'of', 'four'), 102),\n", | |
" (('of', 'four', 'years', '.', ')'), 101),\n", | |
" (('term', 'of', 'four', 'years', '.'), 101),\n", | |
" (('the', 'borough', 'of', 'manhattan', '('), 100),\n", | |
" ((',', '22', 'reade', 'street', ','), 100),\n", | |
" (('borough', 'of', 'manhattan', '(', 'to'), 99),\n", | |
" (('to', 'maintain', ',', 'and', 'operate'), 98),\n", | |
" (('floor', ',', 'new', 'york', ','), 96),\n", | |
" (('solicitation', '(', 's', ')', 'not'), 94),\n", | |
" (('titles', 'within', 'agency', ':', 'none'), 93),\n", | |
" (('none', 'headcount', 'of', 'personnel', 'in'), 92),\n", | |
" (('agency', ':', 'none', 'headcount', 'of'), 92),\n", | |
" ((':', 'none', 'headcount', 'of', 'personnel'), 92),\n", | |
" (('within', 'agency', ':', 'none', 'headcount'), 91),\n", | |
" (('continue', 'to', 'maintain', ',', 'and'), 89)]" | |
] | |
} | |
], | |
"prompt_number": 40 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpus6grams = list(ngrams(lowerTokens,6))\n", | |
"corpus6gramFreqs = nltk.FreqDist(corpus6grams)\n", | |
"corpus6gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 41, | |
"text": [ | |
"[(('personnel', 'in', 'substantially', 'similar', 'titles', 'within'), 269),\n", | |
" (('substantially', 'similar', 'titles', 'within', 'agency', ':'), 264),\n", | |
" (('in', 'substantially', 'similar', 'titles', 'within', 'agency'), 264),\n", | |
" (('--', '--', '--', '--', '--', '--'), 186),\n", | |
" (('date', 'of', 'the', 'proposed', 'contract', ':'), 186),\n", | |
" (('headcount', 'of', 'personnel', 'in', 'substantially', 'similar'), 136),\n", | |
" (('of', 'personnel', 'in', 'substantially', 'similar', 'titles'), 136),\n", | |
" (('the', 'agency', 'intends', 'to', 'utilize', ':'), 134),\n", | |
" (('(', 's', ')', 'not', 'included', 'in'), 126),\n", | |
" (('2015', 'annual', 'contracting', 'plan', 'and', 'schedule'), 124),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of'), 124),\n", | |
" (('fy', '2015', 'annual', 'contracting', 'plan', 'and'), 124),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term'), 120),\n", | |
" (('notice', 'is', 'hereby', 'given', 'that', 'the'), 116),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'), 112),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n", | |
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n", | |
" (('maintain', ',', 'and', 'operate', 'an', 'unenclosed'), 112),\n", | |
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n", | |
" (('method', 'of', 'solicitation', 'the', 'agency', 'intends'), 103),\n", | |
" (('of', 'solicitation', 'the', 'agency', 'intends', 'to'), 103),\n", | |
" (('solicitation', 'the', 'agency', 'intends', 'to', 'utilize'), 103),\n", | |
" (('caf', 'for', 'a', 'term', 'of', 'four'), 102),\n", | |
" (('for', 'a', 'term', 'of', 'four', 'years'), 102),\n", | |
" (('term', 'of', 'four', 'years', '.', ')'), 101),\n", | |
" (('a', 'term', 'of', 'four', 'years', '.'), 101),\n", | |
" (('in', 'the', 'borough', 'of', 'manhattan', '('), 100),\n", | |
" (('the', 'borough', 'of', 'manhattan', '(', 'to'), 99),\n", | |
" (('to', 'maintain', ',', 'and', 'operate', 'an'), 98),\n", | |
" (('end', 'date', 'of', 'the', 'proposed', 'contract'), 96),\n", | |
" (('solicitation', '(', 's', ')', 'not', 'included'), 94),\n", | |
" (('similar', 'titles', 'within', 'agency', ':', 'none'), 93),\n", | |
" (('agency', ':', 'none', 'headcount', 'of', 'personnel'), 92),\n", | |
" (('none', 'headcount', 'of', 'personnel', 'in', 'substantially'), 92),\n", | |
" ((':', 'none', 'headcount', 'of', 'personnel', 'in'), 92),\n", | |
" (('titles', 'within', 'agency', ':', 'none', 'headcount'), 91),\n", | |
" (('within', 'agency', ':', 'none', 'headcount', 'of'), 91),\n", | |
" (('start', 'date', 'of', 'the', 'proposed', 'contract'), 90),\n", | |
" (('continue', 'to', 'maintain', ',', 'and', 'operate'), 89),\n", | |
" (('borough', 'of', 'manhattan', '(', 'to', 'continue'), 87),\n", | |
" (('of', 'manhattan', '(', 'to', 'continue', 'to'), 86),\n", | |
" (('(', 'to', 'continue', 'to', 'maintain', ','), 86),\n", | |
" (('to', 'continue', 'to', 'maintain', ',', 'and'), 86),\n", | |
" (('similar', 'titles', 'within', 'agency', ':', '0'), 83),\n", | |
" (('spector', 'hall', ',', '22', 'reade', 'street'), 82),\n", | |
" (('hall', ',', '22', 'reade', 'street', ','), 82),\n", | |
" (('in', 'spector', 'hall', ',', '22', 'reade'), 81),\n", | |
" (('manhattan', '(', 'to', 'continue', 'to', 'maintain'), 74),\n", | |
" (('floor', ',', 'new', 'york', ',', 'ny'), 72),\n", | |
" ((',', 'new', 'york', ',', 'ny', '10007'), 66)]" | |
] | |
} | |
], | |
"prompt_number": 41 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpus7grams = list(ngrams(lowerTokens,7))\n", | |
"corpus7gramFreqs = nltk.FreqDist(corpus7grams)\n", | |
"corpus7gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 42, | |
"text": [ | |
"[(('personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency'),\n", | |
" 264),\n", | |
" (('in', 'substantially', 'similar', 'titles', 'within', 'agency', ':'), 264),\n", | |
" (('--', '--', '--', '--', '--', '--', '--'), 180),\n", | |
" (('headcount', 'of', 'personnel', 'in', 'substantially', 'similar', 'titles'),\n", | |
" 136),\n", | |
" (('of', 'personnel', 'in', 'substantially', 'similar', 'titles', 'within'),\n", | |
" 133),\n", | |
" (('fy', '2015', 'annual', 'contracting', 'plan', 'and', 'schedule'), 124),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of'), 120),\n", | |
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term'), 112),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'), 112),\n", | |
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n", | |
" (('maintain', ',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n", | |
" (('of', 'solicitation', 'the', 'agency', 'intends', 'to', 'utilize'), 103),\n", | |
" (('solicitation', 'the', 'agency', 'intends', 'to', 'utilize', ':'), 103),\n", | |
" (('method', 'of', 'solicitation', 'the', 'agency', 'intends', 'to'), 103),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four'), 102),\n", | |
" (('caf', 'for', 'a', 'term', 'of', 'four', 'years'), 102),\n", | |
" (('a', 'term', 'of', 'four', 'years', '.', ')'), 101),\n", | |
" (('for', 'a', 'term', 'of', 'four', 'years', '.'), 101),\n", | |
" (('in', 'the', 'borough', 'of', 'manhattan', '(', 'to'), 99),\n", | |
" (('end', 'date', 'of', 'the', 'proposed', 'contract', ':'), 96),\n", | |
" (('solicitation', '(', 's', ')', 'not', 'included', 'in'), 94),\n", | |
" (('substantially', 'similar', 'titles', 'within', 'agency', ':', 'none'), 93),\n", | |
" (('none', 'headcount', 'of', 'personnel', 'in', 'substantially', 'similar'),\n", | |
" 92),\n", | |
" (('agency', ':', 'none', 'headcount', 'of', 'personnel', 'in'), 92),\n", | |
" ((':', 'none', 'headcount', 'of', 'personnel', 'in', 'substantially'), 92),\n", | |
" (('to', 'maintain', ',', 'and', 'operate', 'an', 'unenclosed'), 91),\n", | |
" (('titles', 'within', 'agency', ':', 'none', 'headcount', 'of'), 91),\n", | |
" (('similar', 'titles', 'within', 'agency', ':', 'none', 'headcount'), 91),\n", | |
" (('within', 'agency', ':', 'none', 'headcount', 'of', 'personnel'), 91),\n", | |
" (('start', 'date', 'of', 'the', 'proposed', 'contract', ':'), 90),\n", | |
" (('continue', 'to', 'maintain', ',', 'and', 'operate', 'an'), 89),\n", | |
" (('the', 'borough', 'of', 'manhattan', '(', 'to', 'continue'), 87),\n", | |
" (('to', 'continue', 'to', 'maintain', ',', 'and', 'operate'), 86),\n", | |
" (('(', 'to', 'continue', 'to', 'maintain', ',', 'and'), 86),\n", | |
" (('borough', 'of', 'manhattan', '(', 'to', 'continue', 'to'), 86),\n", | |
" (('substantially', 'similar', 'titles', 'within', 'agency', ':', '0'), 83),\n", | |
" (('spector', 'hall', ',', '22', 'reade', 'street', ','), 82),\n", | |
" (('in', 'spector', 'hall', ',', '22', 'reade', 'street'), 81),\n", | |
" (('manhattan', '(', 'to', 'continue', 'to', 'maintain', ','), 74),\n", | |
" (('of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain'), 74),\n", | |
" (('notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor'), 65),\n", | |
" (('s', ')', 'not', 'included', 'in', 'the', 'fy'), 63),\n", | |
" (('hereby', 'given', 'that', 'the', 'mayor', 'will', 'be'), 63),\n", | |
" (('schedule', 'that', 'is', 'published', 'pursuant', 'to', 'new'), 63),\n", | |
" (('contracting', 'plan', 'and', 'schedule', 'notice', 'is', 'hereby'), 63),\n", | |
" (('plan', 'and', 'schedule', 'notice', 'is', 'hereby', 'given'), 63),\n", | |
" (('annual', 'contracting', 'plan', 'and', 'schedule', 'notice', 'is'), 63),\n", | |
" (('to', 'new', 'york', 'city', 'charter', '312', '('), 63),\n", | |
" (('annual', 'contracting', 'plan', 'and', 'schedule', 'that', 'is'), 63)]" | |
] | |
} | |
], | |
"prompt_number": 42 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpus8grams = list(ngrams(lowerTokens,8))\n", | |
"corpus8gramFreqs = nltk.FreqDist(corpus8grams)\n", | |
"corpus8gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 43, | |
"text": [ | |
"[(('personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':'),\n", | |
" 264),\n", | |
" (('--', '--', '--', '--', '--', '--', '--', '--'), 174),\n", | |
" (('headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within'),\n", | |
" 133),\n", | |
" (('of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency'),\n", | |
" 129),\n", | |
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'), 112),\n", | |
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n", | |
" (('maintain', ',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf'),\n", | |
" 112),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of'), 112),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term'), 112),\n", | |
" (('of', 'solicitation', 'the', 'agency', 'intends', 'to', 'utilize', ':'),\n", | |
" 103),\n", | |
" (('method',\n", | |
" 'of',\n", | |
" 'solicitation',\n", | |
" 'the',\n", | |
" 'agency',\n", | |
" 'intends',\n", | |
" 'to',\n", | |
" 'utilize'),\n", | |
" 103),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years'), 102),\n", | |
" (('caf', 'for', 'a', 'term', 'of', 'four', 'years', '.'), 101),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four'), 101),\n", | |
" (('for', 'a', 'term', 'of', 'four', 'years', '.', ')'), 101),\n", | |
" (('in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none'),\n", | |
" 93),\n", | |
" (('agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially'),\n", | |
" 92),\n", | |
" (('none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles'),\n", | |
" 92),\n", | |
" ((':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar'),\n", | |
" 92),\n", | |
" (('titles', 'within', 'agency', ':', 'none', 'headcount', 'of', 'personnel'),\n", | |
" 91),\n", | |
" (('to', 'maintain', ',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk'),\n", | |
" 91),\n", | |
" (('similar', 'titles', 'within', 'agency', ':', 'none', 'headcount', 'of'),\n", | |
" 91),\n", | |
" (('substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount'),\n", | |
" 91),\n", | |
" (('within', 'agency', ':', 'none', 'headcount', 'of', 'personnel', 'in'), 91),\n", | |
" (('in', 'the', 'borough', 'of', 'manhattan', '(', 'to', 'continue'), 87),\n", | |
" (('the', 'borough', 'of', 'manhattan', '(', 'to', 'continue', 'to'), 86),\n", | |
" (('(', 'to', 'continue', 'to', 'maintain', ',', 'and', 'operate'), 86),\n", | |
" (('to', 'continue', 'to', 'maintain', ',', 'and', 'operate', 'an'), 86),\n", | |
" (('in', 'substantially', 'similar', 'titles', 'within', 'agency', ':', '0'),\n", | |
" 83),\n", | |
" (('continue', 'to', 'maintain', ',', 'and', 'operate', 'an', 'unenclosed'),\n", | |
" 82),\n", | |
" (('in', 'spector', 'hall', ',', '22', 'reade', 'street', ','), 81),\n", | |
" (('borough', 'of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain'), 74),\n", | |
" (('manhattan', '(', 'to', 'continue', 'to', 'maintain', ',', 'and'), 74),\n", | |
" (('of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain', ','), 74),\n", | |
" (('notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor', 'will'), 63),\n", | |
" (('contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant'),\n", | |
" 63),\n", | |
" (('annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby'),\n", | |
" 63),\n", | |
" (('to', 'new', 'york', 'city', 'charter', '312', '(', 'a'), 63),\n", | |
" (('(', 's', ')', 'not', 'included', 'in', 'the', 'fy'), 63),\n", | |
" (('annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published'),\n", | |
" 63),\n", | |
" (('pursuant', 'to', 'new', 'york', 'city', 'charter', '312', '('), 63),\n", | |
" (('that', 'is', 'published', 'pursuant', 'to', 'new', 'york', 'city'), 63),\n", | |
" (('city', 'charter', '312', '(', 'a', ')', ':', 'agency'), 63),\n", | |
" (('schedule', 'notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor'), 63),\n", | |
" (('published', 'pursuant', 'to', 'new', 'york', 'city', 'charter', '312'),\n", | |
" 63),\n", | |
" (('schedule', 'that', 'is', 'published', 'pursuant', 'to', 'new', 'york'),\n", | |
" 63),\n", | |
" (('contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given'),\n", | |
" 63),\n", | |
" (('york', 'city', 'charter', '312', '(', 'a', ')', ':'), 63),\n", | |
" (('and', 'schedule', 'notice', 'is', 'hereby', 'given', 'that', 'the'), 63),\n", | |
" (('plan', 'and', 'schedule', 'notice', 'is', 'hereby', 'given', 'that'), 63)]" | |
] | |
} | |
], | |
"prompt_number": 43 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpus9grams = list(ngrams(lowerTokens,9))\n", | |
"corpus9gramFreqs = nltk.FreqDist(corpus9grams)\n", | |
"corpus9gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 44, | |
"text": [ | |
"[(('--', '--', '--', '--', '--', '--', '--', '--', '--'), 168),\n", | |
" (('of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':'),\n", | |
" 129),\n", | |
" (('headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency'),\n", | |
" 129),\n", | |
" (('maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for'),\n", | |
" 112),\n", | |
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'),\n", | |
" 112),\n", | |
" (('and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a',\n", | |
" 'term'),\n", | |
" 112),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of'),\n", | |
" 112),\n", | |
" (('method',\n", | |
" 'of',\n", | |
" 'solicitation',\n", | |
" 'the',\n", | |
" 'agency',\n", | |
" 'intends',\n", | |
" 'to',\n", | |
" 'utilize',\n", | |
" ':'),\n", | |
" 103),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years', '.'), 101),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years'),\n", | |
" 101),\n", | |
" (('caf', 'for', 'a', 'term', 'of', 'four', 'years', '.', ')'), 101),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four'),\n", | |
" 94),\n", | |
" (('personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none'),\n", | |
" 93),\n", | |
" ((':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles'),\n", | |
" 92),\n", | |
" (('agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar'),\n", | |
" 92),\n", | |
" (('within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially'),\n", | |
" 91),\n", | |
" (('to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf'),\n", | |
" 91),\n", | |
" (('substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of'),\n", | |
" 91),\n", | |
" (('similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel'),\n", | |
" 91),\n", | |
" (('titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in'),\n", | |
" 91),\n", | |
" (('in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount'),\n", | |
" 91),\n", | |
" (('none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within'),\n", | |
" 89),\n", | |
" (('(', 'to', 'continue', 'to', 'maintain', ',', 'and', 'operate', 'an'), 86),\n", | |
" (('in', 'the', 'borough', 'of', 'manhattan', '(', 'to', 'continue', 'to'),\n", | |
" 86),\n", | |
" (('personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" '0'),\n", | |
" 83),\n", | |
" (('continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk'),\n", | |
" 82),\n", | |
" (('to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed'),\n", | |
" 79),\n", | |
" (('of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain', ',', 'and'),\n", | |
" 74),\n", | |
" (('manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate'),\n", | |
" 74),\n", | |
" (('borough', 'of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain', ','),\n", | |
" 74),\n", | |
" (('the',\n", | |
" 'borough',\n", | |
" 'of',\n", | |
" 'manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain'),\n", | |
" 74),\n", | |
" (('and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the',\n", | |
" 'mayor'),\n", | |
" 63),\n", | |
" (('schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city'),\n", | |
" 63),\n", | |
" (('to', 'new', 'york', 'city', 'charter', '312', '(', 'a', ')'), 63),\n", | |
" (('contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that'),\n", | |
" 63),\n", | |
" (('city', 'charter', '312', '(', 'a', ')', ':', 'agency', ':'), 63),\n", | |
" (('pursuant', 'to', 'new', 'york', 'city', 'charter', '312', '(', 'a'), 63),\n", | |
" (('notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor', 'will', 'be'),\n", | |
" 63),\n", | |
" (('that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter'),\n", | |
" 63),\n", | |
" (('york', 'city', 'charter', '312', '(', 'a', ')', ':', 'agency'), 63),\n", | |
" (('and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york'),\n", | |
" 63),\n", | |
" (('plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the'),\n", | |
" 63),\n", | |
" (('annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given'),\n", | |
" 63),\n", | |
" (('schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the',\n", | |
" 'mayor',\n", | |
" 'will'),\n", | |
" 63),\n", | |
" (('plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new'),\n", | |
" 63),\n", | |
" (('contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to'),\n", | |
" 63),\n", | |
" (('new', 'york', 'city', 'charter', '312', '(', 'a', ')', ':'), 63),\n", | |
" (('published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter',\n", | |
" '312',\n", | |
" '('),\n", | |
" 63),\n", | |
" (('is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter',\n", | |
" '312'),\n", | |
" 63),\n", | |
" (('annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant'),\n", | |
" 63)]" | |
] | |
} | |
], | |
"prompt_number": 44 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"corpus5grams = ngrams(lowerTokens,5)\n", | |
"corpus5gramFreqs = nltk.FreqDist(corpus5grams)\n", | |
"corpus5gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 47, | |
"text": [ | |
"[(('personnel', 'in', 'substantially', 'similar', 'titles'), 272),\n", | |
" (('in', 'substantially', 'similar', 'titles', 'within'), 269),\n", | |
" (('similar', 'titles', 'within', 'agency', ':'), 264),\n", | |
" (('substantially', 'similar', 'titles', 'within', 'agency'), 264),\n", | |
" (('--', '--', '--', '--', '--'), 192),\n", | |
" (('of', 'the', 'proposed', 'contract', ':'), 186),\n", | |
" (('date', 'of', 'the', 'proposed', 'contract'), 186),\n", | |
" (('notice', 'is', 'hereby', 'given', 'that'), 185),\n", | |
" ((',', 'new', 'york', ',', 'ny'), 152),\n", | |
" (('agency', 'intends', 'to', 'utilize', ':'), 137),\n", | |
" (('headcount', 'of', 'personnel', 'in', 'substantially'), 136),\n", | |
" (('of', 'personnel', 'in', 'substantially', 'similar'), 136),\n", | |
" (('the', 'agency', 'intends', 'to', 'utilize'), 134),\n", | |
" (('start', 'date', 'of', 'the', 'proposed'), 131),\n", | |
" (('end', 'date', 'of', 'the', 'proposed'), 131),\n", | |
" (('(', 's', ')', 'not', 'included'), 126),\n", | |
" (('annual', 'contracting', 'plan', 'and', 'schedule'), 126),\n", | |
" (('s', ')', 'not', 'included', 'in'), 126),\n", | |
" (('maintain', ',', 'and', 'operate', 'an'), 124),\n", | |
" (('caf', 'for', 'a', 'term', 'of'), 124),\n", | |
" (('2015', 'annual', 'contracting', 'plan', 'and'), 124),\n", | |
" (('fy', '2015', 'annual', 'contracting', 'plan'), 124),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term'), 124),\n", | |
" (('in', 'the', 'borough', 'of', 'manhattan'), 121),\n", | |
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a'), 120),\n", | |
" (('is', 'hereby', 'given', 'that', 'the'), 116),\n", | |
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n", | |
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n", | |
" ((',', 'and', 'operate', 'an', 'unenclosed'), 112),\n", | |
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n", | |
" (('the', 'city', 'of', 'new', 'york'), 104),\n", | |
" (('method', 'of', 'solicitation', 'the', 'agency'), 103),\n", | |
" (('of', 'solicitation', 'the', 'agency', 'intends'), 103),\n", | |
" (('solicitation', 'the', 'agency', 'intends', 'to'), 103),\n", | |
" (('a', 'term', 'of', 'four', 'years'), 102),\n", | |
" (('for', 'a', 'term', 'of', 'four'), 102),\n", | |
" (('of', 'four', 'years', '.', ')'), 101),\n", | |
" (('term', 'of', 'four', 'years', '.'), 101),\n", | |
" (('the', 'borough', 'of', 'manhattan', '('), 100),\n", | |
" ((',', '22', 'reade', 'street', ','), 100),\n", | |
" (('borough', 'of', 'manhattan', '(', 'to'), 99),\n", | |
" (('to', 'maintain', ',', 'and', 'operate'), 98),\n", | |
" (('floor', ',', 'new', 'york', ','), 96),\n", | |
" (('solicitation', '(', 's', ')', 'not'), 94),\n", | |
" (('titles', 'within', 'agency', ':', 'none'), 93),\n", | |
" (('none', 'headcount', 'of', 'personnel', 'in'), 92),\n", | |
" (('agency', ':', 'none', 'headcount', 'of'), 92),\n", | |
" ((':', 'none', 'headcount', 'of', 'personnel'), 92),\n", | |
" (('within', 'agency', ':', 'none', 'headcount'), 91),\n", | |
" (('continue', 'to', 'maintain', ',', 'and'), 89)]" | |
] | |
} | |
], | |
"prompt_number": 47 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#...and let's stop here for now\n", | |
"corpus10grams = list(ngrams(lowerTokens,10))\n", | |
"corpus10gramFreqs = nltk.FreqDist(corpus10grams)\n", | |
"corpus10gramFreqs.most_common(50)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 45, | |
"text": [ | |
"[(('--', '--', '--', '--', '--', '--', '--', '--', '--', '--'), 162),\n", | |
" (('headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':'),\n", | |
" 129),\n", | |
" (('maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a'),\n", | |
" 112),\n", | |
" ((',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a',\n", | |
" 'term'),\n", | |
" 112),\n", | |
" (('and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a',\n", | |
" 'term',\n", | |
" 'of'),\n", | |
" 112),\n", | |
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years', '.', ')'),\n", | |
" 101),\n", | |
" (('unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a',\n", | |
" 'term',\n", | |
" 'of',\n", | |
" 'four',\n", | |
" 'years',\n", | |
" '.'),\n", | |
" 100),\n", | |
" (('operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a',\n", | |
" 'term',\n", | |
" 'of',\n", | |
" 'four'),\n", | |
" 94),\n", | |
" (('an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for',\n", | |
" 'a',\n", | |
" 'term',\n", | |
" 'of',\n", | |
" 'four',\n", | |
" 'years'),\n", | |
" 94),\n", | |
" (('agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles'),\n", | |
" 92),\n", | |
" (('in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of'),\n", | |
" 91),\n", | |
" (('substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel'),\n", | |
" 91),\n", | |
" (('titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially'),\n", | |
" 91),\n", | |
" (('within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar'),\n", | |
" 91),\n", | |
" (('personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount'),\n", | |
" 91),\n", | |
" (('similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in'),\n", | |
" 91),\n", | |
" (('to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf',\n", | |
" 'for'),\n", | |
" 91),\n", | |
" ((':',\n", | |
" 'none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within'),\n", | |
" 89),\n", | |
" (('none',\n", | |
" 'headcount',\n", | |
" 'of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency'),\n", | |
" 85),\n", | |
" (('of',\n", | |
" 'personnel',\n", | |
" 'in',\n", | |
" 'substantially',\n", | |
" 'similar',\n", | |
" 'titles',\n", | |
" 'within',\n", | |
" 'agency',\n", | |
" ':',\n", | |
" '0'),\n", | |
" 83),\n", | |
" (('continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk',\n", | |
" 'caf'),\n", | |
" 82),\n", | |
" (('(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed'),\n", | |
" 79),\n", | |
" (('to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an',\n", | |
" 'unenclosed',\n", | |
" 'sidewalk'),\n", | |
" 79),\n", | |
" (('the',\n", | |
" 'borough',\n", | |
" 'of',\n", | |
" 'manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ','),\n", | |
" 74),\n", | |
" (('manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate',\n", | |
" 'an'),\n", | |
" 74),\n", | |
" (('in',\n", | |
" 'the',\n", | |
" 'borough',\n", | |
" 'of',\n", | |
" 'manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain'),\n", | |
" 74),\n", | |
" (('borough',\n", | |
" 'of',\n", | |
" 'manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and'),\n", | |
" 74),\n", | |
" (('of',\n", | |
" 'manhattan',\n", | |
" '(',\n", | |
" 'to',\n", | |
" 'continue',\n", | |
" 'to',\n", | |
" 'maintain',\n", | |
" ',',\n", | |
" 'and',\n", | |
" 'operate'),\n", | |
" 74),\n", | |
" (('published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter',\n", | |
" '312',\n", | |
" '(',\n", | |
" 'a'),\n", | |
" 63),\n", | |
" (('new', 'york', 'city', 'charter', '312', '(', 'a', ')', ':', 'agency'), 63),\n", | |
" (('annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to'),\n", | |
" 63),\n", | |
" (('is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter',\n", | |
" '312',\n", | |
" '('),\n", | |
" 63),\n", | |
" (('contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the'),\n", | |
" 63),\n", | |
" (('schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the',\n", | |
" 'mayor',\n", | |
" 'will',\n", | |
" 'be'),\n", | |
" 63),\n", | |
" (('and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city'),\n", | |
" 63),\n", | |
" (('plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york'),\n", | |
" 63),\n", | |
" (('york', 'city', 'charter', '312', '(', 'a', ')', ':', 'agency', ':'), 63),\n", | |
" (('pursuant', 'to', 'new', 'york', 'city', 'charter', '312', '(', 'a', ')'),\n", | |
" 63),\n", | |
" (('to', 'new', 'york', 'city', 'charter', '312', '(', 'a', ')', ':'), 63),\n", | |
" (('schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter'),\n", | |
" 63),\n", | |
" (('that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new',\n", | |
" 'york',\n", | |
" 'city',\n", | |
" 'charter',\n", | |
" '312'),\n", | |
" 63),\n", | |
" (('annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that'),\n", | |
" 63),\n", | |
" (('and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the',\n", | |
" 'mayor',\n", | |
" 'will'),\n", | |
" 63),\n", | |
" (('plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is',\n", | |
" 'hereby',\n", | |
" 'given',\n", | |
" 'that',\n", | |
" 'the',\n", | |
" 'mayor'),\n", | |
" 63),\n", | |
" (('contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant',\n", | |
" 'to',\n", | |
" 'new'),\n", | |
" 63),\n", | |
" (('included',\n", | |
" 'in',\n", | |
" 'fy',\n", | |
" '2015',\n", | |
" 'annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice'),\n", | |
" 62),\n", | |
" (('fy',\n", | |
" '2015',\n", | |
" 'annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published'),\n", | |
" 62),\n", | |
" (('in',\n", | |
" 'the',\n", | |
" 'fy',\n", | |
" '2015',\n", | |
" 'annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that'),\n", | |
" 62),\n", | |
" (('in',\n", | |
" 'fy',\n", | |
" '2015',\n", | |
" 'annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'notice',\n", | |
" 'is'),\n", | |
" 62),\n", | |
" (('2015',\n", | |
" 'annual',\n", | |
" 'contracting',\n", | |
" 'plan',\n", | |
" 'and',\n", | |
" 'schedule',\n", | |
" 'that',\n", | |
" 'is',\n", | |
" 'published',\n", | |
" 'pursuant'),\n", | |
" 62)]" | |
] | |
} | |
], | |
"prompt_number": 45 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment