Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mattalhonte/d421dbc87a14838d7fbf to your computer and use it in GitHub Desktop.
Save mattalhonte/d421dbc87a14838d7fbf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:8b15f11410dc4f1327477ae7b83f3f78ba791f7c91a6c1f0eab14dfbc791219c"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import nltk\n",
"from nltk.util import ngrams\n",
"nltk.download('punkt')\n",
"nltk.download('stopwords')\n",
"\n",
"#Importing the dataset\n",
"%cd C:\\Users\\Matt\\Dropbox\\Python Workspace\\CROW\\CROL-PDF\n",
"data = pd.read_csv(\"procPublicationRequest_Oct-Dec_2014_clean - procPublicationRequest_Oct-Dec_2014_clean.csv\")\n",
"\n",
"#Snagging the \"human_readable\" column\n",
"human_readableList = list(data['human_readable'])\n",
"\n",
"#Turn the values into strings\n",
"strReadable = [str(a) for a in human_readableList]\n",
"\n",
"#Split into individual words\n",
"listOfLists = [a.split() for a in strReadable]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] C:\\Users\\Matt\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"[nltk_data] Downloading package stopwords to\n",
"[nltk_data] C:\\Users\\Matt\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n",
"C:\\Users\\Matt\\Dropbox\\Python Workspace\\CROW\\CROL-PDF\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#While we're here, let's output the raw words to a text file\n",
"myCorpus = ''\n",
"for myEntry in strReadable:\n",
" myCorpus = myCorpus + \"\\n\"+ myEntry"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"f = open('rawCorpus', 'w')\n",
"f.write(myCorpus)\n",
"f.close()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Now we have a data file that'll probably a little faster to mess with (maybe?)\n",
"file = open('rawCorpus.txt')\n",
"t = file.read()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Let's tokenize it and turn into an NLTK Text file\n",
"myCorpusTokenized = nltk.word_tokenize(t)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpusText = nltk.Text(myCorpusTokenized)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Now that we've got a bigger body of text, we can look at more interesting patterns in phrasing\n",
"corpusText.collocations()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"New York; substantially similar; similar titles; titles within; HEREBY\n",
"GIVEN; within agency; York City; sidewalk caf; 10:00 A.M.; proposed\n",
"contract; Annual Contracting; Contracting Plan; agency intends; 2015\n",
"Annual; public hearing; square foot; unenclosed sidewalk; COMMUNITY\n",
"BOARD; four years; End date\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpusFreqDist = nltk.FreqDist(corpusText)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Most commom words!\n",
"list(corpusFreqDist.most_common(50))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 36,
"text": [
"[(',', 8033),\n",
" ('the', 4860),\n",
" ('of', 4570),\n",
" ('.', 2727),\n",
" ('and', 2338),\n",
" ('to', 2179),\n",
" (')', 1806),\n",
" ('(', 1747),\n",
" (':', 1714),\n",
" ('in', 1520),\n",
" ('a', 1259),\n",
" ('at', 1207),\n",
" ('for', 1125),\n",
" ('New', 1107),\n",
" ('York', 988),\n",
" ('Street', 910),\n",
" ('on', 875),\n",
" ('be', 674),\n",
" ('City', 665),\n",
" ('The', 641),\n",
" ('proposed', 561),\n",
" ('by', 519),\n",
" ('an', 513),\n",
" ('is', 501),\n",
" ('contract', 498),\n",
" ('Manhattan', 470),\n",
" ('will', 461),\n",
" ('agency', 397),\n",
" ('that', 396),\n",
" ('$', 382),\n",
" ('2014', 379),\n",
" ('from', 369),\n",
" ('Borough', 363),\n",
" ('Floor', 360),\n",
" ('within', 354),\n",
" ('Avenue', 332),\n",
" ('NY', 329),\n",
" ('date', 321),\n",
" ('1', 321),\n",
" (\"'s\", 311),\n",
" ('Board', 292),\n",
" ('public', 285),\n",
" ('similar', 280),\n",
" ('or', 279),\n",
" ('Services', 278),\n",
" ('Department', 274),\n",
" ('titles', 272),\n",
" ('substantially', 272),\n",
" ('as', 264),\n",
" ('with', 252)]"
]
}
],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Let's clean things up a little bit. Changing everything to lower-case is usually a good idea. \n",
"#\"Public Hearing\" will equal \"PUBLIC HEARING\"\n",
"lowerTokens = [w.lower() for w in myCorpusTokenized]\n",
"lowerText = nltk.Text(lowerTokens)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"list(nltk.FreqDist(lowerText).most_common())\n",
"#Already saved some doubling-up! Note the 5649 mentions of \"the\", instead of 4860 like in the last list"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 19,
"text": [
"[(',', 8033),\n",
" ('the', 5649),\n",
" ('of', 4822),\n",
" ('.', 2727),\n",
" ('to', 2409),\n",
" ('and', 2392),\n",
" (')', 1806),\n",
" ('(', 1747),\n",
" (':', 1714),\n",
" ('in', 1686),\n",
" ('a', 1419),\n",
" ('for', 1261),\n",
" ('at', 1215),\n",
" ('new', 1191),\n",
" ('york', 1003),\n",
" ('street', 988),\n",
" ('on', 888),\n",
" ('is', 704),\n",
" ('city', 698),\n",
" ('be', 674),\n",
" ('agency', 647),\n",
" ('contract', 623),\n",
" ('proposed', 583),\n",
" ('an', 533),\n",
" ('public', 532),\n",
" ('by', 522),\n",
" ('services', 510),\n",
" ('manhattan', 479),\n",
" ('floor', 469),\n",
" ('will', 461),\n",
" ('that', 425),\n",
" ('borough', 414),\n",
" ('notice', 401),\n",
" ('board', 396),\n",
" ('hearing', 385),\n",
" ('$', 382),\n",
" ('from', 379),\n",
" ('2014', 379),\n",
" ('date', 363),\n",
" ('within', 356),\n",
" ('avenue', 336),\n",
" ('ny', 329),\n",
" ('district', 325),\n",
" (\"'s\", 324),\n",
" ('1', 321),\n",
" ('application', 315),\n",
" ('community', 307),\n",
" ('a.m.', 291),\n",
" ('personnel', 290),\n",
" ('department', 285),\n",
" ('similar', 282),\n",
" ('or', 279),\n",
" ('substantially', 276),\n",
" ('titles', 274),\n",
" ('as', 274),\n",
" ('office', 266),\n",
" ('pursuant', 260),\n",
" ('with', 253),\n",
" ('lot', 247),\n",
" ('meets', 240),\n",
" ('brooklyn', 238),\n",
" ('room', 237),\n",
" ('2015', 236),\n",
" (';', 236),\n",
" ('project', 235),\n",
" ('not', 233),\n",
" ('building', 233),\n",
" ('block', 229),\n",
" ('given', 220),\n",
" ('following', 219),\n",
" ('--', 218),\n",
" ('hereby', 216),\n",
" ('no', 215),\n",
" ('located', 210),\n",
" ('term', 209),\n",
" ('solicitation', 205),\n",
" ('-', 201),\n",
" ('s', 197),\n",
" ('schedule', 195),\n",
" ('10:00', 192),\n",
" ('years', 190),\n",
" ('this', 187),\n",
" ('plan', 185),\n",
" ('commission', 184),\n",
" ('section', 183),\n",
" ('10007', 182),\n",
" ('end', 181),\n",
" ('intends', 181),\n",
" ('meeting', 179),\n",
" ('annual', 178),\n",
" ('#', 177),\n",
" ('nan', 176),\n",
" ('square', 176),\n",
" ('east', 176),\n",
" ('method', 174),\n",
" ('west', 167),\n",
" ('information', 161),\n",
" ('december', 160),\n",
" ('&', 158),\n",
" ('llc', 158),\n",
" ('site', 157),\n",
" ('development', 156),\n",
" (\"''\", 150),\n",
" ('maintain', 148),\n",
" ('2', 148),\n",
" ('may', 148),\n",
" ('other', 147),\n",
" ('30', 147),\n",
" ('charter', 145),\n",
" ('5', 145),\n",
" ('hall', 144),\n",
" ('days', 144),\n",
" ('p.m.', 144),\n",
" ('``', 144),\n",
" ('sought', 144),\n",
" ('matter', 143),\n",
" ('sidewalk', 143),\n",
" ('call', 142),\n",
" ('22', 142),\n",
" ('each', 142),\n",
" ('held', 142),\n",
" ('start', 141),\n",
" ('are', 141),\n",
" ('headcount', 138),\n",
" ('between', 138),\n",
" ('shall', 138),\n",
" ('utilize', 137),\n",
" ('included', 136),\n",
" ('mayor', 136),\n",
" ('fy', 134),\n",
" ('contracting', 134),\n",
" ('caf', 133),\n",
" ('order', 133),\n",
" ('operate', 129),\n",
" ('month', 127),\n",
" ('program', 127),\n",
" ('3', 124),\n",
" ('four', 122),\n",
" ('october', 121),\n",
" ('environmental', 121),\n",
" ('lease', 121),\n",
" ('unenclosed', 120),\n",
" ('nyc', 120),\n",
" ('please', 120),\n",
" ('business', 120),\n",
" ('inc.', 119),\n",
" ('feet', 119),\n",
" ('june', 118),\n",
" ('description', 118),\n",
" ('has', 118),\n",
" ('none', 118),\n",
" ('212', 117),\n",
" ('zoning', 116),\n",
" ('queens', 116),\n",
" ('should', 115),\n",
" ('property', 114),\n",
" ('continue', 113),\n",
" ('extension', 112),\n",
" ('construction', 110),\n",
" ('reade', 109),\n",
" ('november', 107),\n",
" ('commencing', 107),\n",
" ('period', 107),\n",
" ('corporation', 101),\n",
" ('through', 99),\n",
" ('july', 99),\n",
" ('historic', 98),\n",
" ('been', 98),\n",
" ('approximately', 98),\n",
" ('foot', 97),\n",
" ('design', 97),\n",
" ('park', 96),\n",
" ('spector', 95),\n",
" ('use', 95),\n",
" ('housing', 95),\n",
" ('island', 95),\n",
" ('two', 93),\n",
" ('amount', 93),\n",
" ('zoned', 92),\n",
" ('contact', 91),\n",
" ('wednesday', 91),\n",
" ('administration', 91),\n",
" ('website', 91),\n",
" ('bronx', 89),\n",
" ('task', 89),\n",
" ('9th', 89),\n",
" ('existing', 89),\n",
" ('certificate', 88),\n",
" ('0', 88),\n",
" ('10', 87),\n",
" ('built', 87),\n",
" ('written', 87),\n",
" ('review', 86),\n",
" ('any', 86),\n",
" ('january', 85),\n",
" ('amendment', 84),\n",
" ('broadway', 84),\n",
" ('subject', 83),\n",
" ('have', 83),\n",
" ('tuesday', 83),\n",
" ('which', 82),\n",
" ('acquisition', 82),\n",
" ('intent', 82),\n",
" ('oer', 82),\n",
" ('area', 81),\n",
" ('appropriateness', 81),\n",
" ('40', 80),\n",
" ('applicant', 80),\n",
" ('than', 79),\n",
" ('management', 78),\n",
" ('prior', 78),\n",
" ('n.y.', 77),\n",
" ('time', 77),\n",
" ('rules', 75),\n",
" ('citywide', 75),\n",
" ('nature', 75),\n",
" ('received', 75),\n",
" ('tenant', 74),\n",
" ('style', 74),\n",
" ('facility', 72),\n",
" ('times', 72),\n",
" ('additional', 72),\n",
" ('line', 72),\n",
" ('7', 72),\n",
" ('north', 71),\n",
" ('conference', 71),\n",
" ('state', 71),\n",
" ('2nd', 71),\n",
" ('year', 71),\n",
" ('remediation', 70),\n",
" ('premises', 70),\n",
" ('municipal', 69),\n",
" ('such', 69),\n",
" ('6', 69),\n",
" ('service', 69),\n",
" ('published', 69),\n",
" ('request', 68),\n",
" ('100', 68),\n",
" ('five', 68),\n",
" ('south', 67),\n",
" ('preservation', 67),\n",
" ('law', 67),\n",
" ('thursday', 67),\n",
" ('special', 66),\n",
" ('monthly', 66),\n",
" ('owner', 66),\n",
" ('technology', 66),\n",
" ('place', 66),\n",
" ('including', 66),\n",
" ('permit', 65),\n",
" ('centre', 65),\n",
" ('31', 65),\n",
" ('later', 65),\n",
" ('inspection', 65),\n",
" ('4', 64),\n",
" ('space', 64),\n",
" ('location', 64),\n",
" ('designed', 64),\n",
" ('under', 63),\n",
" ('system', 63),\n",
" ('rector', 63),\n",
" ('provide', 63),\n",
" ('312', 63),\n",
" ('scheduled', 63),\n",
" ('all', 63),\n",
" ('third', 63),\n",
" ('submitted', 63),\n",
" ('chairman', 63),\n",
" ('would', 62),\n",
" ('children', 62),\n",
" ('procurement', 62),\n",
" ('8', 62),\n",
" ('15', 61),\n",
" ('bonds', 61),\n",
" ('c', 61),\n",
" ('visit', 60),\n",
" ('issue', 59),\n",
" ('staten', 59),\n",
" ('ave', 59),\n",
" ('available', 59),\n",
" ('center', 58),\n",
" ('first', 58),\n",
" ('real', 58),\n",
" ('comments', 58),\n",
" ('cb', 58),\n",
" ('cleanup', 57),\n",
" ('also', 57),\n",
" ('matters', 57),\n",
" ('cc', 57),\n",
" ('final', 56),\n",
" ('one', 56),\n",
" ('must', 56),\n",
" ('requirements', 56),\n",
" ('main', 55),\n",
" ('calendar', 55),\n",
" ('concession', 54),\n",
" ('million', 54),\n",
" ('landmarks', 54),\n",
" ('below', 54),\n",
" ('9:30', 53),\n",
" ('vcp', 53),\n",
" ('health', 53),\n",
" ('boulevard', 53),\n",
" ('road', 53),\n",
" ('construct', 52),\n",
" ('monday', 52),\n",
" ('issuing', 51),\n",
" ('per', 51),\n",
" ('hearings', 51),\n",
" ('planning', 51),\n",
" ('affected', 50),\n",
" ('administrative', 50),\n",
" ('manager', 50),\n",
" ('architect', 50),\n",
" ('bounded', 50),\n",
" ('assigned', 49),\n",
" ('its', 49),\n",
" ('voluntary', 49),\n",
" ('e', 49),\n",
" ('before', 49),\n",
" ('used', 48),\n",
" ('preliminary', 48),\n",
" ('warranted', 48),\n",
" ('events', 47),\n",
" ('school', 47),\n",
" ('20', 47),\n",
" ('telecommunications', 47),\n",
" ('court', 47),\n",
" ('parking', 47),\n",
" ('can', 46),\n",
" ('otherwise', 46),\n",
" ('unless', 46),\n",
" ('water', 46),\n",
" ('authority', 46),\n",
" ('2016', 45),\n",
" ('land', 45),\n",
" ('map', 45),\n",
" ('original', 45),\n",
" ('work', 45),\n",
" ('cost', 44),\n",
" ('11', 44),\n",
" ('maintenance', 44),\n",
" ('sign', 44),\n",
" ('fort', 44),\n",
" ('parkway', 44),\n",
" ('if', 43),\n",
" ('installation', 43),\n",
" ('renewal', 43),\n",
" (\"'\", 43),\n",
" ('25', 43),\n",
" ('12th', 43),\n",
" ('fourth', 43),\n",
" ('side', 43),\n",
" ('protection', 43),\n",
" ('code', 42),\n",
" ('resources', 42),\n",
" ('16', 42),\n",
" ('hamilton', 42),\n",
" ('changes', 42),\n",
" ('human', 42),\n",
" ('vendor', 42),\n",
" ('william', 42),\n",
" ('hours', 41),\n",
" ('residential', 41),\n",
" ('consent', 41),\n",
" ('commercial', 41),\n",
" ('renewed/extended', 40),\n",
" ('draft', 40),\n",
" ('disposition', 40),\n",
" ('three', 40),\n",
" ('pm', 40),\n",
" ('approval', 40),\n",
" ('individuals', 40),\n",
" ('requesting', 40),\n",
" ('contractor', 39),\n",
" ('operation', 39),\n",
" ('sections', 39),\n",
" ('negotiated', 39),\n",
" ('after', 39),\n",
" ('general', 39),\n",
" ('language', 39),\n",
" ('hold', 39),\n",
" ('september', 39),\n",
" ('rent', 39),\n",
" ('12', 38),\n",
" ('interpreters', 38),\n",
" ('lots', 38),\n",
" ('education', 38),\n",
" ('addition', 38),\n",
" ('cd', 38),\n",
" ('st', 37),\n",
" ('up', 37),\n",
" ('parcel', 37),\n",
" ('selected', 37),\n",
" ('related', 37),\n",
" ('allow', 37),\n",
" ('extend', 37),\n",
" ('nycha', 37),\n",
" ('9', 37),\n",
" ('determined', 36),\n",
" ('10004', 36),\n",
" ('18', 36),\n",
" ('policy', 36),\n",
" ('buildings', 36),\n",
" ('r6', 36),\n",
" ('along', 36),\n",
" ('19', 36),\n",
" ('150', 35),\n",
" ('address', 35),\n",
" ('these', 35),\n",
" ('@', 35),\n",
" ('pier', 35),\n",
" ('accordance', 35),\n",
" ('revocable', 35),\n",
" ('corner', 35),\n",
" ('unit', 35),\n",
" ('jobs', 35),\n",
" ('http', 35),\n",
" ('funds', 34),\n",
" ('21', 34),\n",
" ('president', 34),\n",
" ('adjacent', 34),\n",
" ('provided', 34),\n",
" ('performed', 34),\n",
" ('assistant', 34),\n",
" ('2018', 34),\n",
" ('landlord', 34),\n",
" ('transportation', 34),\n",
" ('bsa', 34),\n",
" ('church', 34),\n",
" ('plaza', 34),\n",
" ('28', 33),\n",
" ('17', 33),\n",
" ('29', 33),\n",
" ('revenue', 33),\n",
" ('hpd', 33),\n",
" ('rear', 33),\n",
" ('title', 33),\n",
" ('limited', 33),\n",
" ('civil', 33),\n",
" ('note', 33),\n",
" ('24', 32),\n",
" ('b', 32),\n",
" ('6th', 32),\n",
" ('copy', 32),\n",
" ('modifications', 32),\n",
" ('14', 32),\n",
" ('approximate', 32),\n",
" ('reason', 32),\n",
" ('197-c', 32),\n",
" ('int', 32),\n",
" ('relation', 32),\n",
" ('reasonable', 31),\n",
" ('engineer', 31),\n",
" ('parks', 31),\n",
" ('permits', 31),\n",
" ('payable', 31),\n",
" ('council', 31),\n",
" ('tax', 31),\n",
" ('recreation', 31),\n",
" ('plans', 31),\n",
" ('action', 31),\n",
" ('posted', 31),\n",
" ('portion', 31),\n",
" ('comment', 31),\n",
" ('3rd', 31),\n",
" ('you', 31),\n",
" ('ddc', 31),\n",
" ('award', 31),\n",
" ('friday', 30),\n",
" ('23', 30),\n",
" ('establishment', 30),\n",
" ('systems', 30),\n",
" ('commissioner', 30),\n",
" ('into', 30),\n",
" ('local', 30),\n",
" ('august', 30),\n",
" ('youth', 30),\n",
" ('253', 29),\n",
" ('central', 29),\n",
" ('february', 29),\n",
" ('hudson', 29),\n",
" ('10038', 28),\n",
" ('march', 28),\n",
" ('taxes', 28),\n",
" ('van', 28),\n",
" ('river', 28),\n",
" ('type', 28),\n",
" ('%', 28),\n",
" ('not-for-profit', 28),\n",
" ('dollars', 28),\n",
" ('federal', 28),\n",
" ('33', 28),\n",
" ('listed', 28),\n",
" ('replace', 28),\n",
" ('home', 28),\n",
" ('i', 28),\n",
" ('install', 27),\n",
" ('seven', 27),\n",
" ('ii', 27),\n",
" ('open', 27),\n",
" ('13', 27),\n",
" ('company', 27),\n",
" ('projects', 27),\n",
" ('part', 27),\n",
" ('awards', 27),\n",
" ('proceeds', 27),\n",
" ('certain', 27),\n",
" ('next', 27),\n",
" ('transactions', 27),\n",
" ('extended', 27),\n",
" ('second', 27),\n",
" ('yard', 27),\n",
" ('sealed', 26),\n",
" ('it', 26),\n",
" ('means', 26),\n",
" ('conditions', 26),\n",
" ('speak', 26),\n",
" ('proposal', 26),\n",
" ('grant', 26),\n",
" ('units', 26),\n",
" ('budget', 26),\n",
" ('bid', 26),\n",
" ('then', 26),\n",
" ('facilities', 26),\n",
" ('release', 26),\n",
" ('trust', 26),\n",
" ('months', 26),\n",
" ('long', 26),\n",
" ('was', 26),\n",
" ('contracts', 26),\n",
" ('upon', 26),\n",
" ('verizon', 25),\n",
" ('variance', 25),\n",
" ('ave.', 25),\n",
" ('there', 25),\n",
" ('2019', 25),\n",
" ('reconstruction', 25),\n",
" ('bond', 25),\n",
" ('competitive', 25),\n",
" ('small', 25),\n",
" ('necessary', 25),\n",
" ('improvements', 25),\n",
" ('1:30', 25),\n",
" ('retail', 25),\n",
" ('number', 25),\n",
" ('range', 25),\n",
" ('17th', 25),\n",
" ('10006', 24),\n",
" ('201', 24),\n",
" ('division', 24),\n",
" ('rothkrug', 24),\n",
" ('spaces', 24),\n",
" ('sapo', 24),\n",
" ('wednesdays', 24),\n",
" ('fms', 24),\n",
" ('twice', 24),\n",
" ('tdd', 24),\n",
" ('provision', 24),\n",
" ('pier55', 24),\n",
" ('4th', 24),\n",
" ('make', 24),\n",
" ('present', 24),\n",
" ('tuesdays', 24),\n",
" ('option', 24),\n",
" ('committee', 24),\n",
" ('repair', 24),\n",
" ('group', 23),\n",
" ('minutes', 23),\n",
" ('principal', 23),\n",
" ('flushing', 23),\n",
" ('resolution', 23),\n",
" ('copies', 23),\n",
" ('gross', 23),\n",
" ('chapter', 23),\n",
" ('55', 23),\n",
" ('windows', 23),\n",
" ('landscape', 23),\n",
" ('2017', 23),\n",
" ('appeals', 23),\n",
" ('pay', 23),\n",
" ('holidays', 23),\n",
" ('a.m', 23),\n",
" ('6:00', 23),\n",
" ('garage', 23),\n",
" ('insurance', 23),\n",
" ('email', 23),\n",
" ('those', 23),\n",
" ('amended', 23),\n",
" ('industrial', 23),\n",
" ('renew', 23),\n",
" ('estimated', 23),\n",
" ('projected', 22),\n",
" ('250', 22),\n",
" ('rowhouse', 22),\n",
" ('required', 22),\n",
" ('include', 22),\n",
" ('attend', 22),\n",
" ('financing', 22),\n",
" ('ms.', 22),\n",
" ('future', 22),\n",
" ('architectural', 22),\n",
" ('2:30', 22),\n",
" ('issuance', 22),\n",
" ('users', 22),\n",
" ('10th', 22),\n",
" ('14th', 22),\n",
" ('but', 22),\n",
" ('28th', 22),\n",
" ('care', 22),\n",
" ('economic', 22),\n",
" ('institution', 22),\n",
" ('am', 22),\n",
" ('more', 21),\n",
" ('designation', 21),\n",
" ('dot', 21),\n",
" ('inc', 21),\n",
" ('2024', 21),\n",
" ('uses', 21),\n",
" ('and/or', 21),\n",
" ('support', 21),\n",
" ('phase', 21),\n",
" ('expense', 21),\n",
" ('restaurant', 21),\n",
" ('total', 21),\n",
" ('who', 21),\n",
" ('laws', 21),\n",
" ('affairs', 21),\n",
" ('receipts', 21),\n",
" ('5th', 21),\n",
" ('probation', 21),\n",
" ('tax-exempt', 21),\n",
" ('costs', 21),\n",
" ('corp.', 21),\n",
" ('interior', 21),\n",
" ('dpr', 21),\n",
" ('your', 21),\n",
" ('so', 21),\n",
" ('788-7490', 21),\n",
" ('regarding', 21),\n",
" ('physical', 21),\n",
" ('32', 21),\n",
" ('36', 21),\n",
" ('assistance', 21),\n",
" ('without', 21),\n",
" ('control', 21),\n",
" ('writing', 20),\n",
" ('obtained', 20),\n",
" ('renew/extend', 20),\n",
" ('compensation', 20),\n",
" ('equipment', 20),\n",
" ('police', 20),\n",
" ('capital', 20),\n",
" ('provides', 20),\n",
" ('agenda', 20),\n",
" ('full', 20),\n",
" ('meetings', 20),\n",
" ('except', 20),\n",
" ('2013', 20),\n",
" ('opportunity', 20),\n",
" ('current', 20),\n",
" ('landmark', 20),\n",
" ('requiring', 20),\n",
" ('42-09', 20),\n",
" ('renovation', 20),\n",
" ('terms', 20),\n",
" ('extent', 20),\n",
" ('exempt', 20),\n",
" ('april', 20),\n",
" ('11th', 20),\n",
" ('alterations', 20),\n",
" ('rentable', 20),\n",
" ('borrower', 20),\n",
" ('interest', 20),\n",
" ('2,000,000', 20),\n",
" ('26', 19),\n",
" ('washington', 19),\n",
" ('iii', 19),\n",
" ('materials', 19),\n",
" ('house', 19),\n",
" ('engineers', 19),\n",
" ('build', 19),\n",
" ('equal', 19),\n",
" ('paper', 19),\n",
" ('engineering', 19),\n",
" ('11101', 19),\n",
" ('718', 19),\n",
" ('determine', 19),\n",
" ('gsf', 19),\n",
" ('equivalent', 19),\n",
" ('45', 19),\n",
" ('properties', 19),\n",
" ('person', 19),\n",
" ('offices', 19),\n",
" ('greenwich', 19),\n",
" ('7th', 19),\n",
" ('facilitate', 19),\n",
" ('amounts', 19),\n",
" ('revival', 19),\n",
" ('pin', 19),\n",
" ('further', 19),\n",
" ('renewal/extension', 19),\n",
" ('relay', 19),\n",
" ('level', 19),\n",
" ('p.m', 19),\n",
" ('agreement', 19),\n",
" ('licensed', 19),\n",
" ('base', 18),\n",
" ('bay', 18),\n",
" ('day', 18),\n",
" ('training', 18),\n",
" ('maximum', 18),\n",
" ('report', 18),\n",
" ('143', 18),\n",
" ('article', 18),\n",
" ('dwelling', 18),\n",
" ('rights', 18),\n",
" ('21st', 18),\n",
" ('10013', 18),\n",
" ('10:30', 18),\n",
" ('requests', 18),\n",
" ('shown', 18),\n",
" ('during', 18),\n",
" ('rooftop', 18),\n",
" ('culture', 18),\n",
" ('wage', 18),\n",
" ('1st', 18),\n",
" ('meet', 18),\n",
" ('chester', 18),\n",
" ('fifth', 18),\n",
" ('mortgage', 18),\n",
" ('richmond', 18),\n",
" ('terminated', 18),\n",
" ('11201', 17),\n",
" ('2020', 17),\n",
" ('providing', 17),\n",
" ('statement', 17),\n",
" ('dep', 17),\n",
" ('installments', 17),\n",
" ('security', 17),\n",
" ('seeking', 17),\n",
" ('grand', 17),\n",
" ('lessee', 17),\n",
" ('60', 17),\n",
" ('concept', 17),\n",
" ('persons', 17),\n",
" ('begin', 17),\n",
" ('epin', 17),\n",
" ('ground', 17),\n",
" ('franchise', 17),\n",
" ('authorizing', 17),\n",
" ('scheduling', 17),\n",
" ('follows', 17),\n",
" ('options', 17),\n",
" ('entering', 17),\n",
" ('beach', 17),\n",
" ('aka', 17),\n",
" ('intersection', 17),\n",
" ('54th', 17),\n",
" ('530', 17),\n",
" ('advance', 17),\n",
" ('42', 17),\n",
" ('1/1/15', 17),\n",
" ('items', 17),\n",
" ('made', 17),\n",
" ('recording', 17),\n",
" ('need', 17),\n",
" ('telephone', 17),\n",
" ('ten', 17),\n",
" ('sampling', 17),\n",
" ('associate', 17),\n",
" ('5:30', 16),\n",
" ('establish', 16),\n",
" ('participate', 16),\n",
" ('3-04', 16),\n",
" ('once', 16),\n",
" ('potential', 16),\n",
" ('2022', 16),\n",
" ('infrastructure', 16),\n",
" ('value', 16),\n",
" ('27', 16),\n",
" ('areas', 16),\n",
" ('sent', 16),\n",
" ('does', 16),\n",
" ('conjunction', 16),\n",
" ('beaver', 16),\n",
" ('7:00', 16),\n",
" ('above', 16),\n",
" ('includes', 16),\n",
" ('financings', 16),\n",
" ('point', 16),\n",
" ('do', 16),\n",
" ('2011', 16),\n",
" ('forest', 16),\n",
" ('adams', 16),\n",
" ('interested', 16),\n",
" ('needed', 16),\n",
" ('proposals', 16),\n",
" ('melrose', 16),\n",
" ('preceding', 16),\n",
" ('comptroller', 16),\n",
" ('publication', 16),\n",
" ('llp', 16),\n",
" ('306-6088', 16),\n",
" ('benefit', 16),\n",
" ('intern', 16),\n",
" ('standards', 16),\n",
" ('behalf', 16),\n",
" ('madison', 16),\n",
" ('containing', 16),\n",
" ('here', 16),\n",
" ('//www.nyc.gov/html/nycha/html/about/boardmeeting_schedule.shtml', 15),\n",
" ('noted', 15),\n",
" ('joralemon', 15),\n",
" ('having', 15),\n",
" ('neighborhood', 15),\n",
" ('southeasterly', 15),\n",
" ('architecture', 15),\n",
" ('9:15', 15),\n",
" ('forth', 15),\n",
" ('members', 15),\n",
" ('organizations', 15),\n",
" ('name', 15),\n",
" ('retirement', 15),\n",
" ('over', 15),\n",
" ('streets', 15),\n",
" ('22nd', 15),\n",
" ('110', 15),\n",
" ('associates', 15),\n",
" ('assessment', 15),\n",
" ('significant', 15),\n",
" ('permitted', 15),\n",
" ('gotham', 15),\n",
" ('generally', 15),\n",
" ('equipping', 15),\n",
" ('benefits', 15),\n",
" ('rehabilitation', 15),\n",
" ('fleming', 15),\n",
" ('library', 15),\n",
" ('fee', 15),\n",
" ('st.', 15),\n",
" ('their', 15),\n",
" ('approved', 15),\n",
" ('family', 15),\n",
" ('dates', 15),\n",
" ('county', 15),\n",
" ('zr', 15),\n",
" ('junction', 15),\n",
" ('child', 15),\n",
" ('gold', 15),\n",
" ('scope', 15),\n",
" ('59-17', 15),\n",
" ('according', 15),\n",
" ('finance', 15),\n",
" ('applications', 15),\n",
" ('8:00', 15),\n",
" ('rfp', 15),\n",
" ('73-36', 15),\n",
" ('morning', 15),\n",
" ('based', 15),\n",
" ('sole', 15),\n",
" ('__________', 15),\n",
" ('average', 15),\n",
" ('rental', 15),\n",
" ('practicable', 15),\n",
" ('take', 15),\n",
" ('chris', 14),\n",
" ('both', 14),\n",
" ('heights', 14),\n",
" ('2:00', 14),\n",
" ('village', 14),\n",
" ('2023', 14),\n",
" ('accessory', 14),\n",
" ('last', 14),\n",
" ('824', 14),\n",
" ('set', 14),\n",
" ('intergovernmental', 14),\n",
" ('elections', 14),\n",
" ('estate', 14),\n",
" ('50', 14),\n",
" ('various', 14),\n",
" ('mental', 14),\n",
" ('districts', 14),\n",
" ('consisting', 14),\n",
" ('386-0315', 14),\n",
" ('purposes', 14),\n",
" ('annum', 14),\n",
" ('fitness', 14),\n",
" ('w', 14),\n",
" ('acquisitions', 14),\n",
" ('conduct', 14),\n",
" ('2000', 14),\n",
" ('counsel', 14),\n",
" ('dycd', 14),\n",
" ('improvement', 14),\n",
" ('amsterdam', 14),\n",
" ('dispositions', 14),\n",
" ('process', 14),\n",
" ('335', 14),\n",
" ('activities', 14),\n",
" ('8th', 14),\n",
" ('internal', 14),\n",
" ('modify', 14),\n",
" ('consulting', 14),\n",
" ('together', 14),\n",
" ('212-788-3071', 14),\n",
" ('contrary', 14),\n",
" ('noticed', 14),\n",
" ('41', 14),\n",
" ('e-pin', 13),\n",
" ('previously', 13),\n",
" ('chamber', 13),\n",
" ('higher', 13),\n",
" ('2025', 13),\n",
" ('employees', 13),\n",
" ('act', 13),\n",
" ('acquired', 13),\n",
" ('hourly', 13),\n",
" ('our', 13),\n",
" ('exemption', 13),\n",
" ('12/31/15', 13),\n",
" ('senior', 13),\n",
" ('commencement', 13),\n",
" ('accommodation', 13),\n",
" ('text', 13),\n",
" ('ocean', 13),\n",
" ('noise', 13),\n",
" ('hygiene', 13),\n",
" ('air', 13),\n",
" ('enter', 13),\n",
" ('found', 13),\n",
" ('network', 13),\n",
" ('testing', 13),\n",
" ('j', 13),\n",
" ('1:00', 13),\n",
" ('2021', 13),\n",
" ('legalize', 13),\n",
" ('fund', 13),\n",
" ('change', 13),\n",
" ('sessions', 13),\n",
" ('urban', 13),\n",
" ('result', 13),\n",
" ('northern', 13),\n",
" ('arrange', 13),\n",
" ('waiver', 13),\n",
" ('501', 13),\n",
" ('jamaica', 13),\n",
" ('upper', 13),\n",
" ('analysis', 13),\n",
" ('identified', 13),\n",
" ('expansion', 13),\n",
" ('source', 13),\n",
" ('homeless', 13),\n",
" ('customarily', 13),\n",
" ('contractors', 13),\n",
" ('safety', 13),\n",
" ('consumer', 13),\n",
" ('taxation', 13),\n",
" ('implementation', 13),\n",
" ('boroughs', 13),\n",
" ('respect', 13),\n",
" ('mixed', 13),\n",
" ('r6b', 12),\n",
" ('80th', 12),\n",
" ('2203', 12),\n",
" ('10021', 12),\n",
" ('were', 12),\n",
" ('mondays', 12),\n",
" ('180', 12),\n",
" ('www.nyc.gov/landmarks', 12),\n",
" ('said', 12),\n",
" ('indicated', 12),\n",
" ('thereby', 12),\n",
" ('additonal', 12),\n",
" ('councilman', 12),\n",
" ('station', 12),\n",
" ('exceed', 12),\n",
" ('resiliency', 12),\n",
" ('alter', 12),\n",
" ('parole', 12),\n",
" ('roof', 12),\n",
" ('among', 12),\n",
" ('603', 12),\n",
" ('57', 12),\n",
" ('bulletin', 12),\n",
" ('individual', 12),\n",
" ('therefore', 12),\n",
" ('weekly', 12),\n",
" ('pavilion', 12),\n",
" ('revision', 12),\n",
" ('thereafter', 12),\n",
" ('access', 12),\n",
" ('desk', 12),\n",
" ('ordered', 12),\n",
" ('herein', 12),\n",
" ('bi-weekly', 12),\n",
" ('continuation', 12),\n",
" ('agendas', 12),\n",
" ('columbus', 12),\n",
" ('//www.nyc.gov/html/ccrb/html/meeting.html', 12),\n",
" ...]"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Let's see some bigrams!\n",
"corpusBigrams = list(ngrams(lowerTokens,2))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpusBigramFreqs = nltk.FreqDist(corpusBigrams)\n",
"corpusBigramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 37,
"text": [
"[(('of', 'the'), 1267),\n",
" (('new', 'york'), 999),\n",
" (('in', 'the'), 666),\n",
" (('street', ','), 633),\n",
" (('.', 'the'), 491),\n",
" ((',', 'new'), 460),\n",
" (('the', 'proposed'), 454),\n",
" (('agency', ':'), 409),\n",
" ((',', 'and'), 392),\n",
" (('borough', 'of'), 378),\n",
" (('york', ','), 372),\n",
" ((',', '2014'), 363),\n",
" (('contract', ':'), 343),\n",
" (('for', 'the'), 335),\n",
" (('to', 'the'), 334),\n",
" (('york', 'city'), 321),\n",
" (('date', 'of'), 317),\n",
" (('on', 'the'), 316),\n",
" ((',', 'ny'), 314),\n",
" (('will', 'be'), 308),\n",
" (('public', 'hearing'), 280),\n",
" (('substantially', 'similar'), 276),\n",
" (('in', 'substantially'), 272),\n",
" (('personnel', 'in'), 272),\n",
" (('similar', 'titles'), 272),\n",
" (('titles', 'within'), 269),\n",
" (('department', 'of'), 266),\n",
" (('floor', ','), 264),\n",
" (('within', 'agency'), 264),\n",
" (('pursuant', 'to'), 259),\n",
" (('at', 'the'), 252),\n",
" (('for', 'a'), 241),\n",
" (('proposed', 'contract'), 228),\n",
" (('of', 'manhattan'), 226),\n",
" (('the', 'new'), 221),\n",
" (('notice', 'is'), 217),\n",
" (('the', 'following'), 214),\n",
" (('of', 'a'), 213),\n",
" (('is', 'hereby'), 213),\n",
" (('hereby', 'given'), 212),\n",
" (('--', '--'), 210),\n",
" (('manhattan', ','), 208),\n",
" ((',', 'manhattan'), 207),\n",
" ((',', 'at'), 204),\n",
" (('the', 'borough'), 202),\n",
" (('the', 'agency'), 201),\n",
" (('(', 's'), 196),\n",
" (('s', ')'), 196),\n",
" (('office', 'of'), 189),\n",
" (('of', 'services'), 188)]"
]
}
],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Let's see tri-grams!\n",
"corpusTrigrams = list(ngrams(lowerTokens,3))\n",
"corpusTrigramFreqs = nltk.FreqDist(corpusTrigrams)\n",
"corpusTrigramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 38,
"text": [
"[((',', 'new', 'york'), 457),\n",
" (('new', 'york', ','), 372),\n",
" (('new', 'york', 'city'), 321),\n",
" (('of', 'the', 'proposed'), 307),\n",
" (('date', 'of', 'the'), 274),\n",
" (('in', 'substantially', 'similar'), 272),\n",
" (('personnel', 'in', 'substantially'), 272),\n",
" (('substantially', 'similar', 'titles'), 272),\n",
" (('similar', 'titles', 'within'), 269),\n",
" (('within', 'agency', ':'), 264),\n",
" (('titles', 'within', 'agency'), 264),\n",
" (('borough', 'of', 'manhattan'), 223),\n",
" (('notice', 'is', 'hereby'), 213),\n",
" (('is', 'hereby', 'given'), 212),\n",
" (('the', 'new', 'york'), 206),\n",
" (('--', '--', '--'), 204),\n",
" (('the', 'proposed', 'contract'), 201),\n",
" (('(', 's', ')'), 196),\n",
" (('proposed', 'contract', ':'), 189),\n",
" (('in', 'the', 'borough'), 187),\n",
" (('the', 'borough', 'of'), 187),\n",
" (('hereby', 'given', 'that'), 185),\n",
" (('york', ',', 'ny'), 173),\n",
" (('agency', 'intends', 'to'), 169),\n",
" (('the', 'agency', 'intends'), 166),\n",
" (('of', 'new', 'york'), 165),\n",
" (('end', 'date', 'of'), 145),\n",
" ((',', 'borough', 'of'), 144),\n",
" (('to', 'utilize', ':'), 137),\n",
" (('intends', 'to', 'utilize'), 137),\n",
" (('a', 'term', 'of'), 136),\n",
" (('headcount', 'of', 'personnel'), 136),\n",
" (('of', 'personnel', 'in'), 136),\n",
" (('for', 'a', 'term'), 136),\n",
" (('start', 'date', 'of'), 133),\n",
" (('not', 'included', 'in'), 128),\n",
" (('annual', 'contracting', 'plan'), 126),\n",
" (('contracting', 'plan', 'and'), 126),\n",
" (('s', ')', 'not'), 126),\n",
" (('plan', 'and', 'schedule'), 126),\n",
" ((')', 'not', 'included'), 126),\n",
" (('and', 'operate', 'an'), 125),\n",
" ((',', 'and', 'operate'), 124),\n",
" (('caf', 'for', 'a'), 124),\n",
" (('fy', '2015', 'annual'), 124),\n",
" (('2015', 'annual', 'contracting'), 124),\n",
" (('maintain', ',', 'and'), 124),\n",
" (('sidewalk', 'caf', 'for'), 124),\n",
" (('years', '.', ')'), 123),\n",
" (('unenclosed', 'sidewalk', 'caf'), 120)]"
]
}
],
"prompt_number": 38
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#We can keep going!\n",
"corpus4grams = list(ngrams(lowerTokens,4))\n",
"corpus4gramFreqs = nltk.FreqDist(corpus4grams)\n",
"corpus4gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 39,
"text": [
"[(('personnel', 'in', 'substantially', 'similar'), 272),\n",
" (('in', 'substantially', 'similar', 'titles'), 272),\n",
" (('substantially', 'similar', 'titles', 'within'), 269),\n",
" ((',', 'new', 'york', ','), 269),\n",
" (('titles', 'within', 'agency', ':'), 264),\n",
" (('similar', 'titles', 'within', 'agency'), 264),\n",
" (('date', 'of', 'the', 'proposed'), 264),\n",
" (('notice', 'is', 'hereby', 'given'), 212),\n",
" (('the', 'new', 'york', 'city'), 199),\n",
" (('of', 'the', 'proposed', 'contract'), 199),\n",
" (('--', '--', '--', '--'), 198),\n",
" (('the', 'proposed', 'contract', ':'), 186),\n",
" (('is', 'hereby', 'given', 'that'), 185),\n",
" (('in', 'the', 'borough', 'of'), 183),\n",
" (('new', 'york', ',', 'ny'), 173),\n",
" (('the', 'agency', 'intends', 'to'), 166),\n",
" (('agency', 'intends', 'to', 'utilize'), 137),\n",
" (('intends', 'to', 'utilize', ':'), 137),\n",
" (('for', 'a', 'term', 'of'), 136),\n",
" (('headcount', 'of', 'personnel', 'in'), 136),\n",
" (('of', 'personnel', 'in', 'substantially'), 136),\n",
" (('start', 'date', 'of', 'the'), 131),\n",
" (('end', 'date', 'of', 'the'), 131),\n",
" (('s', ')', 'not', 'included'), 126),\n",
" (('(', 's', ')', 'not'), 126),\n",
" (('annual', 'contracting', 'plan', 'and'), 126),\n",
" ((')', 'not', 'included', 'in'), 126),\n",
" (('contracting', 'plan', 'and', 'schedule'), 126),\n",
" (('sidewalk', 'caf', 'for', 'a'), 124),\n",
" ((',', 'and', 'operate', 'an'), 124),\n",
" (('fy', '2015', 'annual', 'contracting'), 124),\n",
" (('caf', 'for', 'a', 'term'), 124),\n",
" (('maintain', ',', 'and', 'operate'), 124),\n",
" (('2015', 'annual', 'contracting', 'plan'), 124),\n",
" (('the', 'borough', 'of', 'manhattan'), 123),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for'), 120),\n",
" (('new', 'york', 'city', 'charter'), 116),\n",
" (('hereby', 'given', 'that', 'the'), 116),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk'), 112),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf'), 112),\n",
" (('and', 'operate', 'an', 'unenclosed'), 112),\n",
" (('22', 'reade', 'street', ','), 109),\n",
" (('city', 'of', 'new', 'york'), 108),\n",
" (('in', 'the', 'matter', 'of'), 106),\n",
" (('the', 'city', 'of', 'new'), 105),\n",
" (('of', 'services', 'sought', ':'), 104),\n",
" (('solicitation', 'the', 'agency', 'intends'), 103),\n",
" (('method', 'of', 'solicitation', 'the'), 103),\n",
" (('of', 'solicitation', 'the', 'agency'), 103),\n",
" (('a', 'term', 'of', 'four'), 102)]"
]
}
],
"prompt_number": 39
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus5grams = list(ngrams(lowerTokens,5))\n",
"corpus5gramFreqs = nltk.FreqDist(corpus5grams)\n",
"corpus5gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 40,
"text": [
"[(('personnel', 'in', 'substantially', 'similar', 'titles'), 272),\n",
" (('in', 'substantially', 'similar', 'titles', 'within'), 269),\n",
" (('similar', 'titles', 'within', 'agency', ':'), 264),\n",
" (('substantially', 'similar', 'titles', 'within', 'agency'), 264),\n",
" (('--', '--', '--', '--', '--'), 192),\n",
" (('of', 'the', 'proposed', 'contract', ':'), 186),\n",
" (('date', 'of', 'the', 'proposed', 'contract'), 186),\n",
" (('notice', 'is', 'hereby', 'given', 'that'), 185),\n",
" ((',', 'new', 'york', ',', 'ny'), 152),\n",
" (('agency', 'intends', 'to', 'utilize', ':'), 137),\n",
" (('headcount', 'of', 'personnel', 'in', 'substantially'), 136),\n",
" (('of', 'personnel', 'in', 'substantially', 'similar'), 136),\n",
" (('the', 'agency', 'intends', 'to', 'utilize'), 134),\n",
" (('start', 'date', 'of', 'the', 'proposed'), 131),\n",
" (('end', 'date', 'of', 'the', 'proposed'), 131),\n",
" (('(', 's', ')', 'not', 'included'), 126),\n",
" (('annual', 'contracting', 'plan', 'and', 'schedule'), 126),\n",
" (('s', ')', 'not', 'included', 'in'), 126),\n",
" (('maintain', ',', 'and', 'operate', 'an'), 124),\n",
" (('caf', 'for', 'a', 'term', 'of'), 124),\n",
" (('2015', 'annual', 'contracting', 'plan', 'and'), 124),\n",
" (('fy', '2015', 'annual', 'contracting', 'plan'), 124),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term'), 124),\n",
" (('in', 'the', 'borough', 'of', 'manhattan'), 121),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a'), 120),\n",
" (('is', 'hereby', 'given', 'that', 'the'), 116),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n",
" ((',', 'and', 'operate', 'an', 'unenclosed'), 112),\n",
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n",
" (('the', 'city', 'of', 'new', 'york'), 104),\n",
" (('method', 'of', 'solicitation', 'the', 'agency'), 103),\n",
" (('of', 'solicitation', 'the', 'agency', 'intends'), 103),\n",
" (('solicitation', 'the', 'agency', 'intends', 'to'), 103),\n",
" (('a', 'term', 'of', 'four', 'years'), 102),\n",
" (('for', 'a', 'term', 'of', 'four'), 102),\n",
" (('of', 'four', 'years', '.', ')'), 101),\n",
" (('term', 'of', 'four', 'years', '.'), 101),\n",
" (('the', 'borough', 'of', 'manhattan', '('), 100),\n",
" ((',', '22', 'reade', 'street', ','), 100),\n",
" (('borough', 'of', 'manhattan', '(', 'to'), 99),\n",
" (('to', 'maintain', ',', 'and', 'operate'), 98),\n",
" (('floor', ',', 'new', 'york', ','), 96),\n",
" (('solicitation', '(', 's', ')', 'not'), 94),\n",
" (('titles', 'within', 'agency', ':', 'none'), 93),\n",
" (('none', 'headcount', 'of', 'personnel', 'in'), 92),\n",
" (('agency', ':', 'none', 'headcount', 'of'), 92),\n",
" ((':', 'none', 'headcount', 'of', 'personnel'), 92),\n",
" (('within', 'agency', ':', 'none', 'headcount'), 91),\n",
" (('continue', 'to', 'maintain', ',', 'and'), 89)]"
]
}
],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus6grams = list(ngrams(lowerTokens,6))\n",
"corpus6gramFreqs = nltk.FreqDist(corpus6grams)\n",
"corpus6gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 41,
"text": [
"[(('personnel', 'in', 'substantially', 'similar', 'titles', 'within'), 269),\n",
" (('substantially', 'similar', 'titles', 'within', 'agency', ':'), 264),\n",
" (('in', 'substantially', 'similar', 'titles', 'within', 'agency'), 264),\n",
" (('--', '--', '--', '--', '--', '--'), 186),\n",
" (('date', 'of', 'the', 'proposed', 'contract', ':'), 186),\n",
" (('headcount', 'of', 'personnel', 'in', 'substantially', 'similar'), 136),\n",
" (('of', 'personnel', 'in', 'substantially', 'similar', 'titles'), 136),\n",
" (('the', 'agency', 'intends', 'to', 'utilize', ':'), 134),\n",
" (('(', 's', ')', 'not', 'included', 'in'), 126),\n",
" (('2015', 'annual', 'contracting', 'plan', 'and', 'schedule'), 124),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of'), 124),\n",
" (('fy', '2015', 'annual', 'contracting', 'plan', 'and'), 124),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term'), 120),\n",
" (('notice', 'is', 'hereby', 'given', 'that', 'the'), 116),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'), 112),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n",
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n",
" (('maintain', ',', 'and', 'operate', 'an', 'unenclosed'), 112),\n",
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n",
" (('method', 'of', 'solicitation', 'the', 'agency', 'intends'), 103),\n",
" (('of', 'solicitation', 'the', 'agency', 'intends', 'to'), 103),\n",
" (('solicitation', 'the', 'agency', 'intends', 'to', 'utilize'), 103),\n",
" (('caf', 'for', 'a', 'term', 'of', 'four'), 102),\n",
" (('for', 'a', 'term', 'of', 'four', 'years'), 102),\n",
" (('term', 'of', 'four', 'years', '.', ')'), 101),\n",
" (('a', 'term', 'of', 'four', 'years', '.'), 101),\n",
" (('in', 'the', 'borough', 'of', 'manhattan', '('), 100),\n",
" (('the', 'borough', 'of', 'manhattan', '(', 'to'), 99),\n",
" (('to', 'maintain', ',', 'and', 'operate', 'an'), 98),\n",
" (('end', 'date', 'of', 'the', 'proposed', 'contract'), 96),\n",
" (('solicitation', '(', 's', ')', 'not', 'included'), 94),\n",
" (('similar', 'titles', 'within', 'agency', ':', 'none'), 93),\n",
" (('agency', ':', 'none', 'headcount', 'of', 'personnel'), 92),\n",
" (('none', 'headcount', 'of', 'personnel', 'in', 'substantially'), 92),\n",
" ((':', 'none', 'headcount', 'of', 'personnel', 'in'), 92),\n",
" (('titles', 'within', 'agency', ':', 'none', 'headcount'), 91),\n",
" (('within', 'agency', ':', 'none', 'headcount', 'of'), 91),\n",
" (('start', 'date', 'of', 'the', 'proposed', 'contract'), 90),\n",
" (('continue', 'to', 'maintain', ',', 'and', 'operate'), 89),\n",
" (('borough', 'of', 'manhattan', '(', 'to', 'continue'), 87),\n",
" (('of', 'manhattan', '(', 'to', 'continue', 'to'), 86),\n",
" (('(', 'to', 'continue', 'to', 'maintain', ','), 86),\n",
" (('to', 'continue', 'to', 'maintain', ',', 'and'), 86),\n",
" (('similar', 'titles', 'within', 'agency', ':', '0'), 83),\n",
" (('spector', 'hall', ',', '22', 'reade', 'street'), 82),\n",
" (('hall', ',', '22', 'reade', 'street', ','), 82),\n",
" (('in', 'spector', 'hall', ',', '22', 'reade'), 81),\n",
" (('manhattan', '(', 'to', 'continue', 'to', 'maintain'), 74),\n",
" (('floor', ',', 'new', 'york', ',', 'ny'), 72),\n",
" ((',', 'new', 'york', ',', 'ny', '10007'), 66)]"
]
}
],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus7grams = list(ngrams(lowerTokens,7))\n",
"corpus7gramFreqs = nltk.FreqDist(corpus7grams)\n",
"corpus7gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
"[(('personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency'),\n",
" 264),\n",
" (('in', 'substantially', 'similar', 'titles', 'within', 'agency', ':'), 264),\n",
" (('--', '--', '--', '--', '--', '--', '--'), 180),\n",
" (('headcount', 'of', 'personnel', 'in', 'substantially', 'similar', 'titles'),\n",
" 136),\n",
" (('of', 'personnel', 'in', 'substantially', 'similar', 'titles', 'within'),\n",
" 133),\n",
" (('fy', '2015', 'annual', 'contracting', 'plan', 'and', 'schedule'), 124),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of'), 120),\n",
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term'), 112),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'), 112),\n",
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n",
" (('maintain', ',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n",
" (('of', 'solicitation', 'the', 'agency', 'intends', 'to', 'utilize'), 103),\n",
" (('solicitation', 'the', 'agency', 'intends', 'to', 'utilize', ':'), 103),\n",
" (('method', 'of', 'solicitation', 'the', 'agency', 'intends', 'to'), 103),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four'), 102),\n",
" (('caf', 'for', 'a', 'term', 'of', 'four', 'years'), 102),\n",
" (('a', 'term', 'of', 'four', 'years', '.', ')'), 101),\n",
" (('for', 'a', 'term', 'of', 'four', 'years', '.'), 101),\n",
" (('in', 'the', 'borough', 'of', 'manhattan', '(', 'to'), 99),\n",
" (('end', 'date', 'of', 'the', 'proposed', 'contract', ':'), 96),\n",
" (('solicitation', '(', 's', ')', 'not', 'included', 'in'), 94),\n",
" (('substantially', 'similar', 'titles', 'within', 'agency', ':', 'none'), 93),\n",
" (('none', 'headcount', 'of', 'personnel', 'in', 'substantially', 'similar'),\n",
" 92),\n",
" (('agency', ':', 'none', 'headcount', 'of', 'personnel', 'in'), 92),\n",
" ((':', 'none', 'headcount', 'of', 'personnel', 'in', 'substantially'), 92),\n",
" (('to', 'maintain', ',', 'and', 'operate', 'an', 'unenclosed'), 91),\n",
" (('titles', 'within', 'agency', ':', 'none', 'headcount', 'of'), 91),\n",
" (('similar', 'titles', 'within', 'agency', ':', 'none', 'headcount'), 91),\n",
" (('within', 'agency', ':', 'none', 'headcount', 'of', 'personnel'), 91),\n",
" (('start', 'date', 'of', 'the', 'proposed', 'contract', ':'), 90),\n",
" (('continue', 'to', 'maintain', ',', 'and', 'operate', 'an'), 89),\n",
" (('the', 'borough', 'of', 'manhattan', '(', 'to', 'continue'), 87),\n",
" (('to', 'continue', 'to', 'maintain', ',', 'and', 'operate'), 86),\n",
" (('(', 'to', 'continue', 'to', 'maintain', ',', 'and'), 86),\n",
" (('borough', 'of', 'manhattan', '(', 'to', 'continue', 'to'), 86),\n",
" (('substantially', 'similar', 'titles', 'within', 'agency', ':', '0'), 83),\n",
" (('spector', 'hall', ',', '22', 'reade', 'street', ','), 82),\n",
" (('in', 'spector', 'hall', ',', '22', 'reade', 'street'), 81),\n",
" (('manhattan', '(', 'to', 'continue', 'to', 'maintain', ','), 74),\n",
" (('of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain'), 74),\n",
" (('notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor'), 65),\n",
" (('s', ')', 'not', 'included', 'in', 'the', 'fy'), 63),\n",
" (('hereby', 'given', 'that', 'the', 'mayor', 'will', 'be'), 63),\n",
" (('schedule', 'that', 'is', 'published', 'pursuant', 'to', 'new'), 63),\n",
" (('contracting', 'plan', 'and', 'schedule', 'notice', 'is', 'hereby'), 63),\n",
" (('plan', 'and', 'schedule', 'notice', 'is', 'hereby', 'given'), 63),\n",
" (('annual', 'contracting', 'plan', 'and', 'schedule', 'notice', 'is'), 63),\n",
" (('to', 'new', 'york', 'city', 'charter', '312', '('), 63),\n",
" (('annual', 'contracting', 'plan', 'and', 'schedule', 'that', 'is'), 63)]"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus8grams = list(ngrams(lowerTokens,8))\n",
"corpus8gramFreqs = nltk.FreqDist(corpus8grams)\n",
"corpus8gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 43,
"text": [
"[(('personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':'),\n",
" 264),\n",
" (('--', '--', '--', '--', '--', '--', '--', '--'), 174),\n",
" (('headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within'),\n",
" 133),\n",
" (('of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency'),\n",
" 129),\n",
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'), 112),\n",
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n",
" (('maintain', ',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf'),\n",
" 112),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of'), 112),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term'), 112),\n",
" (('of', 'solicitation', 'the', 'agency', 'intends', 'to', 'utilize', ':'),\n",
" 103),\n",
" (('method',\n",
" 'of',\n",
" 'solicitation',\n",
" 'the',\n",
" 'agency',\n",
" 'intends',\n",
" 'to',\n",
" 'utilize'),\n",
" 103),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years'), 102),\n",
" (('caf', 'for', 'a', 'term', 'of', 'four', 'years', '.'), 101),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four'), 101),\n",
" (('for', 'a', 'term', 'of', 'four', 'years', '.', ')'), 101),\n",
" (('in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none'),\n",
" 93),\n",
" (('agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially'),\n",
" 92),\n",
" (('none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles'),\n",
" 92),\n",
" ((':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar'),\n",
" 92),\n",
" (('titles', 'within', 'agency', ':', 'none', 'headcount', 'of', 'personnel'),\n",
" 91),\n",
" (('to', 'maintain', ',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk'),\n",
" 91),\n",
" (('similar', 'titles', 'within', 'agency', ':', 'none', 'headcount', 'of'),\n",
" 91),\n",
" (('substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount'),\n",
" 91),\n",
" (('within', 'agency', ':', 'none', 'headcount', 'of', 'personnel', 'in'), 91),\n",
" (('in', 'the', 'borough', 'of', 'manhattan', '(', 'to', 'continue'), 87),\n",
" (('the', 'borough', 'of', 'manhattan', '(', 'to', 'continue', 'to'), 86),\n",
" (('(', 'to', 'continue', 'to', 'maintain', ',', 'and', 'operate'), 86),\n",
" (('to', 'continue', 'to', 'maintain', ',', 'and', 'operate', 'an'), 86),\n",
" (('in', 'substantially', 'similar', 'titles', 'within', 'agency', ':', '0'),\n",
" 83),\n",
" (('continue', 'to', 'maintain', ',', 'and', 'operate', 'an', 'unenclosed'),\n",
" 82),\n",
" (('in', 'spector', 'hall', ',', '22', 'reade', 'street', ','), 81),\n",
" (('borough', 'of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain'), 74),\n",
" (('manhattan', '(', 'to', 'continue', 'to', 'maintain', ',', 'and'), 74),\n",
" (('of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain', ','), 74),\n",
" (('notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor', 'will'), 63),\n",
" (('contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant'),\n",
" 63),\n",
" (('annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby'),\n",
" 63),\n",
" (('to', 'new', 'york', 'city', 'charter', '312', '(', 'a'), 63),\n",
" (('(', 's', ')', 'not', 'included', 'in', 'the', 'fy'), 63),\n",
" (('annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published'),\n",
" 63),\n",
" (('pursuant', 'to', 'new', 'york', 'city', 'charter', '312', '('), 63),\n",
" (('that', 'is', 'published', 'pursuant', 'to', 'new', 'york', 'city'), 63),\n",
" (('city', 'charter', '312', '(', 'a', ')', ':', 'agency'), 63),\n",
" (('schedule', 'notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor'), 63),\n",
" (('published', 'pursuant', 'to', 'new', 'york', 'city', 'charter', '312'),\n",
" 63),\n",
" (('schedule', 'that', 'is', 'published', 'pursuant', 'to', 'new', 'york'),\n",
" 63),\n",
" (('contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given'),\n",
" 63),\n",
" (('york', 'city', 'charter', '312', '(', 'a', ')', ':'), 63),\n",
" (('and', 'schedule', 'notice', 'is', 'hereby', 'given', 'that', 'the'), 63),\n",
" (('plan', 'and', 'schedule', 'notice', 'is', 'hereby', 'given', 'that'), 63)]"
]
}
],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus9grams = list(ngrams(lowerTokens,9))\n",
"corpus9gramFreqs = nltk.FreqDist(corpus9grams)\n",
"corpus9gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 44,
"text": [
"[(('--', '--', '--', '--', '--', '--', '--', '--', '--'), 168),\n",
" (('of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':'),\n",
" 129),\n",
" (('headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency'),\n",
" 129),\n",
" (('maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for'),\n",
" 112),\n",
" ((',', 'and', 'operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a'),\n",
" 112),\n",
" (('and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a',\n",
" 'term'),\n",
" 112),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of'),\n",
" 112),\n",
" (('method',\n",
" 'of',\n",
" 'solicitation',\n",
" 'the',\n",
" 'agency',\n",
" 'intends',\n",
" 'to',\n",
" 'utilize',\n",
" ':'),\n",
" 103),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years', '.'), 101),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years'),\n",
" 101),\n",
" (('caf', 'for', 'a', 'term', 'of', 'four', 'years', '.', ')'), 101),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four'),\n",
" 94),\n",
" (('personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none'),\n",
" 93),\n",
" ((':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles'),\n",
" 92),\n",
" (('agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar'),\n",
" 92),\n",
" (('within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially'),\n",
" 91),\n",
" (('to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf'),\n",
" 91),\n",
" (('substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of'),\n",
" 91),\n",
" (('similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel'),\n",
" 91),\n",
" (('titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in'),\n",
" 91),\n",
" (('in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount'),\n",
" 91),\n",
" (('none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within'),\n",
" 89),\n",
" (('(', 'to', 'continue', 'to', 'maintain', ',', 'and', 'operate', 'an'), 86),\n",
" (('in', 'the', 'borough', 'of', 'manhattan', '(', 'to', 'continue', 'to'),\n",
" 86),\n",
" (('personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" '0'),\n",
" 83),\n",
" (('continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk'),\n",
" 82),\n",
" (('to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed'),\n",
" 79),\n",
" (('of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain', ',', 'and'),\n",
" 74),\n",
" (('manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate'),\n",
" 74),\n",
" (('borough', 'of', 'manhattan', '(', 'to', 'continue', 'to', 'maintain', ','),\n",
" 74),\n",
" (('the',\n",
" 'borough',\n",
" 'of',\n",
" 'manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain'),\n",
" 74),\n",
" (('and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the',\n",
" 'mayor'),\n",
" 63),\n",
" (('schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city'),\n",
" 63),\n",
" (('to', 'new', 'york', 'city', 'charter', '312', '(', 'a', ')'), 63),\n",
" (('contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that'),\n",
" 63),\n",
" (('city', 'charter', '312', '(', 'a', ')', ':', 'agency', ':'), 63),\n",
" (('pursuant', 'to', 'new', 'york', 'city', 'charter', '312', '(', 'a'), 63),\n",
" (('notice', 'is', 'hereby', 'given', 'that', 'the', 'mayor', 'will', 'be'),\n",
" 63),\n",
" (('that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter'),\n",
" 63),\n",
" (('york', 'city', 'charter', '312', '(', 'a', ')', ':', 'agency'), 63),\n",
" (('and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york'),\n",
" 63),\n",
" (('plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the'),\n",
" 63),\n",
" (('annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given'),\n",
" 63),\n",
" (('schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the',\n",
" 'mayor',\n",
" 'will'),\n",
" 63),\n",
" (('plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new'),\n",
" 63),\n",
" (('contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to'),\n",
" 63),\n",
" (('new', 'york', 'city', 'charter', '312', '(', 'a', ')', ':'), 63),\n",
" (('published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter',\n",
" '312',\n",
" '('),\n",
" 63),\n",
" (('is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter',\n",
" '312'),\n",
" 63),\n",
" (('annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant'),\n",
" 63)]"
]
}
],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus5grams = ngrams(lowerTokens,5)\n",
"corpus5gramFreqs = nltk.FreqDist(corpus5grams)\n",
"corpus5gramFreqs.most_common()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 34,
"text": [
"[(('personnel', 'in', 'substantially', 'similar', 'titles'), 272),\n",
" (('in', 'substantially', 'similar', 'titles', 'within'), 269),\n",
" (('similar', 'titles', 'within', 'agency', ':'), 264),\n",
" (('substantially', 'similar', 'titles', 'within', 'agency'), 264),\n",
" (('--', '--', '--', '--', '--'), 192),\n",
" (('of', 'the', 'proposed', 'contract', ':'), 186),\n",
" (('date', 'of', 'the', 'proposed', 'contract'), 186),\n",
" (('notice', 'is', 'hereby', 'given', 'that'), 185),\n",
" ((',', 'new', 'york', ',', 'ny'), 152),\n",
" (('agency', 'intends', 'to', 'utilize', ':'), 137),\n",
" (('headcount', 'of', 'personnel', 'in', 'substantially'), 136),\n",
" (('of', 'personnel', 'in', 'substantially', 'similar'), 136),\n",
" (('the', 'agency', 'intends', 'to', 'utilize'), 134),\n",
" (('start', 'date', 'of', 'the', 'proposed'), 131),\n",
" (('end', 'date', 'of', 'the', 'proposed'), 131),\n",
" (('(', 's', ')', 'not', 'included'), 126),\n",
" (('annual', 'contracting', 'plan', 'and', 'schedule'), 126),\n",
" (('s', ')', 'not', 'included', 'in'), 126),\n",
" (('maintain', ',', 'and', 'operate', 'an'), 124),\n",
" (('caf', 'for', 'a', 'term', 'of'), 124),\n",
" (('2015', 'annual', 'contracting', 'plan', 'and'), 124),\n",
" (('fy', '2015', 'annual', 'contracting', 'plan'), 124),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term'), 124),\n",
" (('in', 'the', 'borough', 'of', 'manhattan'), 121),\n",
" (('unenclosed', 'sidewalk', 'caf', 'for', 'a'), 120),\n",
" (('is', 'hereby', 'given', 'that', 'the'), 116),\n",
" (('an', 'unenclosed', 'sidewalk', 'caf', 'for'), 112),\n",
" (('operate', 'an', 'unenclosed', 'sidewalk', 'caf'), 112),\n",
" ((',', 'and', 'operate', 'an', 'unenclosed'), 112),\n",
" (('and', 'operate', 'an', 'unenclosed', 'sidewalk'), 112),\n",
" (('the', 'city', 'of', 'new', 'york'), 104),\n",
" (('method', 'of', 'solicitation', 'the', 'agency'), 103),\n",
" (('of', 'solicitation', 'the', 'agency', 'intends'), 103),\n",
" (('solicitation', 'the', 'agency', 'intends', 'to'), 103),\n",
" (('a', 'term', 'of', 'four', 'years'), 102),\n",
" (('for', 'a', 'term', 'of', 'four'), 102),\n",
" (('of', 'four', 'years', '.', ')'), 101),\n",
" (('term', 'of', 'four', 'years', '.'), 101),\n",
" (('the', 'borough', 'of', 'manhattan', '('), 100),\n",
" ((',', '22', 'reade', 'street', ','), 100),\n",
" (('borough', 'of', 'manhattan', '(', 'to'), 99),\n",
" (('to', 'maintain', ',', 'and', 'operate'), 98),\n",
" (('floor', ',', 'new', 'york', ','), 96),\n",
" (('solicitation', '(', 's', ')', 'not'), 94),\n",
" (('titles', 'within', 'agency', ':', 'none'), 93),\n",
" (('none', 'headcount', 'of', 'personnel', 'in'), 92),\n",
" (('agency', ':', 'none', 'headcount', 'of'), 92),\n",
" ((':', 'none', 'headcount', 'of', 'personnel'), 92),\n",
" (('within', 'agency', ':', 'none', 'headcount'), 91),\n",
" (('continue', 'to', 'maintain', ',', 'and'), 89),\n",
" (('of', 'manhattan', '(', 'to', 'continue'), 87),\n",
" (('to', 'continue', 'to', 'maintain', ','), 86),\n",
" (('manhattan', '(', 'to', 'continue', 'to'), 86),\n",
" (('(', 'to', 'continue', 'to', 'maintain'), 86),\n",
" (('of', 'the', 'new', 'york', 'city'), 83),\n",
" (('titles', 'within', 'agency', ':', '0'), 83),\n",
" (('spector', 'hall', ',', '22', 'reade'), 82),\n",
" (('hall', ',', '22', 'reade', 'street'), 82),\n",
" (('in', 'spector', 'hall', ',', '22'), 81),\n",
" (('description', 'of', 'services', 'sought', ':'), 80),\n",
" (('at', 'the', 'call', 'of', 'the'), 75),\n",
" (('street', ',', 'new', 'york', ','), 75),\n",
" (('new', 'york', ',', 'ny', '10007'), 68),\n",
" (('hereby', 'given', 'that', 'the', 'mayor'), 65),\n",
" (('of', 'the', 'city', 'of', 'new'), 64),\n",
" (('and', 'schedule', 'notice', 'is', 'hereby'), 63),\n",
" (('charter', '312', '(', 'a', ')'), 63),\n",
" (('contracting', 'plan', 'and', 'schedule', 'notice'), 63),\n",
" (('not', 'included', 'in', 'the', 'fy'), 63),\n",
" (('to', 'new', 'york', 'city', 'charter'), 63),\n",
" (('is', 'published', 'pursuant', 'to', 'new'), 63),\n",
" (('schedule', 'notice', 'is', 'hereby', 'given'), 63),\n",
" (('york', 'city', 'charter', '312', '('), 63),\n",
" (('and', 'schedule', 'that', 'is', 'published'), 63),\n",
" (('plan', 'and', 'schedule', 'notice', 'is'), 63),\n",
" (('312', '(', 'a', ')', ':'), 63),\n",
" (('city', 'charter', '312', '(', 'a'), 63),\n",
" (('(', 'a', ')', ':', 'agency'), 63),\n",
" (('published', 'pursuant', 'to', 'new', 'york'), 63),\n",
" (('plan', 'and', 'schedule', 'that', 'is'), 63),\n",
" (('a', ')', ':', 'agency', ':'), 63),\n",
" ((',', 'new', 'york', ',', 'n.y.'), 63),\n",
" (('new', 'york', 'city', 'charter', '312'), 63),\n",
" (('that', 'the', 'mayor', 'will', 'be'), 63),\n",
" ((')', 'not', 'included', 'in', 'the'), 63),\n",
" (('pursuant', 'to', 'new', 'york', 'city'), 63),\n",
" (('that', 'is', 'published', 'pursuant', 'to'), 63),\n",
" (('contracting', 'plan', 'and', 'schedule', 'that'), 63),\n",
" (('given', 'that', 'the', 'mayor', 'will'), 63),\n",
" (('schedule', 'that', 'is', 'published', 'pursuant'), 63),\n",
" ((')', 'not', 'included', 'in', 'fy'), 63),\n",
" (('not', 'included', 'in', 'fy', '2015'), 62),\n",
" (('the', 'fy', '2015', 'annual', 'contracting'), 62),\n",
" (('included', 'in', 'the', 'fy', '2015'), 62),\n",
" ((',', 'borough', 'of', 'manhattan', ','), 62),\n",
" (('in', 'fy', '2015', 'annual', 'contracting'), 62),\n",
" (('in', 'the', 'fy', '2015', 'annual'), 62),\n",
" (('call', 'of', 'the', 'chairman', '.'), 62),\n",
" (('included', 'in', 'fy', '2015', 'annual'), 62),\n",
" (('for', 'the', 'period', 'july', '1'), 61),\n",
" (('the', 'period', 'july', '1', ','), 61),\n",
" (('meets', 'in', 'spector', 'hall', ','), 60),\n",
" (('new', 'york', ',', 'new', 'york'), 53),\n",
" (('the', 'new', 'york', 'city', 'charter'), 53),\n",
" (('the', 'call', 'of', 'the', 'chairman'), 51),\n",
" (('ave', 'in', 'the', 'borough', 'of'), 51),\n",
" (('reade', 'street', ',', 'main', 'floor'), 50),\n",
" (('street', ',', 'main', 'floor', ','), 50),\n",
" (('22', 'reade', 'street', ',', 'main'), 50),\n",
" (('of', 'environmental', 'remediation', '(', 'oer'), 50),\n",
" (('environmental', 'remediation', '(', 'oer', ')'), 50),\n",
" (('new', 'york', 'city', 'office', 'of'), 50),\n",
" (('office', 'of', 'environmental', 'remediation', '('), 50),\n",
" (('in', 'the', 'matter', 'of', 'a'), 49),\n",
" ((':', 'task', 'order', 'personnel', 'in'), 49),\n",
" (('remediation', '(', 'oer', ')', 'has'), 49),\n",
" (('(', 'oer', ')', 'has', 'received'), 49),\n",
" (('order', 'personnel', 'in', 'substantially', 'similar'), 49),\n",
" (('the', 'new', 'york', 'city', 'office'), 49),\n",
" (('oer', ')', 'has', 'received', 'an'), 49),\n",
" (('assigned', 'to', 'this', 'project', '.'), 49),\n",
" (('received', 'an', 'nyc', 'voluntary', 'cleanup'), 49),\n",
" (('utilize', ':', 'task', 'order', 'personnel'), 49),\n",
" (('nyc', 'voluntary', 'cleanup', 'program', '('), 49),\n",
" (('is', 'assigned', 'to', 'this', 'project'), 49),\n",
" (('program', '(', 'vcp', ')', 'application'), 49),\n",
" (('task', 'order', 'personnel', 'in', 'substantially'), 49),\n",
" (('city', 'office', 'of', 'environmental', 'remediation'), 49),\n",
" (('york', 'city', 'office', 'of', 'environmental'), 49),\n",
" (('has', 'received', 'an', 'nyc', 'voluntary'), 49),\n",
" (('cleanup', 'program', '(', 'vcp', ')'), 49),\n",
" ((')', 'has', 'received', 'an', 'nyc'), 49),\n",
" (('intends', 'to', 'utilize', ':', 'task'), 49),\n",
" (('voluntary', 'cleanup', 'program', '(', 'vcp'), 49),\n",
" (('an', 'nyc', 'voluntary', 'cleanup', 'program'), 49),\n",
" (('to', 'utilize', ':', 'task', 'order'), 49),\n",
" ((',', 'times', 'and', 'location', 'as'), 48),\n",
" (('times', 'and', 'location', 'as', 'warranted'), 48),\n",
" (('administration', 'for', 'children', \"'s\", 'services'), 48),\n",
" (('days', ',', 'times', 'and', 'location'), 48),\n",
" (('and', 'location', 'as', 'warranted', '.'), 48),\n",
" (('other', 'days', ',', 'times', 'and'), 48),\n",
" (('for', 'a', 'site', 'located', 'at'), 48),\n",
" (('(', 'vcp', ')', 'application', 'from'), 48),\n",
" (('is', 'hereby', 'given', 'that', 'a'), 48),\n",
" (('zoning', 'district', '.', 'premises', 'affected'), 48),\n",
" ((',', 'and', 'other', 'days', ','), 48),\n",
" (('and', 'other', 'days', ',', 'times'), 48),\n",
" (('the', 'following', 'solicitation', '(', 's'), 47),\n",
" (('mayor', 'will', 'be', 'issuing', 'the'), 47),\n",
" (('of', 'intent', 'to', 'issue', 'new'), 47),\n",
" (('to', 'issue', 'new', 'solicitation', '('), 47),\n",
" (('new', 'solicitation', '(', 's', ')'), 47),\n",
" (('following', 'solicitation', '(', 's', ')'), 47),\n",
" (('issuing', 'the', 'following', 'solicitation', '('), 47),\n",
" (('will', 'be', 'issuing', 'the', 'following'), 47),\n",
" (('notice', 'of', 'intent', 'to', 'issue'), 47),\n",
" (('public', 'hearing', 'will', 'be', 'held'), 47),\n",
" (('the', 'mayor', 'will', 'be', 'issuing'), 47),\n",
" (('intent', 'to', 'issue', 'new', 'solicitation'), 47),\n",
" (('be', 'issuing', 'the', 'following', 'solicitation'), 47),\n",
" (('issue', 'new', 'solicitation', '(', 's'), 47),\n",
" (('agency', ':', '0', 'agency', ':'), 46),\n",
" (('new', 'york', ',', 'n.y.', '10007'), 46),\n",
" ((',', 'manhattan', 'certificate', 'of', 'appropriateness'), 46),\n",
" (('.', 'the', 'new', 'york', 'city'), 45),\n",
" (('project', '.', 'the', 'new', 'york'), 45),\n",
" (('this', 'project', '.', 'the', 'new'), 45),\n",
" (('within', 'agency', ':', '0', 'agency'), 45),\n",
" (('to', 'this', 'project', '.', 'the'), 45),\n",
" ((')', ':', 'agency', ':', 'department'), 44),\n",
" ((':', 'agency', ':', 'department', 'of'), 44),\n",
" ((',', 'manhattan', ',', 'new', 'york'), 44),\n",
" (('city', 'of', 'new', 'york', ','), 43),\n",
" ((':', 'department', 'of', 'information', 'technology'), 41),\n",
" (('new', 'york', 'city', 'department', 'of'), 41),\n",
" (('agency', ':', 'department', 'of', 'information'), 41),\n",
" (('of', 'the', 'proposed', 'renewed/extended', 'contract'), 40),\n",
" (('date', 'of', 'the', 'proposed', 'renewed/extended'), 40),\n",
" (('the', 'proposed', 'renewed/extended', 'contract', ':'), 40),\n",
" (('public', 'notice', 'is', 'hereby', 'given'), 39),\n",
" (('manhattan', 'certificate', 'of', 'appropriateness', 'a'), 38),\n",
" (('at', '40', 'rector', 'street', ','), 38),\n",
" ((',', 'new', 'york', '10007', ','), 38),\n",
" (('the', 'matter', 'of', 'an', 'application'), 37),\n",
" (('a', 'public', 'hearing', 'will', 'be'), 37),\n",
" ((',', 'new', 'york', ',', 'new'), 37),\n",
" (('main', 'floor', ',', 'manhattan', ','), 36),\n",
" (('a.m.', ',', 'and', 'other', 'days'), 36),\n",
" (('scheduled', 'for', 'public', 'hearing', 'by'), 36),\n",
" (('of', 'the', 'chairman', '.', 'board'), 36),\n",
" (('commencing', '10:00', 'a.m.', ',', 'and'), 36),\n",
" (('for', 'public', 'hearing', 'by', 'community'), 36),\n",
" (('the', 'following', 'matters', 'have', 'been'), 36),\n",
" (('10:00', 'a.m.', ',', 'and', 'other'), 36),\n",
" ((',', 'main', 'floor', ',', 'manhattan'), 36),\n",
" (('the', 'chairman', '.', 'board', 'of'), 36),\n",
" (('given', 'that', 'the', 'following', 'matters'), 36),\n",
" (('been', 'scheduled', 'for', 'public', 'hearing'), 36),\n",
" (('matters', 'have', 'been', 'scheduled', 'for'), 36),\n",
" ((',', 'commencing', '10:00', 'a.m.', ','), 36),\n",
" (('hereby', 'given', 'that', 'the', 'following'), 36),\n",
" (('that', 'the', 'following', 'matters', 'have'), 36),\n",
" (('have', 'been', 'scheduled', 'for', 'public'), 36),\n",
" ((',', 'municipal', 'building', ',', 'manhattan'), 36),\n",
" (('municipal', 'building', ',', 'manhattan', ','), 36),\n",
" (('following', 'matters', 'have', 'been', 'scheduled'), 36),\n",
" (('in', 'the', 'borough', 'of', 'brooklyn'), 35),\n",
" (('22', 'reade', 'street', ',', 'new'), 35),\n",
" (('in', 'the', 'matter', 'of', 'an'), 35),\n",
" (('reade', 'street', ',', 'new', 'york'), 35),\n",
" (('days', 'prior', 'to', 'the', 'public'), 35),\n",
" (('public', 'hearing', 'by', 'community', 'board'), 35),\n",
" (('by', 'community', 'board', ':', 'borough'), 35),\n",
" (('hearing', 'by', 'community', 'board', ':'), 35),\n",
" (('individuals', 'requesting', 'sign', 'language', 'interpreters'), 35),\n",
" ((',', 'in', 'the', 'borough', 'of'), 35),\n",
" (('community', 'board', ':', 'borough', 'of'), 35),\n",
" (('requesting', 'sign', 'language', 'interpreters', 'should'), 35),\n",
" (('matter', 'of', 'an', 'application', 'submitted'), 34),\n",
" ((',', 'owner', '.', 'subject', 'application'), 33),\n",
" (('the', 'matter', 'of', 'a', 'proposed'), 33),\n",
" (('hearing', 'will', 'be', 'held', 'at'), 33),\n",
" (('of', 'an', 'application', 'submitted', 'by'), 33),\n",
" (('at', '10:00', 'a.m.', 'on', 'the'), 32),\n",
" ((':', '0', 'agency', ':', 'department'), 32),\n",
" (('to', 'the', 'public', 'hearing', '.'), 32),\n",
" (('prior', 'to', 'the', 'public', 'hearing'), 32),\n",
" (('nature', 'of', 'services', 'performed', 'under'), 32),\n",
" (('s', ')', 'the', 'agency', 'intends'), 32),\n",
" (('new', 'start', 'date', 'of', 'the'), 32),\n",
" (('0', 'agency', ':', 'department', 'of'), 32),\n",
" (('of', 'the', 'procurement', 'policy', 'board'), 32),\n",
" (('new', 'end', 'date', 'of', 'the'), 32),\n",
" (('performed', 'under', 'the', 'contract', ':'), 32),\n",
" (('of', 'services', 'performed', 'under', 'the'), 32),\n",
" (('services', 'performed', 'under', 'the', 'contract'), 32),\n",
" (('the', 'new', 'york', 'city', 'department'), 32),\n",
" (('contract', '(', 's', ')', 'not'), 32),\n",
" (('floor', ',', 'borough', 'of', 'manhattan'), 32),\n",
" (('(', 's', ')', 'the', 'agency'), 32),\n",
" (('reason', '(', 's', ')', 'the'), 32),\n",
" ((')', 'the', 'agency', 'intends', 'to'), 32),\n",
" (('the', 'nature', 'of', 'services', 'performed'), 31),\n",
" (('sign', 'language', 'interpreters', 'should', 'contact'), 31),\n",
" (('to', 'the', 'nature', 'of', 'services'), 31),\n",
" (('the', 'mayor', \"'s\", 'office', 'of'), 31),\n",
" (('sought', 'to', 'the', 'nature', 'of'), 31),\n",
" (('modifications', 'sought', 'to', 'the', 'nature'), 31),\n",
" (('the', 'administration', 'for', 'children', \"'s\"), 31),\n",
" (('language', 'interpreters', 'should', 'contact', 'the'), 30),\n",
" (('york', '.', 'site', 'no', '.'), 30),\n",
" (('new', 'york', '.', 'site', 'no'), 30),\n",
" ((',', 'new', 'york', '.', 'site'), 30),\n",
" ((',', 'brooklyn', ',', 'new', 'york'), 30),\n",
" (('manhattan', ',', 'new', 'york', '10007'), 29),\n",
" (('st', 'in', 'the', 'borough', 'of'), 29),\n",
" (('department', 'of', 'information', 'technology', '&'), 29),\n",
" (('business', 'days', 'prior', 'to', 'the'), 29),\n",
" (('the', 'procurement', 'policy', 'board', 'rules'), 29),\n",
" (('for', 'children', \"'s\", 'services', ','), 28),\n",
" ((',', '2nd', 'floor', ',', 'new'), 28),\n",
" ((':', 'in', 'the', 'matter', 'of'), 28),\n",
" (('of', 'information', 'technology', '&', 'telecommunications'), 28),\n",
" (('2nd', 'floor', ',', 'new', 'york'), 28),\n",
" (('that', 'a', 'public', 'hearing', 'will'), 28),\n",
" (('york', ',', 'n.y.', '10007', ','), 28),\n",
" (('contact', 'the', 'mayor', \"'s\", 'office'), 27),\n",
" (('should', 'contact', 'the', 'mayor', \"'s\"), 27),\n",
" (('interpreters', 'should', 'contact', 'the', 'mayor'), 27),\n",
" (('square', 'foot', 'parcel', 'of', 'land'), 27),\n",
" (('william', 'street', ',', '9th', 'floor'), 26),\n",
" (('given', 'that', 'a', 'public', 'hearing'), 26),\n",
" ((',', 'at', 'the', 'call', 'of'), 26),\n",
" (('borough', 'of', 'manhattan', ',', 'on'), 26),\n",
" (('five', '(', '5', ')', 'years'), 26),\n",
" (('150', 'william', 'street', ',', '9th'), 26),\n",
" ((')', 'business', 'days', 'prior', 'to'), 26),\n",
" (('extension', 'new', 'start', 'date', 'of'), 26),\n",
" (('llc', 'for', 'a', 'site', 'located'), 25),\n",
" (('hereby', 'given', 'that', 'a', 'public'), 25),\n",
" ((',', 'between', 'the', 'hours', 'of'), 25),\n",
" (('.', '(', 'preliminary', 'and', 'final'), 24),\n",
" (('telecommunications', 'description', 'of', 'services', 'sought'), 24),\n",
" (('(', 'preliminary', 'and', 'final', ')'), 24),\n",
" ((',', 'at', '10:00', 'a.m.', ','), 24),\n",
" (('of', 'each', 'month', ',', 'at'), 24),\n",
" (('procurement', 'policy', 'board', 'rules', '.'), 24),\n",
" (('on', 'the', 'following', ':', 'in'), 24),\n",
" (('month', 'at', 'the', 'call', 'of'), 24),\n",
" (('10007', ',', '(', '212', ')'), 24),\n",
" (('nature', 'of', 'services', 'sought', ':'), 24),\n",
" (('the', 'following', ':', 'in', 'the'), 24),\n",
" (('street', ',', '9th', 'floor', ','), 24),\n",
" ((',', 'brooklyn', 'certificate', 'of', 'appropriateness'), 24),\n",
" (('following', ':', 'in', 'the', 'matter'), 24),\n",
" (('new', 'york', ',', 'ny', '10004'), 24),\n",
" (('department', 'of', 'parks', 'and', 'recreation'), 24),\n",
" ((',', 'maintain', ',', 'and', 'operate'), 24),\n",
" (('new', 'york', ',', 'ny', '10006'), 24),\n",
" (('new', 'york', 'city', 'charter', 'for'), 23),\n",
" (('preliminary', 'and', 'final', ')', '('), 23),\n",
" (('information', 'technology', '&', 'telecommunications', 'description'), 23),\n",
" (('.', 'start', 'date', 'of', 'the'), 23),\n",
" (('technology', '&', 'telecommunications', 'description', 'of'), 23),\n",
" (('&', 'telecommunications', 'description', 'of', 'services'), 23),\n",
" (('.', 'notice', 'of', 'intent', 'to'), 23),\n",
" (('and', 'final', ')', '(', 'cc'), 23),\n",
" (('at', 'the', 'new', 'york', 'city'), 23),\n",
" (('a.m.', 'on', 'the', 'following', ':'), 23),\n",
" (('contract', ':', 'none', 'reason', '('), 22),\n",
" (('term', 'of', 'two', 'years', '.'), 22),\n",
" (('for', 'a', 'term', 'of', 'two'), 22),\n",
" (('new', 'york', 'city', 'charter', ','), 22),\n",
" (('under', 'the', 'contract', ':', 'none'), 22),\n",
" (('of', 'the', 'proposed', 'extended', 'contract'), 22),\n",
" ((':', 'none', 'reason', '(', 's'), 22),\n",
" (('none', 'reason', '(', 's', ')'), 22),\n",
" (('intends', 'to', 'utilize', ':', 'amendment'), 22),\n",
" (('building', ',', 'manhattan', ',', 'new'), 22),\n",
" (('will', 'be', 'held', 'at', 'the'), 22),\n",
" (('york', ',', 'ny', '10004', ','), 22),\n",
" (('of', 'two', 'years', '.', ')'), 22),\n",
" (('pursuant', 'to', 'sections', '197-c', 'and'), 22),\n",
" ((')', 'of', 'the', 'procurement', 'policy'), 22),\n",
" (('a', 'term', 'of', 'two', 'years'), 22),\n",
" (('the', 'contract', ':', 'none', 'reason'), 22),\n",
" ((',', 'zoned', 'r6', 'community', 'district'), 22),\n",
" (('the', 'proposed', 'extended', 'contract', ':'), 22),\n",
" (('new', 'york', '10007', ',', 'at'), 22),\n",
" (('date', 'of', 'the', 'proposed', 'extended'), 22),\n",
" (('9th', 'floor', ',', 'borough', 'of'), 21),\n",
" (('the', 'state', 'of', 'new', 'york'), 21),\n",
" (('public', 'hearings', 'unit', ',', '253'), 21),\n",
" (('of', 'new', 'york', ',', 'as'), 21),\n",
" ((',', 'public', 'hearings', 'unit', ','), 21),\n",
" (('(', '212', ')', '788-7490', ','), 21),\n",
" (('788-7490', ',', 'no', 'later', 'than'), 21),\n",
" ((')', '788-7490', ',', 'no', 'later'), 21),\n",
" (('foot', 'parcel', 'of', 'land', 'located'), 21),\n",
" (('unit', ',', '253', 'broadway', ','), 21),\n",
" (('to', 'utilize', ':', 'amendment', 'extension'), 21),\n",
" (('hearings', 'unit', ',', '253', 'broadway'), 21),\n",
" (('services', ',', 'public', 'hearings', 'unit'), 21),\n",
" (('street', ',', 'brooklyn', ',', 'new'), 21),\n",
" ((',', '9th', 'floor', ',', 'borough'), 21),\n",
" (('parcel', 'of', 'land', 'located', 'at'), 21),\n",
" (('212', ')', '788-7490', ',', 'no'), 21),\n",
" (('services', 'personnel', 'in', 'substantially', 'similar'), 21),\n",
" (('nan', 'notice', 'of', 'intent', 'to'), 20),\n",
" (('of', 'housing', 'preservation', 'and', 'development'), 20),\n",
" (('utilize', ':', 'amendment', 'extension', 'new'), 20),\n",
" (('amendment', 'extension', 'new', 'start', 'date'), 20),\n",
" (('the', 'agency', 'intends', 'to', 'renew/extend'), 20),\n",
" (('york', ',', 'ny', '10007', ','), 20),\n",
" (('to', 'renew/extend', 'the', 'contract', ':'), 20),\n",
" ((',', 'borough', 'of', 'manhattan', '.'), 20),\n",
" (('department', 'of', 'housing', 'preservation', 'and'), 20),\n",
" (('intends', 'to', 'renew/extend', 'the', 'contract'), 20),\n",
" (('agency', 'intends', 'to', 'renew/extend', 'the'), 20),\n",
" (('matter', 'of', 'a', 'proposed', 'contract'), 20),\n",
" (('of', 'a', 'proposed', 'contract', 'between'), 20),\n",
" ((',', '150', 'william', 'street', ','), 20),\n",
" ((',', '2014', 'special', 'permit', '('), 20),\n",
" ((':', 'amendment', 'extension', 'new', 'start'), 20),\n",
" (('.', 'a', 'copy', 'of', 'the'), 20),\n",
" (('commencing', 'at', '10:00', 'a.m.', 'on'), 20),\n",
" (('street', ',', 'manhattan', ',', 'ny'), 20),\n",
" (('10:00', 'a.m.', 'on', 'the', 'following'), 20),\n",
" (('a', 'proposed', 'contract', 'between', 'the'), 20),\n",
" (('nan', 'notice', 'is', 'hereby', 'given'), 19),\n",
" (('york', 'city', 'charter', 'for', 'the'), 19),\n",
" (('children', \"'s\", 'services', ',', 'office'), 19),\n",
" (('been', 'selected', 'by', 'means', 'of'), 19),\n",
" ((\"'s\", 'services', ',', 'office', 'of'), 19),\n",
" (('later', 'than', 'seven', '(', '7'), 19),\n",
" (('than', 'seven', '(', '7', ')'), 19),\n",
" ((',', 'borough', 'of', 'brooklyn', '.'), 19),\n",
" (('seven', '(', '7', ')', 'business'), 19),\n",
" (('7', ')', 'business', 'days', 'prior'), 19),\n",
" (('(', 'preliminary', ')', '(', 'cc'), 19),\n",
" (('available', 'for', 'public', 'inspection', 'at'), 19),\n",
" ((',', 'no', 'later', 'than', 'seven'), 19),\n",
" (('no', 'later', 'than', 'seven', '('), 19),\n",
" (('201', 'of', 'the', 'new', 'york'), 19),\n",
" ((',', 'manhattan', ',', 'ny', '10007'), 19),\n",
" ((',', 'long', 'island', 'city', ','), 19),\n",
" (('.', '(', 'preliminary', ')', '('), 19),\n",
" (('.', 'tdd', 'users', 'should', 'call'), 19),\n",
" (('renewal/extension', 'the', 'agency', 'intends', 'to'), 19),\n",
" (('manhattan', ',', 'ny', '10007', ','), 19),\n",
" (('users', 'should', 'call', 'verizon', 'relay'), 19),\n",
" ((',', 'ny', '.', 'site', 'no'), 19),\n",
" (('mayor', \"'s\", 'office', 'of', 'contract'), 19),\n",
" (('for', 'public', 'inspection', 'at', 'the'), 19),\n",
" (('ny', '.', 'site', 'no', '.'), 19),\n",
" (('.', 'application', 'is', 'to', 'construct'), 19),\n",
" (('agency', ':', 'department', 'of', 'parks'), 19),\n",
" (('(', '7', ')', 'business', 'days'), 19),\n",
" (('.', 'application', 'is', 'to', 'replace'), 19),\n",
" (('197-c', 'and', '201', 'of', 'the'), 19),\n",
" (('tdd', 'users', 'should', 'call', 'verizon'), 19),\n",
" (('of', 'the', 'state', 'of', 'new'), 19),\n",
" (('of', 'renewal/extension', 'the', 'agency', 'intends'), 19),\n",
" (('and', '201', 'of', 'the', 'new'), 19),\n",
" (('of', 'services', 'personnel', 'in', 'substantially'), 19),\n",
" (('method', 'of', 'renewal/extension', 'the', 'agency'), 19),\n",
" (('for', 'the', 'city', 'of', 'new'), 18),\n",
" (('sections', '197-c', 'and', '201', 'of'), 18),\n",
" ((\"'s\", 'office', 'of', 'contract', 'services'), 18),\n",
" (('parks', 'and', 'recreation', 'nature', 'of'), 18),\n",
" (('within', 'agency', ':', '0', 'notice'), 18),\n",
" (('brooklyn', 'certificate', 'of', 'appropriateness', 'a'), 18),\n",
" (('office', 'of', 'contract', 'services', ','), 18),\n",
" (('dollars', '(', '$', '2,000,000', ')'), 18),\n",
" (('at', '100', 'church', 'street', ','), 18),\n",
" (('two', 'million', 'dollars', '(', '$'), 18),\n",
" (('contract', 'services', ',', 'public', 'hearings'), 18),\n",
" (('of', 'contract', 'services', ',', 'public'), 18),\n",
" ((':', 'department', 'of', 'parks', 'and'), 18),\n",
" ((',', '(', '212', ')', '788-7490'), 18),\n",
" (('of', 'parks', 'and', 'recreation', 'nature'), 18),\n",
" (('million', 'dollars', '(', '$', '2,000,000'), 18),\n",
" ((',', 'manhattan', '.', '(', 'preliminary'), 18),\n",
" (('to', 'sections', '197-c', 'and', '201'), 18),\n",
" (('.', 'individuals', 'requesting', 'sign', 'language'), 18),\n",
" (('inspection', 'at', 'the', 'new', 'york'), 17),\n",
" (('district', '2', ',', 'manhattan', 'certificate'), 17),\n",
" (('street', ',', '6th', 'floor', ','), 17),\n",
" (('rules', '.', 'a', 'copy', 'of'), 17),\n",
" ((':', 'individuals', 'requesting', 'sign', 'language'), 17),\n",
" (('note', ':', 'individuals', 'requesting', 'sign'), 17),\n",
" ((',', 'for', 'the', 'provision', 'of'), 17),\n",
" (('five', '(', '5', ')', 'business'), 17),\n",
" (('of', 'brooklyn', '.', 'community', 'board'), 17),\n",
" (('city', 'of', 'new', 'york', '('), 17),\n",
" (('city', 'of', 'new', 'york', 'and'), 17),\n",
" (('2', ',', 'manhattan', 'certificate', 'of'), 17),\n",
" (('(', '5', ')', 'business', 'days'), 17),\n",
" (('brooklyn', '.', 'community', 'board', '#'), 17),\n",
" (('york', ',', 'new', 'york', '10007'), 17),\n",
" (('one', 'centre', 'street', ',', 'room'), 17),\n",
" (('the', 'new', 'york', 'city', 'housing'), 17),\n",
" ((',', '42-09', '28th', 'street', ','), 17),\n",
" (('street', ',', '12th', 'floor', ','), 17),\n",
" (('street', 'in', 'the', 'borough', 'of'), 17),\n",
" (('franchise', 'and', 'concession', 'review', 'committee'), 17),\n",
" (('community', 'district', '2', ',', 'manhattan'), 17),\n",
" (('borough', 'of', 'brooklyn', '.', 'community'), 17),\n",
" (('end', 'of', 'each', 'month', '.'), 17),\n",
" (('to', 'continue', 'to', ',', 'maintain'), 16),\n",
" (('of', '(', 'a', ')', 'contract'), 16),\n",
" (('proposed', 'contractor', 'has', 'been', 'selected'), 16),\n",
" (('notice', 'of', 'intent', 'to', 'extend'), 16),\n",
" (('intent', 'to', 'extend', 'contract', '('), 16),\n",
" ((',', 'staten', 'island', ',', 'new'), 16),\n",
" (('entering', 'into', 'the', 'following', 'extension'), 16),\n",
" (('york', ',', 'n.y.', '10007', '.'), 16),\n",
" (('borough', 'of', 'manhattan', '.', 'community'), 16),\n",
" (('the', 'proposed', 'contractor', 'has', 'been'), 16),\n",
" (('extension', '(', 's', ')', 'of'), 16),\n",
" (('to', ',', 'maintain', ',', 'and'), 16),\n",
" (('in', 'equal', 'monthly', 'installments', 'at'), 16),\n",
" (('payable', 'in', 'equal', 'monthly', 'installments'), 16),\n",
" (('following', 'extension', '(', 's', ')'), 16),\n",
" (('street', ',', 'manhattan', '.', '('), 16),\n",
" (('equal', 'monthly', 'installments', 'at', 'the'), 16),\n",
" (('the', 'end', 'of', 'each', 'month'), 16),\n",
" (('(', '3', ')', 'of', 'the'), 16),\n",
" (('will', 'be', 'entering', 'into', 'the'), 16),\n",
" (('york', ',', 'as', 'tenant', ','), 16),\n",
" (('to', 'utilize', ':', 'competitive', 'sealed'), 16),\n",
" ((')', 'of', '(', 'a', ')'), 16),\n",
" ((',', 'lessee', '.', 'subject', 'application'), 16),\n",
" (('the', 'mayor', 'will', 'be', 'entering'), 16),\n",
" (('monthly', 'installments', 'at', 'the', 'end'), 16),\n",
" (('at', 'the', 'end', 'of', 'each'), 16),\n",
" (('(', '5', ')', 'years', ','), 16),\n",
" (('c', ')', '(', '3', ')'), 16),\n",
" ((',', 'no', 'later', 'than', 'five'), 16),\n",
" (('of', 'each', 'month', '.', 'the'), 16),\n",
" (('public', 'hearing', '.', 'tdd', 'users'), 16),\n",
" (('city', 'and', 'state', 'mortgage', 'recording'), 16),\n",
" (('staten', 'island', ',', 'new', 'york'), 16),\n",
" (('hearing', '.', 'tdd', 'users', 'should'), 16),\n",
" (('a', ')', 'contract', '(', 's'), 16),\n",
" (('mayor', 'will', 'be', 'entering', 'into'), 16),\n",
" (('(', 'c', ')', '(', '3'), 16),\n",
" ((')', 'contract', '(', 's', ')'), 16),\n",
" (('manhattan', '.', 'community', 'board', '#'), 16),\n",
" (('into', 'the', 'following', 'extension', '('), 16),\n",
" ((')', '(', '3', ')', 'of'), 16),\n",
" (('of', 'manhattan', '.', 'community', 'board'), 16),\n",
" (('the', 'following', 'extension', '(', 's'), 16),\n",
" (('intends', 'to', 'utilize', ':', 'competitive'), 16),\n",
" (('extend', 'contract', '(', 's', ')'), 16),\n",
" ((',', 'in', 'spector', 'hall', ','), 16),\n",
" (('original', 'principal', 'amount', 'of', '$'), 16),\n",
" (('s', ')', 'of', '(', 'a'), 16),\n",
" (('and', 'state', 'mortgage', 'recording', 'taxes'), 16),\n",
" (('(', 'a', ')', 'contract', '('), 16),\n",
" (('continue', 'to', ',', 'maintain', ','), 16),\n",
" (('(', 's', ')', 'of', '('), 16),\n",
" (('the', 'borough', 'of', 'brooklyn', '('), 16),\n",
" (('recreation', 'nature', 'of', 'services', 'sought'), 16),\n",
" (('church', 'street', ',', '12th', 'floor'), 16),\n",
" (('at', '10:00', 'a.m.', 'in', 'the'), 16),\n",
" (('installments', 'at', 'the', 'end', 'of'), 16),\n",
" (('notice', 'is', 'hereby', 'given', 'of'), 16),\n",
" (('than', 'five', '(', '5', ')'), 16),\n",
" (('100', 'church', 'street', ',', '12th'), 16),\n",
" (('the', 'public', 'hearing', '.', 'tdd'), 16),\n",
" (('(', 'to', 'continue', 'to', ','), 16),\n",
" (('york', ',', 'ny', '10007', '('), 16),\n",
" (('and', 'recreation', 'nature', 'of', 'services'), 16),\n",
" (('.', 'application', 'is', 'to', 'install'), 16),\n",
" (('new', 'york', 'city', 'housing', 'authority'), 16),\n",
" (('new', 'york', ',', 'as', 'tenant'), 16),\n",
" (('to', 'extend', 'contract', '(', 's'), 16),\n",
" (('no', 'later', 'than', 'five', '('), 16),\n",
" (('be', 'entering', 'into', 'the', 'following'), 16),\n",
" (('of', 'intent', 'to', 'extend', 'contract'), 16),\n",
" (('later', 'than', 'five', '(', '5'), 16),\n",
" (('room', '#', '143', ',', 'new'), 15),\n",
" (('here', 'and', 'on', 'nycha', \"'s\"), 15),\n",
" (('for', 'additional', 'information', ',', 'please'), 15),\n",
" (('at',\n",
" 'http',\n",
" ':',\n",
" '//www.nyc.gov/html/nycha/html/about/boardmeeting_schedule.shtml',\n",
" 'to'),\n",
" 15),\n",
" (('room', ',', 'borough', 'of', 'manhattan'), 15),\n",
" (('on', 'nycha', \"'s\", 'website', 'at'), 15),\n",
" ((',', 'training', 'room', '#', '143'), 15),\n",
" ((',', '33', 'beaver', 'street', ','), 15),\n",
" (('nycha', \"'s\", 'website', 'or', 'contact'), 15),\n",
" (('2nd', 'floor', 'conference', 'room', ','), 15),\n",
" (('within', 'agency', ':', '0', 'nan'), 15),\n",
" (('additional', 'information', ',', 'please', 'visit'), 15),\n",
" (('brooklyn', ',', 'new', 'york', '.'), 15),\n",
" (('schedule', 'will', 'be', 'posted', 'here'), 15),\n",
" ((',', 'as', 'tenant', ',', 'of'), 15),\n",
" (('posted', 'here', 'and', 'on', 'nycha'), 15),\n",
" (('beaver', 'street', ',', '21st', 'floor'), 15),\n",
" (('of', 'a', 'public', 'hearing', ','), 15),\n",
" (('please', 'visit', 'nycha', \"'s\", 'website'), 15),\n",
" (('a', 'reasonable', 'time', 'before', 'the'), 15),\n",
" (('services', ',', 'office', 'of', 'procurement'), 15),\n",
" (('.', 'for', 'additional', 'information', ','), 15),\n",
" (('board', 'of', 'standards', 'and', 'appeals'), 15),\n",
" ((\"'s\", 'website', 'or', 'contact', '('), 15),\n",
" (('meeting', '.', 'for', 'additional', 'information'), 15),\n",
" (('//www.nyc.gov/html/nycha/html/about/boardmeeting_schedule.shtml',\n",
" 'to',\n",
" 'the',\n",
" 'extent',\n",
" 'practicable'),\n",
" 15),\n",
" (('a.m.', 'in', 'the', 'board', 'room'), 15),\n",
" (('http',\n",
" ':',\n",
" '//www.nyc.gov/html/nycha/html/about/boardmeeting_schedule.shtml',\n",
" 'to',\n",
" 'the'),\n",
" 15),\n",
" (('york', 'city', 'charter', ',', 'will'), 15),\n",
" (('on', 'the', '12th', 'floor', 'of'), 15),\n",
" (('10:00', 'a.m.', 'in', 'the', 'board'), 15),\n",
" (('training', 'room', '#', '143', ','), 15),\n",
" (('board', 'room', 'on', 'the', '12th'), 15),\n",
" (('from', 'the', 'date', 'of', 'approval'), 15),\n",
" (('north', ',', 'new', 'york', ','), 15),\n",
" (('on', 'the', 'following', 'matters', ':'), 15),\n",
" (('3-04', '(', 'b', ')', '('), 15),\n",
" (('in', 'the', 'board', 'room', 'on'), 15),\n",
" (('hereby', 'given', 'of', 'a', 'public'), 15),\n",
" (('(', 'unless', 'otherwise', 'noted', ')'), 15),\n",
" (('hearing', ',', 'tuesday', 'morning', ','), 15),\n",
" (('street', ',', 'manhattan', ',', 'new'), 15),\n",
" (('the', 'new', 'york', 'city', 'administration'), 15),\n",
" (('reasonable', 'time', 'before', 'the', 'meeting'), 15),\n",
" (('services', 'start', 'date', 'of', 'the'), 15),\n",
" (('borough', 'of', 'brooklyn', 'community', 'board'), 15),\n",
" (('street', ',', '2nd', 'floor', 'conference'), 15),\n",
" ((',', 'in', 'accordance', 'with', 'section'), 15),\n",
" (('the', 'extent', 'practicable', 'at', 'a'), 15),\n",
" (('unless', 'otherwise', 'noted', ')', '.'), 15),\n",
" (('#', '143', ',', 'new', 'york'), 15),\n",
" (('will', 'be', 'posted', 'here', 'and'), 15),\n",
" (('12th', 'floor', ',', 'training', 'room'), 15),\n",
" (('public', 'hearing', ',', 'tuesday', 'morning'), 15),\n",
" (('given', 'of', 'a', 'public', 'hearing'), 15),\n",
" ((',', '2014', ',', '10:00', 'a.m.'), 15),\n",
" (('information', ',', 'please', 'visit', 'nycha'), 15),\n",
" (('floor', 'of', '250', 'broadway', ','), 15),\n",
" (('should', 'call', 'verizon', 'relay', 'services'), 15),\n",
" (('website',\n",
" 'at',\n",
" 'http',\n",
" ':',\n",
" '//www.nyc.gov/html/nycha/html/about/boardmeeting_schedule.shtml'),\n",
" 15),\n",
" (('broadway', ',', 'new', 'york', ','), 15),\n",
" (('reade', 'street', ',', '2nd', 'floor'), 15),\n",
" (('street', ',', '21st', 'floor', ','), 15),\n",
" (('website', 'or', 'contact', '(', '212'), 15),\n",
" (('conference', 'room', ',', 'borough', 'of'), 15),\n",
" (('visit', 'nycha', \"'s\", 'website', 'or'), 15),\n",
" (('at', 'a', 'reasonable', 'time', 'before'), 15),\n",
" ((':',\n",
" '//www.nyc.gov/html/nycha/html/about/boardmeeting_schedule.shtml',\n",
" 'to',\n",
" 'the',\n",
" 'extent'),\n",
" 15),\n",
" (('foot', 'facility', 'located', 'on', 'a'), 15),\n",
" (('room', 'on', 'the', '12th', 'floor'), 15),\n",
" (('application', 'is', 'to', 'construct', 'a'), 15),\n",
" ((',', 'on', 'the', 'following', 'matters'), 15),\n",
" (('.', 'any', 'changes', 'to', 'the'), 15),\n",
" (('nycha', \"'s\", 'website', 'at', 'http'), 15),\n",
" (('(', 'final', ')', '(', 'cc'), 15),\n",
" (('(', 'b', ')', '(', '2'), 15),\n",
" (('or', 'contact', '(', '212', ')'), 15),\n",
" (('at', 'one', 'centre', 'street', ','), 15),\n",
" (('12th', 'floor', 'of', '250', 'broadway'), 15),\n",
" (('section', '3-04', '(', 'b', ')'), 15),\n",
" (('250', 'broadway', ',', 'new', 'york'), 15),\n",
" ((',', 'bronx', ',', 'new', 'york'), 15),\n",
" (('to', 'the', 'schedule', 'will', 'be'), 15),\n",
" (('and', 'on', 'nycha', \"'s\", 'website'), 15),\n",
" ((',', '2nd', 'floor', 'conference', 'room'), 15),\n",
" (('the', '12th', 'floor', 'of', '250'), 15),\n",
" (('floor', 'conference', 'room', ',', 'borough'), 15),\n",
" (('changes', 'to', 'the', 'schedule', 'will'), 15),\n",
" (('to', 'section', '3-04', '(', 'b'), 15),\n",
" (('square', 'foot', 'facility', 'located', 'on'), 15),\n",
" ((',', 'please', 'visit', 'nycha', \"'s\"), 15),\n",
" (('floor', ',', 'training', 'room', '#'), 15),\n",
" (('pursuant', 'to', 'section', '3-04', '('), 15),\n",
" (('extent', 'practicable', 'at', 'a', 'reasonable'), 15),\n",
" ((':', 'borough', 'of', 'brooklyn', 'community'), 15),\n",
" (('be', 'posted', 'here', 'and', 'on'), 15),\n",
" (('.', '(', 'final', ')', '('), 15),\n",
" (('of', '250', 'broadway', ',', 'new'), 15),\n",
" (('any', 'changes', 'to', 'the', 'schedule'), 15),\n",
" (('b', ')', '(', '2', ')'), 15),\n",
" (('a', 'public', 'hearing', ',', 'tuesday'), 15),\n",
" (('practicable', 'at', 'a', 'reasonable', 'time'), 15),\n",
" (('time', 'before', 'the', 'meeting', '.'), 15),\n",
" (('33', 'beaver', 'street', ',', '21st'), 15),\n",
" (('to', 'the', 'extent', 'practicable', 'at'), 15),\n",
" (('call', 'verizon', 'relay', 'services', '.'), 15),\n",
" (('is', 'hereby', 'given', 'of', 'a'), 15),\n",
" (('143', ',', 'new', 'york', ','), 15),\n",
" (('the', 'board', 'room', 'on', 'the'), 15),\n",
" (('22', 'reade', 'street', ',', '2nd'), 15),\n",
" ((',', '12th', 'floor', ',', 'training'), 15),\n",
" (('board', ':', 'borough', 'of', 'brooklyn'), 15),\n",
" ((\"'s\", 'website', 'at', 'http', ':'), 15),\n",
" (('the', 'schedule', 'will', 'be', 'posted'), 15),\n",
" (('lease', 'may', 'be', 'obtained', 'at'), 14),\n",
" (('agency', ':', '0', 'notice', 'of'), 14),\n",
" ((',', 'n.y.', '10007', ',', '('), 14),\n",
" (('please', 'contact', 'chris', 'fleming', 'at'), 14),\n",
" (('section', '824', 'of', 'the', 'new'), 14),\n",
" (('may', 'be', 'obtained', 'at', 'one'), 14),\n",
" (('the', 'date', 'of', 'approval', 'by'), 14),\n",
" (('project', 'manager', ',', 'associate', 'project'), 14),\n",
" (('an', 'inspection', ',', 'please', 'contact'), 14),\n",
" (('further', 'information', ',', 'including', 'public'), 14),\n",
" (('fleming', 'at', '(', '212', ')'), 14),\n",
" (('contact', 'chris', 'fleming', 'at', '('), 14),\n",
" (('center', ',', '42-09', '28th', 'street'), 14),\n",
" (('in', 'accordance', 'with', 'section', '824'), 14),\n",
" (('a.m.', ',', '22', 'reade', 'street'), 14),\n",
" (('agency', ':', 'department', 'of', 'design'), 14),\n",
" (('policy', 'board', 'rules', '.', 'a'), 14),\n",
" (('new', 'york', 'city', 'administration', 'for'), 14),\n",
" ((',', 'new', 'york', 'having', 'an'), 14),\n",
" (('be', 'obtained', 'at', 'one', 'centre'), 14),\n",
" (('date', 'of', 'approval', 'by', 'the'), 14),\n",
" (('10007', '.', 'to', 'schedule', 'an'), 14),\n",
" (('agency', ':', 'ddc', 'description', 'of'), 14),\n",
" (('dispositions', 'public', 'hearing', ',', 'in'), 14),\n",
" (('as', 'tenant', ',', 'of', 'approximately'), 14),\n",
" (('manager', ',', 'associate', 'project', 'manager'), 14),\n",
" ((',', 'n.y.', '10007', '.', 'to'), 14),\n",
" ((':', '0', 'agency', ':', 'ddc'), 14),\n",
" (('street', ',', '2nd', 'floor', ','), 14),\n",
" (('n.y.', '10007', ',', 'on', 'the'), 14),\n",
" (('construction', 'description', 'of', 'services', 'sought'), 14),\n",
" (('public', 'hearing', ',', 'in', 'accordance'), 14),\n",
" (('schedule', 'an', 'inspection', ',', 'please'), 14),\n",
" (('department', 'of', 'citywide', 'administrative', 'services'), 14),\n",
" (('.', 'notice', 'is', 'hereby', 'given'), 14),\n",
" (('island', ',', 'new', 'york', 'having'), 14),\n",
" (('n.y.', '10007', '.', 'to', 'schedule'), 14),\n",
" (('to', 'schedule', 'an', 'inspection', ','), 14),\n",
" (('proposed', 'lease', 'may', 'be', 'obtained'), 14),\n",
" (('city', 'charter', ',', 'will', 'be'), 14),\n",
" (('borough', 'of', 'brooklyn', '(', 'to'), 14),\n",
" (('acquisitions', 'and', 'dispositions', 'public', 'hearing'), 14),\n",
" (('at', '(', '212', ')', '386-0315'), 14),\n",
" (('agency', ':', 'department', 'of', 'environmental'), 14),\n",
" (('information', ',', 'including', 'public', 'inspection'), 14),\n",
" (('.', 'further', 'information', ',', 'including'), 14),\n",
" (('.', 'the', 'contract', 'term', 'shall'), 14),\n",
" (('state', 'mortgage', 'recording', 'taxes', '.'), 14),\n",
" ((',', 'will', 'be', 'held', 'on'), 14),\n",
" (('inspection', ',', 'please', 'contact', 'chris'), 14),\n",
" (('broadway', ',', '2nd', 'floor', ','), 14),\n",
" ((',', 'ny', '10007', ',', 'at'), 14),\n",
" (('york', 'city', 'administration', 'for', 'children'), 14),\n",
" ((')', '(', '2', ')', '('), 14),\n",
" (('the', 'proposed', 'lease', 'may', 'be'), 14),\n",
" (('2000', 'north', ',', 'new', 'york'), 14),\n",
" (('york', ',', 'ny', '10007', '.'), 14),\n",
" (('room', '2000', 'north', ',', 'new'), 14),\n",
" (('a', 'real', 'property', 'acquisitions', 'and'), 14),\n",
" (('city', 'administration', 'for', 'children', \"'s\"), 14),\n",
" (('hearing', ',', 'in', 'accordance', 'with'), 14),\n",
" ((',', '253', 'broadway', ',', '2nd'), 14),\n",
" (('824', 'of', 'the', 'new', 'york'), 14),\n",
" (('.', 'to', 'schedule', 'an', 'inspection'), 14),\n",
" (('386-0315', '.', 'individuals', 'requesting', 'sign'), 14),\n",
" ((',', '2014', 'at', '10:00', 'a.m.'), 14),\n",
" (('real', 'property', 'acquisitions', 'and', 'dispositions'), 14),\n",
" (('with', 'section', '824', 'of', 'the'), 14),\n",
" (('253', 'broadway', ',', '2nd', 'floor'), 14),\n",
" (('inspection', 'of', 'the', 'proposed', 'lease'), 14),\n",
" (('method', ',', 'pursuant', 'to', 'section'), 14),\n",
" (('department', 'of', 'design', 'and', 'construction'), 14),\n",
" (('n.y.', '10007', ',', '(', '212'), 14),\n",
" (('including', 'public', 'inspection', 'of', 'the'), 14),\n",
" ((',', 'please', 'contact', 'chris', 'fleming'), 14),\n",
" (('of', 'approval', 'by', 'the', 'mayor'), 14),\n",
" (('having', 'an', 'approximate', 'original', 'principal'), 14),\n",
" (('chris', 'fleming', 'at', '(', '212'), 14),\n",
" ((',', 'pursuant', 'to', 'section', '3-04'), 14),\n",
" (('obtained', 'at', 'one', 'centre', 'street'), 14),\n",
" (('street', ',', 'room', '2000', 'north'), 14),\n",
" ((':', 'department', 'of', 'environmental', 'protection'), 14),\n",
" ((',', '9th', 'floor', 'north', ','), 14),\n",
" (('212', ')', '386-0315', '.', 'individuals'), 14),\n",
" (('ddc', 'description', 'of', 'services', 'sought'), 14),\n",
" (('given', 'that', 'a', 'real', 'property'), 14),\n",
" ((')', '386-0315', '.', 'individuals', 'requesting'), 14),\n",
" ((',', 'flushing', ',', 'new', 'york'), 14),\n",
" (('property', 'acquisitions', 'and', 'dispositions', 'public'), 14),\n",
" (('public', 'inspection', 'of', 'the', 'proposed'), 14),\n",
" (('10007', ',', 'on', 'the', 'following'), 14),\n",
" ((',', 'n.y.', '10007', ',', 'on'), 14),\n",
" (('hereby', 'given', 'that', 'a', 'real'), 14),\n",
" ((',', 'including', 'public', 'inspection', 'of'), 14),\n",
" (('and', 'dispositions', 'public', 'hearing', ','), 14),\n",
" (('gotham', 'center', ',', '42-09', '28th'), 14),\n",
" (('new', 'york', 'having', 'an', 'approximate'), 14),\n",
" (('of', 'the', 'proposed', 'lease', 'may'), 14),\n",
" (('building', ',', 'manhattan', ',', 'ny'), 14),\n",
" (('charter', ',', 'will', 'be', 'held'), 14),\n",
" (('(', '212', ')', '386-0315', '.'), 14),\n",
" (('the', 'department', 'of', 'citywide', 'administrative'), 14),\n",
" (('city', 'hall', ',', 'third', 'floor'), 14),\n",
" (('that', 'a', 'real', 'property', 'acquisitions'), 14),\n",
" (('york', 'having', 'an', 'approximate', 'original'), 14),\n",
" ((':', 'ddc', 'description', 'of', 'services'), 14),\n",
" (('at', 'gotham', 'center', ',', '42-09'), 14),\n",
" ((',', 'room', '2000', 'north', ','), 14),\n",
" (('accordance', 'with', 'section', '824', 'of'), 14),\n",
" ((',', 'llc', 'for', 'a', 'site'), 14),\n",
" (('centre', 'street', ',', 'room', '2000'), 14),\n",
" (('be', 'terminated', 'by', 'the', 'tenant'), 13),\n",
" (('state', 'of', 'new', 'york', ','), 13),\n",
" (('public', 'hearing', 'notice', 'is', 'hereby'), 13),\n",
" (('children', \"'s\", 'services', 'of', 'the'), 13),\n",
" (('exempt', 'from', 'federal', 'taxation', 'pursuant'), 13),\n",
" (('contact', '(', '212', ')', '306-6088'), 13),\n",
" (('special', 'permit', '(', '73-36', ')'), 13),\n",
" (('pursuant', 'to', 'section', '501', '('), 13),\n",
" (('contractor', 'has', 'been', 'selected', 'by'), 13),\n",
" (('board', 'rules', '.', 'a', 'copy'), 13),\n",
" (('held', 'at', 'the', 'administration', 'for'), 13),\n",
" (('llc', 'pursuant', 'to', 'sections', '197-c'), 13),\n",
" (('services', 'of', 'the', 'city', 'of'), 13),\n",
" (('to', 'section', '501', '(', 'c'), 13),\n",
" (('method', 'of', 'original', 'contract', ':'), 13),\n",
" (('york', 'city', 'economic', 'development', 'corporation'), 13),\n",
" (('in', 'the', 'matter', 'of', 'the'), 13),\n",
" ((',', 'ny', '10007', 'at', '9:15'), 13),\n",
" (('section', '501', '(', 'c', ')'), 13),\n",
" (('approximate', 'original', 'principal', 'amount', 'of'), 13),\n",
" (('at', 'the', 'administration', 'for', 'children'), 13),\n",
" (('.', 'hourly', 'wage', 'average', 'and'), 13),\n",
" (('may', 'be', 'terminated', 'by', 'the'), 13),\n",
" (('(', '212', ')', '306-6088', '.'), 13),\n",
" (('an', 'application', 'submitted', 'by', 'the'), 13),\n",
" (('date', 'of', 'original', 'contract', ':'), 13),\n",
" ((\"'s\", 'services', 'of', 'the', 'city'), 13),\n",
" (('of', 'design', 'and', 'construction', 'description'), 13),\n",
" (('between', 'the', 'administration', 'for', 'children'), 13),\n",
" (('contract', 'is', 'available', 'for', 'public'), 13),\n",
" (('may', 'determine', '.', 'the', 'proposed'), 13),\n",
" (('from', 'federal', 'taxation', 'pursuant', 'to'), 13),\n",
" (('and', 'construction', 'description', 'of', 'services'), 13),\n",
" (('for', 'children', \"'s\", 'services', 'of'), 13),\n",
" (('new', 'york', ',', 'ny', '10038'), 13),\n",
" (('taxation', 'pursuant', 'to', 'section', '501'), 13),\n",
" (('hourly', 'wage', 'average', 'and', 'range'), 13),\n",
" (('of', 'the', 'general', 'municipal', 'law'), 13),\n",
" (('new', 'york', 'city', 'economic', 'development'), 13),\n",
" (('be', 'held', 'at', 'the', 'administration'), 13),\n",
" (('the', 'contract', 'term', 'shall', 'be'), 13),\n",
" (('is', 'available', 'for', 'public', 'inspection'), 13),\n",
" (('wage', 'average', 'and', 'range', ':'), 13),\n",
" (('end', 'date', 'of', 'original', 'contract'), 13),\n",
" (('york', ',', 'ny', '10007', 'at'), 13),\n",
" ((',', 'associate', 'project', 'manager', ','), 13),\n",
" (('selected', 'by', 'means', 'of', 'the'), 13),\n",
" (('corporation', 'exempt', 'from', 'federal', 'taxation'), 13),\n",
" (('501', '(', 'c', ')', '('), 13),\n",
" (('in', 'the', 'borough', 'of', 'queens'), 13),\n",
" ((':', 'department', 'of', 'design', 'and'), 13),\n",
" (('hearing', 'notice', 'is', 'hereby', 'given'), 13),\n",
" (('ny', '10007', 'at', '9:15', 'a.m.'), 13),\n",
" (('design', 'and', 'construction', 'description', 'of'), 13),\n",
" (('federal', 'taxation', 'pursuant', 'to', 'section'), 13),\n",
" (('an', 'approximate', 'original', 'principal', 'amount'), 13),\n",
" (('10:00', 'a.m.', 'on', 'the', 'second'), 12),\n",
" (('commission', 'meets', 'at', 'its', 'office'), 12),\n",
" (('of', 'public', 'hearing', 'notice', 'is'), 12),\n",
" (('as', 'needed', 'in', 'room', '2203'), 12),\n",
" (('manhattan', ',', 'weekly', ',', 'on'), 12),\n",
" (('the', 'first', 'tuesday', 'of', 'july'), 12),\n",
" (('bi-weekly', ',', 'on', 'wednesdays', ','), 12),\n",
" (('at', '10:00', 'a.m.', ',', '22'), 12),\n",
" ((')', 'at', '10:00', 'a.m.', 'in'), 12),\n",
" (('three', 'tuesday', \"'s\", 'each', 'month'), 12),\n",
" (('40', 'rector', 'street', ',', '6th'), 12),\n",
" (('the', 'chairman', '.', 'housing', 'authority'), 12),\n",
" (('p.m', '.', 'the', 'annual', 'meeting'), 12),\n",
" (('of', 'the', 'president', '.', 'manhattan'), 12),\n",
" (('tuesday', \"'s\", 'each', 'month', ','), 12),\n",
" (('accommodation', 'in', 'order', 'to', 'participate'), 12),\n",
" (('hearings', 'as', 'needed', 'in', 'room'), 12),\n",
" (('of', 'the', 'administrative', 'code', 'of'), 12),\n",
" (('936', ',', 'municipal', 'building', ','), 12),\n",
" (('floor', ',', 'manhattan', ',', 'bi-weekly'), 12),\n",
" (('warranted', '.', 'landmarks', 'preservation', 'commission'), 12),\n",
" (('december', '.', 'annual', 'meeting', 'held'), 12),\n",
" (('municipal', 'building', ',', '9th', 'floor'), 12),\n",
" (('human', 'rights', 'meets', 'on', '10th'), 12),\n",
" (('preceding', 'a', 'tuesday', 'public', 'hearing'), 12),\n",
" (('otherwise', 'ordered', 'by', 'the', 'commission'), 12),\n",
" (('floor', 'north', ',', '1', 'centre'), 12),\n",
" (('control', 'board', 'meets', 'at', '100'), 12),\n",
" (('between', 'the', 'hours', 'of', '10'), 12),\n",
" (('on', '10th', 'floor', 'in', 'the'), 12),\n",
" (('hearing', 'room', ',', 'municipal', 'building'), 12),\n",
" (('january', ',', 'february', ',', 'march'), 12),\n",
" ((',', 'ny', '10007', ',', 'twice'), 12),\n",
" (('10', 'am', 'and', '4', 'pm'), 12),\n",
" (('1:30', 'p.m.', 'and', 'at', 'the'), 12),\n",
" (('meets', 'at', '100', 'church', 'street'), 12),\n",
" ((',', 'on', 'fourth', 'monday', 'in'), 12),\n",
" (('as', 'warranted', '.', 'landmarks', 'preservation'), 12),\n",
" (('wednesday', 'of', 'each', 'month', '('), 12),\n",
" (('is', 'held', 'on', 'the', 'first'), 12),\n",
" (('board', 'generally', 'meets', 'at', '10:00'), 12),\n",
" (('tuesday', 'public', 'hearing', 'in', 'the'), 12),\n",
" ((')', 'in', 'the', 'borough', 'of'), 12),\n",
" (('as', 'warranted', '.', 'civilian', 'complaint'), 12),\n",
" (('.', 'manhattan', ',', 'monthly', 'on'), 12),\n",
" (('avenue', 'in', 'the', 'borough', 'of'), 12),\n",
" ((\"'s\", 'central', 'office', ',', '40'), 12),\n",
" (('customarily', 'held', 'on', 'mondays', 'preceding'), 12),\n",
" (('to', 'june', '30', ',', '2025'), 12),\n",
" (('lease', 'for', 'the', 'city', 'of'), 12),\n",
" (('10:00', 'a.m.', ',', 'unless', 'otherwise'), 12),\n",
" (('north', ',', '1', 'centre', 'street'), 12),\n",
" (('in', 'rem', 'foreclosure', 'release', 'board'), 12),\n",
" (('the', 'hall', 'of', 'the', 'board'), 12),\n",
" ((',', '335', 'adams', 'street', ','), 12),\n",
" (('meets', 'in', 'room', '936', ','), 12),\n",
" (('the', 'hours', 'of', '10', 'am'), 12),\n",
" (('10007', '(', 'unless', 'otherwise', 'noted'), 12),\n",
" (('october', ',', 'november', 'and', 'december'), 12),\n",
" (('floor', ',', 'and', 'other', 'days'), 12),\n",
" (('.', 'design', 'commission', 'meets', 'at'), 12),\n",
" (('application', 'desk', 'at', '(', '212'), 12),\n",
" (('ny', '10006', ',', 'on', 'the'), 12),\n",
" (('the', 'commission', '.', 'for', 'current'), 12),\n",
" (('10007', 'at', '9:15', 'a.m.', 'once'), 12),\n",
" (('meets', 'at', '40', 'rector', 'street'), 12),\n",
" (('office', ',', '40', 'rector', 'street'), 12),\n",
" (('2014', 'special', 'permit', '(', '73-36'), 12),\n",
" (('april', ',', 'june', ',', 'september'), 12),\n",
" (('.', 'board', 'of', 'standards', 'and'), 12),\n",
" (('standards', 'and', 'appeals', 'meets', 'at'), 12),\n",
" (('new', 'york', ',', 'n.y.', '10004'), 12),\n",
" (('on', 'the', '9th', 'floor', 'of'), 12),\n",
" ((',', 'monthly', 'on', 'wednesdays', ','), 12),\n",
" (('proposed', 'contract', 'between', 'the', 'department'), 12),\n",
" (('wednesday', 'of', 'each', 'month', 'at'), 12),\n",
" (('in', 'the', 'hearing', 'room', ','), 12),\n",
" (('floor', ',', '335', 'adams', 'street'), 12),\n",
" (('in', 'room', '603', ',', 'municipal'), 12),\n",
" ((',', '7th', 'floor', ',', 'new'), 12),\n",
" (('matter', 'of', 'a', 'proposed', 'revocable'), 12),\n",
" (('borough', 'of', 'manhattan', ',', 'in'), 12),\n",
" (('wednesdays', ',', 'commencing', '10:00', 'a.m.'), 12),\n",
" (('in', 'the', 'schedule', ',', 'or'), 12),\n",
" ((',', 'june', ',', 'september', ','), 12),\n",
" (('on', 'the', 'third', 'thursday', 'of'), 12),\n",
" (('or', 'consult', 'the', 'bulletin', 'board'), 12),\n",
" (('1:30', 'p.m.', 'contract', 'awards', 'public'), 12),\n",
" (('commissioner', '.', 'environmental', 'control', 'board'), 12),\n",
" (('method', 'of', 'extension', 'the', 'agency'), 12),\n",
" (('meeting', 'dates', ',', 'times', 'and'), 12),\n",
" (('schedule', ',', 'please', 'visit', 'nyc.gov/designcommission'), 12),\n",
" ((',', 'n.y.', '11101', ',', 'at'), 12),\n",
" (('.', 'headcount', 'of', 'personnel', 'in'), 12),\n",
" ((',', 'on', 'the', 'fourth', 'wednesday'), 12),\n",
" (('month', 'at', '40', 'rector', 'street'), 12),\n",
" (('higher', 'education', 'meets', 'at', '535'), 12),\n",
" (('review', 'sessions', 'begin', 'at', '9:30'), 12),\n",
" (('intends', 'to', 'utilize', ':', 'request'), 12),\n",
" (('a.m.', 'and', 'are', 'customarily', 'held'), 12),\n",
" ((',', 'hearing', 'room', '``', 'e'), 12),\n",
" (('third', 'floor', ',', 'new', 'york'), 12),\n",
" (('of', 'manhattan', ',', 'in', 'the'), 12),\n",
" (('meets', 'at', '10:00', 'a.m.', 'on'), 12),\n",
" (('//www.nyc.gov/html/ccrb/html/meeting.html',\n",
" 'for',\n",
" 'additional',\n",
" 'information',\n",
" 'and'),\n",
" 12),\n",
" (('weekly', ',', 'on', 'thursday', ','), 12),\n",
" (('information', ',', 'please', 'call', 'the'), 12),\n",
" (('location', 'as', 'warranted', '.', 'real'), 12),\n",
" (('the', 'fourth', 'wednesday', 'of', 'each'), 12),\n",
" (('rector', 'street', ',', 'new', 'york'), 12),\n",
" (('nyc.gov/designcommission', 'or', 'call', '212-788-3071', '.'), 12),\n",
" (('for', 'meeting', 'schedule', ',', 'please'), 12),\n",
" (('on', 'tuesdays', 'at', '10:00', 'a.m.'), 12),\n",
" (('at', 'www.nyc.gov/landmarks', '.', 'employees', \"'\"), 12),\n",
" (('board', 'of', 'higher', 'education', 'meets'), 12),\n",
" (('e', \"''\", 'on', 'tuesdays', 'at'), 12),\n",
" (('system', 'meets', 'in', 'the', 'boardroom'), 12),\n",
" (('avenue', ',', 'staten', 'island', ','), 12),\n",
" (('each', 'month', 'at', 'the', 'call'), 12),\n",
" (('noticed', 'by', 'the', 'commission', '.'), 12),\n",
" ((',', 'march', ',', 'april', ','), 12),\n",
" (('meets', 'on', '10th', 'floor', 'in'), 12),\n",
" (('on', 'wednesdays', ',', 'commencing', '10:00'), 12),\n",
" ((',', 'april', ',', 'june', ','), 12),\n",
" (('6th', 'floor', ',', 'hearing', 'room'), 12),\n",
" (('for', 'a', 'monthly', 'business', 'meeting'), 12),\n",
" (('employees', \"'\", 'retirement', 'system', 'meets'), 12),\n",
" (('as', 'warranted', '.', 'real', 'property'), 12),\n",
" (('city', 'hall', ',', 'manhattan', ','), 12),\n",
" (('rector', 'street', ',', '2nd', 'floor'), 12),\n",
" (('each', 'month', ',', 'commencing', 'at'), 12),\n",
" (('business', 'meeting', 'on', 'the', 'third'), 12),\n",
" (('at', '10:00', 'a.m.', ',', 'quarterly'), 12),\n",
" ((',', 'each', 'month', 'at', 'the'), 12),\n",
" (('10th', 'floor', 'in', 'the', 'commission'), 12),\n",
" (('6:00', 'p.m', '.', 'the', 'annual'), 12),\n",
" ((',', 'payable', 'in', 'equal', 'monthly'), 12),\n",
" ((',', 'ny', '10007', '.', 'for'), 12),\n",
" (('month', 'in', 'councilman', \"'s\", 'chamber'), 12),\n",
" (('a.m.', 'once', 'a', 'month', 'at'), 12),\n",
" (('10004', '.', 'commission', 'on', 'human'), 12),\n",
" (('the', 'last', 'wednesday', 'of', 'each'), 12),\n",
" (('administrative', 'services', 'division', 'of', 'citywide'), 12),\n",
" (('except', 'august', ')', 'at', '10:00'), 12),\n",
" (('street', ',', 'in', 'the', 'borough'), 12),\n",
" (('and', '4', 'pm', '.', 'please'), 12),\n",
" (('the', '9th', 'floor', 'of', '40'), 12),\n",
" (('of', 'a', 'proposed', 'revocable', 'consent'), 12),\n",
" (('in', 'the', 'hall', 'of', 'the'), 12),\n",
" ((',', 'october', ',', 'november', 'and'), 12),\n",
" ((',', 'at', '1:30', 'p.m.', 'contract'), 12),\n",
" (('schedule', ',', 'or', 'additonal', 'information'), 12),\n",
" (('education', 'meets', 'at', '535', 'east'), 12),\n",
" (('.', 'citywide', 'administrative', 'services', 'division'), 12),\n",
" ((',', 'twice', 'monthly', 'on', 'wednesday'), 12),\n",
" (('fourth', 'monday', 'in', 'january', ','), 12),\n",
" (('environmental', 'control', 'board', 'meets', 'at'), 12),\n",
" (('of', 'citywide', 'administrative', 'services', 'may'), 12),\n",
" (('of', 'each', 'month', 'at', '40'), 12),\n",
" (('10:00', 'a.m.', ',', 'quarterly', 'or'), 12),\n",
" (('location', 'as', 'warranted', '.', 'franchise'), 12),\n",
" (('at', '10:30', 'a.m.', 'board', 'of'), 12),\n",
" (('awards', 'public', 'hearing', 'meets', 'in'), 12),\n",
" (('preservation', 'commission', 'meets', 'in', 'the'), 12),\n",
" (('scheduled', 'for', 'the', 'last', 'wednesday'), 12),\n",
" (('current', 'meeting', 'dates', ',', 'times'), 12),\n",
" ((',', 'at', '5:30', 'p.m.', ','), 12),\n",
" ((',', 'times', 'and', 'agendas', ','), 12),\n",
" (('september', ',', 'october', ',', 'november'), 12),\n",
" (('of', '40', 'rector', 'street', '.'), 12),\n",
" (('new', 'york', ',', 'ny', '10041'), 12),\n",
" (('for', 'current', 'meeting', 'dates', ','), 12),\n",
" (('ny', '10006', '.', 'visit', 'http'), 12),\n",
" ((',', 'on', 'wednesdays', ',', 'commencing'), 12),\n",
" (('twice', 'a', 'month', 'in', 'councilman'), 12),\n",
" (('and', 'disposition', 'meets', 'in', 'spector'), 12),\n",
" (('on', 'tuesdays', ',', 'commencing', '10:00'), 12),\n",
" (('as', 'warranted', '.', 'franchise', 'and'), 12),\n",
" (('by', 'the', 'commission', '.', 'for'), 12),\n",
" (('revision', 'of', 'awards', 'meets', 'in'), 12),\n",
" ((\"''\", 'on', 'tuesdays', 'at', '10:00'), 12),\n",
" (('on', 'thursday', ',', 'commencing', '10:00'), 12),\n",
" (('board', 'meets', 'in', 'spector', 'hall'), 12),\n",
" (('the', 'commission', \"'s\", 'central', 'office'), 12),\n",
" (('month', 'at', '6:00', 'p.m', '.'), 12),\n",
" (('extension', 'the', 'agency', 'intends', 'to'), 12),\n",
" (('health', 'insurance', 'board', 'meets', 'in'), 12),\n",
" (('.', 'board', 'of', 'higher', 'education'), 12),\n",
" (('compensation', 'payable', 'to', 'the', 'city'), 12),\n",
" (('at', 'an', 'annual', 'rent', 'of'), 12),\n",
" (('on', 'tuesday', ',', 'at', '1:30'), 12),\n",
" ((',', 'please', 'visit', 'nyc.gov/designcommission', 'or'), 12),\n",
" (('design', 'commission', 'meets', 'at', 'city'), 12),\n",
" (('terms', 'and', 'conditions', 'for', 'compensation'), 12),\n",
" (('city', ',', 'n.y.', '11101', ','), 12),\n",
" (('otherwise', 'noted', ')', '.', 'any'), 12),\n",
" ...]"
]
}
],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#...and let's stop here for now\n",
"corpus10grams = list(ngrams(lowerTokens,10))\n",
"corpus10gramFreqs = nltk.FreqDist(corpus10grams)\n",
"corpus10gramFreqs.most_common(50)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 45,
"text": [
"[(('--', '--', '--', '--', '--', '--', '--', '--', '--', '--'), 162),\n",
" (('headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':'),\n",
" 129),\n",
" (('maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a'),\n",
" 112),\n",
" ((',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a',\n",
" 'term'),\n",
" 112),\n",
" (('and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a',\n",
" 'term',\n",
" 'of'),\n",
" 112),\n",
" (('sidewalk', 'caf', 'for', 'a', 'term', 'of', 'four', 'years', '.', ')'),\n",
" 101),\n",
" (('unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a',\n",
" 'term',\n",
" 'of',\n",
" 'four',\n",
" 'years',\n",
" '.'),\n",
" 100),\n",
" (('operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a',\n",
" 'term',\n",
" 'of',\n",
" 'four'),\n",
" 94),\n",
" (('an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for',\n",
" 'a',\n",
" 'term',\n",
" 'of',\n",
" 'four',\n",
" 'years'),\n",
" 94),\n",
" (('agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles'),\n",
" 92),\n",
" (('in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of'),\n",
" 91),\n",
" (('substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel'),\n",
" 91),\n",
" (('titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially'),\n",
" 91),\n",
" (('within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar'),\n",
" 91),\n",
" (('personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount'),\n",
" 91),\n",
" (('similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in'),\n",
" 91),\n",
" (('to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf',\n",
" 'for'),\n",
" 91),\n",
" ((':',\n",
" 'none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within'),\n",
" 89),\n",
" (('none',\n",
" 'headcount',\n",
" 'of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency'),\n",
" 85),\n",
" (('of',\n",
" 'personnel',\n",
" 'in',\n",
" 'substantially',\n",
" 'similar',\n",
" 'titles',\n",
" 'within',\n",
" 'agency',\n",
" ':',\n",
" '0'),\n",
" 83),\n",
" (('continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk',\n",
" 'caf'),\n",
" 82),\n",
" (('(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed'),\n",
" 79),\n",
" (('to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an',\n",
" 'unenclosed',\n",
" 'sidewalk'),\n",
" 79),\n",
" (('the',\n",
" 'borough',\n",
" 'of',\n",
" 'manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ','),\n",
" 74),\n",
" (('manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate',\n",
" 'an'),\n",
" 74),\n",
" (('in',\n",
" 'the',\n",
" 'borough',\n",
" 'of',\n",
" 'manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain'),\n",
" 74),\n",
" (('borough',\n",
" 'of',\n",
" 'manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and'),\n",
" 74),\n",
" (('of',\n",
" 'manhattan',\n",
" '(',\n",
" 'to',\n",
" 'continue',\n",
" 'to',\n",
" 'maintain',\n",
" ',',\n",
" 'and',\n",
" 'operate'),\n",
" 74),\n",
" (('published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter',\n",
" '312',\n",
" '(',\n",
" 'a'),\n",
" 63),\n",
" (('new', 'york', 'city', 'charter', '312', '(', 'a', ')', ':', 'agency'), 63),\n",
" (('annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to'),\n",
" 63),\n",
" (('is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter',\n",
" '312',\n",
" '('),\n",
" 63),\n",
" (('contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the'),\n",
" 63),\n",
" (('schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the',\n",
" 'mayor',\n",
" 'will',\n",
" 'be'),\n",
" 63),\n",
" (('and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city'),\n",
" 63),\n",
" (('plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york'),\n",
" 63),\n",
" (('york', 'city', 'charter', '312', '(', 'a', ')', ':', 'agency', ':'), 63),\n",
" (('pursuant', 'to', 'new', 'york', 'city', 'charter', '312', '(', 'a', ')'),\n",
" 63),\n",
" (('to', 'new', 'york', 'city', 'charter', '312', '(', 'a', ')', ':'), 63),\n",
" (('schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter'),\n",
" 63),\n",
" (('that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new',\n",
" 'york',\n",
" 'city',\n",
" 'charter',\n",
" '312'),\n",
" 63),\n",
" (('annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that'),\n",
" 63),\n",
" (('and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the',\n",
" 'mayor',\n",
" 'will'),\n",
" 63),\n",
" (('plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is',\n",
" 'hereby',\n",
" 'given',\n",
" 'that',\n",
" 'the',\n",
" 'mayor'),\n",
" 63),\n",
" (('contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant',\n",
" 'to',\n",
" 'new'),\n",
" 63),\n",
" (('included',\n",
" 'in',\n",
" 'fy',\n",
" '2015',\n",
" 'annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice'),\n",
" 62),\n",
" (('fy',\n",
" '2015',\n",
" 'annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published'),\n",
" 62),\n",
" (('in',\n",
" 'the',\n",
" 'fy',\n",
" '2015',\n",
" 'annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that'),\n",
" 62),\n",
" (('in',\n",
" 'fy',\n",
" '2015',\n",
" 'annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'notice',\n",
" 'is'),\n",
" 62),\n",
" (('2015',\n",
" 'annual',\n",
" 'contracting',\n",
" 'plan',\n",
" 'and',\n",
" 'schedule',\n",
" 'that',\n",
" 'is',\n",
" 'published',\n",
" 'pursuant'),\n",
" 62)]"
]
}
],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment