sonalisharma/Wordcraft_NLP

## Wordcraft_NLP
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Sample Sentences"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#Sample sentences\n",
      "\"\"\"\n",
      "\"I went to the farm yesterday.\",\n",
      "\"Every morning, the animals wake up when the rooster crows.\",\n",
      "\"There was a duck on the pond.\",\n",
      "\"Old McDonald had a farm.\",\n",
      "\"The chickens were laying eggs.\",\n",
      "\"Cows live in the shed.\",\n",
      "\"Pigs live in a pigsty.\",\n",
      "\"The goats are playing on the farm.\",\n",
      "\"The chicken are pecking at their food, pecking and walking.\",\n",
      "\"The farmer and his wife live in a farmhouse.\",\n",
      "\"The dog helps the farmer herd the sheep.\",\n",
      "\"The cow goes moo and gives milk for children to drink.\",\n",
      "\"The pig loves to take mud baths.\",\n",
      "\"Sheep give us wool.\",\n",
      "\"The horse decided to take a walk to meet his friend the duck.\",\n",
      "\"On his way, the horse met a goat.\",\n",
      "\"The cat like lying out in the sun near the barn.\",\n",
      "\"When spring comes, it is time to give the sheep a haircut.\",\n",
      "\"When I visit the farm, I like riding the horse.\",\n",
      "\"The farmer and his wife live happily on the farm.\",\n",
      "\"The farmer feeds his horse, with hay.\",\n",
      "\"Horses and cows both eat hay.\"]\n",
      "\"\"\"\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 1,
       "text": [
        "'\\n\"I went to the farm yesterday.\",\\n\"Every morning, the animals wake up when the rooster crows.\",\\n\"There was a duck on the pond.\",\\n\"Old McDonald had a farm.\",\\n\"The chickens were laying eggs.\",\\n\"Cows live in the shed.\",\\n\"Pigs live in a pigsty.\",\\n\"The goats are playing on the farm.\",\\n\"The chicken are pecking at their food, pecking and walking.\",\\n\"The farmer and his wife live in a farmhouse.\",\\n\"The dog helps the farmer herd the sheep.\",\\n\"The cow goes moo and gives milk for children to drink.\",\\n\"The pig loves to take mud baths.\",\\n\"Sheep give us wool.\",\\n\"The horse decided to take a walk to meet his friend the duck.\",\\n\"On his way, the horse met a goat.\",\\n\"The cat like lying out in the sun near the barn.\",\\n\"When spring comes, it is time to give the sheep a haircut.\",\\n\"When I visit the farm, I like riding the horse.\",\\n\"The farmer and his wife live happily on the farm.\",\\n\"The farmer feeds his horse, with hay.\",\\n\"Horses and cows both eat hay.\"]\\n'"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Import libraries"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import defaultdict\n",
      "import nltk\n",
      "from nltk.corpus import wordnet as wn\n",
      "import en"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Define Seed lists"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Seed sound\n",
      "# convert to list \n",
      "sounds = {\"cow\":\"moooooooo\",\"piggy\":\"oink\",\"sheep\":\"baa\",\"goat\":\"beh\",\"duck\":\"quack\",\"horse\":\"neigh\",\"chicken\":\"cluck\",\"dog\":\"bark\",\n",
      "          \"dog\":\"growl\",\"cat\":\"meow\",\"pig\":\"squeal\",\"pig\":\"grunt\",\"cat\":\"mew\"}\n",
      "\n",
      "#Seed locations\n",
      "seed_loc = [\"near\",\"front\",\"behind\",\"inside\",\"top\",\"below\",\"on\",\"above\",\"far\"]\n",
      "\n",
      "#Reading adjectives file to fetch seed adjectives\n",
      "seed_adj_list = defaultdict(list)\n",
      "with open('data/adj_list.csv', 'r') as adj_file:\n",
      "    data = adj_file.readlines()\n",
      "    lines=  data[0].split('\\r')\n",
      "    #adj_file.readlines()\n",
      "    for line in lines:\n",
      "        adj_word = line.split(',')\n",
      "        seed_adj_list[adj_word[1]].append(adj_word[0])\n",
      "print seed_adj_list\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "grunt\n",
        "defaultdict(<type 'list'>, {'emotion': ['angry', 'animated', 'annoyed', 'ashamed', 'bewildered', 'blushing', 'bored', 'depressed', 'excited', 'fierce', 'happy', 'sad', 'scared'], 'color': ['black', 'black-and-white', 'blue', 'brown', 'colorful', 'pink', 'purple', 'red', 'yellow'], 'position': ['near'], 'size': ['big', 'broad', 'chubby', 'extra-large', 'extra-small', 'fat', 'giant', 'half', 'huge', 'hulking', 'large', 'lean', 'little', 'obese', 'skinny', 'slim', 'small', 'tall', 'teeny', 'teeny-tiny', 'thin', 'tiny']})\n"
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Approach 1: Chunking using Node box"
     ]
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "a) Printing entire chunk"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "s = \"The cat was sleeping in the farm. The dog looked sad\"\n",
      "chunks = defaultdict(list)\n",
      "for i,chunk in enumerate(en.sentence.chunk(s)):\n",
      "    print chunk\n",
      "    #print len(chunk)\n",
      "    #print chunk[0]\n",
      "    #print chunk[1]\n",
      "    #print chunk[2]\n",
      "#    chunks[str(i)+\"_\"+chunk[0]].append(chunk[1])\n",
      "#matches = en.sentence.find(s,  \"NN VBG\")\n",
      "#print matches\n",
      "#print chunks"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "['S', ['NP', ('The', 'DT'), ('cat', 'NN')], ['VA', ['VP', ('was', 'VBD'), ('sleeping', 'VBG')], ['PP', ('in', 'IN'), ['NP', ('the', 'DT'), ('farm', 'NN')]]]]\n",
        "('.', '.')\n",
        "['S', ['NP', ('The', 'DT'), ('dog', 'NN')], ['VA', ['VP', ('looked', 'VBD')], ['NP', ('sad', 'JJ')]]]\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "b) Traversing the chunk tree and separating paths"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import string\n",
      "s1 = \"The happy cat was sleeping in the farm and the dog was jumping around in joy\"\n",
      "s2 =\"The happy girl was sleeping in front of the farm  with her cat which looked sad\"\n",
      "\n",
      "sent1 =\"Here is a duck that goes quack quack. Ducks waddle and swim in ponds. The happy girl was sleeping in front of the farm\"\n",
      "#sent = \"The cats were sleeping in a big farm and the dog was on top of the big house\"\n",
      "sent = \"The happy cat was sleeping in the farm and the dog was jumping around in joy\"\n",
      "noun_matches = en.sentence.find(sent,\"NN\")\n",
      "print noun_matches\n",
      "\n",
      "#sent1 = sent.translate(sent.maketrans(\"\",\"\"), string.punctuation)\n",
      "#print sent1\n",
      "tokens = nltk.word_tokenize(sent)\n",
      "\n",
      "sent_sounds = [(key,word) for key,value in sounds.items() for word in tokens if (word==value) ]\n",
      "print sent_sounds\n",
      "\n",
      "tag = \"\"\n",
      "tag_words = []\n",
      "chunked_sentence = list()\n",
      "def callback(chunk, token, tag):\n",
      "    if chunk != None :\n",
      "        if en.sentence.tag(chunk)[0][0] =='S':\n",
      "            tag = \"S\"\n",
      "        if en.sentence.tag(chunk)[0][0] =='NP':\n",
      "            print 'NP'\n",
      "            tag = 'NP'\n",
      "        if en.sentence.tag(chunk)[0][0] =='PP':\n",
      "            print 'PP'\n",
      "            tag = 'PP'\n",
      "        if en.sentence.tag(chunk)[0][0] =='VP':\n",
      "            print 'VP'\n",
      "            tag = 'VP'\n",
      "        chunked_sentence.append(tag)\n",
      "        tag=\"\"\n",
      "    if chunk == None :\n",
      "        tag_words.append((token, tag))\n",
      "        print token, tag,\"(\"+en.sentence.tag_description(tag)[0]+\")\"\n",
      "        chunked_sentence.append((token,tag))\n",
      "en.sentence.traverse(sent, callback)\n",
      "print chunked_sentence\n",
      "\n",
      "print en.sentence.tag(sent)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[[('cat', 'NN')], [('farm', 'NN')], [('dog', 'NN')], [('joy', 'NN')]]\n",
        "[]\n",
        "NP\n",
        "The DT (determiner)\n",
        "happy JJ (adjective)\n",
        "cat NN (noun, singular or mass)\n",
        "VP\n",
        "was VBD (verb, past tense)\n",
        "sleeping VBG (verb, gerund or present participle)\n",
        "PP\n",
        "in IN (conjunction, subordinating or preposition)\n",
        "NP\n",
        "the DT (determiner)\n",
        "farm NN (noun, singular or mass)\n",
        "and CC (conjunction, coordinating)\n",
        "NP\n",
        "the DT (determiner)\n",
        "dog NN (noun, singular or mass)\n",
        "VP\n",
        "was VBD (verb, past tense)\n",
        "jumping VBG (verb, gerund or present participle)\n",
        "around IN (conjunction, subordinating or preposition)\n",
        "PP\n",
        "in IN (conjunction, subordinating or preposition)\n",
        "NP\n",
        "joy NN (noun, singular or mass)\n",
        "['S', 'NP', ('The', 'DT'), ('happy', 'JJ'), ('cat', 'NN'), None, 'VP', ('was', 'VBD'), ('sleeping', 'VBG'), 'PP', ('in', 'IN'), 'NP', ('the', 'DT'), ('farm', 'NN'), ('and', 'CC'), 'S', 'NP', ('the', 'DT'), ('dog', 'NN'), 'VP', ('was', 'VBD'), ('jumping', 'VBG'), ('around', 'IN'), 'PP', ('in', 'IN'), 'NP', ('joy', 'NN')]\n",
        "The/DT happy/JJ cat/NN was/VBD sleeping/VBG in/IN the/DT farm/NN and/CC the/DT dog/NN was/VBD jumping/VBG around/IN in/IN joy/NN\n"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "c) Trial and Error to fetch Noun attributes (verbs, adjective, locations)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "verbs=[]\n",
      "adj=[]\n",
      "location=[]\n",
      "noun= []\n",
      "chunks = []\n",
      "final_dict = defaultdict(list)\n",
      "for word,tag in en.sentence.tag(sent):\n",
      "    print word,tag\n",
      "    if tag=='NN' or tag==\"NNS\":\n",
      "        old_noun = noun\n",
      "        noun = []\n",
      "        if (old_noun):\n",
      "            final_dict[old_noun[0]].append({\"verb\":verbs,\"adj\":adj,\"location\":location})\n",
      "            #print final_dict\n",
      "            verbs=[]\n",
      "            adj=[]\n",
      "            location=[]\n",
      "            noun= []\n",
      "            noun.append(word)\n",
      "        else:\n",
      "            noun.append(word)\n",
      "    elif (tag==\"VGB\" or tag==\"VBD\" or tag==\"VG\" or tag ==\"VBG\"):\n",
      "        #Put additional filter to filter out words like were etc\n",
      "        verbs.append(word)\n",
      "    elif (tag==\"JJ\"):\n",
      "        adj.append(word)\n",
      "    elif (tag==\"IN\"):\n",
      "        location.append(word)\n",
      "\n",
      "        #final_dict[noun[0]]=[{\"verb\":verbs,\"adj\":adj,\"location\":location}]\n",
      "    \n",
      "print final_dict       \n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "The DT\n",
        "happy JJ\n",
        "cat NN\n",
        "was VBD\n",
        "sleeping VBG\n",
        "in IN\n",
        "the DT\n",
        "farm NN\n",
        "and CC\n",
        "the DT\n",
        "dog NN\n",
        "was VBD\n",
        "jumping VBG\n",
        "around IN\n",
        "in IN\n",
        "joy NN\n",
        "defaultdict(<type 'list'>, {'farm': [{'verb': [], 'adj': [], 'location': []}], 'dog': [{'verb': ['was', 'jumping'], 'adj': [], 'location': ['around', 'in']}], 'cat': [{'verb': ['was', 'sleeping'], 'adj': ['happy'], 'location': ['in']}]})\n"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Approach 2: Custom method to create sentence chunks"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Here I identify all nouns in a sentence and find the associated adjectives, verbs, location attributes.\n",
      "sentences = [\"The happy cat was sleeping in the farm and a big dog was jumping around in joy\"]\n",
      "\"\"\",\n",
      "             \"The happy girl was sleeping in front of the farm  with her cat which looked sad\",\n",
      "             \"Here is a duck that goes quack quack. Ducks waddle and swim in ponds\", \n",
      "             \"The happy girl was sleeping in front of the farm\"]\n",
      "             \"\"\"\n",
      "#sent = \"The cats were sleeping in a big farm and the dog was on top of the big house\"\n",
      "sent = \"The farmer feeds his horse, with hay\"\n",
      "for sent in sentences:\n",
      "    #This method creates sentence chunks\n",
      "    create_chunks(sent)\n",
      "    "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[{'adj': ['happy'], 'verbs': ['was', 'sleeping'], 'noun': 'cat', 'location': []}, {'adj': [], 'verbs': [], 'noun': 'farm', 'location': []}, {'adj': ['big'], 'verbs': ['was', 'jumping'], 'noun': 'dog', 'location': []}, {'adj': [], 'verbs': [], 'noun': 'joy', 'location': []}]\n"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Step1: Create sentence chunks by splitting the sentence by Preposition and Conjunction"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "I create sentence chunks by splitting the sentence on preposition and conjunction. From each chunk i take the noun \n",
      "and find out all adjectives, verbs and location words. This is done for each chunk and every noun found in the chunk.\n",
      "For every noun, I collect all assoiated adjectives, verbs and location and put them all in a dictionary"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "def create_chunks(sent):\n",
      "    #print en.sentence.find(sent,\"NN\")\n",
      "    chunks = []\n",
      "    temp = []\n",
      "    final_dict = defaultdict(list)\n",
      "    for word,tag in en.sentence.tag(sent):\n",
      "        if ((tag ==\"IN\" and word not in seed_loc) or tag==\"CC\"):\n",
      "            temp.append((word,tag))\n",
      "            chunks.append(temp)\n",
      "            temp=[]\n",
      "        else:\n",
      "            temp.append((word,tag))\n",
      "    chunks.append(temp)\n",
      "    create_attributes(chunks)\n",
      " "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 17
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Step2: Go through chunks and get nouns attributes"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def create_attributes(chunks):\n",
      "    verbs=[]\n",
      "    adj=[]\n",
      "    location=[]\n",
      "    noun= []\n",
      "    #final_dict = defaultdict(list)\n",
      "    final_dict = []\n",
      "    for data in chunks:\n",
      "        for word,tag in data:\n",
      "            if tag=='NN' or tag==\"NNS\":\n",
      "                noun.append(word)\n",
      "            elif (tag==\"VGB\" or tag==\"VBD\" or tag==\"VG\" or tag ==\"VBG\" or tag==\"VBZ\"):\n",
      "            #Put additional filter to filter out words like were etc\n",
      "                verbs.append(word)\n",
      "            elif (tag==\"JJ\"):\n",
      "                adj.append(word)\n",
      "            elif (tag==\"IN\" and word in seed_loc):\n",
      "                location.append(word)\n",
      "        if (noun):\n",
      "            #final_dict[noun[0]] = {\"verbs\":verbs,\"adj\":adj,\"location\":location}\n",
      "            final_dict.append({\"noun\":noun[0],\"verbs\":verbs,\"adj\":adj,\"location\":location})\n",
      "        verbs=[]\n",
      "        adj=[]\n",
      "        location=[]\n",
      "        noun= []\n",
      "    \n",
      "    print final_dict\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import en\n",
      "print en.sentence.tag_description(\"WDT\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "('determiner', 'which, whatever, whichever')\n"
       ]
      }
     ],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\"The cat likes lying out in the sun near the barn\",\n",
      "             \"The happy cat was sleeping in the farm\",\n",
      "             \"There is a cat in the field by the farm\",\n",
      "             \"I went to the farm yesterday.\",\n",
      "            \"Every morning, the animals wake up when the rooster crows.\",\n",
      "            \"There was a duck on the pond.\",\n",
      "            \"Old McDonald had a farm.\",\n",
      "            \"The chickens were laying eggs.\",\n",
      "            \"Cows live in the shed.\",\n",
      "            \"Pigs live in a pigsty.\",\n",
      "            \"The goats are playing on the farm.\",\n",
      "            \"The chicken are pecking at their food, pecking and walking.\",\n",
      "            \"The farmer and his wife live in a farmhouse.\",\n",
      "            \"The dog helps the farmer herd the sheep.\",\n",
      "            \"The cow goes moo and gives milk for children to drink.\",\n",
      "            \"The pig loves to take mud baths.\",\n",
      "            \"Sheep give us wool.\",\n",
      "            \"The horse decided to take a walk to meet his friend the duck.\",\n",
      "            \"On his way, the horse met a goat.\",\n",
      "            \"The cat like lying out in the sun near the barn.\",\n",
      "            \"When spring comes, it is time to give the sheep a haircut.\",\n",
      "            \"When I visit the farm, I like riding the horse.\",\n",
      "            \"The farmer and his wife live happily on the farm.\",\n",
      "            \"The farmer feeds his horse, with hay.\",\n",
      "            \"Horses and cows both eat hay.\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sentences = [\"The happy cat was sleeping in the farm and the dog was jumping with joy\"]\n",
      "             \n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 58
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import nltk\n",
      "#print sentences[0]\n",
      "grammar = r\"\"\"\n",
      "  NP:   # chunk determiner/possessive, adjectives and nouns             # chunk sequences of proper nouns\n",
      "      {<NN|NNP|NNS>+.*<RP>*<VBG|VBZ|VBP>+<RP>*}\n",
      "      {<RP>*<VBG|VBZ|VBP>+<RP>*.*<NN|NNP|NNS>+}\n",
      "      {<NN|NNP|NNS>+.*<JJ>+.*<NN|NNP|NNS>*}\n",
      "      {<JJ>+.*<NN|NNP|NNS>+}\n",
      "      {<DT|IN>+.*<NN|NNP|NNS>+}\n",
      "\"\"\"\n",
      "cp = nltk.RegexpParser(grammar)\n",
      "for sent in sentences:\n",
      "    #print nltk.pos_tag(nltk.word_tokenize(sent))\n",
      "    result = cp.parse(nltk.pos_tag(nltk.word_tokenize(sent)))\n",
      "    print \"-------------\"\n",
      "    print result\n",
      "    print \"-------------\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "-------------\n",
        "(S\n",
        "  (NP The/DT happy/NN cat/NN)\n",
        "  was/VBD\n",
        "  sleeping/VBG\n",
        "  (NP in/IN the/DT farm/NN)\n",
        "  and/CC\n",
        "  (NP the/DT dog/NN)\n",
        "  was/VBD\n",
        "  jumping/VBG\n",
        "  (NP with/IN joy/NN))\n",
        "-------------\n"
       ]
      }
     ],
     "prompt_number": 116
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import defaultdict\n",
      "dictionary = defaultdict(list)\n",
      "newdictionary = defaultdict(list)\n",
      "from pattern.en import parsetree\n",
      "for sent in sentences:\n",
      "    s = parsetree(sent)\n",
      "    for j,sentence in enumerate(s): \n",
      "        for i,chunk in enumerate(sentence.chunks):\n",
      "            for word in chunk.words:\n",
      "                #print word.string+\"-->\"+word.tag,\n",
      "                print word.string,word.tag,chunk.head\n",
      "                newdictionary[chunk.part_of_speech].append((word.string,word.tag))\n",
      "                dictionary[str(j)+\"_\"+str(i)+\"_chunk\"].append((word.string,word.tag))\n",
      "            \n",
      "         #print word.string\n",
      "         #print word.tag\n",
      "print dictionary\n",
      "#print newdictionary"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "The DT Word(u'cat/NN')\n",
        "happy JJ Word(u'cat/NN')\n",
        "cat NN Word(u'cat/NN')\n",
        "was VBD Word(u'sleeping/VBG')\n",
        "sleeping VBG Word(u'sleeping/VBG')\n",
        "in IN Word(u'in/IN')\n",
        "the DT Word(u'farm/NN')\n",
        "farm NN Word(u'farm/NN')\n",
        "the DT Word(u'dog/NN')\n",
        "dog NN Word(u'dog/NN')\n",
        "was VBD Word(u'jumping/VBG')\n",
        "jumping VBG Word(u'jumping/VBG')\n",
        "with IN Word(u'with/IN')\n",
        "joy NN Word(u'joy/NN')\n",
        "defaultdict(<type 'list'>, {'0_2_chunk': [(u'in', u'IN')], '0_7_chunk': [(u'joy', u'NN')], '0_4_chunk': [(u'the', u'DT'), (u'dog', u'NN')], '0_3_chunk': [(u'the', u'DT'), (u'farm', u'NN')], '0_1_chunk': [(u'was', u'VBD'), (u'sleeping', u'VBG')], '0_6_chunk': [(u'with', u'IN')], '0_0_chunk': [(u'The', u'DT'), (u'happy', u'JJ'), (u'cat', u'NN')], '0_5_chunk': [(u'was', u'VBD'), (u'jumping', u'VBG')]})\n"
       ]
      }
     ],
     "prompt_number": 101
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sounds = {\"cow\":\"moooooooo\",\"piggy\":\"oink\",\"sheep\":\"baa\",\"goat\":\"beh\",\"duck\":\"quack\",\"horse\":\"neigh\",\"chicken\":\"cluck\",\"dog\":\"bark\",\n",
      "          \"dog\":\"growl\",\"cat\":\"meow\",\"pig\":\"squeal\",\"pig\":\"grunt\",\"cat\":\"mew\"}\n",
      "location = []\n",
      "verbs = defaultdict(list)\n",
      "dictionary\n",
      "data = sorted(dictionary.iteritems(),key=lambda (k,v): k,reverse=False) \n",
      "print data\n",
      "for word in data:\n",
      "    for w in word[1]:\n",
      "        if w[0].lower() in sounds:\n",
      "            print \"----------------\"\n",
      "            print w\n",
      "            print \"----------------\"\n",
      "        #if 'NN' in w or 'NNP' in w:\n",
      "            \n",
      "            #verbs[w].append()\n",
      "            #print word[1]\n",
      "\n",
      "# Visualness of Noun\n",
      "\n",
      "\n",
      "\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[('0_0_chunk', [(u'The', u'DT'), (u'cat', u'NN')]), ('0_1_chunk', [(u'likes', u'VBZ'), (u'lying', u'VBG')]), ('0_2_chunk', [(u'out', u'IN'), (u'in', u'IN')]), ('0_3_chunk', [(u'the', u'DT'), (u'sun', u'NN')]), ('0_4_chunk', [(u'near', u'IN')]), ('0_5_chunk', [(u'the', u'DT'), (u'barn', u'NN')])]\n",
        "----------------\n",
        "(u'cat', u'NN')\n",
        "----------------\n"
       ]
      }
     ],
     "prompt_number": 50
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 105,
       "text": [
        "['baa',\n",
        " 'neigh',\n",
        " 'grunt',\n",
        " 'oink',\n",
        " 'moooo',\n",
        " 'quack',\n",
        " 'cluck',\n",
        " 'mew',\n",
        " 'beh',\n",
        " 'growl']"
       ]
      }
     ],
     "prompt_number": 105
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 25
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from nltk.corpus import stopwords\n",
      "meta = stopwords.words('english')\n",
      "doc = \"Mr. duckbird was in front of my house. My farm has a big red colored house. There is a cow that goes Moooooo and makes the milk for children to drink\"\n",
      "tokens = nltk.word_tokenize(doc)\n",
      "words = []\n",
      "for t in tokens:\n",
      "    if (t not in meta):\n",
      "        words.append(t)\n",
      "keywordSummary = en.content.keywords(doc, top=10, nouns=True, singularize=True, filters=meta)\n",
      "docCategories = en.content.categorise(doc)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 74
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for item in docCategories:\n",
      "    #if (item.type == 'emotions'):\n",
      "    itemNameKey = item.name.partition(' ')\n",
      "    print \"###### %s \\n\" %(item.name)\n",
      "    print \"- Total no of words in this category: %d\" %(item.count)\n",
      "    #print \"- Normalized Count: %f\" %(item.count/len(w))\n",
      "    print \"- Type of category: %s\" %(item.type)\n",
      "    print \"- Set of Words in this category: %s\" %item.words"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "###### orality \n",
        "\n",
        "- Total no of words in this category: 2\n",
        "- Type of category: need\n",
        "- Set of Words in this category: ['milk', 'drink']\n",
        "###### concreteness \n",
        "\n",
        "- Total no of words in this category: 1\n",
        "- Type of category: regressive cognition\n",
        "- Set of Words in this category: ['front']\n",
        "###### cold \n",
        "\n",
        "- Total no of words in this category: 1\n",
        "- Type of category: sensation\n",
        "- Set of Words in this category: ['children']\n",
        "###### vision \n",
        "\n",
        "- Total no of words in this category: 1\n",
        "- Type of category: sensation\n",
        "- Set of Words in this category: ['colored']\n",
        "###### instrumental behavior \n",
        "\n",
        "- Total no of words in this category: 1\n",
        "- Type of category: secondary\n",
        "- Set of Words in this category: ['makes']\n"
       ]
      }
     ],
     "prompt_number": 75
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#print en.basic.words"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 53
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for i in en.sentence.chunk(doc):\n",
      "    print i"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "['NP', ('Mr', 'NNP')]\n",
        "('.', '.')\n",
        "['S', ['NP', ('duckbird', 'NN')], ['VA', ['VP', ('was', 'VBD')], ['PP', ('at', 'IN'), ['NP', ('my', 'PRP$'), ('farm', 'NN')]]]]\n",
        "('.', '.')\n",
        "['S', ['NP', ('My', 'PRP$'), ('farm', 'NN')], ['VA', ['VP', ('has', 'VBZ')], ['NP', ('a', 'DT'), ('big', 'JJ'), ('red', 'JJ'), ('colored', 'JJ'), ('house', 'NN')]]]\n",
        "('.', '.')\n",
        "('There', 'EX')\n",
        "['VA', ['VP', ('is', 'VBZ')], ['NP', ('a', 'DT'), ('cow', 'NN')]]\n",
        "('that', 'IN')\n",
        "['VA', ['VP', ('goes', 'VBZ')], ['NP', ('Moooooo', 'NN')]]\n",
        "('and', 'CC')\n",
        "['VA', ['VP', ('makes', 'VBZ')], ['NP', ('the', 'DT'), ('milk', 'NN')]]\n",
        "['S', ['PP', ('for', 'IN'), ['NP', ('children', 'NNS')]], ['VA', ['VP', ('to', 'TO')], ['NP', ('drink', 'NN')]]]\n"
       ]
      }
     ],
     "prompt_number": 64
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}