ak314/wiki-trivia.ipynb

## wiki-trivia.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "/*\n",
       "Placeholder for custom user CSS\n",
       "\n",
       "mainly to be overridden in profile/static/custom/custom.css\n",
       "\n",
       "This will always be an empty file in IPython\n",
       "*/\n",
       "\n",
       "@import url('http://fonts.googleapis.com/css?family=Crimson+Text');\n",
       "@import url('http://fonts.googleapis.com/css?family=Kameron');\n",
       "@import url('http://fonts.googleapis.com/css?family=Lato:200');\n",
       "@import url('http://fonts.googleapis.com/css?family=Lato:300');\n",
       "@import url('http://fonts.googleapis.com/css?family=Lato:400');\n",
       "\n",
       "\n",
       "/* Change code font */\n",
       ".CodeMirror pre {\n",
       "    font-family: Monaco;\n",
       "    font-size: 14pt;\n",
       "}\n",
       "\n",
       "div.output pre{\n",
       "    font-family: Monaco;\n",
       "    font-size: 14pt;\n",
       "}\n",
       "\n",
       "div.output_html td{\n",
       "    font-family: Monaco;\n",
       "    font-size: 10pt;\n",
       "}\n",
       "\n",
       "div.prompt{\n",
       "    font-family: Monaco;\n",
       "    font-size: 10pt;\n",
       "}\n",
       "\n",
       "div.completions select{\n",
       "    font-family: Monaco;\n",
       "    font-size: 12pt;\n",
       "}\n",
       "\n",
       "div.container pre{\n",
       "    font-family: Monaco;\n",
       "    font-size: 12pt;\n",
       "}\n",
       "\n",
       "div.tooltiptext pre{\n",
       "    font-family: Monaco;\n",
       "    font-size: 10pt;\n",
       "}\n",
       "\n",
       "div.input_area {\n",
       "    border-color: rgba(0,0,0,0.10);\n",
       "    background: rbga(0,0,0,0.5);\n",
       "}\n",
       "\n",
       "div.text_cell {\n",
       "    max-width: 105ex; /* instead of 100%, */\n",
       "}\n",
       "\n",
       "div.text_cell_render {\n",
       "    font-family: lato;\n",
       "    font-size: 14pt;\n",
       "    line-height: 145%; /* added for some line spacing of text. */\n",
       "}\n",
       "\n",
       "div.text_cell_render code{\n",
       "    font-family: Monaco;\n",
       "    font-size: 2pt;\n",
       "}\n",
       "\n",
       "div.text_cell_render h1,\n",
       "div.text_cell_render h2,\n",
       "div.text_cell_render h3,\n",
       "div.text_cell_render h4,\n",
       "div.text_cell_render h5,\n",
       "div.text_cell_render h6 {\n",
       "    font-family: lato, 'HelveticaNeue-Light';\n",
       "    font-weight: 300;\n",
       "}\n",
       "\n",
       "div.text_cell_render h1 {\n",
       "    font-size: 30pt;\n",
       "}\n",
       "\n",
       "div.text_cell_render h2 {\n",
       "    font-size: 24pt;\n",
       "}\n",
       "\n",
       "div.text_cell_render h3 {\n",
       "    font-size: 28pt;\n",
       "}\n",
       "\n",
       ".rendered_html pre,\n",
       ".rendered_html code {\n",
       "    font-size: medium;\n",
       "}\n",
       "\n",
       ".rendered_html ol {\n",
       "    list-style:decimal;\n",
       "    margin: 1em 2em;\n",
       "}\n",
       "\n",
       ".prompt.input_prompt {\n",
       "    color: rgba(0,0,0,0.5);\n",
       "}\n",
       "\n",
       ".cell.command_mode.selected {\n",
       "    border-color: rgba(0,0,0,0.1);\n",
       "}\n",
       "\n",
       ".cell.edit_mode.selected {\n",
       "    border-color: rgba(0,0,0,0.15);\n",
       "    box-shadow: 0px 0px 5px #f0f0f0;\n",
       "    -webkit-box-shadow: 0px 0px 5px #f0f0f0;\n",
       "}\n",
       "\n",
       "div.output_scroll {\n",
       "    -webkit-box-shadow: inset 0 2px 8px rgba(0,0,0,0.1);\n",
       "    box-shadow: inset 0 2px 8px rgba(0,0,0,0.1);\n",
       "    border-radious: 2px;\n",
       "}\n",
       "\n",
       "#menubar .navbar-inner {\n",
       "    background: #fff;\n",
       "    -webkit-box-shadow: none;\n",
       "    box-shadow: none;\n",
       "    border-radius: 0;\n",
       "    border: none;\n",
       "    font-family: lato;\n",
       "    font-weight: 400;\n",
       "}\n",
       "\n",
       ".navbar-fixed-top .navbar-inner,\n",
       ".navbar-static-top .navbar-inner {\n",
       "    box-shadow: none;\n",
       "    -webkit-box-shadow: none;\n",
       "    border: none;\n",
       "}\n",
       "\n",
       "div#notebook_panel {\n",
       "    box-shadow: none;\n",
       "    -webkit-box-shadow: none;\n",
       "    border-top: none;\n",
       "}\n",
       "\n",
       "div#notebook {\n",
       "    border-top: 1px solid rgba(0,0,0,0.15);\n",
       "}\n",
       "\n",
       "#menubar .navbar .navbar-inner,\n",
       ".toolbar-inner {\n",
       "    padding-left: 0;\n",
       "    padding-right: 0;\n",
       "}\n",
       "\n",
       "#checkpoint_status,\n",
       "#autosave_status {\n",
       "    color: rgba(0,0,0,0.5);\n",
       "}\n",
       "\n",
       "#header {\n",
       "    font-family: lato;\n",
       "}\n",
       "\n",
       "#notebook_name {\n",
       "    font-weight: 200;\n",
       "}\n",
       "</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import utils  \n",
    "from IPython.core.display import HTML  \n",
    "import os\n",
    "config_filename = \"custom.css\"\n",
    "styles = \"<style>\\n%s\\n</style>\" % (open(config_filename,'r').read())\n",
    "HTML(styles)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# WikiTrivia code"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Some init"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import requests\n",
    "import time\n",
    "from time import sleep\n",
    "import json\n",
    "from random import shuffle\n",
    "import codecs\n",
    "import os\n",
    "import urllib\n",
    "import dill\n",
    "import errno\n",
    "\n",
    "import credentials  # python module containing the definition API_KEY = \"your_own_api_key\"\n",
    "\n",
    "import networkx as nx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# some initial data\n",
    "\n",
    "# qid of the series, folder where stuff is saved, path where analysis is saved\n",
    "SERIES_QID, RESULTS_FOLDER, SAVED_ANALYSES, SAVED_GRAPH = (\n",
    "    \"Q886\", \"simpsons\", \"analyses/simpsons_syntax.pickle\", \"graph/simpsons_graph.pickle\"\n",
    ")\n",
    "#SERIES_QID, RESULTS_FOLDER, SAVED_ANALYSES, SAVED_GRAPH = (\"Q8539\", \"bigbangtheory\", \"analyses/bbt_syntax.pickle\", \"graph/bbt_graph.pickle\")\n",
    "#SERIES_QID, RESULTS_FOLDER, SAVED_ANALYSES = (\"Q5930\", \"familyguy\", \"analyses/familyguy_syntax.pickle\")\n",
    "\n",
    "EPISODE_SERIES_PREDICATE = \"P179\"\n",
    "CHARACTER_SERIES_PREDICATE = \"P1441\"\n",
    "\n",
    "GENDER_PREDICATE = \"P21\"\n",
    "MALE_GENDER = \"Q6581097\"\n",
    "FEMALE_GENDER = \"Q6581072\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def read_resource(path):\n",
    "    with open(path, \"rb\") as file_in:\n",
    "        objj = dill.load(file_in)\n",
    "    return objj\n",
    "\n",
    "def write_resource(resource, path):\n",
    "    try:\n",
    "        os.makedirs(os.path.dirname(path))\n",
    "    except OSError as exc:  # in case the fodler already exists\n",
    "        if exc.errno != errno.EEXIST:\n",
    "            raise\n",
    "    with open(path, \"wb\") as file_out:\n",
    "        dill.dump(resource, file_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# classes to store and manipulate analysis results\n",
    "\n",
    "class AnalysisResult(object):\n",
    "    def __init__(self, sentences, references=None):\n",
    "        self.sentences = sentences\n",
    "        if not references:\n",
    "            self.references = [None] * len(self.sentences)\n",
    "        else:\n",
    "            self.references = references\n",
    "    \n",
    "    @staticmethod\n",
    "    def merge(one, two):\n",
    "        return AnalysisResult(one.sentences + two.sentences, one.references + two.references)\n",
    "\n",
    "    \n",
    "class AnalysisNode(object):\n",
    "    def __init__(self, token_analysis):\n",
    "        self.text = token_analysis[\"text\"][\"content\"]\n",
    "        self.lemma = token_analysis[\"lemma\"]\n",
    "        self.pos = token_analysis[\"partOfSpeech\"][\"tag\"]\n",
    "        self.match = token_analysis[\"text\"][\"beginOffset\"]\n",
    "        self.data = token_analysis[\"partOfSpeech\"]\n",
    "        self.father = None\n",
    "        self.left_children = []\n",
    "        self.right_children = []\n",
    "        self.entity = None\n",
    "\n",
    "    def traverse(self, block_nodes=frozenset()):\n",
    "        left_nodes = [node for link in self.left_children for node in link.node.traverse(block_nodes=block_nodes)]\n",
    "        right_nodes = [node for link in self.right_children for node in link.node.traverse(block_nodes=block_nodes)]\n",
    "        current = [self] if self not in block_nodes else []\n",
    "        return left_nodes + current + right_nodes\n",
    "    \n",
    "    def string(self, block_nodes=frozenset()):\n",
    "        return \" \".join(self.strings(block_nodes=block_nodes))\n",
    "    \n",
    "    def strings(self, block_nodes=frozenset()):\n",
    "        return [node.text for node in self.traverse(block_nodes=block_nodes)]\n",
    "    \n",
    "    def __str__(self):\n",
    "        return \"[%s, (%s), %s, %s]\" % (self.text, self.lemma, self.pos, self.entity)\n",
    "\n",
    "    \n",
    "class AnalysisLink(object):\n",
    "    def __init__(self, node, link_type):\n",
    "        self.node = node\n",
    "        self.link_type = link_type\n",
    "\n",
    "\n",
    "class NEREntity(object):\n",
    "    def __init__(self, name, typee, mentions=[], link=None):\n",
    "        self._name = name\n",
    "        self._typee = typee\n",
    "        self._mentions = []\n",
    "        self._link = link\n",
    "    \n",
    "    def __str__(self):\n",
    "        return \"{%s / %s / %s }\" % (self._name, self._typee, self._link)\n",
    "\n",
    "class Mention(object):\n",
    "    def __init__(self, text, match):\n",
    "        self._text = text\n",
    "        self._match = match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# function to retrieve relations for a given \"predicate\", knowing the start or end node\n",
    "def get_linked_nodes(subject_id, predicate_id, object_id, raw_json=False, with_articles=False, gender=None):\n",
    "    query =  'SELECT ?cid ?article WHERE {'\n",
    "    if not subject_id and object_id is not None:\n",
    "        query += '    ?cid wdt:%s wd:%s .' % (predicate_id, object_id)\n",
    "    elif subject_id is not None and not object_id:\n",
    "        query += '    wd:%s wdt:%s ?cid .' % (subject_id, predicate_id)\n",
    "    else:\n",
    "        raise ValueError(\"no parameters on which to perform the query\")\n",
    "    if gender:\n",
    "        query += '    ?cid wdt:%s wd:%s .' % (\"P21\", gender)\n",
    "    query += '''\n",
    "        OPTIONAL {\n",
    "            ?article schema:about ?cid .\n",
    "            ?article schema:inLanguage \"en\" .\n",
    "            FILTER (SUBSTR(str(?article), 1, 25) = \"https://en.wikipedia.org/\")\n",
    "        }\n",
    "    }\n",
    "    '''\n",
    "    headers = {\"Accept\": \"application/sparql-results+json\"}\n",
    "    response = requests.get(\"https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=%s\" % query, headers=headers)\n",
    "    if raw_json:\n",
    "        return response.json()\n",
    "    jsonn = response.json()\n",
    "    if \"results\" not in jsonn or \"bindings\" not in jsonn[\"results\"]:\n",
    "        return []\n",
    "    if with_articles:\n",
    "        return (\n",
    "            [item[\"cid\"][\"value\"].split(\"/\")[-1] for item in response.json()[\"results\"][\"bindings\"]],\n",
    "            [item[\"article\"][\"value\"] for item in response.json()[\"results\"][\"bindings\"] if \"article\" in item]\n",
    "        )\n",
    "    return [item[\"cid\"][\"value\"].split(\"/\")[-1] for item in response.json()[\"results\"][\"bindings\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# functions to generate and manipulate local wikidata subgraph\n",
    "\n",
    "def generate_graph_for_father_and_predicate(father, predicate):\n",
    "    G = nx.DiGraph()\n",
    "    male_qids, male_names = get_linked_nodes(None, predicate, father, with_articles=True, gender=MALE_GENDER)\n",
    "    female_qids, female_names = get_linked_nodes(None, predicate, father, with_articles=True, gender=FEMALE_GENDER)\n",
    "    qids, names = get_linked_nodes(None, predicate, father, with_articles=True)\n",
    "    for qid, name in zip(qids, names):\n",
    "\n",
    "        mod_name = urllib.unquote(name).replace(\" \", \"_\").replace(\"https\", \"http\")\n",
    "\n",
    "        G.add_edge(qid, father, p=predicate)\n",
    "        G.add_edge(father, qid, p=\"-\"+predicate)\n",
    "        G.add_edge(qid, mod_name, p=\"name\")\n",
    "        G.add_edge(mod_name, qid, p=\"qid\")\n",
    "        \n",
    "        if qid in female_qids:\n",
    "            G.add_edge(qid, \"gender_female\", p=\"gender\")\n",
    "        elif qid in male_qids:\n",
    "            G.add_edge(qid, \"gender_male\", p=\"gender\")\n",
    "        \n",
    "    return G\n",
    "\n",
    "def neighbors_for_type_for_node(kgraph, link_type, node_name):\n",
    "    if not node_name in kgraph.nodes():\n",
    "        return None\n",
    "    neighbors = [node \n",
    "       for node, props in kgraph[node_name].iteritems() \n",
    "       if props.get(\"p\") == link_type]\n",
    "    return neighbors if neighbors else None\n",
    "\n",
    "def neighbor_for_type_for_node(kgraph, link_type, node_name):\n",
    "    neighs = neighbors_for_type_for_node(kgraph, link_type, node_name)\n",
    "    if neighs:\n",
    "        return neighs[0]\n",
    "    return None\n",
    "\n",
    "def get_qid_from_name(kgraph, name):\n",
    "    if not name in kgraph.nodes():\n",
    "        return None\n",
    "    qids = [node \n",
    "       for node, props in kgraph[name].iteritems() \n",
    "       if props.get(\"p\") == \"qid\"]\n",
    "    return qids[0] if qids else None\n",
    "\n",
    "def get_name_from_qid(kgraph, qid):\n",
    "    if not qid in kgraph.nodes():\n",
    "        return None\n",
    "    names = [node \n",
    "       for node, props in kgraph[qid].iteritems() \n",
    "       if props.get(\"p\") == \"name\"]\n",
    "    return names[0] if names else None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_entities_structures(entities):\n",
    "    entity_map = {}\n",
    "    entity_matches = {}\n",
    "    for entity in entities[\"entities\"]:\n",
    "        for mention in entity[\"mentions\"]:\n",
    "            if not entity[\"name\"] in entity_map:\n",
    "                link = None\n",
    "                if \"metadata\" in entity and \"wikipedia_url\" in entity[\"metadata\"]:\n",
    "                    link = entity[\"metadata\"][\"wikipedia_url\"]\n",
    "                entity_map[entity[\"name\"]] = NEREntity(entity[\"name\"], entity[\"type\"], mentions=[Mention(mention[\"text\"][\"content\"], mention[\"text\"][\"beginOffset\"])], link=link)\n",
    "            entity_matches[mention[\"text\"][\"beginOffset\"]] = NEREntity(entity[\"name\"], entity[\"type\"], mentions=[Mention(mention[\"text\"][\"content\"], mention[\"text\"][\"beginOffset\"])], link=link)\n",
    "    return entity_matches, entity_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# function to retrieve NLP analysis for a given text from remote API \n",
    "\n",
    "# document = {\"text\": \"A simple example.\", \"link\": \"Something\"}\n",
    "def analyze_text(document):\n",
    "    base = \"https://language.googleapis.com\"\n",
    "    \n",
    "    doc = {\"type\": \"PLAIN_TEXT\", \"content\": document[\"text\"]}\n",
    "    request_data = {\"document\": doc, \"encodingType\": \"UTF8\"}\n",
    "\n",
    "    analysis_endpoint = \"/v1/documents:analyzeSyntax\"\n",
    "    analysis_url = base + analysis_endpoint + \"?key=\" + credentials.NLP_API_KEY\n",
    "    \n",
    "    entities_endpoint = \"/v1/documents:analyzeEntities\"\n",
    "    entities_url = base + entities_endpoint + \"?key=\" + credentials.NLP_API_KEY\n",
    "    \n",
    "    \n",
    "    # syntax\n",
    "    response = requests.post(analysis_url, data=json.dumps(request_data))\n",
    "    analysis_results = json.loads(response.text)\n",
    "    \n",
    "    nodes = [AnalysisNode(token) for token in analysis_results.get(\"tokens\")]\n",
    "\n",
    "    roots = []\n",
    "    references = []\n",
    "        \n",
    "    # collect the syntax links\n",
    "    for index, token in enumerate(nodes):\n",
    "        data = analysis_results.get(\"tokens\")[index]\n",
    "        father_node = nodes[data.get(\"dependencyEdge\").get(\"headTokenIndex\")]\n",
    "        token.father = AnalysisLink(father_node, data.get(\"dependencyEdge\").get(\"label\"))\n",
    "        if data.get(\"dependencyEdge\").get(\"label\") != \"ROOT\":\n",
    "            if token.match < father_node.match:\n",
    "                father_node.left_children.append(AnalysisLink(token, data.get(\"dependencyEdge\").get(\"label\")))\n",
    "            else:\n",
    "                father_node.right_children.append(AnalysisLink(token, data.get(\"dependencyEdge\").get(\"label\")))\n",
    "        else:\n",
    "            roots.append(token)\n",
    "            if document.get(\"link\"):\n",
    "                references.append(document.get(\"link\"))\n",
    "            else:\n",
    "                None\n",
    "    \n",
    "    # entities\n",
    "    entities_response = requests.post(entities_url, data=json.dumps(request_data))\n",
    "    entities = json.loads(entities_response.text)\n",
    "    entity_matches, _ = generate_entities_structures(entities)\n",
    "    \n",
    "    for node in nodes:\n",
    "        if node.match in entity_matches:\n",
    "            node.entity = entity_matches.get(node.match)\n",
    "\n",
    "    return AnalysisResult(roots, references)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quick example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Homer, (Homer), NOUN, {Homer Simpson / PERSON / http://en.wikipedia.org/wiki/Homer_Simpson }] NN\n",
      "[Simpson, (Simpson), NOUN, None] NSUBJ\n",
      "[stole, (steal), VERB, None] ROOT\n",
      "[Ned, (Ned), NOUN, {Ned / PERSON / http://en.wikipedia.org/wiki/Ned_Flanders }] POSS\n",
      "['s, ('s), PRT, None] PS\n",
      "[air, (air), NOUN, {air conditioner / OTHER / None }] NN\n",
      "[conditioner, (conditioner), NOUN, None] DOBJ\n",
      "[., (.), PUNCT, None] P\n",
      "[Homer, (Homer), NOUN, {Homer Simpson / PERSON / http://en.wikipedia.org/wiki/Homer_Simpson }] NSUBJ\n",
      "[did, (do), VERB, None] ROOT\n",
      "[it, (it), PRON, None] DOBJ\n",
      "[again, (again), ADV, None] ADVMOD\n",
      "[., (.), PUNCT, None] P\n"
     ]
    }
   ],
   "source": [
    "document = {\n",
    "    \"text\": \"Homer Simpson stole Ned's air conditioner. Homer did it again.\", \n",
    "    \"link\": \"Some/path/ola\",\n",
    "}\n",
    "result = analyze_text(document)\n",
    "for sentence in result.sentences:\n",
    "    for node in sentence.traverse():\n",
    "        print node, node.father.link_type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{u'article': {u'type': u'uri',\n",
       "  u'value': u'https://en.wikipedia.org/wiki/Homer%20Goes%20to%20College'},\n",
       " u'cid': {u'type': u'uri', u'value': u'http://www.wikidata.org/entity/Q94318'}}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# retrieve list of qids that are episodes of the series, together with their wikipedia link\n",
    "predicate = EPISODE_SERIES_PREDICATE\n",
    "supertype = SERIES_QID\n",
    "\n",
    "query = \"\"\"\n",
    "SELECT ?cid ?article WHERE {\n",
    "    ?cid wdt:%s wd:%s .\n",
    "    OPTIONAL {\n",
    "      ?article schema:about ?cid .\n",
    "      ?article schema:inLanguage \"en\" .\n",
    "      FILTER (SUBSTR(str(?article), 1, 25) = \"https://en.wikipedia.org/\")\n",
    "    }\n",
    "} \n",
    "\"\"\" % (predicate, supertype)\n",
    "\n",
    "headers = {\"Accept\": \"application/sparql-results+json\"}\n",
    "response = requests.get(\"https://query.wikidata.org/bigdata/namespace/wdq/sparql?query=%s\" % query, headers=headers)\n",
    "res_obj = response.json()\n",
    "\n",
    "res_obj.get(\"results\").get(\"bindings\")[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Retrieve needed data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### to dump texts of wikipedia xml exports obtained via api GET request, parsed by using the wikiextractor lib USING wikiextractor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "export_api = \"https://en.wikipedia.org/wiki/Special:Export/\"\n",
    "FILE_NAME = \"test.page\"\n",
    "\n",
    "for result in res_obj[\"results\"][\"bindings\"][:20]:\n",
    "    # pass\n",
    "    try:\n",
    "        print result[\"cid\"][\"value\"], result[\"article\"][\"value\"]\n",
    "        page_name = result[\"article\"][\"value\"].split(\"/\")[-1]\n",
    "        print \"    \", export_api + page_name\n",
    "        # print result[\"item\"][\"value\"], result[\"itemLabel\"][\"value\"]\n",
    "\n",
    "        result_text = requests.get(export_api + result[\"article\"][\"value\"].split(\"/\")[-1]).text\n",
    "        with codecs.open(FILE_NAME, \"w\", encoding=\"utf-8\") as file_out:\n",
    "            file_out.write(result_text)\n",
    "        os.system(\"python wikiextractor/WikiExtractor.py test.page -s -o %s/%s\" % (RESULTS_FOLDER, page_name))\n",
    "    except Exception as e:\n",
    "        print str(e)\n",
    "    sleep(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### extra processing step to get a section of the document, here set to retrieve \"plot\" and \"cultural references\" for selected pages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "PLOT = \"Plot.\"\n",
    "CULTURAL_REFERENCES = \"Cultural references.\\n\"\n",
    "\n",
    "PRODUCTION = \"Production.\"\n",
    "RECEPTION = \"Reception.\"\n",
    "CRITICAL_RECEPTION = \"Critical reception.\"\n",
    "\n",
    "documents_to_analyze = []\n",
    "for folder in os.listdir(RESULTS_FOLDER):\n",
    "    for filee in os.listdir(\"%s/%s/AA\" % (RESULTS_FOLDER, folder)):\n",
    "        with codecs.open(\"%s/%s/AA/%s\" % (RESULTS_FOLDER, folder, filee)) as file_in:\n",
    "            total_text = \"\"\n",
    "            \n",
    "            text = file_in.read()\n",
    "            print folder\n",
    "            beginning1 = text.find(PLOT) + len(PLOT)\n",
    "            end1 = min([ndx for ndx in (\n",
    "                text.find(PRODUCTION),\n",
    "                text.find(RECEPTION),\n",
    "                text.find(CRITICAL_RECEPTION),\n",
    "                text.find(CULTURAL_REFERENCES),\n",
    "                text.find(\"</doc>\"),\n",
    "                len(text),\n",
    "            ) if ndx > beginning1])\n",
    "            beginning2 = text.find(CULTURAL_REFERENCES) + len(CULTURAL_REFERENCES)\n",
    "            end2 = min([ndx for ndx in (\n",
    "                text.find(PRODUCTION),\n",
    "                text.find(RECEPTION),\n",
    "                text.find(CRITICAL_RECEPTION),\n",
    "                text.find(\"</doc>\"),\n",
    "                len(text),\n",
    "            ) if ndx > beginning2])\n",
    "            total_text += text[beginning1:end1] + \"\\n\\n\"\n",
    "            if text.find(CULTURAL_REFERENCES) > 0:\n",
    "                print \"----------\"\n",
    "                print text[beginning2:end2]\n",
    "                total_text += text[beginning2:end2] + \"\\n\\n\"\n",
    "            \n",
    "            documents_to_analyze.append({\"text\": total_text, \"link\": \"https://en.wikipedia.org/wiki/%s\" % folder})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# price estimate\n",
    "units = 0\n",
    "for doc in documents_to_analyze:\n",
    "    units += len(doc[\"text\"]) / 1000\n",
    "print float(units) / 1000 * 1.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# launch analysis of preprocessed documents\n",
    "analyzed_documents = [analyze_text(document) for document in documents_to_analyze]\n",
    "merged = analyzed_documents[0]\n",
    "for ad in analyzed_documents[1:]:\n",
    "    merged = AnalysisResult.merge(merged, ad)\n",
    "\n",
    "write_resource(merged, SAVED_ANALYSES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# generate knowledge subgraph for the chosen topic\n",
    "kgraph = generate_graph_for_father_and_predicate(SERIES_QID, CHARACTER_SERIES_PREDICATE)\n",
    "write_resource(kgraph, SAVED_GRAPH)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre-analyzed quiz generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# reload saved analyses\n",
    "merged_analyses = read_resource(SAVED_ANALYSES)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# generate knowledge subgraph for the chosen topic\n",
    "kgraph = read_resource(SAVED_GRAPH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class Quiz(object):\n",
    "    def __init__(self):\n",
    "        self.explanation = None\n",
    "        self.question = None\n",
    "        self.correct_answers = []\n",
    "        self.wrong_answers = []\n",
    "    \n",
    "    def __unicode__(self):\n",
    "        return \"S: %s\\nQ: %s\\nA: %s\\nW: %s\" % (\n",
    "            self.explanation,\n",
    "            self.question,\n",
    "            \", \".join(self.correct_answers),\n",
    "            \", \".join(self.wrong_answers),\n",
    "        )\n",
    "    \n",
    "    def __str__(self):\n",
    "        return unicode(self).encode(\"utf-8\")\n",
    "    \n",
    "\n",
    "class PersonSubjectQuestioner(object):\n",
    "    TO_BE_REMOVED_KEYWORDS = {\"also\", \"then\", \"meanwhile\", \"later\", \"however\", \"soon\", \"afterwards\"}\n",
    "    \n",
    "    def __init__(self):\n",
    "        pass\n",
    "    \n",
    "    # TODO extra parameters here should be moved in context or something else\n",
    "    def process(self, analysis, kgraph):\n",
    "        questioner_results = []\n",
    "        for ndx in range(len(analysis.sentences)):\n",
    "            quiz = Quiz()\n",
    "\n",
    "            sentence_root = analysis.sentences[ndx]\n",
    "\n",
    "            if sentence_root.pos != \"VERB\":\n",
    "                continue\n",
    "\n",
    "            # get the subject\n",
    "            subj = None\n",
    "            listt = [node \n",
    "                     for node in sentence_root.traverse()\n",
    "                     if node.father and (node.father.link_type == \"NSUBJ\" or node.father.link_type == \"NSUBJPASS\")]\n",
    "\n",
    "            if not listt:\n",
    "                continue\n",
    "            \n",
    "            objs = [node\n",
    "                    for node in sentence_root.traverse()\n",
    "                    if node.father and (node.father.link_type == \"DOBJ\")]\n",
    "            bad_dobj = False\n",
    "            if objs:\n",
    "                for obj in objs:\n",
    "                    if obj.father.node == sentence_root and obj.text.lower() in {\"him\", \"her\", \"them\"}:\n",
    "                        bad_dobj = True\n",
    "                    if obj.father.node == sentence_root and \"the\" in obj.strings():\n",
    "                        bad_dobj = True\n",
    "            if bad_dobj:\n",
    "                continue\n",
    "\n",
    "            for subjj in listt:\n",
    "                if subjj.father.node == sentence_root:\n",
    "                    subj = subjj\n",
    "            if not subj:\n",
    "                continue\n",
    "            if subj.text.lower() in {\"you\", \"we\"}:\n",
    "                continue\n",
    "\n",
    "            # collect entities in the subject nodes\n",
    "            subject_nodes = subj.traverse()\n",
    "            if any(node.father.link_type not in {\"NSUBJ\", \"NN\", \"NSUBJPASS\"} for node in subject_nodes):\n",
    "                continue\n",
    "            \n",
    "            the_entities = [node.entity for node in subj.traverse() if node.entity]\n",
    "\n",
    "            if the_entities and the_entities[0]._typee == \"PERSON\" and the_entities[0]._link:\n",
    "\n",
    "                quiz.explanation = sentence_root.string()\n",
    "\n",
    "                lemmas_to_be_filtered = list(\n",
    "                    filter(lambda nodee: nodee.lemma.lower() in PersonSubjectQuestioner.TO_BE_REMOVED_KEYWORDS,\n",
    "                           sentence_root.traverse()))\n",
    "\n",
    "                # add punctuation just-after-a-lemma-to-be-removed to list of stuff to be removed\n",
    "                # e.g. \"He is clever, too, and very smart.\" -> I want to remove \"too,\"\n",
    "                lemmas_to_be_filtered += list(\n",
    "                    map(\n",
    "                        lambda nodee: nodee[1],\n",
    "                        filter(\n",
    "                            lambda nodee: nodee[0] in lemmas_to_be_filtered and nodee[1].pos == \"PUNCT\",\n",
    "                            zip(sentence_root.traverse()[:-1], sentence_root.traverse()[1:])\n",
    "                        )\n",
    "                    )\n",
    "                )\n",
    "\n",
    "                parataxis_and_conj_roots = [\n",
    "                    node \n",
    "                    for root in sentence_root.traverse()\n",
    "                    for node in root.traverse()\n",
    "                    if (\n",
    "                        root.father.link_type == \"PARATAXIS\" \n",
    "                        or root.father.link_type == \"CONJ\") \n",
    "                        and root.pos == \"VERB\" and \"NSUBJ\" in set(\n",
    "                            [link.link_type for link in root.left_children]\n",
    "                    )\n",
    "                ]\n",
    "\n",
    "                post_who_string = sentence_root.string(\n",
    "                    block_nodes=set(subject_nodes + lemmas_to_be_filtered + parataxis_and_conj_roots),\n",
    "                )\n",
    "\n",
    "                # generate the question and clean it\n",
    "                if any(link.link_type not in {\"NSUBJ\", \"NSUBJPASS\"} for link in sentence_root.left_children):\n",
    "                    qtext = \" \" + \"who, \" + \" \" + post_who_string[0].lower() + post_who_string[1:]\n",
    "                else:\n",
    "                    qtext = \" \" + \"who\" + \" \" + post_who_string\n",
    "\n",
    "                if qtext[-1] in {\".\", \",\", \";\"}:\n",
    "                    qtext = qtext[:-1].strip()\n",
    "                while qtext[-1:] in {\".\", \",\", \";\"}:\n",
    "                    qtext = qtext[:-1].strip()  # assuming space-separated tokens\n",
    "                for particle in [\" and\", \" so\", \" but\"]:\n",
    "                    if qtext.endswith(particle):\n",
    "                        qtext = qtext[:-len(particle)].strip()\n",
    "                while qtext[-1:] in {\".\", \",\", \";\"}:\n",
    "                    qtext = qtext[:-1].strip()  # assuming space-separated tokens\n",
    "                \n",
    "                episode_context = 'In the episode \"%s\",' % urllib.unquote(analysis.references[ndx].split(\"/\")[-1])#.replace(\"%20\", \" \")\n",
    "                qtext = episode_context + \" \" + qtext + \"?\"\n",
    "                quiz.question = qtext\n",
    "\n",
    "                # generate the correct answer\n",
    "                correct_answer_link = the_entities[0]._link\n",
    "                correct_answer = correct_answer_link.split(\"/\")[-1].replace(\"_\", \" \")\n",
    "                quiz.correct_answers.append(correct_answer)\n",
    "\n",
    "                # start checks over wikidata identity of the correct answer to generate wrong answers\n",
    "                qid = neighbor_for_type_for_node(kgraph, \"qid\", correct_answer_link)\n",
    "                if qid and (\"gender_female\" in kgraph[qid] or \"gender_male\" in kgraph[qid]):\n",
    "                    if \"gender_female\" in kgraph[qid]:\n",
    "                        gender = \"gender_female\"\n",
    "                    else:\n",
    "                        gender = \"gender_male\"\n",
    "                    \n",
    "                    # generate wrong answers (distractors)\n",
    "                    predicate = CHARACTER_SERIES_PREDICATE\n",
    "\n",
    "                    type_qid = neighbor_for_type_for_node(kgraph, predicate, qid)\n",
    "\n",
    "                    other_qid_same_type = neighbors_for_type_for_node(\n",
    "                        kgraph, \"-\" + predicate, type_qid,\n",
    "                    )\n",
    "                    shuffle(other_qid_same_type)\n",
    "                    other_qid_same_type = filter(lambda x : gender in kgraph[x], other_qid_same_type)\n",
    "                    \n",
    "                    random_sample = []\n",
    "                    for other_qid in other_qid_same_type:\n",
    "                        wrong_name = neighbor_for_type_for_node(kgraph, \"name\", other_qid).split(\"/\")[-1].replace(\"_\", \" \")\n",
    "                        if \"list of\" in wrong_name.lower():\n",
    "                            continue\n",
    "                        if \"the simpsons\" in wrong_name.lower():\n",
    "                            continue\n",
    "                        if \"database\" in wrong_name.lower():\n",
    "                            continue\n",
    "                        random_sample.append(wrong_name)\n",
    "                    for wrong in random_sample[:3]:\n",
    "                        if wrong == correct_answer:\n",
    "                            quiz.wrong_answers.append(random_sample[-1])\n",
    "                        else:\n",
    "                            quiz.wrong_answers.append(wrong)\n",
    "\n",
    "                    if len(quiz.wrong_answers) < 3:\n",
    "                        continue\n",
    "\n",
    "                    questioner_results.append(quiz)\n",
    "\n",
    "        return questioner_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "S: Marge throws a dinner party to escape from the doldrums at the Simpson house , so she decides to invite the Flanders , the Lovejoys , the Hibberts and the Van Houtens .\n",
      "Q: In the episode \"A Milhouse Divided\", who throws a dinner party to escape from the doldrums at the Simpson house?\n",
      "A: Marge Simpson\n",
      "W: Maggie Simpson, Lisa Simpson, Edna Krabappel\n",
      "\n",
      "S: Homer tries to perform selfless gestures for Marge , such as making soothing ocean noises to lull her to sleep and cutting her hair , but they only serve to annoy her .\n",
      "Q: In the episode \"A Milhouse Divided\", who tries to perform selfless gestures for Marge , such as making soothing ocean noises to lull her to sleep and cutting her hair?\n",
      "A: Homer Simpson\n",
      "W: Barney Gumble, Grampa Simpson, Lionel Hutz\n",
      "\n",
      "S: Deciding at that point that Marge deserves a fresh start , Homer secretly files for a divorce .\n",
      "Q: In the episode \"A Milhouse Divided\", who,  deciding at that point that Marge deserves a fresh start , secretly files for a divorce?\n",
      "A: Homer Simpson\n",
      "W: Mayor Quimby, Groundskeeper Willie, Barney Gumble\n",
      "\n",
      "S: Lisa starts to video her family doing their everyday activities .\n",
      "Q: In the episode \"Any Given Sundance\", who starts to video her family doing their everyday activities?\n",
      "A: Lisa Simpson\n",
      "W: Marge Simpson, Edna Krabappel, Maggie Simpson\n",
      "\n",
      "S: Lisa shows her family in all of its dysfunction .\n",
      "Q: In the episode \"Any Given Sundance\", who shows her family in all of its dysfunction?\n",
      "A: Lisa Simpson\n",
      "W: Maggie Simpson, Edna Krabappel, Marge Simpson\n",
      "\n",
      "S: Marge cleans up after them and Lisa wishes her a \" Happy Birthday , \" embarrassing the entire family .\n",
      "Q: In the episode \"Any Given Sundance\", who cleans up after them?\n",
      "A: Marge Simpson\n",
      "W: Edna Krabappel, Maggie Simpson, Lisa Simpson\n",
      "\n",
      "S: Lisa feels sorry for what she did to the family , and while deep in thought , Jim Jarmusch approaches her and says he can relate because his movies are also about \" social misfits experiencing the dark side of the American dream \" .\n",
      "Q: In the episode \"Any Given Sundance\", who feels sorry for what she did to the family?\n",
      "A: Lisa Simpson\n",
      "W: Marge Simpson, Maggie Simpson, Edna Krabappel\n",
      "\n",
      "S: Lisa however , feels that she may have , deep inside , humiliated her family on purpose .\n",
      "Q: In the episode \"Any Given Sundance\", who,  feels that she may have , deep inside , humiliated her family on purpose?\n",
      "A: Lisa Simpson\n",
      "W: Edna Krabappel, Marge Simpson, Maggie Simpson\n",
      "\n",
      "S: When his film ends , Lisa learns that although her family may embarrass her , humiliate her , or infuriate her , there are other families with tougher problems .\n",
      "Q: In the episode \"Any Given Sundance\", who,  when his film ends , learns that although her family may embarrass her , humiliate her , or infuriate her , there are other families with tougher problems?\n",
      "A: Lisa Simpson\n",
      "W: Edna Krabappel, Marge Simpson, Maggie Simpson\n",
      "\n",
      "S: Marge delivers a heart - felt plea to the residents to let the Simpsons live in the one place they call home , but they refuse , with Quimby declaring her the \" worst Simpson \" for always trying to see the family in a positive light .\n",
      "Q: In the episode \"At Long Last Leave\", who delivers a heart - felt plea to the residents to let the Simpsons live in the one place they call home?\n",
      "A: Marge Simpson\n",
      "W: Maggie Simpson, Lisa Simpson, Edna Krabappel\n"
     ]
    }
   ],
   "source": [
    "questioner = PersonSubjectQuestioner()\n",
    "quizzes = questioner.process(merged_analyses, kgraph)\n",
    "for quiz in quizzes[:10]:\n",
    "    print\n",
    "    print quiz"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Lambda code for Alexa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Simple Alexa Skill to demonstrate quizzing.\n",
    "\"\"\"\n",
    "\n",
    "from __future__ import print_function\n",
    "import random\n",
    "\n",
    "\n",
    "quizzes = [\n",
    "    {\n",
    "        \"question\": \"In which city do the Simpsons live?\",\n",
    "        \"correct_answer\": \"Springfield\",\n",
    "        \"wrong_answers\": [\"Chicago\", \"New York\", \"Florence\"]\n",
    "    },\n",
    "    {\n",
    "        \"question\": \"In the episode \\\"Bart the Fink\\\", who kills off his pseudonym in a boating accident in order to collect the life insurance, thus ending his tax woes?\",\n",
    "        \"correct_answer\": \"Krusty the Clown\",\n",
    "        \"wrong_answers\": [\"Reverend Lovejoy\", \"Kang and Kodos\", \"Lionel Hutz\"]\n",
    "    },\n",
    "    {\n",
    "        \"question\": \"In the episode \\\"Brush with Greatness\\\", who, once home, announces to his family that he will go on a diet and exercise more?\",\n",
    "        \"correct_answer\": \"Homer Simpson\",\n",
    "        \"wrong_answers\": [\"Chief Wiggum\", \"Martin Prince\", \"Principal Skinner\"]\n",
    "    }\n",
    "]\n",
    "\n",
    "opening_sentences = [\n",
    "    \"Sure, I have one for you!\",\n",
    "    \"Here you are one!\",\n",
    "    \"I've just found a new one! Here it is ...\"\n",
    "]\n",
    "\n",
    "def build_random_question_sentence():\n",
    "    quiz = random.choice(quizzes)\n",
    "    total_answers = [quiz[\"correct_answer\"]] + quiz[\"wrong_answers\"]\n",
    "    random.shuffle(total_answers)\n",
    "    \n",
    "    answers_sentence = \"\"\n",
    "    correct_number = None\n",
    "    correct_answer = quiz[\"correct_answer\"]\n",
    "    for index, ans in enumerate(total_answers):\n",
    "        answers_sentence += \"%s %s %s \" % (str(index + 1), ans, \"...\")\n",
    "        if ans == correct_answer:\n",
    "            correct_number = str(index + 1)\n",
    "    \n",
    "    return (\n",
    "        \"%s %s %s\" % (\n",
    "            random.choice(opening_sentences),\n",
    "            quiz[\"question\"] + \" ... \",\n",
    "            answers_sentence,\n",
    "        ),\n",
    "        correct_number,\n",
    "        correct_answer,\n",
    "    )\n",
    "        \n",
    "\n",
    "\n",
    "# speech response builder\n",
    "def build_speechlet_response(title, output, reprompt_text, should_end_session):\n",
    "    return {\n",
    "        'outputSpeech': {\n",
    "            'type': 'PlainText',\n",
    "            'text': output\n",
    "        },\n",
    "        'card': {\n",
    "            'type': 'Simple',\n",
    "            'title': \"SessionSpeechlet - \" + title,\n",
    "            'content': \"SessionSpeechlet - \" + output\n",
    "        },\n",
    "        'reprompt': {\n",
    "            'outputSpeech': {\n",
    "                'type': 'PlainText',\n",
    "                'text': reprompt_text\n",
    "            }\n",
    "        },\n",
    "        'shouldEndSession': should_end_session\n",
    "    }\n",
    "\n",
    "\n",
    "def build_response(session_attributes, speechlet_response):\n",
    "    return {\n",
    "        'version': '1.0',\n",
    "        'sessionAttributes': session_attributes,\n",
    "        'response': speechlet_response\n",
    "    }\n",
    "\n",
    "\n",
    "# --------------- Functions that control the skill's behavior ------------------\n",
    "\n",
    "def get_question():\n",
    "    \"\"\" Immediately ask a question, populating session attributes so that a\n",
    "    specific correct answer is expected.\n",
    "    \"\"\"\n",
    "\n",
    "    card_title = \"Question\"\n",
    "    speech_output, correct_number, correct_answer = build_random_question_sentence()\n",
    "    # If the user either does not reply to the welcome message or says something\n",
    "    # that is not understood, they will be prompted again with this text.\n",
    "    reprompt_text = \"You can answer the question by saying 'number two'.\"\n",
    "    should_end_session = False\n",
    "    session_attributes = {\"correct_number\": correct_number, \"correct_answer\": correct_answer}\n",
    "    return build_response(\n",
    "        session_attributes, \n",
    "        build_speechlet_response(\n",
    "            card_title, \n",
    "            speech_output, \n",
    "            reprompt_text, \n",
    "            should_end_session\n",
    "        ),\n",
    "    )\n",
    "\n",
    "\n",
    "def handle_session_end_request():\n",
    "    card_title = \"Session Ended\"\n",
    "    speech_output = \"Thank you for playing!\"\n",
    "    should_end_session = True\n",
    "    return build_response({}, build_speechlet_response(\n",
    "        card_title, \n",
    "        speech_output, \n",
    "        None, \n",
    "        should_end_session\n",
    "    ))\n",
    "\n",
    "\n",
    "# --------------- Events ------------------\n",
    "\n",
    "def on_session_started(session_started_request, session):\n",
    "    \"\"\" Called when the session starts \"\"\"\n",
    "\n",
    "    print(\"on_session_started requestId=\" + session_started_request['requestId']\n",
    "          + \", sessionId=\" + session['sessionId'])\n",
    "\n",
    "\n",
    "def on_launch(launch_request, session):\n",
    "\n",
    "    print(\"on_launch requestId=\" + launch_request['requestId'] +\n",
    "          \", sessionId=\" + session['sessionId'])\n",
    "    return get_question()\n",
    "\n",
    "\n",
    "def check_answer_and_conclude(intent, session):\n",
    "    session_attributes = {}\n",
    "    card_title = \"Answer\"\n",
    "    if session.get('attributes', {}) and \"correct_number\" in session.get('attributes', {}) and \"correct_answer\" in session.get('attributes', {}) and 'AnswerNumber' in intent['slots']:\n",
    "        answer_number = intent['slots']['AnswerNumber']['value']\n",
    "        correct_number = session['attributes']['correct_number']\n",
    "        correct_answer = session['attributes']['correct_answer']\n",
    "        if answer_number == correct_number:\n",
    "            speech_output = \"You're right! Great job!\"\n",
    "        else:\n",
    "            speech_output = \"I'm sorry. The correct answer was \" + correct_answer +\".\" \n",
    "        return build_response(session_attributes, build_speechlet_response(\n",
    "            card_title, speech_output, None, True))\n",
    "    else:\n",
    "        speech_output = \"I'm not sure what your answer was. \"\n",
    "        should_end_session = False\n",
    "        return build_response(session_attributes, build_speechlet_response(\n",
    "            card_title, speech_output, None, True))\n",
    "\n",
    "\n",
    "def on_intent(intent_request, session):\n",
    "    \"\"\" called when an intent has been detected \"\"\"\n",
    "\n",
    "    print(\"on_intent requestId=\" + intent_request['requestId'] +\n",
    "          \", sessionId=\" + session['sessionId'])\n",
    "\n",
    "    intent = intent_request['intent']\n",
    "    intent_name = intent_request['intent']['name']\n",
    "\n",
    "    # dispatch based on the intent\n",
    "    if intent_name == \"Answer\":\n",
    "        return check_answer_and_conclude(intent, session)\n",
    "    elif intent_name == \"AMAZON.HelpIntent\":\n",
    "        return get_question()\n",
    "    elif intent_name == \"AMAZON.CancelIntent\" or intent_name == \"AMAZON.StopIntent\":\n",
    "        return handle_session_end_request()\n",
    "    else:\n",
    "        raise ValueError(\"Invalid intent\")\n",
    "\n",
    "\n",
    "def on_session_ended(session_ended_request, session):\n",
    "    \"\"\" called when the user ends the session.\n",
    "    not called when the skill returns should_end_session=true\n",
    "    \"\"\"\n",
    "    print(\"on_session_ended requestId=\" + session_ended_request['requestId'] +\n",
    "          \", sessionId=\" + session['sessionId'])\n",
    "\n",
    "\n",
    "# --------------- Main handler ------------------\n",
    "\n",
    "def lambda_handler(event, context):\n",
    "    \"\"\" Route the incoming request based on type (LaunchRequest, IntentRequest,\n",
    "    etc.) The JSON body of the request is provided in the event parameter.\n",
    "    \"\"\"\n",
    "    print(\"event.session.application.applicationId=\" +\n",
    "          event['session']['application']['applicationId'])\n",
    "\n",
    "    \"\"\"\n",
    "    Uncomment this if statement and populate with your skill's application ID to\n",
    "    prevent someone else from configuring a skill that sends requests to this\n",
    "    function.\n",
    "    \"\"\"\n",
    "    # if (event['session']['application']['applicationId'] !=\n",
    "    #         \"amzn1.echo-sdk-ams.app.[unique-value-here]\"):\n",
    "    #     raise ValueError(\"Invalid Application ID\")\n",
    "\n",
    "    if event['session']['new']:\n",
    "        on_session_started({'requestId': event['request']['requestId']},\n",
    "                           event['session'])\n",
    "\n",
    "    if event['request']['type'] == \"LaunchRequest\":\n",
    "        return on_launch(event['request'], event['session'])\n",
    "    elif event['request']['type'] == \"IntentRequest\":\n",
    "        return on_intent(event['request'], event['session'])\n",
    "    elif event['request']['type'] == \"SessionEndedRequest\":\n",
    "        return on_session_ended(event['request'], event['session'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}