jerielizabeth/iliff_named_entites_per_decade.ipynb

## iliff_named_entites_per_decade.ipynb
{
  "cells": [
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:24.176877",
          "end_time": "2017-01-27T17:25:25.442707"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "import nltk\nimport os\nfrom collections import Counter\nimport operator",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:25.943146",
          "end_time": "2017-01-27T17:25:25.950279"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "def extract_entity_names(t):\n    entity_names = []\n    \n    \"\"\" Cycle through the different tags.\n    First identify those with a lable, then isolate those labled 'PERSON'. This could also be\n    GPE (GeoPolitical Entity) or ORGANIZATION. \n    Combine and return the names, or check if there are nested attributes.\n    \"\"\"\n    \n    if hasattr(t, 'label') and t.label:\n        if t.label() == 'PERSON':\n            entity_names.append(' '.join([child[0] for child in t]))\n        else:\n            for child in t:\n                entity_names.extend(extract_entity_names(child))\n\n    return( entity_names)",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:27.085843",
          "end_time": "2017-01-27T17:25:27.098006"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "def identify_chunks(directory, file):\n    with open(os.path.join(directory, file)) as f:\n        content = f.read()\n    \"\"\"First break the content into sentences, using the sentence tokenizer in NLTK.\"\"\"\n    sentences = nltk.sent_tokenize(content)\n    \n    \"\"\"Then break each sentence into word tokens, using the word tokenizer in NLTK. \"\"\"\n    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]\n    \n    \"\"\"Tag each token with a part of speech tag.\"\"\"\n    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]\n    \n    \"\"\"Finally, use the ne \"chunker\" to identify the named entities\"\"\"\n    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=False)\n    \n    return(chunked_sentences)",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:28.163626",
          "end_time": "2017-01-27T17:25:28.204531"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "def process_chunks(chunks):\n    entity_names = []\n    for tree in chunks:\n        entity_names.extend(extract_entity_names(tree))\n    return(entity_names)",
      "execution_count": 4,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:29.089016",
          "end_time": "2017-01-27T17:25:29.093643"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "def get_entity_frequency(entity_names):\n    counts = Counter(entity_names)\n    return(dict(counts))",
      "execution_count": 5,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:30.236004",
          "end_time": "2017-01-27T17:25:30.240120"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "def process_file(directory, file):\n    \n    chunks = identify_chunks(directory, file)\n    entity_names = process_chunks(chunks)\n    entity_summary = get_entity_frequency(entity_names)\n    \n    return({'doc_id': file, 'entities': entity_summary})",
      "execution_count": 6,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:30.987331",
          "end_time": "2017-01-27T17:25:30.996201"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "def get_decade_data(decade):\n    decade_statistics = []\n    for file in corpus:\n        if file.startswith(decade):\n            file_data = process_file(directory, file)\n            decade_statistics.append(file_data)\n    return(decade_statistics)",
      "execution_count": 7,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:51:23.469632",
          "end_time": "2017-01-27T17:51:23.492059"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "def summary_data(decade_data):\n    '''Generates overview on the named entities for a collection of data. Creates a dictionary (entity_summary) \n    from all the reported entities/frequencies\n    and records the entity (as key) and the total count for that entity (as value).\n    '''\n    all_entities = [ document['entities'] for document in decade_data]\n\n    inp = [dict(x) for x in all_entities]\n    \n    entity_summary = Counter()\n    for y in inp:\n        entity_summary += Counter(y)\n    entity_dict = dict(entity_summary)\n    \n    return(sorted(entity_summary.items(), key=operator.itemgetter(1)))",
      "execution_count": 15,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:32.517791",
          "end_time": "2017-01-27T17:25:32.520754"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "directory = \"/Users/jeriwieringa/Documents/nlp-group/iliff_review/data/ir_txt/\"",
      "execution_count": 9,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:25:33.373209",
          "end_time": "2017-01-27T17:25:33.387250"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "corpus = [f for f in os.listdir(directory) if not f.startswith('.') and os.path.isfile(os.path.join(directory, f))]",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:51:28.694901",
          "end_time": "2017-01-27T17:51:57.365970"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "fifties_decade = get_decade_data(\"5\")\nfifties_summary = summary_data(fifties_decade)",
      "execution_count": 16,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T18:04:09.970702",
          "end_time": "2017-01-27T18:04:09.985387"
        },
        "trusted": true,
        "collapsed": false,
        "scrolled": true
      },
      "cell_type": "code",
      "source": "fifties_summary[-50:]",
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "[('Bowne', 3),\n ('Plato', 3),\n ('Knudson', 3),\n ('Existentialism', 3),\n ('Simon', 3),\n ('Value', 3),\n ('Empty', 3),\n ('Sir', 3),\n ('Holt', 3),\n ('Einstein', 3),\n ('Williams', 3),\n ('Donahue', 3),\n ('Karl', 3),\n ('Mr.', 4),\n ('Cimarron', 4),\n ('Christ', 4),\n ('Cardinas', 4),\n ('Hume', 4),\n ('Thomas Harwood', 4),\n ('Cruz Vega', 4),\n ('Emerson', 4),\n ('Schleiermacher', 4),\n ('Bernhardt', 4),\n ('Augustine', 5),\n ('Whitehead', 5),\n ('Mills', 5),\n ('Parkinson', 5),\n ('Garden', 5),\n ('Macmillan', 5),\n ('Glencoe', 5),\n ('Christian Theology', 5),\n ('Adam', 6),\n ('Durkheim', 6),\n ('Shapley', 6),\n ('Aristotle', 6),\n ('Galileo', 6),\n ('William James', 6),\n ('James', 6),\n ('John', 6),\n ('Kant', 7),\n ('Vol', 10),\n ('Paul', 11),\n ('Harper', 12),\n ('Jesus', 14),\n ('Man', 15),\n ('Etzler', 16),\n ('Freud', 18),\n ('Popper', 21),\n ('Thoreau', 21),\n ('God', 78)]"
          },
          "metadata": {},
          "execution_count": 25
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:54:09.526562",
          "end_time": "2017-01-27T17:58:37.622907"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "sixties = get_decade_data(\"6\")\nsixties_summary = summary_data(sixties)",
      "execution_count": 18,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T18:04:18.420398",
          "end_time": "2017-01-27T18:04:18.431740"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "sixties_summary[-50:]",
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "[('History', 24),\n ('Jews', 25),\n ('Robinson', 25),\n ('Macmillan', 25),\n ('Douglas', 28),\n ('Sweden', 28),\n ('Jeremiah', 28),\n ('Nabopolassar', 29),\n ('Church', 29),\n ('Ogden', 29),\n ('Adam Pastor', 30),\n ('Daniel', 30),\n ('Teilhard', 31),\n ('Pastor', 31),\n ('Job', 33),\n ('Him', 34),\n ('Mark', 35),\n ('Calvin', 36),\n ('Matthew', 36),\n ('Jason Lee', 36),\n ('Aristotle', 36),\n ('Pietism', 38),\n ('John Wesley', 39),\n ('Williams', 41),\n ('Harwood', 41),\n ('Wieman', 42),\n ('Thomas', 44),\n ('Bultmann', 45),\n ('Emerson', 46),\n ('Martin Rist', 47),\n ('Man', 47),\n ('Babylon', 50),\n ('Harper', 52),\n ('Iliff Review', 52),\n ('Solomon', 54),\n ('David', 56),\n ('Wesley', 57),\n ('Lee', 63),\n ('Niebuhr', 66),\n ('Vol', 78),\n ('Tillich', 95),\n ('Scott', 100),\n ('Luther', 102),\n ('Jesus Christ', 102),\n ('Christ', 105),\n ('Lincoln', 133),\n ('John', 163),\n ('Paul', 241),\n ('Jesus', 299),\n ('God', 631)]"
          },
          "metadata": {},
          "execution_count": 26
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T17:58:37.674962",
          "end_time": "2017-01-27T18:03:37.084161"
        },
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "seventies = get_decade_data(\"7\")\nseventies_summary = summary_data(seventies)",
      "execution_count": 20,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T18:04:43.270229",
          "end_time": "2017-01-27T18:04:43.280674"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "seventies_summary[-50:]",
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "[('Barrett', 28),\n ('Francis', 29),\n ('Faith', 31),\n ('Salzburg', 32),\n ('Whitehead', 32),\n ('Wittgenstein', 36),\n ('Knox', 36),\n ('Mencius', 36),\n ('Martin', 37),\n ('Kierkegaard', 39),\n ('Dostoevsky', 42),\n ('Rahner', 42),\n ('Job', 46),\n ('Macmillan', 46),\n ('Sartre', 49),\n ('St. Francis', 49),\n ('Wesley', 50),\n ('Husserl', 50),\n ('Pistorius', 51),\n ('Camus', 53),\n ('Hegel', 53),\n ('Smith', 58),\n ('Gordon', 59),\n ('Williams', 60),\n ('Jesus Christ', 60),\n ('Church', 61),\n ('Jung', 62),\n ('Campbell', 64),\n ('Van', 64),\n ('Bonhoeffer', 64),\n ('Niebuhr', 66),\n ('Dasein', 66),\n ('Man', 66),\n ('Bultmann', 68),\n ('Row', 69),\n ('Christ', 76),\n ('Luther', 76),\n ('Bernhardt', 82),\n ('John', 90),\n ('Harper', 93),\n ('Heidegger', 100),\n ('Iliff', 114),\n ('Wieman', 119),\n ('Paul', 127),\n ('Vol', 134),\n ('Lowell', 179),\n ('Tillich', 189),\n ('Ricoeur', 200),\n ('Jesus', 317),\n ('God', 737)]"
          },
          "metadata": {},
          "execution_count": 27
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T18:04:58.904488",
          "end_time": "2017-01-27T18:08:40.712988"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "eighties = get_decade_data(\"8\")\neighties_summary = summary_data(eighties)",
      "execution_count": 28,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2017-01-27T18:08:40.715068",
          "end_time": "2017-01-27T18:08:40.723500"
        },
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "eighties_summary[-50:]",
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "[('Himself', 23),\n ('Petersen', 23),\n ('Mapple', 24),\n ('Charles', 24),\n ('Williams', 25),\n ('Bernhardt', 25),\n ('King', 25),\n ('Macmillan', 26),\n ('Martin', 26),\n ('Wilbanks', 27),\n ('Sontag', 28),\n ('Milligan', 29),\n ('Order', 29),\n ('Bob', 29),\n ('Faith', 29),\n ('Holler', 30),\n ('Tracy', 32),\n ('Jews', 32),\n ('Campbell', 32),\n ('Strong', 33),\n ('Jesus Christ', 34),\n ('Whitehead', 35),\n ('Luther', 37),\n ('Smith', 38),\n ('Rist', 39),\n ('Tillich', 40),\n ('James', 42),\n ('Haggai', 44),\n ('Herbert', 44),\n ('Melville', 45),\n ('Schweitzer', 45),\n ('Gandhi', 50),\n ('Vol', 51),\n ('Thomas', 57),\n ('Fowler', 60),\n ('Harper', 62),\n ('Row', 63),\n ('Freud', 71),\n ('Christ', 71),\n ('Pannenberg', 74),\n ('Black Elk', 78),\n ('Wieman', 84),\n ('John', 99),\n ('Potthoff', 103),\n ('Oecolampadius', 107),\n ('Matthew', 126),\n ('Paul', 152),\n ('David', 185),\n ('Jesus', 230),\n ('God', 722)]"
          },
          "metadata": {},
          "execution_count": 29
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      },
      "nbconvert_exporter": "python",
      "version": "3.5.2",
      "mimetype": "text/x-python",
      "file_extension": ".py",
      "pygments_lexer": "ipython3",
      "name": "python"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "iliff named entities per decade",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:24.176877",
	"end_time": "2017-01-27T17:25:25.442707"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "import nltk\nimport os\nfrom collections import Counter\nimport operator",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:25.943146",
	"end_time": "2017-01-27T17:25:25.950279"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "def extract_entity_names(t):\n entity_names = []\n \n \"\"\" Cycle through the different tags.\n First identify those with a lable, then isolate those labled 'PERSON'. This could also be\n GPE (GeoPolitical Entity) or ORGANIZATION. \n Combine and return the names, or check if there are nested attributes.\n \"\"\"\n \n if hasattr(t, 'label') and t.label:\n if t.label() == 'PERSON':\n entity_names.append(' '.join([child[0] for child in t]))\n else:\n for child in t:\n entity_names.extend(extract_entity_names(child))\n\n return( entity_names)",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:27.085843",
	"end_time": "2017-01-27T17:25:27.098006"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "def identify_chunks(directory, file):\n with open(os.path.join(directory, file)) as f:\n content = f.read()\n \"\"\"First break the content into sentences, using the sentence tokenizer in NLTK.\"\"\"\n sentences = nltk.sent_tokenize(content)\n \n \"\"\"Then break each sentence into word tokens, using the word tokenizer in NLTK. \"\"\"\n tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]\n \n \"\"\"Tag each token with a part of speech tag.\"\"\"\n tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]\n \n \"\"\"Finally, use the ne \"chunker\" to identify the named entities\"\"\"\n chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=False)\n \n return(chunked_sentences)",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:28.163626",
	"end_time": "2017-01-27T17:25:28.204531"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "def process_chunks(chunks):\n entity_names = []\n for tree in chunks:\n entity_names.extend(extract_entity_names(tree))\n return(entity_names)",
	"execution_count": 4,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:29.089016",
	"end_time": "2017-01-27T17:25:29.093643"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "def get_entity_frequency(entity_names):\n counts = Counter(entity_names)\n return(dict(counts))",
	"execution_count": 5,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:30.236004",
	"end_time": "2017-01-27T17:25:30.240120"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "def process_file(directory, file):\n \n chunks = identify_chunks(directory, file)\n entity_names = process_chunks(chunks)\n entity_summary = get_entity_frequency(entity_names)\n \n return({'doc_id': file, 'entities': entity_summary})",
	"execution_count": 6,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:30.987331",
	"end_time": "2017-01-27T17:25:30.996201"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "def get_decade_data(decade):\n decade_statistics = []\n for file in corpus:\n if file.startswith(decade):\n file_data = process_file(directory, file)\n decade_statistics.append(file_data)\n return(decade_statistics)",
	"execution_count": 7,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:51:23.469632",
	"end_time": "2017-01-27T17:51:23.492059"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "def summary_data(decade_data):\n '''Generates overview on the named entities for a collection of data. Creates a dictionary (entity_summary) \n from all the reported entities/frequencies\n and records the entity (as key) and the total count for that entity (as value).\n '''\n all_entities = [ document['entities'] for document in decade_data]\n\n inp = [dict(x) for x in all_entities]\n \n entity_summary = Counter()\n for y in inp:\n entity_summary += Counter(y)\n entity_dict = dict(entity_summary)\n \n return(sorted(entity_summary.items(), key=operator.itemgetter(1)))",
	"execution_count": 15,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:32.517791",
	"end_time": "2017-01-27T17:25:32.520754"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "directory = \"/Users/jeriwieringa/Documents/nlp-group/iliff_review/data/ir_txt/\"",
	"execution_count": 9,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:25:33.373209",
	"end_time": "2017-01-27T17:25:33.387250"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "corpus = [f for f in os.listdir(directory) if not f.startswith('.') and os.path.isfile(os.path.join(directory, f))]",
	"execution_count": 10,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:51:28.694901",
	"end_time": "2017-01-27T17:51:57.365970"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "fifties_decade = get_decade_data(\"5\")\nfifties_summary = summary_data(fifties_decade)",
	"execution_count": 16,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T18:04:09.970702",
	"end_time": "2017-01-27T18:04:09.985387"
	},
	"trusted": true,
	"collapsed": false,
	"scrolled": true
	},
	"cell_type": "code",
	"source": "fifties_summary[-50:]",
	"execution_count": 25,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "[('Bowne', 3),\n ('Plato', 3),\n ('Knudson', 3),\n ('Existentialism', 3),\n ('Simon', 3),\n ('Value', 3),\n ('Empty', 3),\n ('Sir', 3),\n ('Holt', 3),\n ('Einstein', 3),\n ('Williams', 3),\n ('Donahue', 3),\n ('Karl', 3),\n ('Mr.', 4),\n ('Cimarron', 4),\n ('Christ', 4),\n ('Cardinas', 4),\n ('Hume', 4),\n ('Thomas Harwood', 4),\n ('Cruz Vega', 4),\n ('Emerson', 4),\n ('Schleiermacher', 4),\n ('Bernhardt', 4),\n ('Augustine', 5),\n ('Whitehead', 5),\n ('Mills', 5),\n ('Parkinson', 5),\n ('Garden', 5),\n ('Macmillan', 5),\n ('Glencoe', 5),\n ('Christian Theology', 5),\n ('Adam', 6),\n ('Durkheim', 6),\n ('Shapley', 6),\n ('Aristotle', 6),\n ('Galileo', 6),\n ('William James', 6),\n ('James', 6),\n ('John', 6),\n ('Kant', 7),\n ('Vol', 10),\n ('Paul', 11),\n ('Harper', 12),\n ('Jesus', 14),\n ('Man', 15),\n ('Etzler', 16),\n ('Freud', 18),\n ('Popper', 21),\n ('Thoreau', 21),\n ('God', 78)]"
	},
	"metadata": {},
	"execution_count": 25
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:54:09.526562",
	"end_time": "2017-01-27T17:58:37.622907"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "sixties = get_decade_data(\"6\")\nsixties_summary = summary_data(sixties)",
	"execution_count": 18,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T18:04:18.420398",
	"end_time": "2017-01-27T18:04:18.431740"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "sixties_summary[-50:]",
	"execution_count": 26,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "[('History', 24),\n ('Jews', 25),\n ('Robinson', 25),\n ('Macmillan', 25),\n ('Douglas', 28),\n ('Sweden', 28),\n ('Jeremiah', 28),\n ('Nabopolassar', 29),\n ('Church', 29),\n ('Ogden', 29),\n ('Adam Pastor', 30),\n ('Daniel', 30),\n ('Teilhard', 31),\n ('Pastor', 31),\n ('Job', 33),\n ('Him', 34),\n ('Mark', 35),\n ('Calvin', 36),\n ('Matthew', 36),\n ('Jason Lee', 36),\n ('Aristotle', 36),\n ('Pietism', 38),\n ('John Wesley', 39),\n ('Williams', 41),\n ('Harwood', 41),\n ('Wieman', 42),\n ('Thomas', 44),\n ('Bultmann', 45),\n ('Emerson', 46),\n ('Martin Rist', 47),\n ('Man', 47),\n ('Babylon', 50),\n ('Harper', 52),\n ('Iliff Review', 52),\n ('Solomon', 54),\n ('David', 56),\n ('Wesley', 57),\n ('Lee', 63),\n ('Niebuhr', 66),\n ('Vol', 78),\n ('Tillich', 95),\n ('Scott', 100),\n ('Luther', 102),\n ('Jesus Christ', 102),\n ('Christ', 105),\n ('Lincoln', 133),\n ('John', 163),\n ('Paul', 241),\n ('Jesus', 299),\n ('God', 631)]"
	},
	"metadata": {},
	"execution_count": 26
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T17:58:37.674962",
	"end_time": "2017-01-27T18:03:37.084161"
	},
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "seventies = get_decade_data(\"7\")\nseventies_summary = summary_data(seventies)",
	"execution_count": 20,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T18:04:43.270229",
	"end_time": "2017-01-27T18:04:43.280674"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "seventies_summary[-50:]",
	"execution_count": 27,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "[('Barrett', 28),\n ('Francis', 29),\n ('Faith', 31),\n ('Salzburg', 32),\n ('Whitehead', 32),\n ('Wittgenstein', 36),\n ('Knox', 36),\n ('Mencius', 36),\n ('Martin', 37),\n ('Kierkegaard', 39),\n ('Dostoevsky', 42),\n ('Rahner', 42),\n ('Job', 46),\n ('Macmillan', 46),\n ('Sartre', 49),\n ('St. Francis', 49),\n ('Wesley', 50),\n ('Husserl', 50),\n ('Pistorius', 51),\n ('Camus', 53),\n ('Hegel', 53),\n ('Smith', 58),\n ('Gordon', 59),\n ('Williams', 60),\n ('Jesus Christ', 60),\n ('Church', 61),\n ('Jung', 62),\n ('Campbell', 64),\n ('Van', 64),\n ('Bonhoeffer', 64),\n ('Niebuhr', 66),\n ('Dasein', 66),\n ('Man', 66),\n ('Bultmann', 68),\n ('Row', 69),\n ('Christ', 76),\n ('Luther', 76),\n ('Bernhardt', 82),\n ('John', 90),\n ('Harper', 93),\n ('Heidegger', 100),\n ('Iliff', 114),\n ('Wieman', 119),\n ('Paul', 127),\n ('Vol', 134),\n ('Lowell', 179),\n ('Tillich', 189),\n ('Ricoeur', 200),\n ('Jesus', 317),\n ('God', 737)]"
	},
	"metadata": {},
	"execution_count": 27
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T18:04:58.904488",
	"end_time": "2017-01-27T18:08:40.712988"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "eighties = get_decade_data(\"8\")\neighties_summary = summary_data(eighties)",
	"execution_count": 28,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2017-01-27T18:08:40.715068",
	"end_time": "2017-01-27T18:08:40.723500"
	},
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "eighties_summary[-50:]",
	"execution_count": 29,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "[('Himself', 23),\n ('Petersen', 23),\n ('Mapple', 24),\n ('Charles', 24),\n ('Williams', 25),\n ('Bernhardt', 25),\n ('King', 25),\n ('Macmillan', 26),\n ('Martin', 26),\n ('Wilbanks', 27),\n ('Sontag', 28),\n ('Milligan', 29),\n ('Order', 29),\n ('Bob', 29),\n ('Faith', 29),\n ('Holler', 30),\n ('Tracy', 32),\n ('Jews', 32),\n ('Campbell', 32),\n ('Strong', 33),\n ('Jesus Christ', 34),\n ('Whitehead', 35),\n ('Luther', 37),\n ('Smith', 38),\n ('Rist', 39),\n ('Tillich', 40),\n ('James', 42),\n ('Haggai', 44),\n ('Herbert', 44),\n ('Melville', 45),\n ('Schweitzer', 45),\n ('Gandhi', 50),\n ('Vol', 51),\n ('Thomas', 57),\n ('Fowler', 60),\n ('Harper', 62),\n ('Row', 63),\n ('Freud', 71),\n ('Christ', 71),\n ('Pannenberg', 74),\n ('Black Elk', 78),\n ('Wieman', 84),\n ('John', 99),\n ('Potthoff', 103),\n ('Oecolampadius', 107),\n ('Matthew', 126),\n ('Paul', 152),\n ('David', 185),\n ('Jesus', 230),\n ('God', 722)]"
	},
	"metadata": {},
	"execution_count": 29
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"codemirror_mode": {
	"version": 3,
	"name": "ipython"
	},
	"nbconvert_exporter": "python",
	"version": "3.5.2",
	"mimetype": "text/x-python",
	"file_extension": ".py",
	"pygments_lexer": "ipython3",
	"name": "python"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "iliff named entities per decade",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}