cmgerber/Criteria_Extraction.ipynb

## Criteria_Extraction.ipynb
{
 "worksheets": [
  {
   "cells": [
    {
     "metadata": {},
     "cell_type": "code",
     "input": "import nltk\nimport cPickle as pickle\nfrom collections import Counter\nfrom random import shuffle",
     "prompt_number": 2,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Load Data",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "#test data\ntest_data = pickle.load(open('data/test_tagged_data.pkl', 'rb'))",
     "prompt_number": 2,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "data = pickle.load(open('data/criteria_text_chunk_3.pkl', 'rb'))",
     "prompt_number": 231,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Initial Concept Term Lists",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "smoker_list = ['Non-smoker', 'smoker', 'Current smoker', 'smoking']\npregnancy_list = ['Pregnancy']\nbirth_control_list = ['Birth control', 'contraception']\ndrug_list = ['Illicit drugs', 'Alcohol abuse', 'illegal', 'illicit', 'drug abuse']\nheart_failure_list = ['Congestive Heart Failure', 'heart failure']\nhiv_list = ['HIV', 'aids', 'human immunodeficiency virus']\nallergy_list = ['Allergies', 'allergy', 'hypersensitivity']",
     "prompt_number": 219,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Inital Predictive Terms",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "smoker_pred_list = ['current']\npregnancy_pred_list = ['potential', 'negative']\nbirth_control_pred_list = ['effective', 'Fertile patients', 'must use effective',\n                           'must use', 'use effective', 'Fertile patients must use',\n                           'fertile']\ndrug_pred_list = ['use', 'abuse']\nheart_failure_pred_list = []\nhiv_pred_list = []\nallergy_pred_list = ['known', 'history', 'suspected', 'known suspected',\n                     'clinically significant']",
     "prompt_number": 220,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Discount Dictionaries",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "#dictionaries keeping track of predictors said no to\nsmoker_pred_dict = {}\npregnancy_pred_dict = {}\nbirth_control_pred_dict = {}\ndrug_pred_dict = {}\nheart_failure_pred_dict = {}\nhiv_pred_dict = {}\nallergy_pred_dict = {}\n\n#dictionaries to keep track of terms said no to\nsmoker_term_dict = {}\npregnancy_term_dict = {}\nbirth_control_term_dict = {}\ndrug_term_dict = {}\nheart_failure_term_dict = {}\nhiv_term_dict = {}\nallergy_term_dict = {}",
     "prompt_number": 221,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "pred_list = [smoker_pred_list, pregnancy_pred_list, birth_control_pred_list, drug_pred_list,\n                   heart_failure_pred_list, hiv_pred_list, allergy_pred_list]\nterm_list = [smoker_list, pregnancy_list, birth_control_list, drug_list, heart_failure_list,\n             hiv_list, allergy_list]\npred_dicts = [smoker_pred_dict, pregnancy_pred_dict, birth_control_pred_dict, drug_pred_dict,\n              heart_failure_pred_dict, hiv_pred_dict, allergy_pred_dict]\nterm_dicts = [smoker_term_dict, pregnancy_term_dict, birth_control_term_dict, drug_term_dict,\n              heart_failure_term_dict, hiv_term_dict, allergy_term_dict]",
     "prompt_number": 222,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Find new predictors",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "def active_learn_predictors(data, term_list, pred_list, pred_dicts):\n    #look for more predictors for each concept by finding sentnces that have \n    #concept terms in them and looking for predictors in those sentences \n\n    def get_pred(text_dict, term_list, pred_dicts, pred_list):\n        pred_options_dict = Counter()\n        for doc in text_dict.values():\n            for subdoc in doc:\n                for sent in subdoc:\n                    #if the sentance has less than 2 words skip it\n                    if len(sent) <= 1:\n                        continue\n                    #crate a sentence rank for judging weight of terms found\n                    sent_rank = 0\n                    for term in term_list:\n                        if term.lower() in ' '.join(zip(*sent)[0]).lower():\n                            sent_rank += 1\n                    result = chunker(sent)\n                    preds = [' '.join(x) for x in [[x[0] for x in term] for term in result]]\n                    preds.append(' '.join([sent[0][0], sent[1][0]]))\n                    #lower case all preds\n                    preds = [x.lower() for x in preds]\n                    preds = preds * sent_rank\n                    pred_options_dict.update(preds)\n\n        #get top 20 predictors that have not been seen before\n        sorted_preds = sorted(pred_options_dict.items(), key=lambda x: x[1], reverse=True)\n        counter = 0\n        top_preds = []\n        for pred in sorted_preds:\n            if pred[0] not in pred_list and pred[0] not in pred_dicts:\n                top_preds.append(pred)\n                counter += 1\n                if counter == 15 or counter == len(sorted_preds):\n                    return top_preds\n        #if there are no preds return empty list\n        return top_preds\n\n    #get chunks for preds\n    def chunker(sent):\n\n        chunk_reg1 = r\"\"\"\n                          CHUNK: {<NN.*><IN>}\n                     \"\"\"\n        chunk_reg2 = r\"\"\"\n                          CHUNK: {<VB.*><DT>}\n                     \"\"\"\n        chunk_reg3 = r\"\"\"\n                          CHUNK: {<NN.*><VB.*>}\n                     \"\"\"\n        results = []\n\n        for chunk_reg in [chunk_reg1, chunk_reg2, chunk_reg3]:\n            cp = nltk.RegexpParser(chunk_reg)\n\n            tree = cp.parse(sent)\n            for subtree in tree.subtrees():\n                if subtree.label() == 'CHUNK':\n                    results.append(subtree[:])\n        return results\n\n    def human_checker(term, pred_list, top_preds, pred_dict):\n        '''This function loops through the possible predictors and\n        lets human input decide if they actually are or not'''\n        print 'Are the following predictors of these %r?' % (term)\n        if len(top_preds) > 1:\n            for pred in top_preds:\n                print 'Predictor: \\x1b[35m %s \\x1b[0m  Count: \\x1b[36m %d \\x1b[0m' % (pred[0], pred[1])\n                answer_switch = True\n                while answer_switch:\n                    add_pred = raw_input('Is this a predictor of %s? (Y, N, exit): ' % (term[0]))\n                    if add_pred.lower() == 'y':\n                        pred_list.append(pred[0])\n                        answer_switch = False\n                    elif add_pred.lower() == 'exit':\n                        #pass switch to exit program\n                        exit_switch = True\n                        return pred_list, pred_dict, exit_switch\n                    elif add_pred.lower() == 'n':\n                        pred_dict[pred[0]] = ''\n                        answer_switch = False\n                    else:\n                        pass\n                    \n        exit_switch = False\n        return pred_list, pred_dict, exit_switch\n\n\n    for idx, term in enumerate(term_list):\n        top_preds = get_pred(data, term, pred_dicts[idx], pred_list[idx])\n        print '\\n**NEW Concept**\\n'\n        pred_list[idx], pred_dicts[idx], exit_switch = human_checker(term, pred_list[idx], top_preds, pred_dicts[idx])\n        #save list and dict\n        #make sure it is not null before saving\n        if pred_list[idx]:\n            pickle.dump(pred_list, open('data/predictor_list.pkl', 'wb'))\n            pickle.dump(pred_dicts, open('data/not_predictor_dict.pkl', 'wb'))\n        else:\n            print 'pred list Null'\n        #if exit, exit program\n        if exit_switch:\n            break\n    print 'Active Learning Complete'\n    return pred_list, pred_dicts\n",
     "prompt_number": 223,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Find new terms",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "def active_learn_terms(data, term_list, pred_list, term_dicts):\n    #look for more terms for each concept by finding sentnces that have \n    #predictors in them and looking for terms in those sentences \n\n    def get_pred(text_dict, term_list, term_dicts, pred_list):\n        term_options_dict = Counter()\n        for doc in text_dict.values():\n            for subdoc in doc:\n                for sent in subdoc:\n                    #skip sentence if it contains less than one word\n                    if len(sent) <= 1:\n                            continue\n                    #crate a sentence rank for judging weight of terms found\n                    sent_rank = 0\n                    for pred in pred_list:\n                        if pred[0].lower() in ' '.join(zip(*sent)[0]).lower():\n                            sent_rank += pred[1]\n                    result = chunker(sent)\n                    terms = [' '.join(x) for x in [[x[0] for x in term] for term in result]]\n                    terms.append(' '.join([sent[0][0], sent[1][0]]))\n                    #lower case all preds\n                    terms = [x.lower() for x in terms]\n                    #add weights to terms by multiplying by sent_rank\n                    terms = terms * sent_rank\n                    term_options_dict.update(terms)\n\n        #get top 20 predictors that have not been seen before\n        sorted_terms = sorted(term_options_dict.items(), key=lambda x: x[1], reverse=True)\n        counter = 0\n        top_terms = []\n        for term in sorted_terms:\n            if term[0] not in term_list and term[0] not in term_dicts:\n                top_terms.append(term)\n                counter += 1\n                if counter == 15 or counter == len(sorted_terms):\n                    return top_terms\n        #if there are no preds return empty list\n        return top_terms\n\n    #get chunks for preds\n    def chunker(sent):\n\n        chunk_reg1 = r\"\"\"\n                          CHUNK: {(<NN.*><POS>)?<RB>?<JJ.*>*<NN.*>+}\n                     \"\"\"\n        results = []\n\n        for chunk_reg in [chunk_reg1]:\n            cp = nltk.RegexpParser(chunk_reg)\n\n            tree = cp.parse(sent)\n            for subtree in tree.subtrees():\n                if subtree.label() == 'CHUNK':\n                    results.append(subtree[:])\n        return results\n\n    def human_checker(term_list, top_terms, term_dict):\n        '''This function loops through the possible terms and\n        lets human input decide if they actually are or not'''\n        print 'Are the following terms part of this list: %r?' % (term_list)\n        if len(top_terms) > 1:\n            for term in top_terms:\n                print 'Term: \\x1b[35m %s \\x1b[0m  Count: \\x1b[36m %d \\x1b[0m' % (term[0], (term[1]/7.))\n                answer_switch = True\n                while answer_switch:\n                    add_term = raw_input('Is this similar to %s? (Y, N, exit): ' % (term_list[0]))\n                    if add_term.lower() == 'y':\n                        term_list.append(term[0])\n                        answer_switch = False\n                    elif add_term.lower() == 'exit':\n                        #pass switch to exit program\n                        exit_switch = True\n                        return term_list, term_dict, exit_switch\n                    elif add_term.lower() == 'n':\n                        term_dict[term[0]] = ''\n                        answer_switch = False\n                    else:\n                        pass\n                    \n        exit_switch = False\n        return term_list, term_dict, exit_switch\n\n    #making a pred weight list because of scoping problems in iPyhton notebooks\n    smoker_pred_weight_list = []\n    pregnancy_pred_weight_list = []\n    birth_control_pred_weight_list = []\n    drug_pred_weight_list = []\n    heart_failure_pred_weight_list = []\n    hiv_pred_weight_list = []\n    allergy_pred_weight_list = []\n    \n    pred_weight_list = [smoker_pred_weight_list, pregnancy_pred_weight_list,\n                 birth_control_pred_weight_list, drug_pred_weight_list,\n                 heart_failure_pred_weight_list, hiv_pred_weight_list, allergy_pred_weight_list]\n    \n    #create a combined list of all preds, create Counter dict\n    tot_pred_list = []\n    for p in pred_list:\n        tot_pred_list += p\n    count_pred = Counter(tot_pred_list)\n\n    #add weights to pred terms and create new pred weight lists\n    for n in xrange(len(pred_list)):\n        for idx in range(len(pred_list[n])):\n            weight  = 7 - (count_pred[pred_list[n][idx]]-1)\n            pred_weight_list[n].append((pred_list[n][idx], weight))\n\n\n\n\n    for idx, term in enumerate(term_list):\n        top_terms = get_pred(data, term, term_dicts[idx], pred_weight_list[idx])\n        print '\\n**NEW Concept**\\n'\n        term_list[idx], term_dicts[idx], exit_switch = human_checker(term, top_terms, term_dicts[idx])\n        #save list and dict\n        #make sure it is not null before saving\n        if pred_list[idx]:\n            pickle.dump(term_list, open('data/term_list.pkl', 'wb'))\n            pickle.dump(term_dicts, open('data/not_term_dict.pkl', 'wb'))\n        else:\n            print 'Term list Null'\n        #if exit, exit program\n        if exit_switch:\n            break\n    print 'Active Learning Complete'\n    return term_list, term_dicts",
     "prompt_number": 224,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "#load in past predictor terms\npred_list = pickle.load(open('data/predictor_list.pkl', 'rb'))\npred_dicts = pickle.load(open('data/not_predictor_dict.pkl', 'rb'))\n#load in past concept terms\nterm_list = pickle.load(open('data/term_list.pkl', 'rb'))\nterm_dicts = pickle.load(open('data/not_term_dict.pkl', 'rb'))",
     "prompt_number": 4,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Active Learn Predictor Terms",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "pred_list, pred_dicts = active_learn_predictors(data, term_list, pred_list, pred_dicts)",
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Active Learn Concept Terms",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "term_list, term_dicts = active_learn_terms(data, term_list, pred_list, term_dicts)",
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Change save names for presentation examples",
     "level": 1
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Display Highlighted Criteria",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "markdown",
     "source": "Display criteria split by Inclusion and Exclusion\n\nSentences highlighted based with different colors depending on the concept they contain\n\nTag the trials which two set of tags, Inclusion and Exclusion"
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "#load trail concept lookup dict\ntrial_concept_lookup = pickle.load(open('data/trial_concept_lookup.pkl', 'rb'))",
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "def criteria_highlight(data, term_list, term_color_lookup, trial_concept_lookup,\n                       concept_lookup):\n    for key, value in data.items():\n        #print a color key\n        print 'Color Legend'\n        for c in xrange(len(term_color_lookup)):\n            print (term_color_lookup[c] + concept_lookup[c] + '\\x1b[0m \\x1b[0m')\n        print\n        print key\n        if key not in trial_concept_lookup:\n            trial_concept_lookup[key] = {'inclusion':set(),\n                                         'exclusion':set()}\n        for group in value:\n            doc = [' '.join(word) for word in [[word[0] for word in sent] for sent in group]]\n            #check each sentence for concept terms\n            for sent_idx in xrange(len(doc)):\n                for concept_idx in xrange(len(term_list)):\n                    for term in term_list[concept_idx]:\n                        if term.lower() in doc[sent_idx].lower():\n                            #tag trial with this concept\n                            #split into inclusion and exclusion sections\n                            if 'inclusion criteria' in doc[0].lower():\n                                trial_concept_lookup[key]['inclusion'].add(concept_lookup[concept_idx])\n                            elif 'exclusion criteria' in doc[0].lower():\n                                trial_concept_lookup[key]['exclusion'].add(concept_lookup[concept_idx])\n                            #if the background is being set to black you have to escape twice\n                            if concept_idx == 6:\n                                doc[sent_idx] = (term_color_lookup[concept_idx] + doc[sent_idx]\n                                             + '\\x1b[0m \\x1b[0m')\n                            else:\n                                doc[sent_idx] = (term_color_lookup[concept_idx] + doc[sent_idx]\n                                                 + '\\x1b[0m')\n                                \n            #check to print inclusion or exclusion tags\n            if 'inclusion criteria' in doc[0].lower():\n                if len(trial_concept_lookup[key]['inclusion']) >= 1:\n                    print 'Tags: ', list(trial_concept_lookup[key]['inclusion'])\n                else:\n                    print 'Tags: None'\n                print\n\n            elif 'exclusion criteria' in doc[0].lower():\n                if len(trial_concept_lookup[key]['exclusion']) >= 1:\n                    print 'Tags: ', list(trial_concept_lookup[key]['exclusion'])\n                else:\n                    print 'Tags: None'\n                print\n\n            for sent in doc:\n                print sent\n            print\n        #save lookup dict\n        pickle.dump(trial_concept_lookup, open('data/trial_concept_lookup.pkl', 'wb'))\n        return trial_concept_lookup",
     "prompt_number": 380,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "term_color_lookup = ['\\x1b[41m', '\\x1b[42m', '\\x1b[43m', '\\x1b[44m', '\\x1b[45m', '\\x1b[46m',\n                     '\\x1b[40m \\x1b[37m']\nconcept_lookup = ['Smoking', 'Pregnancy', 'Birth Control', 'Illicit drugs',\n                  'Congestive heart failure', 'HIV', 'Allergies']\n\nshuffled_trials = data.items()\nshuffle(shuffled_trials)\n\nfor trial in shuffled_trials:\n    if trial[0] not in trial_concept_lookup:\n        trial_concept_lookup = criteria_highlight({trial[0]:trial[1]}, term_list,\n                                          term_color_lookup,\n                                          trial_concept_lookup, concept_lookup)\n    break",
     "prompt_number": 400,
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": "Color Legend\n\u001b[41mSmoking\u001b[0m \u001b[0m\n\u001b[42mPregnancy\u001b[0m \u001b[0m\n\u001b[43mBirth Control\u001b[0m \u001b[0m\n\u001b[44mIllicit drugs\u001b[0m \u001b[0m\n\u001b[45mCongestive heart failure\u001b[0m \u001b[0m\n\u001b[46mHIV\u001b[0m \u001b[0m\n\u001b[40m \u001b[37mAllergies\u001b[0m \u001b[0m\n\nNCT01342159\nTags: None\n\nInclusion Criteria :\nDiabetic macular edema ( central macular thickness greater than 300 mm on optical coherence tomography )\n\nTags:  ['Congestive heart failure']\n\nExclusion Criteria :\nhistory of glaucoma or ocular hypertension ( defined as an intraocular pressure higher than 22 mmHg )\nan ocular condition ( other than diabetes ) that , in the opinion of the investigator , might affect macular oedema or alter visual acuity during the course of the study ( e . g .\nretinal vein occlusion , uveitis or other ocular inflammatory disease , neovascular glaucoma , Irvine-Gass Syndrome , etc .\n)\n\u001b[45msystemic corticosteroid therapy history of thromboembolic event ( including myocardial infarction or cerebral vascular accident )\u001b[0m\nmajor surgery within the prior 6 months or planned within the next 28 days\n\n"
      }
     ],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "pickle.dump(term_list, open('data/term_list.pkl', 'wb'))",
     "prompt_number": 328,
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Notes",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "markdown",
     "source": "Notes: If a sentence had two concepts in then the first concept in the list will be the one that accounts for the highlight color. Both will be added to the tags however.\n\nExample: Tags - Birth Control and Pregnacy:\n\nPositive pregnancy test in women of child bearing potential or who are unwilling to use an acceptable method of contraception . \n\nProblems:\n\nNegatives - Not pregnant or sentences that say was pregnant but now are not..."
    },
    {
     "metadata": {},
     "cell_type": "heading",
     "source": "Final Concept Terms and Predictor Terms",
     "level": 3
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "term_list",
     "prompt_number": 392,
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 392,
       "metadata": {},
       "text": "[['Non-smoker',\n  'smoker',\n  'Current smoker',\n  'smoking',\n  'tobacco',\n  'nicotine',\n  'cigarettes',\n  u'tobacco products'],\n ['Pregnancy',\n  u'negative pregnancy test',\n  u'pregnancy',\n  u'urine pregnancy test',\n  u'negative serum pregnancy test',\n  u'negative serum',\n  u'negative urine pregnancy test',\n  u'pregnant women',\n  u'pregnant'],\n ['Birth control',\n  'contraception',\n  u'birth control',\n  u'fertile patients',\n  u'effective contraception',\n  u'child-bearing potential',\n  u'abstinence',\n  u'adequate contraception',\n  u'condom',\n  u'iud',\n  u'intrauterine device',\n  u'diaphragm',\n  u'oral contraceptives'],\n ['Illicit drugs',\n  'Alcohol abuse',\n  'illegal',\n  'illicit',\n  'drug abuse',\n  u'alcohol',\n  u'substance abuse',\n  u'alcohol abuse'],\n ['Congestive Heart Failure',\n  'heart failure',\n  u'myocardial infarction',\n  u'congestive heart failure',\n  u'symptomatic congestive heart failure',\n  u'cardiovascular disease',\n  u'heart disease',\n  u'cardiac disease'],\n ['HIV',\n  'aids',\n  'human immunodeficiency virus',\n  u'hiv',\n  u'human immunodeficiency',\n  u'known hiv'],\n ['Allergies',\n  'allergy',\n  'hypersensitivity',\n  u'known hypersensitivity',\n  u'known allergy']]"
      }
     ],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "pred_list",
     "prompt_number": 15,
     "outputs": [
      {
       "text": "[['current',\n  u'history of',\n  u'use of',\n  u'tobacco use',\n  u'patients who',\n  u'smoking of',\n  u'products in',\n  u'user of',\n  u'products within',\n  u'smokers with',\n  u'subjects who',\n  u'products with',\n  u'nicotine containing',\n  u'smoker of',\n  u'forms of'],\n ['potential',\n  'negative',\n  u'women of',\n  u'have a',\n  u'pregnancy or',\n  u'females of',\n  u'test at',\n  u'history of',\n  u'female patients',\n  u'if female',\n  u'females with',\n  u'planning a',\n  u'test for',\n  u'women with',\n  u'female subjects',\n  u'child bearing',\n  u'woman of'],\n ['effective',\n  'Fertile patients',\n  'must use effective',\n  'must use',\n  'use effective',\n  'Fertile patients must use',\n  'fertile',\n  u'women of',\n  u'method of',\n  u'methods of',\n  u'form of',\n  u'use an',\n  u'females of',\n  u'patients of',\n  u'use a',\n  u'dose of',\n  u'use of',\n  u'forms of',\n  u'child bearing',\n  u'female patients',\n  u'female subjects',\n  u'fertile patients',\n  u'using an',\n  u'administration of'],\n ['use',\n  'abuse',\n  u'history of',\n  u'patient is',\n  u'abuse within',\n  u'evidence of',\n  u'abuse in',\n  u'ounces of',\n  u'treatment for',\n  u'use of',\n  u'dose of',\n  u'drugs within',\n  u'administration of',\n  u'drug within',\n  u'consumption of',\n  u'intake of',\n  u'drugs of',\n  u'abuse of',\n  u'subjects who',\n  u'drugs known',\n  u'presence of',\n  u'drinks per',\n  u'dependence on',\n  u'test for',\n  u'drugs with',\n  u'drugs that',\n  u'current or'],\n [u'history of',\n  u'patients with',\n  u'infarction within',\n  u'evidence of',\n  u'uncontrolled intercurrent',\n  u'illness including',\n  u'disease including',\n  u'risk of',\n  u'clinically significant',\n  u'patient has',\n  u'subjects with',\n  u'symptomatic congestive',\n  u'presence of',\n  u'cardiovascular disease',\n  u'diagnosis of',\n  u'subject has',\n  u'symptoms of',\n  u'cardiac disease',\n  u'uncontrolled congestive',\n  u'has symptomatic',\n  u'heart disease',\n  u'severe cardiovascular'],\n [u'history of',\n  u'subjects with',\n  u'infection with',\n  u'patients with',\n  u'test for',\n  u'diagnosis of',\n  u'known hiv',\n  u'any confirmed',\n  u'patient has',\n  u'known human',\n  u'presence of',\n  u'positive test',\n  u'hiv positive',\n  u'co-infection with',\n  u'infection including',\n  u'known infection',\n  u'positive for',\n  u'known diagnosis',\n  u'known positive',\n  u'known history',\n  u'subjects who',\n  u'report having'],\n ['known',\n  'history',\n  'suspected',\n  'known suspected',\n  'clinically significant',\n  u'history of',\n  u'patients with',\n  u'known allergy',\n  u'known hypersensitivity',\n  u'subjects with',\n  u'hypersensitivity to',\n  u'allergy or',\n  u'participant has',\n  u'a known',\n  u'allergy that',\n  u'have known',\n  u'intolerance of',\n  u'children with',\n  u'known severe',\n  u'evidence of']]",
       "output_type": "pyout",
       "metadata": {},
       "prompt_number": 15
      }
     ],
     "language": "python",
     "trusted": true,
     "collapsed": false
    },
    {
     "metadata": {},
     "cell_type": "code",
     "input": "",
     "outputs": [],
     "language": "python",
     "trusted": true,
     "collapsed": false
    }
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "name": "",
  "signature": "sha256:9a73a7945899f3ce4d6fef5db4da75f3c8d43b2ad86837d7e5698a70816fb5ed"
 },
 "nbformat": 3
}