crherlihy/cmmi_SourceData_py.ipynb

## cmmi_SourceData_py.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Url that contains high-level model info is: \n",
    "#     https://innovation.cms.gov/initiatives/#views=models\n",
    "# Url that contains JSON info for health care facilities where CMMI models are being tested is:\n",
    "#     https://data.cms.gov/resource/x8pc-u7ta.json\n",
    "# Socrata API endpoint for all hospitals registered with Medicare (acute care = a subset of this list) is:\n",
    "#     https://data.medicare.gov/resource/rbry-mqwu.json\n",
    "\n",
    "path = \"/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/cmmi_src_text.json\"\n",
    "path2 = \"/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/healthCareFacWhereCMMITested.json\"\n",
    "outfile = \"/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/cmmiOut.csv\"\n",
    "outfile2 = \"/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/cmmiOut_Models_withStates.csv\"\n",
    "outfile3 = \"/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/machOutByModel.csv\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/cmmi_src_text.json\n",
      "/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/cmmiOut.csv\n"
     ]
    }
   ],
   "source": [
    "print path\n",
    "print outfile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import csv\n",
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Didn't end up using this, but it might be helpful \n",
    "\n",
    "class cmmiModel:\n",
    "    \"\"\"Object to hold model info\"\"\"\n",
    "    \n",
    "    def __init__(self, kw, num, desc, url, st, mn, stage, auth, dis_sum, cat):        \n",
    "        self.keywords = kw\n",
    "        self.participants = num\n",
    "        self.description = desc\n",
    "        self.url = url\n",
    "        self.states = st\n",
    "        self.modelname = mn\n",
    "        self.stage = stage\n",
    "        self.authority = auth\n",
    "        self.displaysummary = dis_sum\n",
    "        self.category = cat\n",
    "        \n",
    "    def getModelName(self):\n",
    "        return self.modelname\n",
    "    \n",
    "    def getInfo(self):\n",
    "        temp = []\n",
    "        temp.append(self.modelname)\n",
    "        temp.append(self.description)\n",
    "        return temp\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' Christine'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def strSplit(mystring):\n",
    "    return mystring.partition(\":\")[2]\n",
    "\n",
    "strSplit(\"name: Christine\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "states = {\n",
    "        'AK': 'Alaska',\n",
    "        'AL': 'Alabama',\n",
    "        'AR': 'Arkansas',\n",
    "        'AS': 'American Samoa',\n",
    "        'AZ': 'Arizona',\n",
    "        'CA': 'California',\n",
    "        'CO': 'Colorado',\n",
    "        'CT': 'Connecticut',\n",
    "        'DC': 'District of Columbia',\n",
    "        'DE': 'Delaware',\n",
    "        'FL': 'Florida',\n",
    "        'GA': 'Georgia',\n",
    "        'GU': 'Guam',\n",
    "        'HI': 'Hawaii',\n",
    "        'IA': 'Iowa',\n",
    "        'ID': 'Idaho',\n",
    "        'IL': 'Illinois',\n",
    "        'IN': 'Indiana',\n",
    "        'KS': 'Kansas',\n",
    "        'KY': 'Kentucky',\n",
    "        'LA': 'Louisiana',\n",
    "        'MA': 'Massachusetts',\n",
    "        'MD': 'Maryland',\n",
    "        'ME': 'Maine',\n",
    "        'MI': 'Michigan',\n",
    "        'MN': 'Minnesota',\n",
    "        'MO': 'Missouri',\n",
    "        'MP': 'Northern Mariana Islands',\n",
    "        'MS': 'Mississippi',\n",
    "        'MT': 'Montana',\n",
    "        'NA': 'National',\n",
    "        'NC': 'North Carolina',\n",
    "        'ND': 'North Dakota',\n",
    "        'NE': 'Nebraska',\n",
    "        'NH': 'New Hampshire',\n",
    "        'NJ': 'New Jersey',\n",
    "        'NM': 'New Mexico',\n",
    "        'NV': 'Nevada',\n",
    "        'NY': 'New York',\n",
    "        'OH': 'Ohio',\n",
    "        'OK': 'Oklahoma',\n",
    "        'OR': 'Oregon',\n",
    "        'PA': 'Pennsylvania',\n",
    "        'PR': 'Puerto Rico',\n",
    "        'RI': 'Rhode Island',\n",
    "        'SC': 'South Carolina',\n",
    "        'SD': 'South Dakota',\n",
    "        'TN': 'Tennessee',\n",
    "        'TX': 'Texas',\n",
    "        'UT': 'Utah',\n",
    "        'VA': 'Virginia',\n",
    "        'VI': 'Virgin Islands',\n",
    "        'VT': 'Vermont',\n",
    "        'WA': 'Washington',\n",
    "        'WI': 'Wisconsin',\n",
    "        'WV': 'West Virginia',\n",
    "        'WY': 'Wyoming'\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# This code block imports /reads from the back-end of the original link David asked us to scrape for.\n",
    "# path = \"/Users/christine/Dropbox/Econometrica/CMMI_DatabaseProject/cmmi_src_text.json\" \n",
    "# https://innovation.cms.gov/initiatives/#views=models \n",
    "#(go to developer tools:\n",
    "# sources / innovation.cms.gov / resources / javascript / InnoModelFilter.js; I c/p the JSON data there to a .txt)\n",
    "\n",
    "\n",
    "class BlankDict(dict):\n",
    "        def __missing__(self, key):\n",
    "            return u'Missing'\n",
    "        \n",
    "def checkStates(modelStates, stateDict):\n",
    "    binaryOut = []\n",
    "\n",
    "    for i in stateDict:\n",
    "        binaryOut.append(0)\n",
    "    \n",
    "    # If an entry contains general info and doesn't refer to an actual model, there won't be any states listed\n",
    "    if 'Missing' in modelStates:\n",
    "        return binaryOut\n",
    "    else:\n",
    "        for state in modelStates:\n",
    "            # print state\n",
    "            dummy = state.strip()\n",
    "            # print state, dummy \n",
    "            temp = sorted(stateDict.keys()).index(dummy)\n",
    "            # print temp\n",
    "            binaryOut[temp] = 1\n",
    "        return binaryOut\n",
    "    \n",
    "                \n",
    "with open(path) as json_data:\n",
    "    cmmiData = json.load(json_data, object_hook=BlankDict)\n",
    "    len(cmmiData) # should = 79\n",
    "\n",
    "with open(outfile, \"wb+\") as csv_file:\n",
    "    csv_writer = csv.writer(csv_file)   \n",
    "    csv_writer.writerow([\"Model_Name\", \"Category\", \"States\", \"Keywords\", \"Num_Participants\", \"Description\", \"url\", \n",
    "                       \"Stage\", \"Authority\", \"Display_Summary\"])\n",
    "    for i in cmmiData:    \n",
    "        csv_writer.writerow([i[u'model_name'].encode('utf8'),\n",
    "                            i[u'category'].encode('utf8'),\n",
    "                            i[u'states'].encode('utf8'),\n",
    "                            i[u'keywords'].encode('utf8'),\n",
    "                            i[u'number_of_participants'].encode('utf8'),\n",
    "                            i[u'description'].encode('utf8'),\n",
    "                            i[u'url'].encode('utf8'),    \n",
    "                            i[u'stage'].encode('utf8'),\n",
    "                            i[u'authority'].encode('utf8'),\n",
    "                            i[u'display_model_summary'].encode('utf8')])\n",
    " \n",
    "\n",
    " # This second file outputs a file in which each row = a model, and there is a column for each state; the model gets a \n",
    "# 1 for each state that includes model participant(s), and a 0 otherwise. \n",
    "\n",
    "with open(outfile2, \"wb+\") as csv_file2:\n",
    "    csv_writer2 = csv.writer(csv_file2)\n",
    "    stateAbbr = sorted(states.keys())    \n",
    "       \n",
    "    header = []\n",
    "    header.append(\"Model_Name\")\n",
    "    header.append(\"Category\")\n",
    "    header.append(\"Stage\")\n",
    "    header.append(\"List of States\")\n",
    "    header.append(\"Number of States\")\n",
    "    # header.append(\"Checksum\")\n",
    "    # header.append(\"Num == check\")\n",
    "    \n",
    "    for state in stateAbbr:\n",
    "        header.append(state)\n",
    "\n",
    "    csv_writer2.writerow(header)\n",
    "    \n",
    "    for j in cmmiData:\n",
    "        getStates = str(j[u'states']).split(\",\")\n",
    "           \n",
    "        binaryStates = checkStates(getStates, states)\n",
    "        \n",
    "        row = []\n",
    "        row.append(j[u'model_name'].encode('utf8'))\n",
    "        row.append(j[u'category'].encode('utf8'))\n",
    "        row.append(j[u'stage'].encode('utf8'))\n",
    "        row.append(j[u'states'])\n",
    "        # row.append(len(getStates))\n",
    "        row.append(sum(binaryStates))\n",
    "        # row.append(len(getStates) == sum(binaryStates))\n",
    "        \n",
    "        '''if(len(getStates)!= sum(binaryStates)):\n",
    "            counter = 0\n",
    "            print sorted(getStates)\n",
    "            for i in sorted(getStates):\n",
    "                print i, binaryStates[counter]\n",
    "                counter += 1'''\n",
    "    \n",
    "        for s in binaryStates:\n",
    "            row.append(s)\n",
    "        \n",
    "        csv_writer2.writerow(row)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "57"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(states)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'BPCI Model 1: Retrospective Acute Care Hospital Stay Only',\n",
       "       u'BPCI Model 2: Retrospective Acute & Post Acute Care Episode',\n",
       "       u'BPCI Model 3: Retrospective Post Acute Care Only',\n",
       "       u'BPCI Model 4: Prospective Acute Care Hospital Stay Only',\n",
       "       u'Community-based Care Transitions Program',\n",
       "       u'Frontier Extended Stay Clinic Demonstration',\n",
       "       u'Graduate Nurse Education Demonstration',\n",
       "       u'Medicaid Emergency Psychiatric Demonstration',\n",
       "       u'Physician Hospital Collaboration Demonstration', u'Pioneer ACO Model',\n",
       "       u'Rural Community Hospital Demonstration', u'Maryland All-Payer Model',\n",
       "       u'Frontier Community Health Integration Project Demonstration',\n",
       "       u'Next Generation ACO Model'],\n",
       "      dtype='object', name=u'Model_Name')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Use the files we have created to subset and select models related to acute care:\n",
    "\n",
    "models = pd.read_csv(outfile, index_col=0, parse_dates=True)\n",
    "len(models) # 79\n",
    "\n",
    "# (1) Remove rows where States == \"Missing\"; these appear to refer to tabs that correspond to general information\n",
    "# about a set of initiatives, rather than a particular model. \n",
    "\n",
    "# This brings us from 79 rows to 46\n",
    "\n",
    "m1 = models.loc[(models.States != \"Missing\"),]\n",
    "m1.head(n=5)\n",
    "len(m1) # 46\n",
    "\n",
    "# (2) \n",
    "\n",
    "def matchString(mystr, row):\n",
    "    mystr = mystr.lower()\n",
    "    result = False\n",
    "    \n",
    "    # Check to see whether acute care is referenced\n",
    "    match = re.search(r'\\bacute care*\\b',mystr)\n",
    "    if match:\n",
    "        row['AcuteCare'] = 1\n",
    "        # print \"match\" # debugging\n",
    "        result = True\n",
    "    \n",
    "    # Check to see if hospital is referenced\n",
    "    match = re.search(r'\\bhospitals*\\b', mystr)\n",
    "    if match:\n",
    "        row['Hospital'] = 1\n",
    "        # print \"match2\" # debugging \n",
    "        result = True\n",
    "    \n",
    "    # Check to see if providers are referenced\n",
    "    match = re.search(r'\\bproviders*\\b', mystr)\n",
    "    if match:\n",
    "        row['Provider'] = 1\n",
    "        # print \"match3\" # debugging \n",
    "        result = True \n",
    "    \n",
    "    return result \n",
    "\n",
    "def findAcuteCareRefs(df):    \n",
    "    columns = m1.columns\n",
    "    index = m1.index\n",
    "    m2 = pd.DataFrame(index=index, columns=columns)\n",
    "    m2['AcuteCare'] = 0\n",
    "    m2['Hospital'] = 0\n",
    "    m2['Provider'] = 0\n",
    "\n",
    "    # Examine keywords and description of each row; if the keywords\n",
    "    for i in range(0,len(df)):\n",
    "        alreadyIn = False # We only want to include a model once, regardless of whether it is a 1 for keyword ^ descr\n",
    "        temp = df.iloc[i]\n",
    "        temp['AcuteCare'] = 0\n",
    "        temp['Hospital'] = 0\n",
    "        temp['Provider'] = 0\n",
    "        keyw = temp['Keywords']\n",
    "        descr = temp['Description']\n",
    "        \n",
    "        # print i, keyw # debugging\n",
    "        \n",
    "        # Add the row to m2 if there is a match in either the 'Keywords' | 'Description' fields\n",
    "        if matchString(keyw, temp):\n",
    "            m2.iloc[i] = temp\n",
    "            alreadyIn = True\n",
    "        if matchString(descr, temp):\n",
    "            if ~alreadyIn:\n",
    "                m2.iloc[i] = temp\n",
    "    # Return m2 (pd.DataFrame object) \n",
    "    return m2\n",
    "\n",
    "# Only keep the observations whose keywords or descriptions reference acute care\n",
    "m2 = findAcuteCareRefs(m1)\n",
    "m2 = m2[m2.Category.notnull()]\n",
    "\n",
    "# This is the subset of models that we should focus on, from the original 79 rows \n",
    "acuteCareModels = m2.index\n",
    "acuteCareModels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hackensack university medical center\n",
      "newark beth israel medical center\n",
      "palisades medical center\n",
      "hunterdon medical center\n",
      "st mary's general hospital\n",
      "holy name medical center\n",
      "clara maass medical center\n",
      "univ medical center of princeton at plainsboro\n",
      "cape regional medical center inc\n",
      "valley hospital\n",
      "cooper university hospital\n",
      "morristown medical center\n",
      "carepoint health-christ hospital\n",
      "chilton medical center\n",
      "st joseph's regional medical center\n",
      "st francis medical center\n",
      "virtua west jersey hospitals berlin\n",
      "robert wood johnson university hospital at rahway\n",
      "carepoint health - bayonne medical center\n",
      "trinitas regional medical center\n",
      "newton medical center\n",
      "our lady of lourdes medical center\n",
      "deborah heart and lung center\n",
      "inspira medical center vineland\n",
      "riverview medical center\n",
      "robert wood johnson university hospital\n",
      "raritan bay medical center perth amboy division\n",
      "carepoint health-hoboken university medical center\n",
      "community medical center\n",
      "capital health medical center - hopewell\n",
      "englewood hospital and medical center\n",
      "shore medical center\n",
      "robert wood johnson university hospital somerset\n",
      "saint clare's hospital\n",
      "overlook medical center\n",
      "ocean medical center\n",
      "hackensack-umc mountainside\n",
      "virtua memorial hospital of burlington county\n",
      "bergen regional medical center\n",
      "st luke's warren hospital\n",
      "lourdes medical center of burlington county\n",
      "atlanticare regional medical center - city campus\n",
      "inspira medical center elmer\n",
      "saint peter's university hospital\n",
      "jersey shore university medical center\n",
      "jersey city medical center\n",
      "monmouth medical center\n",
      "saint barnabas medical center\n",
      "inspira medical center woodbury\n",
      "east orange general hospital\n",
      "monmouth medical center-southern campus\n",
      "kennedy university hospital - stratford div\n",
      "memorial hospital of salem county\n",
      "capital health system-fuld campus\n",
      "saint michael's medical center\n",
      "jfk medical ctr - anthony m. yelencsics community\n",
      "robert wood johnson university hospital hamilton\n",
      "centrastate medical center\n",
      "bayshore community hospital\n",
      "southern ocean medical center\n",
      "hackettstown regional medical center\n",
      "meadowlands hospital medical center\n",
      "university hospital\n",
      "hackensack-umc at pascack valley\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>address</th>\n",
       "      <th>city</th>\n",
       "      <th>county_name</th>\n",
       "      <th>effectiveness_of_care_national_comparison</th>\n",
       "      <th>effectiveness_of_care_national_comparison_footnote</th>\n",
       "      <th>efficient_use_of_medical_imaging_national_comparison</th>\n",
       "      <th>efficient_use_of_medical_imaging_national_comparison_footnote</th>\n",
       "      <th>emergency_services</th>\n",
       "      <th>hospital_name</th>\n",
       "      <th>hospital_overall_rating</th>\n",
       "      <th>...</th>\n",
       "      <th>phone_number</th>\n",
       "      <th>provider_id</th>\n",
       "      <th>readmission_national_comparison</th>\n",
       "      <th>readmission_national_comparison_footnote</th>\n",
       "      <th>safety_of_care_national_comparison</th>\n",
       "      <th>safety_of_care_national_comparison_footnote</th>\n",
       "      <th>state</th>\n",
       "      <th>timeliness_of_care_national_comparison</th>\n",
       "      <th>timeliness_of_care_national_comparison_footnote</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1108 ROSS CLARK CIRCLE</td>\n",
       "      <td>DOTHAN</td>\n",
       "      <td>HOUSTON</td>\n",
       "      <td>Same as the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Same as the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>southeast alabama medical center</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>3347938701</td>\n",
       "      <td>010001</td>\n",
       "      <td>Same as the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Above the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AL</td>\n",
       "      <td>Same as the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2505 U S HIGHWAY 431 NORTH</td>\n",
       "      <td>BOAZ</td>\n",
       "      <td>MARSHALL</td>\n",
       "      <td>Same as the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Below the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>marshall medical center south</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2565938310</td>\n",
       "      <td>010005</td>\n",
       "      <td>Above the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Same as the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AL</td>\n",
       "      <td>Above the National average</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35957</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      address    city county_name  \\\n",
       "0      1108 ROSS CLARK CIRCLE  DOTHAN     HOUSTON   \n",
       "1  2505 U S HIGHWAY 431 NORTH    BOAZ    MARSHALL   \n",
       "\n",
       "  effectiveness_of_care_national_comparison  \\\n",
       "0              Same as the National average   \n",
       "1              Same as the National average   \n",
       "\n",
       "  effectiveness_of_care_national_comparison_footnote  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "\n",
       "  efficient_use_of_medical_imaging_national_comparison  \\\n",
       "0                       Same as the National average     \n",
       "1                         Below the National average     \n",
       "\n",
       "  efficient_use_of_medical_imaging_national_comparison_footnote  \\\n",
       "0                                                NaN              \n",
       "1                                                NaN              \n",
       "\n",
       "  emergency_services                     hospital_name  \\\n",
       "0               True  southeast alabama medical center   \n",
       "1               True     marshall medical center south   \n",
       "\n",
       "  hospital_overall_rating   ...    phone_number provider_id  \\\n",
       "0                       3   ...      3347938701      010001   \n",
       "1                       3   ...      2565938310      010005   \n",
       "\n",
       "  readmission_national_comparison readmission_national_comparison_footnote  \\\n",
       "0    Same as the National average                                      NaN   \n",
       "1      Above the National average                                      NaN   \n",
       "\n",
       "  safety_of_care_national_comparison  \\\n",
       "0         Above the National average   \n",
       "1       Same as the National average   \n",
       "\n",
       "  safety_of_care_national_comparison_footnote state  \\\n",
       "0                                         NaN    AL   \n",
       "1                                         NaN    AL   \n",
       "\n",
       "  timeliness_of_care_national_comparison  \\\n",
       "0           Same as the National average   \n",
       "1             Above the National average   \n",
       "\n",
       "  timeliness_of_care_national_comparison_footnote zip_code  \n",
       "0                                             NaN    36301  \n",
       "1                                             NaN    35957  \n",
       "\n",
       "[2 rows x 28 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Here, we import the names + provider numbers (and other related info) for all Medicare providers, and filter to include\n",
    "# only acute care hospitals. This is the possible universe of providers that we will want to check against. \n",
    "\n",
    "import urllib\n",
    "from operator import itemgetter\n",
    "# The default limit is 1000; as of 1/4/17, there were 4807 providers listed\n",
    "url = \"https://data.medicare.gov/resource/rbry-mqwu.json?&$limit=10000\"\n",
    "response = urllib.urlopen(url)\n",
    "\n",
    "# Medicare Acute Care Providers \n",
    "macp = json.loads(response.read(), object_hook= BlankDict)  \n",
    "len(macp) # should be 4807\n",
    "\n",
    "# Now, we can subset by only selecting Acute Care Hospitals; this brings us from n= 4807 to n = 3370\n",
    "mach = []\n",
    "\n",
    "for provider in macp:\n",
    "    if provider[u'hospital_type']== \"Acute Care Hospitals\":\n",
    "        mach.append(provider)\n",
    "        \n",
    "len(mach) # 3370\n",
    "\n",
    "# QA check: make sure BPCI Model 1 NJ hospitals are included\n",
    "for i in range(0, len(mach)):\n",
    "    mach[i][u'hospital_name'] = mach[i][u'hospital_name'].lower()\n",
    "    if mach[i][u'state'] == \"NJ\":\n",
    "        print mach[i]['hospital_name']\n",
    "\n",
    "mach = sorted(mach, key=itemgetter('provider_id')) # sort by provider ID\n",
    "\n",
    "machDF = pd.DataFrame(mach)\n",
    "machDF.head(n=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1415"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://data.cms.gov/resource/4aqq-tcj3.json?&$limit=10000\"\n",
    "response = urllib.urlopen(url)\n",
    "\n",
    "# CMS Innovation Center Model Participants \n",
    "cmmiParticip = pd.DataFrame(json.loads(response.read(), object_hook= BlankDict))\n",
    "len(cmmiParticip.index) # should be 4024\n",
    "\n",
    "cmmiModels = cmmiParticip.name_of_initiative.unique()\n",
    "\n",
    "columns = cmmiParticip.columns\n",
    "index = cmmiParticip.index\n",
    "cmmiSubset = pd.DataFrame(index=index, columns= columns)\n",
    "\n",
    "# we only want to keep participants in models that are IN our acute care models list, or that contain BPCI (for regex reasons)\n",
    "for i in range(0, len(cmmiParticip.index)):\n",
    "    if cmmiParticip.iloc[i, 7] in acuteCareModels or \"BPCI\" in cmmiParticip.iloc[i, 7]:\n",
    "        cmmiParticip.iloc[i,9] = cmmiParticip.iloc[i,9].lower()\n",
    "        cmmiSubset.iloc[i] = cmmiParticip.iloc[i]\n",
    "\n",
    "cmmiSubset = cmmiSubset[cmmiSubset.category.notnull()]\n",
    "len(cmmiSubset.index) #1415\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>city</th>\n",
       "      <th>facebook</th>\n",
       "      <th>location_1</th>\n",
       "      <th>location_1_address</th>\n",
       "      <th>location_1_city</th>\n",
       "      <th>location_1_state</th>\n",
       "      <th>name_of_initiative</th>\n",
       "      <th>notes</th>\n",
       "      <th>organization_name</th>\n",
       "      <th>phase_2</th>\n",
       "      <th>state</th>\n",
       "      <th>state_based</th>\n",
       "      <th>street_address</th>\n",
       "      <th>twitter</th>\n",
       "      <th>unique_id</th>\n",
       "      <th>website</th>\n",
       "      <th>youtube</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Episode-based Payment Initiatives</td>\n",
       "      <td>Hilo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>{u'type': u'Point', u'coordinates': [-155.0922...</td>\n",
       "      <td>1190 Waianuenue Ave</td>\n",
       "      <td>Hilo</td>\n",
       "      <td>HI</td>\n",
       "      <td>BPCI Initiative: Model 2</td>\n",
       "      <td>Number of Episodes: 29 // Convening Organizati...</td>\n",
       "      <td>quantum healthcare medical associates inc.</td>\n",
       "      <td>Congestive heart failure, Diabetes, Fractures...</td>\n",
       "      <td>HI</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1190 Waianuenue Ave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3050</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Initiatives to Accelerate the Development and ...</td>\n",
       "      <td>Circle</td>\n",
       "      <td>NaN</td>\n",
       "      <td>{u'type': u'Point', u'coordinates': [-81.52311...</td>\n",
       "      <td>605 Sullivan Ave.</td>\n",
       "      <td>Circle</td>\n",
       "      <td>NV</td>\n",
       "      <td>Frontier Community Health Integration Project ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>mccone county health center</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NV</td>\n",
       "      <td>NaN</td>\n",
       "      <td>605 Sullivan Ave.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Primary Care Transformation</td>\n",
       "      <td>Scottsdale</td>\n",
       "      <td>https://www.facebook.com/scottsdalehealthcare</td>\n",
       "      <td>{u'type': u'Point', u'coordinates': [-111.9260...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Scottsdale</td>\n",
       "      <td>AZ</td>\n",
       "      <td>Graduate Nurse Education Demonstration</td>\n",
       "      <td>NaN</td>\n",
       "      <td>scottsdale healthcare medical center</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AZ</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://twitter.com/SHCpublicinfo</td>\n",
       "      <td>1007</td>\n",
       "      <td>http://www.shc.org/</td>\n",
       "      <td>http://www.youtube.com/user/ScottsdaleHealthca...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             category        city  \\\n",
       "1                   Episode-based Payment Initiatives        Hilo   \n",
       "12  Initiatives to Accelerate the Development and ...      Circle   \n",
       "13                        Primary Care Transformation  Scottsdale   \n",
       "\n",
       "                                         facebook  \\\n",
       "1                                             NaN   \n",
       "12                                            NaN   \n",
       "13  https://www.facebook.com/scottsdalehealthcare   \n",
       "\n",
       "                                           location_1   location_1_address  \\\n",
       "1   {u'type': u'Point', u'coordinates': [-155.0922...  1190 Waianuenue Ave   \n",
       "12  {u'type': u'Point', u'coordinates': [-81.52311...    605 Sullivan Ave.   \n",
       "13  {u'type': u'Point', u'coordinates': [-111.9260...                  NaN   \n",
       "\n",
       "   location_1_city location_1_state  \\\n",
       "1             Hilo               HI   \n",
       "12          Circle               NV   \n",
       "13      Scottsdale               AZ   \n",
       "\n",
       "                                   name_of_initiative  \\\n",
       "1                            BPCI Initiative: Model 2   \n",
       "12  Frontier Community Health Integration Project ...   \n",
       "13             Graduate Nurse Education Demonstration   \n",
       "\n",
       "                                                notes  \\\n",
       "1   Number of Episodes: 29 // Convening Organizati...   \n",
       "12                                                NaN   \n",
       "13                                                NaN   \n",
       "\n",
       "                             organization_name  \\\n",
       "1   quantum healthcare medical associates inc.   \n",
       "12                 mccone county health center   \n",
       "13       scottsdale healthcare medical center    \n",
       "\n",
       "                                              phase_2 state state_based  \\\n",
       "1    Congestive heart failure, Diabetes, Fractures...    HI         NaN   \n",
       "12                                                NaN    NV         NaN   \n",
       "13                                                NaN    AZ         NaN   \n",
       "\n",
       "         street_address                            twitter unique_id  \\\n",
       "1   1190 Waianuenue Ave                                NaN      3050   \n",
       "12   605 Sullivan Ave.                                 NaN      1000   \n",
       "13                  NaN  https://twitter.com/SHCpublicinfo      1007   \n",
       "\n",
       "                website                                            youtube  \n",
       "1                   NaN                                                NaN  \n",
       "12                  NaN                                                NaN  \n",
       "13  http://www.shc.org/  http://www.youtube.com/user/ScottsdaleHealthca...  "
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cmmiSubset[0:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "163"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Now, let's try to match acute care hospitals from mach to facilities in our: facWhereModelsTested dataset:\n",
    "generator = (hosp[u'hospital_name'] for hosp in mach)\n",
    "machSet = set(generator) # Medicare Acute Care Hospitals\n",
    "len(machSet) # 3251 \n",
    "\n",
    "cmmiSubsetSet = set(cmmiSubset[u'organization_name'])\n",
    "len(cmmiSubsetSet) #1306\n",
    "\n",
    "t1 = machSet.intersection(cmmiSubsetSet) #right now this is an empty set\n",
    "len(t1) # 163"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 address\n",
      "1 city_x\n",
      "2 county_name\n",
      "3 effectiveness_of_care_national_comparison\n",
      "4 effectiveness_of_care_national_comparison_footnote\n",
      "5 efficient_use_of_medical_imaging_national_comparison\n",
      "6 efficient_use_of_medical_imaging_national_comparison_footnote\n",
      "7 emergency_services\n",
      "8 hospital_name\n",
      "9 hospital_overall_rating\n",
      "10 hospital_overall_rating_footnote\n",
      "11 hospital_ownership\n",
      "12 hospital_type\n",
      "13 meets_criteria_for_meaningful_use_of_ehrs\n",
      "14 mortality_national_comparison\n",
      "15 mortality_national_comparison_footnote\n",
      "16 patient_experience_national_comparison\n",
      "17 patient_experience_national_comparison_footnote\n",
      "18 phone_number\n",
      "19 provider_id\n",
      "20 readmission_national_comparison\n",
      "21 readmission_national_comparison_footnote\n",
      "22 safety_of_care_national_comparison\n",
      "23 safety_of_care_national_comparison_footnote\n",
      "24 state_x\n",
      "25 timeliness_of_care_national_comparison\n",
      "26 timeliness_of_care_national_comparison_footnote\n",
      "27 zip_code\n",
      "28 category\n",
      "29 city_y\n",
      "30 facebook\n",
      "31 location_1\n",
      "32 location_1_address\n",
      "33 location_1_city\n",
      "34 location_1_state\n",
      "35 name_of_initiative\n",
      "36 notes\n",
      "37 organization_name\n",
      "38 phase_2\n",
      "39 state_y\n",
      "40 state_based\n",
      "41 street_address\n",
      "42 twitter\n",
      "43 unique_id\n",
      "44 website\n",
      "45 youtube\n",
      "46 _merge\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "163"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Make a new data frame that contains all Medicare Acute Care Hospitals that have participated in @ least one CMMI model\n",
    "\n",
    "mergeOut = pd.merge(machDF, cmmiSubset, left_on=\"hospital_name\", right_on=\"organization_name\", indicator=True)\n",
    "\n",
    "counter = 0\n",
    "for i in mergeOut.columns:\n",
    "    print counter, i\n",
    "    counter +=1\n",
    "\n",
    "mergeOutClean = mergeOut.drop_duplicates(subset='hospital_name', keep='first')\n",
    "\n",
    "# for i in range(0, len(mergeOutClean.index)):\n",
    "    # print mergeOutClean.iloc[i,8], mergeOutClean.iloc[i,37], mergeOutClean.iloc[i,35]\n",
    "\n",
    "len(mergeOutClean) # should be, and is, 163"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([u'BPCI Initiative: Model 1', u'BPCI Initiative: Model 2',\n",
      "       u'BPCI Initiative: Model 3', u'BPCI Initiative: Model 4',\n",
      "       u'Community-based Care Transitions Program',\n",
      "       u'Frontier Community Health Integration Project Demonstration',\n",
      "       u'Graduate Nurse Education Demonstration',\n",
      "       u'Medicaid Emergency Psychiatric Demonstration',\n",
      "       u'Next Generation ACO Model', u'Pioneer ACO Model', u'hospital_name',\n",
      "       u'provider_id', u'state_x'],\n",
      "      dtype='object')\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 3 BPCI Initiative: Model 3 2\n",
      "BPCI Initiative: Model 4 BPCI Initiative: Model 4 3\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "Graduate Nurse Education Demonstration Graduate Nurse Education Demonstration 6\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 1 BPCI Initiative: Model 1 0\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 4 BPCI Initiative: Model 4 3\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 4 BPCI Initiative: Model 4 3\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "Graduate Nurse Education Demonstration Graduate Nurse Education Demonstration 6\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 4 BPCI Initiative: Model 4 3\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n",
      "BPCI Initiative: Model 2 BPCI Initiative: Model 2 1\n"
     ]
    }
   ],
   "source": [
    "index = mergeOutClean.index\n",
    "provInfoCols = mergeOutClean.columns[(8,19, 24),]\n",
    "cmmiSubset2 = cmmiSubset.name_of_initiative.unique()\n",
    "cols = provInfoCols.union(cmmiSubset2)\n",
    "print cols\n",
    "\n",
    "def order(frame,var):\n",
    "    varlist =[w for w in frame.columns if w not in var]\n",
    "    frame = frame[var+varlist]\n",
    "    return frame \n",
    "\n",
    "machByModel = pd.DataFrame(index=index, columns=cols)\n",
    "machByModel = order(machByModel,['provider_id', 'hospital_name', 'state_x'])\n",
    "\n",
    "\n",
    "def fillInTable(inputDf, out, modelList):\n",
    "    \n",
    "    modelList = sorted(modelList)\n",
    "    \n",
    "    for i in range(0, len(inputDf)):\n",
    "        out.iloc[i, 0] = inputDf.iloc[i, 19] # provider Id\n",
    "        out.iloc[i, 1] = inputDf.iloc[i, 8] # hospital name\n",
    "        out.iloc[i, 2] = inputDf.iloc[i, 24] # state name\n",
    "        out.iloc[i, 3:len(out)] = 0 # fill in w/0s as placeholders; these are booleans for model participation \n",
    "        \n",
    "        # Here, if the initiative for a row == a particular model, give that observation a 1 for that model. \n",
    "        def checkModel(cell, models):\n",
    "            result = False\n",
    "            counter = -1\n",
    "            counterOut = -1\n",
    "            for model in models:\n",
    "                counter += 1\n",
    "                if cell == model:\n",
    "                    result = True                 \n",
    "                    counterOut = counter \n",
    "                    print cell, model, counterOut\n",
    "            return [result, counterOut]\n",
    "        \n",
    "        temp = inputDf.iloc[i,35] #this is the name of the initiative for each observation\n",
    "        \n",
    "        result = checkModel(temp, modelList)\n",
    "        # print \"result: \" + str(result) # debugging \n",
    "        \n",
    "        # If there was a match, get the index and offset and make that cell a 1\n",
    "        if result[0]:\n",
    "            out.iloc[i, (result[1] + 3)] = 1\n",
    "        \n",
    "    return out\n",
    "\n",
    "machByModelOut = fillInTable(mergeOutClean, machByModel, cmmiSubset2)\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([u'provider_id', u'hospital_name', u'state_x',\n",
      "       u'BPCI Initiative: Model 1', u'BPCI Initiative: Model 2',\n",
      "       u'BPCI Initiative: Model 3', u'BPCI Initiative: Model 4',\n",
      "       u'Community-based Care Transitions Program',\n",
      "       u'Frontier Community Health Integration Project Demonstration',\n",
      "       u'Graduate Nurse Education Demonstration',\n",
      "       u'Medicaid Emergency Psychiatric Demonstration',\n",
      "       u'Next Generation ACO Model', u'Pioneer ACO Model'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "print machByModelOut.columns\n",
    "\n",
    "with open(outfile3, \"wb+\") as csv_file:\n",
    "    csv_writer = csv.writer(csv_file)   \n",
    "    csv_writer.writerow(machByModelOut.columns)\n",
    "    for i in range(0, len(machByModelOut)): \n",
    "        csv_writer.writerow(machByModelOut.iloc[i, 0:len(machByModelOut.columns)])\n",
    "\n",
    "        \n",
    "# This is almost what we need.\n",
    "# But, a few key problems:\n",
    "# (1) This data is current and does not reflect or include providers that participated in past models or in models where they exited\n",
    "# (for example, only Kansas is included for participation in BPCI Model 1)\n",
    "# (2) Need to validate that this is the correct subset of models and that we've included all that refer to acute care providers\n",
    "# (3) Element of time is still missing. Need to get start and end dates for each model and THEN assess participation by qtr-yr-prvdr\n",
    "# (4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}