Skip to content

Instantly share code, notes, and snippets.

@akelleh
Last active February 11, 2019 02:32
Show Gist options
  • Save akelleh/c13b8d42ea6474d725180958fedd6358 to your computer and use it in GitHub Desktop.
Save akelleh/c13b8d42ea6474d725180958fedd6358 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import dowhy.datasets\n",
"from dowhy.do_samplers.kernel_density_sampler import KernelDensitySampler\n",
"from dowhy.do_why import CausalModel\n",
"from dowhy.api.causal_data_frame import CausalDataFrame\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from statsmodels.api import OLS"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\n",
"data = dowhy.datasets.linear_dataset(beta=5,\n",
" num_common_causes=1,\n",
" num_instruments = 0,\n",
" num_samples=1000,\n",
" treatment_is_binary=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"data['dot_graph'] = 'digraph { v ->y;X0-> v;X0-> y;}'\n",
"df = data['df']\n",
"df['y'] = df['y'] + np.random.normal(size=len(df)) # without noise, the variance in Y|X, Z is zero, and mcmc fails.\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"['X0']\n",
"yes\n",
"{'observed': 'yes'}\n",
"Model to find the causal effect of treatment v on outcome y\n",
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"All common causes are observed. Causal effect can be identified.\n",
"McmcSampler\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.do_sampler:Using McmcSampler for do sampling.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"treatments ['v']\n",
"backdoor ['X0']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:pymc3:Auto-assigning NUTS sampler...\n",
"INFO:pymc3:Initializing NUTS using jitter+adapt_diag...\n",
"INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)\n",
"INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v]\n",
"Sampling 4 chains: 100%|██████████| 8000/8000 [00:05<00:00, 1550.20draws/s]\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7feaac626550>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAEQCAYAAACk818iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADXBJREFUeJzt3X2MpeVdxvHrctnNKt2UdDjSlmEcIoihJgUykpqaGsAKSNOGpm0gwbctmSa2piQaxZCg/sdfBjS+jW2lxMpWUGKldLVqSdNYlp1l14ZdWltXKrMpMKxt+pJsednLP/asXYZZduc8d+Y55zffTzJhz8vezy9k+fLsPc+Zx0kEAKjjh/oeAADQFmEHgGIIOwAUQ9gBoBjCDgDFEHYAKIawA0AxhB0AiiHsAFDMGX0c9Oyzz87s7GwfhwaAibVnz57nkgxO9b5ewj47O6vFxcU+Dg0AE8v210/nfWzFAEAxncNu+yLb+074+rbtW1oMBwBYu85bMUm+IukSSbK9SdIhSQ90XRcAMJrWe+xXSfqvJKe1D3SiF154QUtLSzpy5EjjkdrZunWrpqentXnz5r5HAYCTah32GyTdu9oLtuclzUvSzMzMK15fWlrStm3bNDs7K9uNx+ouiQ4fPqylpSWdf/75fY8DACfV7JuntrdIeqek+1Z7PclCkrkkc4PBK6/WOXLkiKampsYy6pJkW1NTU2P9NwoAkNpeFXOtpMeSPDPqAuMa9ePGfT4AkNqG/UadZBsGALB+muyx2z5T0tslfaDFepI0e+unWy0lSXryjuuargf0qfV/HxtdtT40CXuS70maarEWAKAbPnk6dPvtt+vOO+/8/8e33Xab7rrrrh4nAoDREPah7du365577pEkHT16VDt27NBNN93U81QAsHa9/BCwcTQ7O6upqSnt3btXzzzzjC699FJNTbG7BGDyEPYT3Hzzzbr77rv19NNPa/v27X2PAwAjYSvmBNdff7127typ3bt36+qrr+57HAAYydiesfdx+dGWLVt0xRVX6KyzztKmTZvW/fgA0MLYhr0PR48e1SOPPKL77lv1pyIAwERgK2bowIEDuuCCC3TVVVfpwgsv7HscABgZZ+xDF198sQ4ePNj3GADQ2VidsSfpe4RXNe7zAYA0RmHfunWrDh8+PLbxPP7z2Ldu3dr3KADwqsZmK2Z6elpLS0taXl7ue5STOn4HJQAYZ2MT9s2bN3NnIgBoYGy2YgAAbRB2ACiGsANAMYQdAIppEnbbZ9m+3/aXbT9h+2darAsAWLtWV8XcJWlnkvfY3iLpRxqtCwBYo85ht/1aSW+T9KuSlOR5Sc93XRcAMJoWWzHnS1qW9Fe299r+iO0zV77J9rztRduL4/whJACYdC3CfoakyyT9WZJLJX1P0q0r35RkIclckrnBYNDgsACA1bQI+5KkpSS7ho/v17HQAwB60DnsSZ6W9JTti4ZPXSXpQNd1AQCjaXVVzG9I+sTwipiDkn6t0boAgDVqEvYk+yTNtVgLANANnzwFgGIIOwAUQ9gBoBjCDgDFEHYAKIawA0AxhB0AiiHsAFAMYQeAYgg7ABRD2AGgGMIOAMUQdgAohrADQDGEHQCKIewAUAxhB4BimtxByfaTkr4j6SVJLybhbkoA0JNW9zyVpCuSPNdwPQDACNiKAYBiWoU9kv7Z9h7b86u9wfa87UXbi8vLy40OCwBYqVXYfzbJZZKulfRB229b+YYkC0nmkswNBoNGhwUArNQk7EkODf/5rKQHJF3eYl0AwNp1DrvtM21vO/5rSb8g6fGu6wIARtPiqphzJD1g+/h6f5NkZ4N1AQAj6Bz2JAclvbnBLACABrjcEQCKIewAUAxhB4BiCDsAFEPYAaAYwg4AxRB2ACiGsANAMYQdAIoh7ABQDGEHgGIIOwAUQ9gBoBjCDgDFEHYAKIawA0AxhB0AimkWdtubbO+1/WCrNQEAa9fyjP3Dkp5ouB4AYARNwm57WtJ1kj7SYj0AwOhanbHfKem3JR092Rtsz9tetL24vLzc6LAAgJU6h932OyQ9m2TPq70vyUKSuSRzg8Gg62EBACfR4oz9rZLeaftJSTskXWn7rxusCwAYQeewJ/ndJNNJZiXdIOnfktzUeTIAwEi4jh0Aijmj5WJJHpb0cMs1AQBrwxk7ABRD2AGgGMIOAMUQdgAohrADQDGEHQCKIewAUAxhB4BiCDsAFEPYAaAYwg4AxRB2ACiGsANAMYQdAIoh7ABQDGEHgGIIOwAU0znstrfaftT2f9jeb/sPWgwGABhNi1vjfV/SlUm+a3uzpC/Y/kySRxqsDQBYo85hTxJJ3x0+3Dz8Std1AQCjabLHbnuT7X2SnpX02SS7VnnPvO1F24vLy8stDgsAWEWTsCd5KcklkqYlXW77p1Z5z0KSuSRzg8GgxWEBAKtoelVMkm9J+pyka1quCwA4fS2uihnYPmv46x+W9HZJX+66LgBgNC2uinmDpI/b3qRj/6P42yQPNlgXADCCFlfFfEnSpQ1mAQA0wCdPAaAYwg4AxRB2ACiGsANAMYQdAIoh7ABQDGEHgGIIOwAUQ9gBoBjCDgDFEHYAKIawA0AxhB0AiiHsAFAMYQeAYgg7ABRD2AGgmBb3PD3P9udsH7C93/aHWwwGABhNi3uevijpN5M8ZnubpD22P5vkQIO1AQBr1PmMPck3kjw2/PV3JD0h6dyu6wIARtN0j932rI7d2HrXKq/N2160vbi8vNzysACAEzQLu+3XSPo7Sbck+fbK15MsJJlLMjcYDFodFgCwQpOw296sY1H/RJK/b7EmAGA0La6KsaSPSnoiyR92HwkA0EWLM/a3SvolSVfa3jf8+sUG6wIARtD5csckX5DkBrMAABrgk6cAUAxhB4BiCDsAFEPYAaAYwg4AxRB2ACiGsANAMYQdAIoh7ABQDGEHgGIIOwAUQ9gBoBjCDgDFEHYAKIawA0AxhB0AiiHsAFBMq5tZf8z2s7Yfb7EeAGB0rc7Y75Z0TaO1AAAdNAl7ks9L+t8WawEAumGPHQCKWbew2563vWh7cXl5eb0OCwAbzrqFPclCkrkkc4PBYL0OCwAbDlsxAFBMq8sd75X0RUkX2V6y/f4W6wIA1u6MFoskubHFOgCA7tiKAYBiCDsAFEPYAaAYwg4AxRB2ACiGsANAMYQdAIoh7ABQDGEHgGIIOwAUQ9gBoBjCDgDFEHYAKIawA0AxhB0AiiHsAFAMYQeAYlrdGu8a21+x/TXbt7ZYEwAwms5ht71J0p9IulbSxZJutH1x13UBAKNpccZ+uaSvJTmY5HlJOyS9q8G6AIARtAj7uZKeOuHx0vA5AEAPzlivA9melzQvSTMzM+t12E5mb/103yOU8uQd1/U9Qhn8u8SraXHGfkjSeSc8nh4+9zJJFpLMJZkbDAYNDgsAWE2LsO+WdKHt821vkXSDpE81WBcAMILOWzFJXrT9IUn/JGmTpI8l2d95MgDASJrssSd5SNJDLdYCAHTDJ08BoBjCDgDFEHYAKIawA0AxhB0AiiHsAFAMYQeAYgg7ABRD2AGgGMIOAMUQdgAohrADQDGEHQCKIewAUMy63RpvEnH7MQCTiDN2ACiGsANAMZ3Cbvu9tvfbPmp7rtVQAIDRdT1jf1zSuyV9vsEsAIAGOn3zNMkTkmS7zTQAgM7YYweAYk55xm77XyS9fpWXbkvyD6d7INvzkuYlaWZm5rQHBACszSnDnuTnWxwoyYKkBUmam5tLizUBAK/EVgwAFONk9JNn29dL+mNJA0nfkrQvydWn8fuWJX195ANjpbMlPdf3EMAq+LPZ1o8lGZzqTZ3CjvFgezEJnyPA2OHPZj/YigGAYgg7ABRD2GtY6HsA4CT4s9kD9tgBoBjO2AGgGMIOAMUQdgAohrBPKNuvs/26vucAMH4I+wSxPWN7x/CTu7skPWr72eFzs/1OBxxj+xzblw2/zul7no2Iq2ImiO0vSrpT0v1JXho+t0nSeyXdkuQtfc6Hjc32JZL+XNJrJR0aPj2tYz9u5NeTPNbXbBsNYZ8gtr+a5MK1vgasB9v7JH0gya4Vz79F0l8keXM/k208ne6ghHW3x/afSvq4pKeGz50n6Vck7e1tKuCYM1dGXZKSPGL7zD4G2qg4Y58gtrdIer+kd0k6d/j0kqR/lPTRJN/vazbA9h9J+nFJ9+jlJx6/LOm/k3yor9k2GsIOoBnb1+rlJx6HJH0qyUP9TbXxEPYibL8jyYN9zwGgf1zuWMdP9z0AcDLDex5jnfDN0wlj+ye1+l91f6+/qYBTct8DbCScsU8Q278jaYeO/Ufy6PDLku61fWufswGn8HzfA2wk7LFPENv/KelNSV5Y8fwWSfu5jh3jyvb/JJnpe46Ngq2YyXJU0hv1yhuBv2H4GtAb21862UuS+NEC64iwT5ZbJP2r7a/qB9cJz0i6QBLXCKNv50i6WtI3VzxvSf++/uNsXIR9giTZafsnJF2ul3/zdPfxnx0D9OhBSa9Jsm/lC7YfXv9xNi722AGgGK6KAYBiCDsAFEPYAaAYwg4AxRB2QJLtO2x/8ITHv2/7t/qcCRgVYQeO+aSk953w+H3D54CJw3XsgKQke23/qO03ShpI+maSp071+4BxRNiBH7hP0nskvV6crWOC8QElYMj2myT9paSzJf1ckm/0PBIwEvbYgaEk+yVtk3SIqGOSccYOAMVwxg4AxRB2ACiGsANAMYQdAIoh7ABQDGEHgGIIOwAUQ9gBoJj/A8Sb/MKZv07KAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cdf = CausalDataFrame(df)\n",
"cdf.causal.do(x={'v': 1}, \n",
" variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, \n",
" outcome='y',\n",
" method='mcmc', \n",
" common_causes=['X0'],\n",
" keep_original_treatment=True,\n",
" proceed_when_unidentifiable=True).groupby('v').mean().plot(y='y', kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"cdf = CausalDataFrame(df)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n",
"INFO:dowhy.do_sampler:Using McmcSampler for do sampling.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error: Pygraphviz cannot be loaded. No module named 'pygraphviz'\n",
"Trying pydot ...\n",
"['X0']\n",
"yes\n",
"{'observed': 'yes'}\n",
"Model to find the causal effect of treatment v on outcome y\n",
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"All common causes are observed. Causal effect can be identified.\n",
"McmcSampler\n",
"treatments ['v']\n",
"backdoor ['X0']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:pymc3:Auto-assigning NUTS sampler...\n",
"INFO:pymc3:Initializing NUTS using jitter+adapt_diag...\n",
"INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)\n",
"INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v]\n",
"Sampling 4 chains: 100%|██████████| 8000/8000 [00:05<00:00, 1479.09draws/s]\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"All common causes are observed. Causal effect can be identified.\n",
"McmcSampler\n"
]
}
],
"source": [
"cdf_1 = cdf.causal.do(x={'v': 1}, \n",
" variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, \n",
" outcome='y',\n",
" method='mcmc', \n",
" dot_graph=data['dot_graph'],\n",
" proceed_when_unidentifiable=True)\n",
"\n",
"cdf_0 = cdf.causal.do(x={'v': 0}, \n",
" variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, \n",
" outcome='y',\n",
" method='mcmc', \n",
" dot_graph=data['dot_graph'],\n",
" proceed_when_unidentifiable=True,\n",
" use_previous_sampler=True)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>v</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.679571</td>\n",
" <td>0</td>\n",
" <td>-0.560958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.243537</td>\n",
" <td>0</td>\n",
" <td>-0.041947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.195101</td>\n",
" <td>0</td>\n",
" <td>-0.812799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.923288</td>\n",
" <td>0</td>\n",
" <td>1.973034</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.389773</td>\n",
" <td>0</td>\n",
" <td>1.541949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.345340</td>\n",
" <td>0</td>\n",
" <td>0.475291</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.539989</td>\n",
" <td>0</td>\n",
" <td>0.750167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.294383</td>\n",
" <td>0</td>\n",
" <td>1.966613</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-0.557656</td>\n",
" <td>0</td>\n",
" <td>-1.079506</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.581319</td>\n",
" <td>0</td>\n",
" <td>-1.348016</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 v y\n",
"0 -0.679571 0 -0.560958\n",
"1 -0.243537 0 -0.041947\n",
"2 -0.195101 0 -0.812799\n",
"3 0.923288 0 1.973034\n",
"4 0.389773 0 1.541949\n",
"5 0.345340 0 0.475291\n",
"6 0.539989 0 0.750167\n",
"7 1.294383 0 1.966613\n",
"8 -0.557656 0 -1.079506\n",
"9 -0.581319 0 -1.348016"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cdf_0.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>v</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.679571</td>\n",
" <td>1</td>\n",
" <td>3.072462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.243537</td>\n",
" <td>1</td>\n",
" <td>4.972621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.195101</td>\n",
" <td>1</td>\n",
" <td>3.339304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.923288</td>\n",
" <td>1</td>\n",
" <td>7.598479</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.389773</td>\n",
" <td>1</td>\n",
" <td>6.248243</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.345340</td>\n",
" <td>1</td>\n",
" <td>6.406315</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.539989</td>\n",
" <td>1</td>\n",
" <td>5.946327</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.294383</td>\n",
" <td>1</td>\n",
" <td>7.686153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-0.557656</td>\n",
" <td>1</td>\n",
" <td>3.413582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.581319</td>\n",
" <td>1</td>\n",
" <td>4.661256</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 v y\n",
"0 -0.679571 1 3.072462\n",
"1 -0.243537 1 4.972621\n",
"2 -0.195101 1 3.339304\n",
"3 0.923288 1 7.598479\n",
"4 0.389773 1 6.248243\n",
"5 0.345340 1 6.406315\n",
"6 0.539989 1 5.946327\n",
"7 1.294383 1 7.686153\n",
"8 -0.557656 1 3.413582\n",
"9 -0.581319 1 4.661256"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cdf_1.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n",
"INFO:dowhy.do_sampler:Using McmcSampler for do sampling.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"['X0']\n",
"yes\n",
"{'observed': 'yes'}\n",
"Model to find the causal effect of treatment v on outcome y\n",
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"All common causes are observed. Causal effect can be identified.\n",
"McmcSampler\n",
"treatments ['v']\n",
"backdoor ['X0']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:pymc3:Auto-assigning NUTS sampler...\n",
"INFO:pymc3:Initializing NUTS using jitter+adapt_diag...\n",
"INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)\n",
"INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v]\n",
"Sampling 4 chains: 100%|██████████| 8000/8000 [00:06<00:00, 1175.49draws/s]\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"All common causes are observed. Causal effect can be identified.\n",
"McmcSampler\n"
]
}
],
"source": [
"cdf_1 = cdf.causal.do(x={'v': 1}, \n",
" variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, \n",
" outcome='y',\n",
" method='mcmc', \n",
" common_causes=['X0'],\n",
" proceed_when_unidentifiable=True,\n",
" use_previous_sampler=False)\n",
"cdf_0 = cdf.causal.do(x={'v': 0}, \n",
" variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, \n",
" outcome='y',\n",
" method='mcmc', \n",
" common_causes=['X0'],\n",
" proceed_when_unidentifiable=True,\n",
" use_previous_sampler=True)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAKoAAAAPBAMAAACGiUnsAAAAMFBMVEX///8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAv3aB7AAAAD3RSTlMAMpndq3bvImbNiRBUu0T/6ZgXAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAC5klEQVQ4Ea2Uv2sTYRjHP0nz47zLj1KdXBoDIlSoHTqJ4hlQOwiNg8W6WERQidbDoYhLb7AVXczgIFYw1g4SLDkUhXZpFAdxMYs4KBoUByer1P5KNT7v+57gH+BBvrm77/f9vM/73L0H0CU/eHK8F24FXXUuV9+DfexeKBDJ4fTMePCoeohIf0+1aoLa2VyoVivO0emmsYVkeP1yBjlidS6010nmeO1zh8eeEehweeonSzhnGAusdru9pIMySJwbcu1G4bCx5abmxU8paMrHKXP7gEd8g2yfNYJd1iLekMtBKBAbIZaLSs2uDhpnB8R4CXPGBsO7dFNR7QZOn8QhM0q2nHZJtbRA8prLGtTkNvaiLdlOHTROBe4xC4+NDYZX0dTMr8D2w7B0IOuS2dAikE2u8wtOeAsN4isy7xdVrTrEEc0U+faOB4S25lmepjK4+hAGJrZJzNnNQo7MTy1wVcaelVqbCan1t/iNv0HlQFp60L5ex9iG9wRDtdtT8I7xJsn7OxmvEPmpBacoY2tw17cX2bQqFfphUDtIlTLlWiDdUbbhVULq88/LTTVxn8hQczxHZF0LNkJNF1O1Om94tQELktBB7eAsyrPu3TeKsTUv1WmolsuJkoSjqnGJkl68Fp4pKi+P1DpJTWz9DsOS0EHjxMtwhfhyoG3Dm8dQEwGRjZhqXNIjrR5UyohT0VQYDIQVa8Fe+VPB0OlogNTTXde24b3I55e3y7Va1oD0O9rKtoSabmC1tKQK+V0ni+Lq9quyl2QxKhg6UoBqQswHmf8vD70LEh68jcpuKScadLSskX92QcJlMoisYJXo9oj8kOWroFDEoTsHp6XPnrExPM7jrJGZwirKZp30LF/t2D087TQiYxsMB3M+8SnnljyZdXn9VVBRG/C6Al8DJkIb4ckx2x5QW3dLTy9cnNkJn/IfYP7o/lCwPy759rS4VPNNIZ6TMx3UDmN1+coU1NdF25onif9//AEgEBSydfMdDAAAAABJRU5ErkJggg==\n",
"text/latex": [
"$$4.853004997547894$$"
],
"text/plain": [
"4.853004997547894"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(cdf_1['y'] - cdf_0['y']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAL8AAAAPBAMAAABHDgNAAAAAMFBMVEX///8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAv3aB7AAAAD3RSTlMAiXZmMs1UEN0i77urRJlR0qN3AAAACXBIWXMAAA7EAAAOxAGVKw4bAAADCElEQVQ4EbVUz0tUURg98/P55vdQi6DBxGiRC5uMkCJo/AfSEGpVGrVqUa9FJCFpi9ok+NpFCxWizMAcWmQk0esn5UIHoZY50i+QGrUazdBe53735Uj7vsV8551z73fmfu+7D/Bt3QgVkq+m78sDEGoMfoJRW13QKNgzk06j1TrQDqM0luPyWrUtWIRWLqRfQ8tCekUkHUKHrYDKZhGns8BJC4i4bgoJ4KVGAdd1VzDiLgCd8K3COIKP3FTlQBTjBq5bWhYS4UFVUxUKDMDXQizZv4hkt/Fkknwosx04DpzVKGEDDg4+ZfoM3EG0iDHuqnMgSmwAsaKWhezMLAG6UNRBmA+QHB9Gkm77aRAjh81Ah0Y+NiVHC8ZtYNKephNg7nYgCnf55rUsJAKqphRKOogvEv/NqkVrBtOj2KANuGJKnYHRZmEy91Ah+EKOSlPoysO/7BkIWTHoKiL+jYu8bFwkFoNd9TnE3J3tiCnEyAPPGj4ohF6r3FRfAPZogzwiPMGqJwtZMehPIagMdDY3scNiELXMVWBL2YJG8PFoo+hnWcQXjHIWl2HUiAEV9if0XcuaXGfAQeNooN/LdaygTsB4gPDX3mGNeEThot1MVY7hArOWD2KglHs4phpNWZMVg39ahAjnyzOYs/fB/0uZzdnANmYgscyfEvCTqwontIFSwg1NP7SsyYoBX27Ye8nhRdNGlA/K4AX7U6BZc7sg4AonSzoNfxG4xFWNKW1AhRFbEtnQZMUgmtcjJTm5tGZwk//73Dz3ZRWi4wovXwsSHL9TMO1xGrydKM3drRGFBiFH5LAmKwaBgXUXLZJHFSuoExTVvxxiV21BCP5mf9j+FgRT8NvNFmZZNeLwy0AlMIhmW8uarBhwGM7njDIkB7LyqdhBgz4EhvDGQoNGCHMSzCL22riWrn6FWMq4pQzybD8V/2Gj1ZM16ZeLpgrhTO1zoMfL70qPgC8jR7MwZyYsBCf4sRMEU9Xry3CK21yXr7g6k+NFe7zClUpJlwqeLGRivPzeK0Txv8YfNHg391I5n3MAAAAASUVORK5CYII=\n",
"text/latex": [
"$$0.03158575264795411$$"
],
"text/plain": [
"0.03158575264795411"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1.96*(cdf_1['y'] - cdf_0['y']).std() / np.sqrt(len(cdf))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>y</td> <th> R-squared: </th> <td> 0.965</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.965</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.390e+04</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Sun, 10 Feb 2019</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>21:31:10</td> <th> Log-Likelihood: </th> <td> -1423.0</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 1000</td> <th> AIC: </th> <td> 2850.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 998</td> <th> BIC: </th> <td> 2860.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 2</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>X0</th> <td> 1.9351</td> <td> 0.039</td> <td> 50.014</td> <td> 0.000</td> <td> 1.859</td> <td> 2.011</td>\n",
"</tr>\n",
"<tr>\n",
" <th>v</th> <td> 4.8641</td> <td> 0.052</td> <td> 94.093</td> <td> 0.000</td> <td> 4.763</td> <td> 4.966</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td> 2.937</td> <th> Durbin-Watson: </th> <td> 1.945</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.230</td> <th> Jarque-Bera (JB): </th> <td> 2.802</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.123</td> <th> Prob(JB): </th> <td> 0.246</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 3.084</td> <th> Cond. No. </th> <td> 2.20</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.965\n",
"Model: OLS Adj. R-squared: 0.965\n",
"Method: Least Squares F-statistic: 1.390e+04\n",
"Date: Sun, 10 Feb 2019 Prob (F-statistic): 0.00\n",
"Time: 21:31:10 Log-Likelihood: -1423.0\n",
"No. Observations: 1000 AIC: 2850.\n",
"Df Residuals: 998 BIC: 2860.\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"X0 1.9351 0.039 50.014 0.000 1.859 2.011\n",
"v 4.8641 0.052 94.093 0.000 4.763 4.966\n",
"==============================================================================\n",
"Omnibus: 2.937 Durbin-Watson: 1.945\n",
"Prob(Omnibus): 0.230 Jarque-Bera (JB): 2.802\n",
"Skew: 0.123 Prob(JB): 0.246\n",
"Kurtosis: 3.084 Cond. No. 2.20\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = OLS(df['y'], df[['X0', 'v']])\n",
"result = model.fit()\n",
"result.summary()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>v</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.679571</td>\n",
" <td>1</td>\n",
" <td>3.359819</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.243537</td>\n",
" <td>1</td>\n",
" <td>4.250789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.195101</td>\n",
" <td>1</td>\n",
" <td>4.440511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.923288</td>\n",
" <td>1</td>\n",
" <td>6.522757</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.389773</td>\n",
" <td>1</td>\n",
" <td>5.647217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.345340</td>\n",
" <td>1</td>\n",
" <td>5.382206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.539989</td>\n",
" <td>1</td>\n",
" <td>5.989925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.294383</td>\n",
" <td>1</td>\n",
" <td>7.374283</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-0.557656</td>\n",
" <td>1</td>\n",
" <td>3.751946</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.581319</td>\n",
" <td>1</td>\n",
" <td>3.503088</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 v y\n",
"0 -0.679571 1 3.359819\n",
"1 -0.243537 1 4.250789\n",
"2 -0.195101 1 4.440511\n",
"3 0.923288 1 6.522757\n",
"4 0.389773 1 5.647217\n",
"5 0.345340 1 5.382206\n",
"6 0.539989 1 5.989925\n",
"7 1.294383 1 7.374283\n",
"8 -0.557656 1 3.751946\n",
"9 -0.581319 1 3.503088"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cdf_1.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>v</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.679571</td>\n",
" <td>0</td>\n",
" <td>-0.352479</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.243537</td>\n",
" <td>0</td>\n",
" <td>-0.090221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.195101</td>\n",
" <td>0</td>\n",
" <td>-0.559728</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.923288</td>\n",
" <td>0</td>\n",
" <td>1.813926</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.389773</td>\n",
" <td>0</td>\n",
" <td>0.397714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.345340</td>\n",
" <td>0</td>\n",
" <td>0.917746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.539989</td>\n",
" <td>0</td>\n",
" <td>1.860140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.294383</td>\n",
" <td>0</td>\n",
" <td>1.636664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-0.557656</td>\n",
" <td>0</td>\n",
" <td>-1.481095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.581319</td>\n",
" <td>0</td>\n",
" <td>-0.859396</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 v y\n",
"0 -0.679571 0 -0.352479\n",
"1 -0.243537 0 -0.090221\n",
"2 -0.195101 0 -0.559728\n",
"3 0.923288 0 1.813926\n",
"4 0.389773 0 0.397714\n",
"5 0.345340 0 0.917746\n",
"6 0.539989 0 1.860140\n",
"7 1.294383 0 1.636664\n",
"8 -0.557656 0 -1.481095\n",
"9 -0.581319 0 -0.859396"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cdf_0.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:dowhy.do_why:Causal Graph not provided. DoWhy will construct a graph based on data inputs.\n",
"INFO:dowhy.causal_identifier:Common causes of treatment and outcome:{'U', 'X0'}\n",
"INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n",
"INFO:dowhy.do_sampler:Using McmcSampler for do sampling.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"['X0']\n",
"yes\n",
"{'observed': 'yes'}\n",
"Model to find the causal effect of treatment v on outcome y\n",
"{'label': 'Unobserved Confounders', 'observed': 'no'}\n",
"All common causes are observed. Causal effect can be identified.\n",
"McmcSampler\n",
"treatments ['v']\n",
"backdoor ['X0']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:pymc3:Auto-assigning NUTS sampler...\n",
"INFO:pymc3:Initializing NUTS using jitter+adapt_diag...\n",
"INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)\n",
"INFO:pymc3:NUTS: [y_sd, beta_y, v_sd, beta_v]\n",
"Sampling 4 chains: 100%|██████████| 8000/8000 [00:07<00:00, 1070.40draws/s]\n"
]
}
],
"source": [
"cdf_do = cdf.causal.do(x={'v': 0}, \n",
" variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, \n",
" outcome='y',\n",
" method='mcmc', \n",
" common_causes=['X0'],\n",
" proceed_when_unidentifiable=True,\n",
" keep_original_treatment=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>v</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.679571</td>\n",
" <td>1.0</td>\n",
" <td>3.765173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.243537</td>\n",
" <td>1.0</td>\n",
" <td>3.465047</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.195101</td>\n",
" <td>1.0</td>\n",
" <td>6.808677</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.923288</td>\n",
" <td>1.0</td>\n",
" <td>7.360935</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.389773</td>\n",
" <td>1.0</td>\n",
" <td>6.875433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.345340</td>\n",
" <td>1.0</td>\n",
" <td>4.591774</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.539989</td>\n",
" <td>0.0</td>\n",
" <td>1.891473</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.294383</td>\n",
" <td>1.0</td>\n",
" <td>7.019950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-0.557656</td>\n",
" <td>1.0</td>\n",
" <td>3.717224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.581319</td>\n",
" <td>0.0</td>\n",
" <td>-1.434013</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 v y\n",
"0 -0.679571 1.0 3.765173\n",
"1 -0.243537 1.0 3.465047\n",
"2 -0.195101 1.0 6.808677\n",
"3 0.923288 1.0 7.360935\n",
"4 0.389773 1.0 6.875433\n",
"5 0.345340 1.0 4.591774\n",
"6 0.539989 0.0 1.891473\n",
"7 1.294383 1.0 7.019950\n",
"8 -0.557656 1.0 3.717224\n",
"9 -0.581319 0.0 -1.434013"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cdf_do.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>v</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.679571</td>\n",
" <td>1.0</td>\n",
" <td>4.107991</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.243537</td>\n",
" <td>1.0</td>\n",
" <td>2.607517</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.195101</td>\n",
" <td>1.0</td>\n",
" <td>4.613115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.923288</td>\n",
" <td>1.0</td>\n",
" <td>6.447638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.389773</td>\n",
" <td>1.0</td>\n",
" <td>5.170365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.345340</td>\n",
" <td>1.0</td>\n",
" <td>5.035614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.539989</td>\n",
" <td>0.0</td>\n",
" <td>0.511874</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.294383</td>\n",
" <td>1.0</td>\n",
" <td>9.080715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-0.557656</td>\n",
" <td>1.0</td>\n",
" <td>3.681738</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-0.581319</td>\n",
" <td>0.0</td>\n",
" <td>-2.756425</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 v y\n",
"0 -0.679571 1.0 4.107991\n",
"1 -0.243537 1.0 2.607517\n",
"2 -0.195101 1.0 4.613115\n",
"3 0.923288 1.0 6.447638\n",
"4 0.389773 1.0 5.170365\n",
"5 0.345340 1.0 5.035614\n",
"6 0.539989 0.0 0.511874\n",
"7 1.294383 1.0 9.080715\n",
"8 -0.557656 1.0 3.681738\n",
"9 -0.581319 0.0 -2.756425"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cdf.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment