libinruan/gist:dad14eec3d194512b4aebc458bad0ae5

## gistfile1.txt
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The reason we use csv module instead of default Python i/o functions.  \n",
    "Here is an example."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Let's start with a cursory preview without any modules\n",
    "path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014.csv\"\n",
    "file = open(path)\n",
    "for line in file:\n",
    "    print(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Date,Open,High,Low,Close,Volume,Adj Close\\n'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Index each line of a file with list comprehensions\n",
    "file = open(path)\n",
    "lines = [line for line in file]\n",
    "# Then, we can read each line by index; \n",
    "# each line is treated as a single string \n",
    "# (there's a new line character included in the end)\n",
    "lines[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'8/19/2014,585.002622,587.342658,584.002627,586.862643,978600,586.862643\\n'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lines[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Date,Open,High,Low,Close,Volume,Adj Close'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We can use strip to remove any leading and trailing white space\n",
    "lines[0].strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We can further use split function to divide the string into small pieces\n",
    "lines[0].strip().split(',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'],\n",
       " ['8/19/2014',\n",
       "  '585.002622',\n",
       "  '587.342658',\n",
       "  '584.002627',\n",
       "  '586.862643',\n",
       "  '978600',\n",
       "  '586.862643']]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's read the file once more with trick we just saw\n",
    "file = open(path)\n",
    "dataset = [line.strip().split(',') for line in file]\n",
    "dataset[:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Problem: Data remain treated as string.  \n",
    "Solution: Use csv module."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Dialect', 'DictReader', 'DictWriter', 'Error', 'QUOTE_ALL', 'QUOTE_MINIMAL', 'QUOTE_NONE', 'QUOTE_NONNUMERIC', 'Sniffer', 'StringIO', '_Dialect', '__all__', '__builtins__', '__doc__', '__file__', '__name__', '__package__', '__version__', 'excel', 'excel_tab', 'field_size_limit', 'get_dialect', 'list_dialects', 're', 'reader', 'reduce', 'register_dialect', 'unregister_dialect', 'writer']\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "'''When use a module for the first time, it is always useful to use 'dir'\n",
    "to see what classes and function the module contains'''\n",
    "print(dir(csv))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "'''use newline='' in Python 3.0 becasue depending on your system strings may \n",
    "end with a new line carraige return or both. This ensures the csv \n",
    "module will work correctly across all platfomrs'''\n",
    "file = open(path)\n",
    "reader = csv.reader(file)\n",
    "# use next function to extract the first line which is header containing no data\n",
    "header = next(reader) # The first line is the header\n",
    "data = [row for row in reader] # Read the remaing data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']\n",
      "['8/19/2014', '585.002622', '587.342658', '584.002627', '586.862643', '978600', '586.862643']\n"
     ]
    }
   ],
   "source": [
    "print(header)\n",
    "print(data[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To convert data into appropriate type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[datetime.datetime(2014, 8, 19, 0, 0), 585.002622, 587.342658, 584.002627, 586.862643, 978600, 586.862643]\n"
     ]
    }
   ],
   "source": [
    "# import the datetime class from the datetime module. you'll see why.\n",
    "from datetime import datetime \n",
    "import csv\n",
    "file = open(path)\n",
    "reader = csv.reader(file)\n",
    "header = next(reader)\n",
    "data = []\n",
    "'''row = [Date, Open, High, Low, Close, Volume, Adj. Close ]\n",
    "       'Date' is a datetime; the rest are of type floats \n",
    "       except 'Volume' which is an integer'''\n",
    "''' We use strptime (short for string parse time) \n",
    "        to convert the datetime column'''\n",
    "for row in reader:\n",
    "    date = datetime.strptime(row[0], '%m/%d/%Y')\n",
    "    open_price = float(row[1])\n",
    "    high = float(row[2])\n",
    "    low = float(row[3])\n",
    "    close = float(row[4])\n",
    "    volume = int(row[5])\n",
    "    adj_close = float(row[6])\n",
    "    data.append([date,open_price,high,low,close,volume,adj_close])\n",
    "\n",
    "print(data[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# compute and store daily Google stock returns\n",
    "save_path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014_daily_returns.csv\"\n",
    "file = open(save_path,'w')\n",
    "# create a csv.writer object to store our computed results\n",
    "writer = csv.writer(file)\n",
    "# to write a row call the write row method with a list of values\n",
    "# write the header first\n",
    "writer.writerow([\"Date\",\"Return\"])\n",
    "# our data is luckily already sorted chronologically so\n",
    "# we can simply loop through it.\n",
    "# Since we use stock prices of consecutive days to compute returns, \n",
    "# we stop at the second-to-last row.\n",
    "for i in range(len(data)-1): \n",
    "    todays_row = data[i]\n",
    "    todays_date = todays_row[0]\n",
    "    todays_price = todays_row[-1] # the last element in the list (adj_close)\n",
    "    yesterdays_row = data[i+1] # data is arranged starting recently\n",
    "    yesterdays_price = yesterdays_row[-1]\n",
    "    daily_return = (todays_price-yesterdays_price)/yesterdays_price\n",
    "    \n",
    "    #The reuslt would not be listed in a user friedly way without\n",
    "    #using the string format time method on today's date and pass in the desired format\n",
    "    formatted_date = todays_date.strftime('%m/%d/%Y')\n",
    "    writer.writerow([formatted_date,daily_return])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, let's turn to pandas module to see how it makes our life better  \n",
    "It only takes four command lines to perfrom the same task."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8/19/2014</td>\n",
       "      <td>585.002622</td>\n",
       "      <td>587.342658</td>\n",
       "      <td>584.002627</td>\n",
       "      <td>586.862643</td>\n",
       "      <td>978600</td>\n",
       "      <td>586.862643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8/18/2014</td>\n",
       "      <td>576.112580</td>\n",
       "      <td>584.512631</td>\n",
       "      <td>576.002598</td>\n",
       "      <td>582.162619</td>\n",
       "      <td>1284100</td>\n",
       "      <td>582.162619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8/15/2014</td>\n",
       "      <td>577.862619</td>\n",
       "      <td>579.382595</td>\n",
       "      <td>570.522603</td>\n",
       "      <td>573.482626</td>\n",
       "      <td>1519100</td>\n",
       "      <td>573.482626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8/14/2014</td>\n",
       "      <td>576.182596</td>\n",
       "      <td>577.902645</td>\n",
       "      <td>570.882599</td>\n",
       "      <td>574.652582</td>\n",
       "      <td>985400</td>\n",
       "      <td>574.652582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8/13/2014</td>\n",
       "      <td>567.312567</td>\n",
       "      <td>575.002602</td>\n",
       "      <td>565.752564</td>\n",
       "      <td>574.782577</td>\n",
       "      <td>1439200</td>\n",
       "      <td>574.782577</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Date        Open        High         Low       Close   Volume  \\\n",
       "0  8/19/2014  585.002622  587.342658  584.002627  586.862643   978600   \n",
       "1  8/18/2014  576.112580  584.512631  576.002598  582.162619  1284100   \n",
       "2  8/15/2014  577.862619  579.382595  570.522603  573.482626  1519100   \n",
       "3  8/14/2014  576.182596  577.902645  570.882599  574.652582   985400   \n",
       "4  8/13/2014  567.312567  575.002602  565.752564  574.782577  1439200   \n",
       "\n",
       "    Adj Close  \n",
       "0  586.862643  \n",
       "1  582.162619  \n",
       "2  573.482626  \n",
       "3  574.652582  \n",
       "4  574.782577  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014.csv\"\n",
    "df = pd.read_csv(path)\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2518 entries, 0 to 2517\n",
      "Data columns (total 7 columns):\n",
      "Date         2518 non-null object\n",
      "Open         2518 non-null float64\n",
      "High         2518 non-null float64\n",
      "Low          2518 non-null float64\n",
      "Close        2518 non-null float64\n",
      "Volume       2518 non-null int64\n",
      "Adj Close    2518 non-null float64\n",
      "dtypes: float64(5), int64(1), object(1)\n",
      "memory usage: 137.8+ KB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "# See panadas already correctly identifies object type for each data column\n",
    "print(df.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "      <th>daily return</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8/19/2014</td>\n",
       "      <td>585.002622</td>\n",
       "      <td>587.342658</td>\n",
       "      <td>584.002627</td>\n",
       "      <td>586.862643</td>\n",
       "      <td>978600</td>\n",
       "      <td>586.862643</td>\n",
       "      <td>0.008073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8/18/2014</td>\n",
       "      <td>576.112580</td>\n",
       "      <td>584.512631</td>\n",
       "      <td>576.002598</td>\n",
       "      <td>582.162619</td>\n",
       "      <td>1284100</td>\n",
       "      <td>582.162619</td>\n",
       "      <td>0.015136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8/15/2014</td>\n",
       "      <td>577.862619</td>\n",
       "      <td>579.382595</td>\n",
       "      <td>570.522603</td>\n",
       "      <td>573.482626</td>\n",
       "      <td>1519100</td>\n",
       "      <td>573.482626</td>\n",
       "      <td>-0.002036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8/14/2014</td>\n",
       "      <td>576.182596</td>\n",
       "      <td>577.902645</td>\n",
       "      <td>570.882599</td>\n",
       "      <td>574.652582</td>\n",
       "      <td>985400</td>\n",
       "      <td>574.652582</td>\n",
       "      <td>-0.000226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8/13/2014</td>\n",
       "      <td>567.312567</td>\n",
       "      <td>575.002602</td>\n",
       "      <td>565.752564</td>\n",
       "      <td>574.782577</td>\n",
       "      <td>1439200</td>\n",
       "      <td>574.782577</td>\n",
       "      <td>0.021413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8/12/2014</td>\n",
       "      <td>564.522567</td>\n",
       "      <td>565.902572</td>\n",
       "      <td>560.882518</td>\n",
       "      <td>562.732562</td>\n",
       "      <td>1542000</td>\n",
       "      <td>562.732562</td>\n",
       "      <td>-0.009069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8/11/2014</td>\n",
       "      <td>569.992585</td>\n",
       "      <td>570.492553</td>\n",
       "      <td>566.002578</td>\n",
       "      <td>567.882551</td>\n",
       "      <td>1214700</td>\n",
       "      <td>567.882551</td>\n",
       "      <td>-0.001565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8/8/2014</td>\n",
       "      <td>563.562536</td>\n",
       "      <td>570.252576</td>\n",
       "      <td>560.352561</td>\n",
       "      <td>568.772565</td>\n",
       "      <td>1494700</td>\n",
       "      <td>568.772565</td>\n",
       "      <td>0.009603</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8/7/2014</td>\n",
       "      <td>568.002570</td>\n",
       "      <td>569.892580</td>\n",
       "      <td>561.102543</td>\n",
       "      <td>563.362525</td>\n",
       "      <td>1110900</td>\n",
       "      <td>563.362525</td>\n",
       "      <td>-0.005322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8/6/2014</td>\n",
       "      <td>561.782569</td>\n",
       "      <td>570.702601</td>\n",
       "      <td>560.002541</td>\n",
       "      <td>566.376589</td>\n",
       "      <td>1334300</td>\n",
       "      <td>566.376589</td>\n",
       "      <td>0.002308</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Date        Open        High         Low       Close   Volume  \\\n",
       "0  8/19/2014  585.002622  587.342658  584.002627  586.862643   978600   \n",
       "1  8/18/2014  576.112580  584.512631  576.002598  582.162619  1284100   \n",
       "2  8/15/2014  577.862619  579.382595  570.522603  573.482626  1519100   \n",
       "3  8/14/2014  576.182596  577.902645  570.882599  574.652582   985400   \n",
       "4  8/13/2014  567.312567  575.002602  565.752564  574.782577  1439200   \n",
       "5  8/12/2014  564.522567  565.902572  560.882518  562.732562  1542000   \n",
       "6  8/11/2014  569.992585  570.492553  566.002578  567.882551  1214700   \n",
       "7   8/8/2014  563.562536  570.252576  560.352561  568.772565  1494700   \n",
       "8   8/7/2014  568.002570  569.892580  561.102543  563.362525  1110900   \n",
       "9   8/6/2014  561.782569  570.702601  560.002541  566.376589  1334300   \n",
       "\n",
       "    Adj Close  daily return  \n",
       "0  586.862643      0.008073  \n",
       "1  582.162619      0.015136  \n",
       "2  573.482626     -0.002036  \n",
       "3  574.652582     -0.000226  \n",
       "4  574.782577      0.021413  \n",
       "5  562.732562     -0.009069  \n",
       "6  567.882551     -0.001565  \n",
       "7  568.772565      0.009603  \n",
       "8  563.362525     -0.005322  \n",
       "9  566.376589      0.002308  "
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# use 'shift' method to read the previous trade day's adjusted closing price.\n",
    "df['daily return'] = (df['Adj Close']-df['Adj Close'].shift(-1))/df['Adj Close'].shift(-1)\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.to_csv(\"google_stock_data_20042014_daily_returns_PANDAS.csv\",columns=['Date','daily return'],index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Rearrange Google stock prices series in reverse date order"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1/10/2005</th>\n",
       "      <td>194.500326</td>\n",
       "      <td>198.100339</td>\n",
       "      <td>191.830333</td>\n",
       "      <td>195.060341</td>\n",
       "      <td>15135600</td>\n",
       "      <td>97.432738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2006</th>\n",
       "      <td>464.420786</td>\n",
       "      <td>470.250794</td>\n",
       "      <td>462.040801</td>\n",
       "      <td>469.760804</td>\n",
       "      <td>18262300</td>\n",
       "      <td>234.645756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2007</th>\n",
       "      <td>484.430845</td>\n",
       "      <td>493.550855</td>\n",
       "      <td>482.040813</td>\n",
       "      <td>489.460837</td>\n",
       "      <td>11981700</td>\n",
       "      <td>244.485933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2008</th>\n",
       "      <td>645.011113</td>\n",
       "      <td>657.201125</td>\n",
       "      <td>640.111093</td>\n",
       "      <td>646.731101</td>\n",
       "      <td>12715800</td>\n",
       "      <td>323.042508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2011</th>\n",
       "      <td>614.801082</td>\n",
       "      <td>615.391054</td>\n",
       "      <td>608.561033</td>\n",
       "      <td>614.211048</td>\n",
       "      <td>3170100</td>\n",
       "      <td>306.798725</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Open        High         Low       Close    Volume  \\\n",
       "Date                                                                  \n",
       "1/10/2005  194.500326  198.100339  191.830333  195.060341  15135600   \n",
       "1/10/2006  464.420786  470.250794  462.040801  469.760804  18262300   \n",
       "1/10/2007  484.430845  493.550855  482.040813  489.460837  11981700   \n",
       "1/10/2008  645.011113  657.201125  640.111093  646.731101  12715800   \n",
       "1/10/2011  614.801082  615.391054  608.561033  614.211048   3170100   \n",
       "\n",
       "            Adj Close  \n",
       "Date                   \n",
       "1/10/2005   97.432738  \n",
       "1/10/2006  234.645756  \n",
       "1/10/2007  244.485933  \n",
       "1/10/2008  323.042508  \n",
       "1/10/2011  306.798725  "
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wdf = df # just to keep original df intact\n",
    "print(type(wdf['Date']))\n",
    "wdf.set_index('Date').head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1/10/2005</th>\n",
       "      <td>194.500326</td>\n",
       "      <td>198.100339</td>\n",
       "      <td>191.830333</td>\n",
       "      <td>195.060341</td>\n",
       "      <td>15135600</td>\n",
       "      <td>97.432738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2006</th>\n",
       "      <td>464.420786</td>\n",
       "      <td>470.250794</td>\n",
       "      <td>462.040801</td>\n",
       "      <td>469.760804</td>\n",
       "      <td>18262300</td>\n",
       "      <td>234.645756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2007</th>\n",
       "      <td>484.430845</td>\n",
       "      <td>493.550855</td>\n",
       "      <td>482.040813</td>\n",
       "      <td>489.460837</td>\n",
       "      <td>11981700</td>\n",
       "      <td>244.485933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2008</th>\n",
       "      <td>645.011113</td>\n",
       "      <td>657.201125</td>\n",
       "      <td>640.111093</td>\n",
       "      <td>646.731101</td>\n",
       "      <td>12715800</td>\n",
       "      <td>323.042508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1/10/2011</th>\n",
       "      <td>614.801082</td>\n",
       "      <td>615.391054</td>\n",
       "      <td>608.561033</td>\n",
       "      <td>614.211048</td>\n",
       "      <td>3170100</td>\n",
       "      <td>306.798725</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Open        High         Low       Close    Volume  \\\n",
       "Date                                                                  \n",
       "1/10/2005  194.500326  198.100339  191.830333  195.060341  15135600   \n",
       "1/10/2006  464.420786  470.250794  462.040801  469.760804  18262300   \n",
       "1/10/2007  484.430845  493.550855  482.040813  489.460837  11981700   \n",
       "1/10/2008  645.011113  657.201125  640.111093  646.731101  12715800   \n",
       "1/10/2011  614.801082  615.391054  608.561033  614.211048   3170100   \n",
       "\n",
       "            Adj Close  \n",
       "Date                   \n",
       "1/10/2005   97.432738  \n",
       "1/10/2006  234.645756  \n",
       "1/10/2007  244.485933  \n",
       "1/10/2008  323.042508  \n",
       "1/10/2011  306.798725  "
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wdf = df\n",
    "wdf.sort_values(by='Date',ascending=True,inplace=True)\n",
    "wdf.set_index('Date').head(5)\n",
    "# The result is grouped in the way not what we expect"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2004-08-19</th>\n",
       "      <td>100.000168</td>\n",
       "      <td>104.060182</td>\n",
       "      <td>95.960165</td>\n",
       "      <td>100.340176</td>\n",
       "      <td>44871300</td>\n",
       "      <td>50.119968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2004-08-20</th>\n",
       "      <td>101.010175</td>\n",
       "      <td>109.080187</td>\n",
       "      <td>100.500174</td>\n",
       "      <td>108.310183</td>\n",
       "      <td>22942800</td>\n",
       "      <td>54.100990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2004-08-23</th>\n",
       "      <td>110.750191</td>\n",
       "      <td>113.480193</td>\n",
       "      <td>109.050183</td>\n",
       "      <td>109.400185</td>\n",
       "      <td>18342800</td>\n",
       "      <td>54.645447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2004-08-24</th>\n",
       "      <td>111.240189</td>\n",
       "      <td>111.600192</td>\n",
       "      <td>103.570177</td>\n",
       "      <td>104.870176</td>\n",
       "      <td>15319700</td>\n",
       "      <td>52.382705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2004-08-25</th>\n",
       "      <td>104.960181</td>\n",
       "      <td>108.000187</td>\n",
       "      <td>103.880180</td>\n",
       "      <td>106.000184</td>\n",
       "      <td>9232100</td>\n",
       "      <td>52.947145</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Open        High         Low       Close    Volume  \\\n",
       "Date                                                                   \n",
       "2004-08-19  100.000168  104.060182   95.960165  100.340176  44871300   \n",
       "2004-08-20  101.010175  109.080187  100.500174  108.310183  22942800   \n",
       "2004-08-23  110.750191  113.480193  109.050183  109.400185  18342800   \n",
       "2004-08-24  111.240189  111.600192  103.570177  104.870176  15319700   \n",
       "2004-08-25  104.960181  108.000187  103.880180  106.000184   9232100   \n",
       "\n",
       "            Adj Close  \n",
       "Date                   \n",
       "2004-08-19  50.119968  \n",
       "2004-08-20  54.100990  \n",
       "2004-08-23  54.645447  \n",
       "2004-08-24  52.382705  \n",
       "2004-08-25  52.947145  "
      ]
     },
     "execution_count": 174,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wdf = df\n",
    "# add this command to correct the problem\n",
    "wdf['Date'] =pd.to_datetime(wdf.Date)\n",
    "wdf.sort_values(by='Date',ascending=True,inplace=True)\n",
    "wdf.set_index('Date').head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Locate Google stock price of a specific trading date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8/19/2014</th>\n",
       "      <td>585.002622</td>\n",
       "      <td>587.342658</td>\n",
       "      <td>584.002627</td>\n",
       "      <td>586.862643</td>\n",
       "      <td>978600</td>\n",
       "      <td>586.862643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8/18/2014</th>\n",
       "      <td>576.112580</td>\n",
       "      <td>584.512631</td>\n",
       "      <td>576.002598</td>\n",
       "      <td>582.162619</td>\n",
       "      <td>1284100</td>\n",
       "      <td>582.162619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8/15/2014</th>\n",
       "      <td>577.862619</td>\n",
       "      <td>579.382595</td>\n",
       "      <td>570.522603</td>\n",
       "      <td>573.482626</td>\n",
       "      <td>1519100</td>\n",
       "      <td>573.482626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8/14/2014</th>\n",
       "      <td>576.182596</td>\n",
       "      <td>577.902645</td>\n",
       "      <td>570.882599</td>\n",
       "      <td>574.652582</td>\n",
       "      <td>985400</td>\n",
       "      <td>574.652582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8/13/2014</th>\n",
       "      <td>567.312567</td>\n",
       "      <td>575.002602</td>\n",
       "      <td>565.752564</td>\n",
       "      <td>574.782577</td>\n",
       "      <td>1439200</td>\n",
       "      <td>574.782577</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Open        High         Low       Close   Volume   Adj Close\n",
       "Date                                                                          \n",
       "8/19/2014  585.002622  587.342658  584.002627  586.862643   978600  586.862643\n",
       "8/18/2014  576.112580  584.512631  576.002598  582.162619  1284100  582.162619\n",
       "8/15/2014  577.862619  579.382595  570.522603  573.482626  1519100  573.482626\n",
       "8/14/2014  576.182596  577.902645  570.882599  574.652582   985400  574.652582\n",
       "8/13/2014  567.312567  575.002602  565.752564  574.782577  1439200  574.782577"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wdf = df.set_index('Date')\n",
    "wdf.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'8/19/2014', u'8/18/2014', u'8/15/2014', u'8/14/2014', u'8/13/2014',\n",
       "       u'8/12/2014', u'8/11/2014', u'8/8/2014', u'8/7/2014', u'8/6/2014',\n",
       "       ...\n",
       "       u'9/1/2004', u'8/31/2004', u'8/30/2004', u'8/27/2004', u'8/26/2004',\n",
       "       u'8/25/2004', u'8/24/2004', u'8/23/2004', u'8/20/2004', u'8/19/2004'],\n",
       "      dtype='object', name=u'Date', length=2518)"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wdf.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Open         5.054009e+02\n",
       "High         5.057409e+02\n",
       "Low          4.960509e+02\n",
       "Close        5.002209e+02\n",
       "Volume       6.663800e+06\n",
       "Adj Close    2.498606e+02\n",
       "Name: 8/6/2010, dtype: float64"
      ]
     },
     "execution_count": 196,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# locate an entry by label index\n",
    "wdf.loc['8/6/2010']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Adj Close</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8/6/2010</th>\n",
       "      <td>505.400882</td>\n",
       "      <td>505.740867</td>\n",
       "      <td>496.050856</td>\n",
       "      <td>500.220854</td>\n",
       "      <td>6663800</td>\n",
       "      <td>249.860567</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                Open        High         Low       Close   Volume   Adj Close\n",
       "Date                                                                         \n",
       "8/6/2010  505.400882  505.740867  496.050856  500.220854  6663800  249.860567"
      ]
     },
     "execution_count": 215,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Or using positional indexing (input is in the list by list format)\n",
    "wdf.iloc[[wdf.index.get_loc('8/6/2010')],list(range(len(wdf.columns)))]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}