Skip to content

Instantly share code, notes, and snippets.

@libinruan
Created September 4, 2017 12:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save libinruan/dad14eec3d194512b4aebc458bad0ae5 to your computer and use it in GitHub Desktop.
Save libinruan/dad14eec3d194512b4aebc458bad0ae5 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The reason we use csv module instead of default Python i/o functions. \n",
"Here is an example."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Let's start with a cursory preview without any modules\n",
"path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014.csv\"\n",
"file = open(path)\n",
"for line in file:\n",
" print(line)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Date,Open,High,Low,Close,Volume,Adj Close\\n'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Index each line of a file with list comprehensions\n",
"file = open(path)\n",
"lines = [line for line in file]\n",
"# Then, we can read each line by index; \n",
"# each line is treated as a single string \n",
"# (there's a new line character included in the end)\n",
"lines[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'8/19/2014,585.002622,587.342658,584.002627,586.862643,978600,586.862643\\n'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lines[1]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Date,Open,High,Low,Close,Volume,Adj Close'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We can use strip to remove any leading and trailing white space\n",
"lines[0].strip()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We can further use split function to divide the string into small pieces\n",
"lines[0].strip().split(',')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'],\n",
" ['8/19/2014',\n",
" '585.002622',\n",
" '587.342658',\n",
" '584.002627',\n",
" '586.862643',\n",
" '978600',\n",
" '586.862643']]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Let's read the file once more with trick we just saw\n",
"file = open(path)\n",
"dataset = [line.strip().split(',') for line in file]\n",
"dataset[:2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Problem: Data remain treated as string. \n",
"Solution: Use csv module."
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Dialect', 'DictReader', 'DictWriter', 'Error', 'QUOTE_ALL', 'QUOTE_MINIMAL', 'QUOTE_NONE', 'QUOTE_NONNUMERIC', 'Sniffer', 'StringIO', '_Dialect', '__all__', '__builtins__', '__doc__', '__file__', '__name__', '__package__', '__version__', 'excel', 'excel_tab', 'field_size_limit', 'get_dialect', 'list_dialects', 're', 'reader', 'reduce', 'register_dialect', 'unregister_dialect', 'writer']\n"
]
}
],
"source": [
"import csv\n",
"'''When use a module for the first time, it is always useful to use 'dir'\n",
"to see what classes and function the module contains'''\n",
"print(dir(csv))"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"'''use newline='' in Python 3.0 becasue depending on your system strings may \n",
"end with a new line carraige return or both. This ensures the csv \n",
"module will work correctly across all platfomrs'''\n",
"file = open(path)\n",
"reader = csv.reader(file)\n",
"# use next function to extract the first line which is header containing no data\n",
"header = next(reader) # The first line is the header\n",
"data = [row for row in reader] # Read the remaing data"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']\n",
"['8/19/2014', '585.002622', '587.342658', '584.002627', '586.862643', '978600', '586.862643']\n"
]
}
],
"source": [
"print(header)\n",
"print(data[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To convert data into appropriate type"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[datetime.datetime(2014, 8, 19, 0, 0), 585.002622, 587.342658, 584.002627, 586.862643, 978600, 586.862643]\n"
]
}
],
"source": [
"# import the datetime class from the datetime module. you'll see why.\n",
"from datetime import datetime \n",
"import csv\n",
"file = open(path)\n",
"reader = csv.reader(file)\n",
"header = next(reader)\n",
"data = []\n",
"'''row = [Date, Open, High, Low, Close, Volume, Adj. Close ]\n",
" 'Date' is a datetime; the rest are of type floats \n",
" except 'Volume' which is an integer'''\n",
"''' We use strptime (short for string parse time) \n",
" to convert the datetime column'''\n",
"for row in reader:\n",
" date = datetime.strptime(row[0], '%m/%d/%Y')\n",
" open_price = float(row[1])\n",
" high = float(row[2])\n",
" low = float(row[3])\n",
" close = float(row[4])\n",
" volume = int(row[5])\n",
" adj_close = float(row[6])\n",
" data.append([date,open_price,high,low,close,volume,adj_close])\n",
"\n",
"print(data[0])"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# compute and store daily Google stock returns\n",
"save_path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014_daily_returns.csv\"\n",
"file = open(save_path,'w')\n",
"# create a csv.writer object to store our computed results\n",
"writer = csv.writer(file)\n",
"# to write a row call the write row method with a list of values\n",
"# write the header first\n",
"writer.writerow([\"Date\",\"Return\"])\n",
"# our data is luckily already sorted chronologically so\n",
"# we can simply loop through it.\n",
"# Since we use stock prices of consecutive days to compute returns, \n",
"# we stop at the second-to-last row.\n",
"for i in range(len(data)-1): \n",
" todays_row = data[i]\n",
" todays_date = todays_row[0]\n",
" todays_price = todays_row[-1] # the last element in the list (adj_close)\n",
" yesterdays_row = data[i+1] # data is arranged starting recently\n",
" yesterdays_price = yesterdays_row[-1]\n",
" daily_return = (todays_price-yesterdays_price)/yesterdays_price\n",
" \n",
" #The reuslt would not be listed in a user friedly way without\n",
" #using the string format time method on today's date and pass in the desired format\n",
" formatted_date = todays_date.strftime('%m/%d/%Y')\n",
" writer.writerow([formatted_date,daily_return])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, let's turn to pandas module to see how it makes our life better \n",
"It only takes four command lines to perfrom the same task."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8/19/2014</td>\n",
" <td>585.002622</td>\n",
" <td>587.342658</td>\n",
" <td>584.002627</td>\n",
" <td>586.862643</td>\n",
" <td>978600</td>\n",
" <td>586.862643</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8/18/2014</td>\n",
" <td>576.112580</td>\n",
" <td>584.512631</td>\n",
" <td>576.002598</td>\n",
" <td>582.162619</td>\n",
" <td>1284100</td>\n",
" <td>582.162619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8/15/2014</td>\n",
" <td>577.862619</td>\n",
" <td>579.382595</td>\n",
" <td>570.522603</td>\n",
" <td>573.482626</td>\n",
" <td>1519100</td>\n",
" <td>573.482626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8/14/2014</td>\n",
" <td>576.182596</td>\n",
" <td>577.902645</td>\n",
" <td>570.882599</td>\n",
" <td>574.652582</td>\n",
" <td>985400</td>\n",
" <td>574.652582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8/13/2014</td>\n",
" <td>567.312567</td>\n",
" <td>575.002602</td>\n",
" <td>565.752564</td>\n",
" <td>574.782577</td>\n",
" <td>1439200</td>\n",
" <td>574.782577</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Open High Low Close Volume \\\n",
"0 8/19/2014 585.002622 587.342658 584.002627 586.862643 978600 \n",
"1 8/18/2014 576.112580 584.512631 576.002598 582.162619 1284100 \n",
"2 8/15/2014 577.862619 579.382595 570.522603 573.482626 1519100 \n",
"3 8/14/2014 576.182596 577.902645 570.882599 574.652582 985400 \n",
"4 8/13/2014 567.312567 575.002602 565.752564 574.782577 1439200 \n",
"\n",
" Adj Close \n",
"0 586.862643 \n",
"1 582.162619 \n",
"2 573.482626 \n",
"3 574.652582 \n",
"4 574.782577 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014.csv\"\n",
"df = pd.read_csv(path)\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 2518 entries, 0 to 2517\n",
"Data columns (total 7 columns):\n",
"Date 2518 non-null object\n",
"Open 2518 non-null float64\n",
"High 2518 non-null float64\n",
"Low 2518 non-null float64\n",
"Close 2518 non-null float64\n",
"Volume 2518 non-null int64\n",
"Adj Close 2518 non-null float64\n",
"dtypes: float64(5), int64(1), object(1)\n",
"memory usage: 137.8+ KB\n",
"None\n"
]
}
],
"source": [
"# See panadas already correctly identifies object type for each data column\n",
"print(df.info())"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" <th>daily return</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8/19/2014</td>\n",
" <td>585.002622</td>\n",
" <td>587.342658</td>\n",
" <td>584.002627</td>\n",
" <td>586.862643</td>\n",
" <td>978600</td>\n",
" <td>586.862643</td>\n",
" <td>0.008073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8/18/2014</td>\n",
" <td>576.112580</td>\n",
" <td>584.512631</td>\n",
" <td>576.002598</td>\n",
" <td>582.162619</td>\n",
" <td>1284100</td>\n",
" <td>582.162619</td>\n",
" <td>0.015136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8/15/2014</td>\n",
" <td>577.862619</td>\n",
" <td>579.382595</td>\n",
" <td>570.522603</td>\n",
" <td>573.482626</td>\n",
" <td>1519100</td>\n",
" <td>573.482626</td>\n",
" <td>-0.002036</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8/14/2014</td>\n",
" <td>576.182596</td>\n",
" <td>577.902645</td>\n",
" <td>570.882599</td>\n",
" <td>574.652582</td>\n",
" <td>985400</td>\n",
" <td>574.652582</td>\n",
" <td>-0.000226</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8/13/2014</td>\n",
" <td>567.312567</td>\n",
" <td>575.002602</td>\n",
" <td>565.752564</td>\n",
" <td>574.782577</td>\n",
" <td>1439200</td>\n",
" <td>574.782577</td>\n",
" <td>0.021413</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>8/12/2014</td>\n",
" <td>564.522567</td>\n",
" <td>565.902572</td>\n",
" <td>560.882518</td>\n",
" <td>562.732562</td>\n",
" <td>1542000</td>\n",
" <td>562.732562</td>\n",
" <td>-0.009069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8/11/2014</td>\n",
" <td>569.992585</td>\n",
" <td>570.492553</td>\n",
" <td>566.002578</td>\n",
" <td>567.882551</td>\n",
" <td>1214700</td>\n",
" <td>567.882551</td>\n",
" <td>-0.001565</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8/8/2014</td>\n",
" <td>563.562536</td>\n",
" <td>570.252576</td>\n",
" <td>560.352561</td>\n",
" <td>568.772565</td>\n",
" <td>1494700</td>\n",
" <td>568.772565</td>\n",
" <td>0.009603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8/7/2014</td>\n",
" <td>568.002570</td>\n",
" <td>569.892580</td>\n",
" <td>561.102543</td>\n",
" <td>563.362525</td>\n",
" <td>1110900</td>\n",
" <td>563.362525</td>\n",
" <td>-0.005322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>8/6/2014</td>\n",
" <td>561.782569</td>\n",
" <td>570.702601</td>\n",
" <td>560.002541</td>\n",
" <td>566.376589</td>\n",
" <td>1334300</td>\n",
" <td>566.376589</td>\n",
" <td>0.002308</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Open High Low Close Volume \\\n",
"0 8/19/2014 585.002622 587.342658 584.002627 586.862643 978600 \n",
"1 8/18/2014 576.112580 584.512631 576.002598 582.162619 1284100 \n",
"2 8/15/2014 577.862619 579.382595 570.522603 573.482626 1519100 \n",
"3 8/14/2014 576.182596 577.902645 570.882599 574.652582 985400 \n",
"4 8/13/2014 567.312567 575.002602 565.752564 574.782577 1439200 \n",
"5 8/12/2014 564.522567 565.902572 560.882518 562.732562 1542000 \n",
"6 8/11/2014 569.992585 570.492553 566.002578 567.882551 1214700 \n",
"7 8/8/2014 563.562536 570.252576 560.352561 568.772565 1494700 \n",
"8 8/7/2014 568.002570 569.892580 561.102543 563.362525 1110900 \n",
"9 8/6/2014 561.782569 570.702601 560.002541 566.376589 1334300 \n",
"\n",
" Adj Close daily return \n",
"0 586.862643 0.008073 \n",
"1 582.162619 0.015136 \n",
"2 573.482626 -0.002036 \n",
"3 574.652582 -0.000226 \n",
"4 574.782577 0.021413 \n",
"5 562.732562 -0.009069 \n",
"6 567.882551 -0.001565 \n",
"7 568.772565 0.009603 \n",
"8 563.362525 -0.005322 \n",
"9 566.376589 0.002308 "
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# use 'shift' method to read the previous trade day's adjusted closing price.\n",
"df['daily return'] = (df['Adj Close']-df['Adj Close'].shift(-1))/df['Adj Close'].shift(-1)\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.to_csv(\"google_stock_data_20042014_daily_returns_PANDAS.csv\",columns=['Date','daily return'],index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Rearrange Google stock prices series in reverse date order"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.series.Series'>\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1/10/2005</th>\n",
" <td>194.500326</td>\n",
" <td>198.100339</td>\n",
" <td>191.830333</td>\n",
" <td>195.060341</td>\n",
" <td>15135600</td>\n",
" <td>97.432738</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2006</th>\n",
" <td>464.420786</td>\n",
" <td>470.250794</td>\n",
" <td>462.040801</td>\n",
" <td>469.760804</td>\n",
" <td>18262300</td>\n",
" <td>234.645756</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2007</th>\n",
" <td>484.430845</td>\n",
" <td>493.550855</td>\n",
" <td>482.040813</td>\n",
" <td>489.460837</td>\n",
" <td>11981700</td>\n",
" <td>244.485933</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2008</th>\n",
" <td>645.011113</td>\n",
" <td>657.201125</td>\n",
" <td>640.111093</td>\n",
" <td>646.731101</td>\n",
" <td>12715800</td>\n",
" <td>323.042508</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2011</th>\n",
" <td>614.801082</td>\n",
" <td>615.391054</td>\n",
" <td>608.561033</td>\n",
" <td>614.211048</td>\n",
" <td>3170100</td>\n",
" <td>306.798725</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume \\\n",
"Date \n",
"1/10/2005 194.500326 198.100339 191.830333 195.060341 15135600 \n",
"1/10/2006 464.420786 470.250794 462.040801 469.760804 18262300 \n",
"1/10/2007 484.430845 493.550855 482.040813 489.460837 11981700 \n",
"1/10/2008 645.011113 657.201125 640.111093 646.731101 12715800 \n",
"1/10/2011 614.801082 615.391054 608.561033 614.211048 3170100 \n",
"\n",
" Adj Close \n",
"Date \n",
"1/10/2005 97.432738 \n",
"1/10/2006 234.645756 \n",
"1/10/2007 244.485933 \n",
"1/10/2008 323.042508 \n",
"1/10/2011 306.798725 "
]
},
"execution_count": 172,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wdf = df # just to keep original df intact\n",
"print(type(wdf['Date']))\n",
"wdf.set_index('Date').head(5)"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1/10/2005</th>\n",
" <td>194.500326</td>\n",
" <td>198.100339</td>\n",
" <td>191.830333</td>\n",
" <td>195.060341</td>\n",
" <td>15135600</td>\n",
" <td>97.432738</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2006</th>\n",
" <td>464.420786</td>\n",
" <td>470.250794</td>\n",
" <td>462.040801</td>\n",
" <td>469.760804</td>\n",
" <td>18262300</td>\n",
" <td>234.645756</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2007</th>\n",
" <td>484.430845</td>\n",
" <td>493.550855</td>\n",
" <td>482.040813</td>\n",
" <td>489.460837</td>\n",
" <td>11981700</td>\n",
" <td>244.485933</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2008</th>\n",
" <td>645.011113</td>\n",
" <td>657.201125</td>\n",
" <td>640.111093</td>\n",
" <td>646.731101</td>\n",
" <td>12715800</td>\n",
" <td>323.042508</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1/10/2011</th>\n",
" <td>614.801082</td>\n",
" <td>615.391054</td>\n",
" <td>608.561033</td>\n",
" <td>614.211048</td>\n",
" <td>3170100</td>\n",
" <td>306.798725</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume \\\n",
"Date \n",
"1/10/2005 194.500326 198.100339 191.830333 195.060341 15135600 \n",
"1/10/2006 464.420786 470.250794 462.040801 469.760804 18262300 \n",
"1/10/2007 484.430845 493.550855 482.040813 489.460837 11981700 \n",
"1/10/2008 645.011113 657.201125 640.111093 646.731101 12715800 \n",
"1/10/2011 614.801082 615.391054 608.561033 614.211048 3170100 \n",
"\n",
" Adj Close \n",
"Date \n",
"1/10/2005 97.432738 \n",
"1/10/2006 234.645756 \n",
"1/10/2007 244.485933 \n",
"1/10/2008 323.042508 \n",
"1/10/2011 306.798725 "
]
},
"execution_count": 173,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wdf = df\n",
"wdf.sort_values(by='Date',ascending=True,inplace=True)\n",
"wdf.set_index('Date').head(5)\n",
"# The result is grouped in the way not what we expect"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2004-08-19</th>\n",
" <td>100.000168</td>\n",
" <td>104.060182</td>\n",
" <td>95.960165</td>\n",
" <td>100.340176</td>\n",
" <td>44871300</td>\n",
" <td>50.119968</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2004-08-20</th>\n",
" <td>101.010175</td>\n",
" <td>109.080187</td>\n",
" <td>100.500174</td>\n",
" <td>108.310183</td>\n",
" <td>22942800</td>\n",
" <td>54.100990</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2004-08-23</th>\n",
" <td>110.750191</td>\n",
" <td>113.480193</td>\n",
" <td>109.050183</td>\n",
" <td>109.400185</td>\n",
" <td>18342800</td>\n",
" <td>54.645447</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2004-08-24</th>\n",
" <td>111.240189</td>\n",
" <td>111.600192</td>\n",
" <td>103.570177</td>\n",
" <td>104.870176</td>\n",
" <td>15319700</td>\n",
" <td>52.382705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2004-08-25</th>\n",
" <td>104.960181</td>\n",
" <td>108.000187</td>\n",
" <td>103.880180</td>\n",
" <td>106.000184</td>\n",
" <td>9232100</td>\n",
" <td>52.947145</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume \\\n",
"Date \n",
"2004-08-19 100.000168 104.060182 95.960165 100.340176 44871300 \n",
"2004-08-20 101.010175 109.080187 100.500174 108.310183 22942800 \n",
"2004-08-23 110.750191 113.480193 109.050183 109.400185 18342800 \n",
"2004-08-24 111.240189 111.600192 103.570177 104.870176 15319700 \n",
"2004-08-25 104.960181 108.000187 103.880180 106.000184 9232100 \n",
"\n",
" Adj Close \n",
"Date \n",
"2004-08-19 50.119968 \n",
"2004-08-20 54.100990 \n",
"2004-08-23 54.645447 \n",
"2004-08-24 52.382705 \n",
"2004-08-25 52.947145 "
]
},
"execution_count": 174,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wdf = df\n",
"# add this command to correct the problem\n",
"wdf['Date'] =pd.to_datetime(wdf.Date)\n",
"wdf.sort_values(by='Date',ascending=True,inplace=True)\n",
"wdf.set_index('Date').head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Locate Google stock price of a specific trading date"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8/19/2014</th>\n",
" <td>585.002622</td>\n",
" <td>587.342658</td>\n",
" <td>584.002627</td>\n",
" <td>586.862643</td>\n",
" <td>978600</td>\n",
" <td>586.862643</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8/18/2014</th>\n",
" <td>576.112580</td>\n",
" <td>584.512631</td>\n",
" <td>576.002598</td>\n",
" <td>582.162619</td>\n",
" <td>1284100</td>\n",
" <td>582.162619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8/15/2014</th>\n",
" <td>577.862619</td>\n",
" <td>579.382595</td>\n",
" <td>570.522603</td>\n",
" <td>573.482626</td>\n",
" <td>1519100</td>\n",
" <td>573.482626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8/14/2014</th>\n",
" <td>576.182596</td>\n",
" <td>577.902645</td>\n",
" <td>570.882599</td>\n",
" <td>574.652582</td>\n",
" <td>985400</td>\n",
" <td>574.652582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8/13/2014</th>\n",
" <td>567.312567</td>\n",
" <td>575.002602</td>\n",
" <td>565.752564</td>\n",
" <td>574.782577</td>\n",
" <td>1439200</td>\n",
" <td>574.782577</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume Adj Close\n",
"Date \n",
"8/19/2014 585.002622 587.342658 584.002627 586.862643 978600 586.862643\n",
"8/18/2014 576.112580 584.512631 576.002598 582.162619 1284100 582.162619\n",
"8/15/2014 577.862619 579.382595 570.522603 573.482626 1519100 573.482626\n",
"8/14/2014 576.182596 577.902645 570.882599 574.652582 985400 574.652582\n",
"8/13/2014 567.312567 575.002602 565.752564 574.782577 1439200 574.782577"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wdf = df.set_index('Date')\n",
"wdf.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'8/19/2014', u'8/18/2014', u'8/15/2014', u'8/14/2014', u'8/13/2014',\n",
" u'8/12/2014', u'8/11/2014', u'8/8/2014', u'8/7/2014', u'8/6/2014',\n",
" ...\n",
" u'9/1/2004', u'8/31/2004', u'8/30/2004', u'8/27/2004', u'8/26/2004',\n",
" u'8/25/2004', u'8/24/2004', u'8/23/2004', u'8/20/2004', u'8/19/2004'],\n",
" dtype='object', name=u'Date', length=2518)"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wdf.index"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Open 5.054009e+02\n",
"High 5.057409e+02\n",
"Low 4.960509e+02\n",
"Close 5.002209e+02\n",
"Volume 6.663800e+06\n",
"Adj Close 2.498606e+02\n",
"Name: 8/6/2010, dtype: float64"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# locate an entry by label index\n",
"wdf.loc['8/6/2010']"
]
},
{
"cell_type": "code",
"execution_count": 215,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Volume</th>\n",
" <th>Adj Close</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8/6/2010</th>\n",
" <td>505.400882</td>\n",
" <td>505.740867</td>\n",
" <td>496.050856</td>\n",
" <td>500.220854</td>\n",
" <td>6663800</td>\n",
" <td>249.860567</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Open High Low Close Volume Adj Close\n",
"Date \n",
"8/6/2010 505.400882 505.740867 496.050856 500.220854 6663800 249.860567"
]
},
"execution_count": 215,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Or using positional indexing (input is in the list by list format)\n",
"wdf.iloc[[wdf.index.get_loc('8/6/2010')],list(range(len(wdf.columns)))]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment