Skip to content

Instantly share code, notes, and snippets.

@chetanambi
Created August 12, 2020 17:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chetanambi/c8f029c7b18759a3abb57a7e55f8ede5 to your computer and use it in GitHub Desktop.
Save chetanambi/c8f029c7b18759a3abb57a7e55f8ede5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import camelot"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"tables = camelot.read_pdf('https://trade.indiabulls.com/pdf/NSE_Holidays_Equity.pdf')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Numbers of tables"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2\n"
]
}
],
"source": [
"print(tables.n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Parsing report"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'accuracy': 100.0, 'whitespace': 0.0, 'order': 1, 'page': 1}\n"
]
}
],
"source": [
"print(tables[0].parsing_report)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'accuracy': 100.0, 'whitespace': 0.0, 'order': 1, 'page': 1}\n",
"{'accuracy': 100.0, 'whitespace': 0.0, 'order': 2, 'page': 1}\n"
]
}
],
"source": [
"for i in range(tables.n):\n",
" print(tables[i].parsing_report)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Table shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Table shape=(13, 4)>\n"
]
}
],
"source": [
"print(tables[0])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Table shape=(13, 4)>\n",
"<Table shape=(8, 4)>\n"
]
}
],
"source": [
"for i in range(tables.n):\n",
" print(tables[i])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Export the tables"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"tables[0].to_csv('nse_holiday_list_table1.csv') # to_json, to_excel, to_html, to_sqlite"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Export to pandas dataframe"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Sr. No.</td>\n",
" <td>Date</td>\n",
" <td>Day</td>\n",
" <td>Description</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>February 21, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Mahashivratri</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>March 10, 2020</td>\n",
" <td>Tuesday</td>\n",
" <td>Holi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>April 02,2020</td>\n",
" <td>Thursday</td>\n",
" <td>Ram Navami</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>April 06,2020</td>\n",
" <td>Monday</td>\n",
" <td>Mahavir Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>April 10, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Good Friday</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>April 14,2020</td>\n",
" <td>Tuesday</td>\n",
" <td>Dr.Baba Saheb Ambedkar Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>May 01,2020</td>\n",
" <td>Friday</td>\n",
" <td>Maharashtra Day</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8</td>\n",
" <td>May 25, 2020</td>\n",
" <td>Monday</td>\n",
" <td>Id-Ul-Fitr (Ramzan ID)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>October 02, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Mahatma Gandhi Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10</td>\n",
" <td>November 16, 2020</td>\n",
" <td>Monday</td>\n",
" <td>Diwali-Balipratipada</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>November 30, 2020</td>\n",
" <td>Monday</td>\n",
" <td>Gurunanak Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>December 25, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Christmas</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3\n",
"0 Sr. No. Date Day Description\n",
"1 1 February 21, 2020 Friday Mahashivratri\n",
"2 2 March 10, 2020 Tuesday Holi\n",
"3 3 April 02,2020 Thursday Ram Navami\n",
"4 4 April 06,2020 Monday Mahavir Jayanti\n",
"5 5 April 10, 2020 Friday Good Friday\n",
"6 6 April 14,2020 Tuesday Dr.Baba Saheb Ambedkar Jayanti\n",
"7 7 May 01,2020 Friday Maharashtra Day\n",
"8 8 May 25, 2020 Monday Id-Ul-Fitr (Ramzan ID)\n",
"9 9 October 02, 2020 Friday Mahatma Gandhi Jayanti\n",
"10 10 November 16, 2020 Monday Diwali-Balipratipada\n",
"11 11 November 30, 2020 Monday Gurunanak Jayanti\n",
"12 12 December 25, 2020 Friday Christmas"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tables[0].df "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sr. No.</th>\n",
" <th>Date</th>\n",
" <th>Day</th>\n",
" <th>Description</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>February 21, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Mahashivratri</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>March 10, 2020</td>\n",
" <td>Tuesday</td>\n",
" <td>Holi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>April 02,2020</td>\n",
" <td>Thursday</td>\n",
" <td>Ram Navami</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>April 06,2020</td>\n",
" <td>Monday</td>\n",
" <td>Mahavir Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>April 10, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Good Friday</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>April 14,2020</td>\n",
" <td>Tuesday</td>\n",
" <td>Dr.Baba Saheb Ambedkar Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>May 01,2020</td>\n",
" <td>Friday</td>\n",
" <td>Maharashtra Day</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8</td>\n",
" <td>May 25, 2020</td>\n",
" <td>Monday</td>\n",
" <td>Id-Ul-Fitr (Ramzan ID)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>October 02, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Mahatma Gandhi Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10</td>\n",
" <td>November 16, 2020</td>\n",
" <td>Monday</td>\n",
" <td>Diwali-Balipratipada</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>November 30, 2020</td>\n",
" <td>Monday</td>\n",
" <td>Gurunanak Jayanti</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>December 25, 2020</td>\n",
" <td>Friday</td>\n",
" <td>Christmas</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sr. No. Date Day Description\n",
"1 1 February 21, 2020 Friday Mahashivratri\n",
"2 2 March 10, 2020 Tuesday Holi\n",
"3 3 April 02,2020 Thursday Ram Navami\n",
"4 4 April 06,2020 Monday Mahavir Jayanti\n",
"5 5 April 10, 2020 Friday Good Friday\n",
"6 6 April 14,2020 Tuesday Dr.Baba Saheb Ambedkar Jayanti\n",
"7 7 May 01,2020 Friday Maharashtra Day\n",
"8 8 May 25, 2020 Monday Id-Ul-Fitr (Ramzan ID)\n",
"9 9 October 02, 2020 Friday Mahatma Gandhi Jayanti\n",
"10 10 November 16, 2020 Monday Diwali-Balipratipada\n",
"11 11 November 30, 2020 Monday Gurunanak Jayanti\n",
"12 12 December 25, 2020 Friday Christmas"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp_df = tables[0].df \n",
"temp_df.rename(columns=temp_df.iloc[0]).drop(temp_df.index[0])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Sr. No.</td>\n",
" <td>Date</td>\n",
" <td>Day</td>\n",
" <td>Description</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>January 26, 2020</td>\n",
" <td>Sunday</td>\n",
" <td>Republic Day</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>August 01, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Bakri Id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>August 15, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Independence Day</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>August 22, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Ganesh Chaturthi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>August 30, 2020</td>\n",
" <td>Sunday</td>\n",
" <td>Moharram</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>October 25, 2020</td>\n",
" <td>Sunday</td>\n",
" <td>Dasera</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>November 14, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Diwali-Laxmi Pujan*</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3\n",
"0 Sr. No. Date Day Description\n",
"1 1 January 26, 2020 Sunday Republic Day\n",
"2 2 August 01, 2020 Saturday Bakri Id\n",
"3 3 August 15, 2020 Saturday Independence Day\n",
"4 4 August 22, 2020 Saturday Ganesh Chaturthi\n",
"5 5 August 30, 2020 Sunday Moharram\n",
"6 6 October 25, 2020 Sunday Dasera\n",
"7 7 November 14, 2020 Saturday Diwali-Laxmi Pujan*"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tables[1].df # get a pandas DataFrame!"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sr. No.</th>\n",
" <th>Date</th>\n",
" <th>Day</th>\n",
" <th>Description</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>January 26, 2020</td>\n",
" <td>Sunday</td>\n",
" <td>Republic Day</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>August 01, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Bakri Id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>August 15, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Independence Day</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>August 22, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Ganesh Chaturthi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>August 30, 2020</td>\n",
" <td>Sunday</td>\n",
" <td>Moharram</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>October 25, 2020</td>\n",
" <td>Sunday</td>\n",
" <td>Dasera</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>November 14, 2020</td>\n",
" <td>Saturday</td>\n",
" <td>Diwali-Laxmi Pujan*</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sr. No. Date Day Description\n",
"1 1 January 26, 2020 Sunday Republic Day\n",
"2 2 August 01, 2020 Saturday Bakri Id\n",
"3 3 August 15, 2020 Saturday Independence Day\n",
"4 4 August 22, 2020 Saturday Ganesh Chaturthi\n",
"5 5 August 30, 2020 Sunday Moharram\n",
"6 6 October 25, 2020 Sunday Dasera\n",
"7 7 November 14, 2020 Saturday Diwali-Laxmi Pujan*"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp_df = tables[1].df \n",
"temp_df.rename(columns=temp_df.iloc[0]).drop(temp_df.index[0])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment