Skip to content

Instantly share code, notes, and snippets.

@decisionstats
Created March 16, 2017 15:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save decisionstats/d3cf51e145b581480a42348a8a16177e to your computer and use it in GitHub Desktop.
Save decisionstats/d3cf51e145b581480a42348a8a16177e to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import psycopg2\n",
"import pandas as pd\n",
"import sqlalchemy as sa\n",
"import time\n",
"import seaborn as sns\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandasql in c:\\users\\dell\\anaconda3\\lib\\site-packages\n",
"Requirement already satisfied: pandas in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n",
"Requirement already satisfied: sqlalchemy in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n",
"Requirement already satisfied: numpy in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n",
"Requirement already satisfied: python-dateutil>=2 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas->pandasql)\n",
"Requirement already satisfied: pytz>=2011k in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas->pandasql)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from python-dateutil>=2->pandas->pandasql)\n"
]
}
],
"source": [
"! pip install pandasql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: psycopg2 in c:\\users\\dell\\anaconda3\\lib\\site-packages\n"
]
}
],
"source": [
"! pip install psycopg2"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"parameters = { \n",
" 'username': 'postgres', \n",
" 'password': 'root',\n",
" 'server': 'localhost',\n",
" 'database': 'ajay'\n",
" }\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"connection= 'postgresql://{username}:{password}@{server}:5432/{database}'.format(**parameters)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"postgresql://postgres:root@localhost:5432/ajay\n"
]
}
],
"source": [
"print (connection)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"engine = sa.create_engine(connection, encoding=\"utf-8\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"insp = sa.inspect(engine)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<sqlalchemy.dialects.postgresql.base.PGInspector object at 0x000000000B2E5278>\n"
]
}
],
"source": [
"print(insp)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['information_schema', 'public']\n"
]
}
],
"source": [
"db_list = insp.get_schema_names()\n",
"print(db_list)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['iris', 'temp', 'sales']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"engine.table_names()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data3= pd.read_sql_query('select * from \"sales\" limit 10',con=engine)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10 entries, 0 to 9\n",
"Data columns (total 4 columns):\n",
"customer_id 10 non-null int64\n",
"sales 10 non-null int64\n",
"date 10 non-null object\n",
"product_id 10 non-null int64\n",
"dtypes: int64(3), object(1)\n",
"memory usage: 400.0+ bytes\n"
]
}
],
"source": [
"data3.info()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>sales</th>\n",
" <th>date</th>\n",
" <th>product_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10001</td>\n",
" <td>5230</td>\n",
" <td>2017-02-07</td>\n",
" <td>524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10002</td>\n",
" <td>2781</td>\n",
" <td>2017-05-12</td>\n",
" <td>469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10003</td>\n",
" <td>2083</td>\n",
" <td>2016-12-18</td>\n",
" <td>917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10004</td>\n",
" <td>214</td>\n",
" <td>2015-01-19</td>\n",
" <td>354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10005</td>\n",
" <td>9407</td>\n",
" <td>2016-09-26</td>\n",
" <td>292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>10006</td>\n",
" <td>4705</td>\n",
" <td>2015-10-17</td>\n",
" <td>380</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>10007</td>\n",
" <td>4729</td>\n",
" <td>2016-01-02</td>\n",
" <td>469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>10008</td>\n",
" <td>7715</td>\n",
" <td>2015-09-12</td>\n",
" <td>480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>10009</td>\n",
" <td>9898</td>\n",
" <td>2015-04-05</td>\n",
" <td>611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10010</td>\n",
" <td>5797</td>\n",
" <td>2015-08-13</td>\n",
" <td>959</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id sales date product_id\n",
"0 10001 5230 2017-02-07 524\n",
"1 10002 2781 2017-05-12 469\n",
"2 10003 2083 2016-12-18 917\n",
"3 10004 214 2015-01-19 354\n",
"4 10005 9407 2016-09-26 292\n",
"5 10006 4705 2015-10-17 380\n",
"6 10007 4729 2016-01-02 469\n",
"7 10008 7715 2015-09-12 480\n",
"8 10009 9898 2015-04-05 611\n",
"9 10010 5797 2015-08-13 959"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data3"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data5= pd.read_sql_query('select * from \"sales\" limit 20',con=engine)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>sales</th>\n",
" <th>date</th>\n",
" <th>product_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10001</td>\n",
" <td>5230</td>\n",
" <td>2017-02-07</td>\n",
" <td>524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10002</td>\n",
" <td>2781</td>\n",
" <td>2017-05-12</td>\n",
" <td>469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10003</td>\n",
" <td>2083</td>\n",
" <td>2016-12-18</td>\n",
" <td>917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10004</td>\n",
" <td>214</td>\n",
" <td>2015-01-19</td>\n",
" <td>354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10005</td>\n",
" <td>9407</td>\n",
" <td>2016-09-26</td>\n",
" <td>292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>10006</td>\n",
" <td>4705</td>\n",
" <td>2015-10-17</td>\n",
" <td>380</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>10007</td>\n",
" <td>4729</td>\n",
" <td>2016-01-02</td>\n",
" <td>469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>10008</td>\n",
" <td>7715</td>\n",
" <td>2015-09-12</td>\n",
" <td>480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>10009</td>\n",
" <td>9898</td>\n",
" <td>2015-04-05</td>\n",
" <td>611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10010</td>\n",
" <td>5797</td>\n",
" <td>2015-08-13</td>\n",
" <td>959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10011</td>\n",
" <td>1283</td>\n",
" <td>2016-04-22</td>\n",
" <td>950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>10012</td>\n",
" <td>2751</td>\n",
" <td>2015-01-01</td>\n",
" <td>322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>10013</td>\n",
" <td>4422</td>\n",
" <td>2017-07-11</td>\n",
" <td>965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>10014</td>\n",
" <td>6235</td>\n",
" <td>2015-03-07</td>\n",
" <td>783</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>10015</td>\n",
" <td>7302</td>\n",
" <td>2016-04-06</td>\n",
" <td>792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>10016</td>\n",
" <td>6408</td>\n",
" <td>2016-10-21</td>\n",
" <td>347</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>10017</td>\n",
" <td>1880</td>\n",
" <td>2015-08-23</td>\n",
" <td>187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>10018</td>\n",
" <td>3738</td>\n",
" <td>2017-03-12</td>\n",
" <td>222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>10019</td>\n",
" <td>900</td>\n",
" <td>2015-07-20</td>\n",
" <td>236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>10020</td>\n",
" <td>5516</td>\n",
" <td>2017-05-10</td>\n",
" <td>828</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id sales date product_id\n",
"0 10001 5230 2017-02-07 524\n",
"1 10002 2781 2017-05-12 469\n",
"2 10003 2083 2016-12-18 917\n",
"3 10004 214 2015-01-19 354\n",
"4 10005 9407 2016-09-26 292\n",
"5 10006 4705 2015-10-17 380\n",
"6 10007 4729 2016-01-02 469\n",
"7 10008 7715 2015-09-12 480\n",
"8 10009 9898 2015-04-05 611\n",
"9 10010 5797 2015-08-13 959\n",
"10 10011 1283 2016-04-22 950\n",
"11 10012 2751 2015-01-01 322\n",
"12 10013 4422 2017-07-11 965\n",
"13 10014 6235 2015-03-07 783\n",
"14 10015 7302 2016-04-06 792\n",
"15 10016 6408 2016-10-21 347\n",
"16 10017 1880 2015-08-23 187\n",
"17 10018 3738 2017-03-12 222\n",
"18 10019 900 2015-07-20 236\n",
"19 10020 5516 2017-05-10 828"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data5"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandasql as pdsql"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"str1=\"select * from data5 limit 5;\""
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df11=pdsql.sqldf(str1)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>sales</th>\n",
" <th>date</th>\n",
" <th>product_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10001</td>\n",
" <td>5230</td>\n",
" <td>2017-02-07</td>\n",
" <td>524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10002</td>\n",
" <td>2781</td>\n",
" <td>2017-05-12</td>\n",
" <td>469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10003</td>\n",
" <td>2083</td>\n",
" <td>2016-12-18</td>\n",
" <td>917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10004</td>\n",
" <td>214</td>\n",
" <td>2015-01-19</td>\n",
" <td>354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10005</td>\n",
" <td>9407</td>\n",
" <td>2016-09-26</td>\n",
" <td>292</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id sales date product_id\n",
"0 10001 5230 2017-02-07 524\n",
"1 10002 2781 2017-05-12 469\n",
"2 10003 2083 2016-12-18 917\n",
"3 10004 214 2015-01-19 354\n",
"4 10005 9407 2016-09-26 292"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df11"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(data5)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data5= pd.read_sql_query('select * from \"sales\" limit 250',con=engine)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>sales</th>\n",
" <th>date</th>\n",
" <th>product_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10001</td>\n",
" <td>5230</td>\n",
" <td>2017-02-07</td>\n",
" <td>524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10002</td>\n",
" <td>2781</td>\n",
" <td>2017-05-12</td>\n",
" <td>469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10003</td>\n",
" <td>2083</td>\n",
" <td>2016-12-18</td>\n",
" <td>917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10004</td>\n",
" <td>214</td>\n",
" <td>2015-01-19</td>\n",
" <td>354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10005</td>\n",
" <td>9407</td>\n",
" <td>2016-09-26</td>\n",
" <td>292</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id sales date product_id\n",
"0 10001 5230 2017-02-07 524\n",
"1 10002 2781 2017-05-12 469\n",
"2 10003 2083 2016-12-18 917\n",
"3 10004 214 2015-01-19 354\n",
"4 10005 9407 2016-09-26 292"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data5.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"str2=\"select avg(sales) from data5 ;\""
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df111=pdsql.sqldf(str2)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>avg(sales)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5226.868</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" avg(sales)\n",
"0 5226.868"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df111"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment