Skip to content

Instantly share code, notes, and snippets.

@infinite-Joy
Created August 23, 2017 08:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save infinite-Joy/a7cc8a9975f12b33a896eb6c3412448d to your computer and use it in GitHub Desktop.
Save infinite-Joy/a7cc8a9975f12b33a896eb6c3412448d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Last</th>\n",
" <th>Close</th>\n",
" <th>Total Trade Quantity</th>\n",
" <th>Turnover (Lacs)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2017-08-02</td>\n",
" <td>209.10</td>\n",
" <td>209.95</td>\n",
" <td>204.1</td>\n",
" <td>205.80</td>\n",
" <td>206.70</td>\n",
" <td>2935.0</td>\n",
" <td>6.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2017-08-01</td>\n",
" <td>212.00</td>\n",
" <td>214.95</td>\n",
" <td>208.3</td>\n",
" <td>208.30</td>\n",
" <td>209.15</td>\n",
" <td>5094.0</td>\n",
" <td>10.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2017-07-31</td>\n",
" <td>212.00</td>\n",
" <td>215.00</td>\n",
" <td>210.2</td>\n",
" <td>210.35</td>\n",
" <td>212.00</td>\n",
" <td>6803.0</td>\n",
" <td>14.47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2017-07-28</td>\n",
" <td>208.00</td>\n",
" <td>213.95</td>\n",
" <td>208.0</td>\n",
" <td>211.95</td>\n",
" <td>211.25</td>\n",
" <td>2023.0</td>\n",
" <td>4.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2017-07-27</td>\n",
" <td>213.05</td>\n",
" <td>215.30</td>\n",
" <td>209.0</td>\n",
" <td>210.95</td>\n",
" <td>210.50</td>\n",
" <td>6714.0</td>\n",
" <td>14.23</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Open High Low Last Close Total Trade Quantity \\\n",
"0 2017-08-02 209.10 209.95 204.1 205.80 206.70 2935.0 \n",
"1 2017-08-01 212.00 214.95 208.3 208.30 209.15 5094.0 \n",
"2 2017-07-31 212.00 215.00 210.2 210.35 212.00 6803.0 \n",
"3 2017-07-28 208.00 213.95 208.0 211.95 211.25 2023.0 \n",
"4 2017-07-27 213.05 215.30 209.0 210.95 210.50 6714.0 \n",
"\n",
" Turnover (Lacs) \n",
"0 6.09 \n",
"1 10.78 \n",
"2 14.47 \n",
"3 4.28 \n",
"4 14.23 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import numpy as np\n",
"from sklearn import metrics\n",
"\n",
"df_hitech = pd.read_csv(\"NSE-HITECHCORP.csv\")\n",
"df_hitech.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"below are the shapes: \n",
"(51, 8) (55, 8) (53, 8)\n"
]
}
],
"source": [
"df_bhagyanar = pd.read_csv(\"NSE-BHAGYANGR.csv\")\n",
"\n",
"df_hudco = pd.read_csv(\"NSE-HUDCO.csv\")\n",
"\n",
"print('below are the shapes: ')\n",
"print(df_hitech.shape, df_bhagyanar.shape, df_hudco.shape)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 209.1 209.95 204.1 205.8 206.7 2935. 6.09]\n"
]
}
],
"source": [
"column_names = ['Open', 'High', 'Low', 'Last', 'Close', 'Total Trade Quantity', 'Turnover (Lacs)']\n",
"\n",
"X_hitech = df_hitech.loc[:, column_names].values\n",
"X_bhagyanagar = df_bhagyanar.loc[:, column_names].values\n",
"X_hudco = df_hudco.loc[:, column_names].values\n",
"\n",
"X = np.concatenate([X_hitech, X_bhagyanagar, X_hudco], axis=0)\n",
"\n",
"print(X[0])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"some samples of y\n",
"[1000 1000 1000 1000 1000]\n",
"first x shape and then y shape\n",
"shape of X: (159, 7)\n",
"shape of y: (159,)\n",
"[[ 209.1 209.95 204.1 205.8 206.7 2935. 6.09]\n",
" [ 212. 214.95 208.3 208.3 209.15 5094. 10.78]]\n",
"[1000 1000]\n"
]
}
],
"source": [
"# print('we will define the y now')\n",
"\n",
"y = [1000] * 51 + [2000] * 55 + [3000] * 53\n",
"y = np.asarray(y)\n",
"\n",
"print('some samples of y')\n",
"print(y[:5])\n",
"\n",
"print('first x shape and then y shape')\n",
"\n",
"print('shape of X: {}'.format(X.shape))\n",
"print('shape of y: {}'.format(y.shape))\n",
"\n",
"print(X[:2])\n",
"print(y[:2])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 6.83000000e+01 6.83000000e+01 6.78000000e+01 6.79000000e+01\n",
" 6.80000000e+01 1.43373000e+06 9.75450000e+02]\n",
"[ 3000. 3000. 1000. 1000. 2000.]\n",
"[ 225.4 231. 214. 220.7 220.65 126911. 283.93]\n",
"[ 1000. 1000. 1000. 1000. 3000.]\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"def get_train_test(X, y, test_size):\n",
" \"\"\"Give the train and test from X and y.\n",
" \n",
" To do this first combine the matrices, then split them in ratio of test_size then strip out the X and y components.\n",
" \n",
" \"\"\"\n",
" y_reshaped = np.array([y.T])\n",
" combined_X_y = np.concatenate((X, y_reshaped.T), axis=1)\n",
" combined_Xy_train, combined_Xy_test = train_test_split(combined_X_y, test_size=test_size)\n",
"\n",
" X_train, y_train_2d = np.split(combined_Xy_train, [-1], axis=1)\n",
" y_train = np.squeeze(y_train_2d)\n",
"\n",
" X_test, y_test_2d = np.split(combined_Xy_test, [-1], axis=1)\n",
" y_test = np.squeeze(y_test_2d)\n",
" \n",
" return X_train, y_train, X_test, y_test\n",
" \n",
"X_train, y_train, X_test, y_test = get_train_test(X, y, test_size=0.2)\n",
"\n",
"print(X_train[0])\n",
"print(y_train[:5])\n",
"\n",
"print(X_test[0])\n",
"print(y_test[:5])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0.05469505 0. 0.14321643 0.19745815 0.23498935 0.15445189\n",
" 0.21518912]\n",
"normal X_test predictions are below:\n",
"[ 1000. 1000. 1000. 1000. 3000.]\n"
]
}
],
"source": [
"clf = RandomForestClassifier(max_depth=2, random_state=0)\n",
"clf.fit(X_train, y_train)\n",
"print(clf.feature_importances_)\n",
"\n",
"print('normal X_test predictions are below:')\n",
"\n",
"normal_predictions = clf.predict(X_test)\n",
"\n",
"print(normal_predictions[:5])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"NOw we will perform PCA. To do that we need to first scale the dataset\n",
"(127, 7)\n",
"mean is:\n",
"1.55855830679e-16\n"
]
}
],
"source": [
"print(\"NOw we will perform PCA. To do that we need to first scale the dataset\")\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"X_train_scaled = StandardScaler().fit_transform(X_train)\n",
"X_test_scaled = StandardScaler().fit_transform(X_test)\n",
"\n",
"mu = X_train_scaled.mean()\n",
"\n",
"print(X_train_scaled.shape)\n",
"print('mean is:')\n",
"print(mu)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"explained variance in train\n",
"[ 5.0029508 1.98866868]\n",
"classified feature importances\n",
"[ 0.37365783 0.62634217]\n"
]
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"pca = PCA(n_components=2)\n",
"\n",
"training_features = pca.fit_transform(X_train_scaled)\n",
"print('explained variance in train')\n",
"print(pca.explained_variance_)\n",
"\n",
"clf.fit(training_features, y_train)\n",
"print('classified feature importances')\n",
"print(clf.feature_importances_)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trying out the prediction capabilities\n",
"[[ 2.57645802 0.05547991]\n",
" [ 2.48947141 -0.02405964]]\n",
"explained variance in test\n",
"[ 5.15421156 1.83697385]\n",
"PCA predictions are below:\n",
"[ 1000. 1000. 1000. 1000. 3000.]\n"
]
}
],
"source": [
"print('trying out the prediction capabilities')\n",
"test_features = pca.fit_transform(X_test_scaled)\n",
"print(test_features[:2])\n",
"\n",
"print('explained variance in test')\n",
"print(pca.explained_variance_)\n",
"\n",
"predictions = clf.predict(test_features)\n",
"\n",
"print('PCA predictions are below:')\n",
"print(predictions[:5])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"##############################################\n",
"normal X_test predictions are below:\n",
"[ 1000. 1000. 1000. 1000. 3000.]\n",
"......................................\n",
"PCA predictions are below:\n",
"[ 1000. 1000. 1000. 1000. 3000.]\n",
"here are the real values\n",
"[ 1000. 1000. 1000. 1000. 3000.]\n",
"################################################\n",
"what is the prediction score...\n",
"normal predictive score: 1.0\n",
"pca predictive score: 0.96875\n"
]
}
],
"source": [
"print('##############################################')\n",
"print('normal X_test predictions are below:')\n",
"print(normal_predictions[:5])\n",
"print('......................................')\n",
"print('PCA predictions are below:')\n",
"print(predictions[:5])\n",
"print('here are the real values')\n",
"print(y_test[:5])\n",
"\n",
"print('################################################')\n",
"print('what is the prediction score...')\n",
"normal_pscore = metrics.accuracy_score(y_test, normal_predictions)\n",
"pca_score = metrics.accuracy_score(y_test, predictions)\n",
"print(\"normal predictive score: {}\".format(normal_pscore))\n",
"print(\"pca predictive score: {}\".format(pca_score))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment