Skip to content

Instantly share code, notes, and snippets.

@live-wire
Created March 25, 2018 04:55
Show Gist options
  • Save live-wire/0962d30788d30ba88a37458442b4e680 to your computer and use it in GitHub Desktop.
Save live-wire/0962d30788d30ba88a37458442b4e680 to your computer and use it in GitHub Desktop.
MNIST challenge for EvalAI data preparation
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(42000, 785)\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"train = pd.read_csv(\"Data/train.csv\")\n",
"print(train.shape)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"X = train.loc[:, train.columns != 'label']\n",
"Y = train.loc[:, train.columns == 'label']\n",
"X = X.as_matrix()\n",
"Y = Y.as_matrix()\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(29400, 784) (12600, 784) (29400, 1) (12600, 1)\n"
]
}
],
"source": [
"print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"z = pd.DataFrame(X_train)\n",
"z['label'] = y_train\n",
"z = z.set_index('label').reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"z.to_csv(\"Data/training.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"zt = pd.DataFrame(X_test)\n",
"zt2 = pd.DataFrame(y_test)\n",
"zt3 = zt2.sample(frac=1)\n",
"zt3.reset_index(inplace=True)\n",
"zt3.drop(\"index\",axis=1,inplace=True)\n",
"zt3 = zt3.rename(index=str, columns={0: \"label\"})\n",
"zt2 = zt2.rename(index=str, columns={0: \"label\"})"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"zt3.to_csv(\"Data/submission.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"zt2.to_csv(\"Data/answers.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"zt.to_csv(\"Data/testing.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "virtualenvironment3",
"language": "python",
"name": "virtualenvironment3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment