Skip to content

Instantly share code, notes, and snippets.

@tvorogme
Last active December 5, 2016 02:11
Show Gist options
  • Save tvorogme/6dbfb8046b43612959dd2a45b682e15a to your computer and use it in GitHub Desktop.
Save tvorogme/6dbfb8046b43612959dd2a45b682e15a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"import numpy as np\n",
"from tqdm import tqdm_notebook\n",
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"location = 'kaggle/'\n",
"events = pd.DataFrame.from_csv(location+\"user_activity.csv\",index_col=None)\n",
"structure = pd.DataFrame.from_csv(location+\"structure.csv\",index_col=None)\n",
"targets = pd.DataFrame.from_csv(location+\"targets.csv\",index_col=None)\n",
"events_test = pd.DataFrame.from_csv(location+\"user_activity_test.csv\",index_col=None)"
]
},
{
"cell_type": "code",
"execution_count": 497,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"counter = Counter(events[events.user_id.isin(set(targets[targets.passed == 1].user_id.values))].drop_duplicates('user_id').step_id)"
]
},
{
"cell_type": "code",
"execution_count": 498,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cool_feature = defaultdict(lambda: 0)\n",
"\n",
"for i in counter:\n",
" cool_feature[i] = counter[i]/659"
]
},
{
"cell_type": "code",
"execution_count": 499,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#\n",
"# ПОСОРТИМ ПО ВРЕМЕНИ\n",
"#\n",
"\n",
"events.sort_values(by='time', inplace=True)\n",
"\n",
"#\n",
"# ПОСОРТИМ, А ПОТОМ СОХРАНИМ ВЕКТОР\n",
"#\n",
"structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)\n",
"vec = structure.step_id.values.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 500,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"desc = {y:x for x,y in enumerate(set(events.action))}\n",
"events.action = list(map(lambda x: desc[x], events.action))\n",
"\n",
"\n",
"desc = {y:x for x,y in enumerate(set(events.step_type))}\n",
"events.step_type = list(map(lambda x: desc[x], events.step_type))"
]
},
{
"cell_type": "code",
"execution_count": 501,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"desc = {y:x for x,y in enumerate(set(events_test.action))}\n",
"events_test.action = list(map(lambda x: desc[x], events_test.action))\n",
"\n",
"\n",
"desc = {y:x for x,y in enumerate(set(events_test.step_type))}\n",
"events_test.step_type = list(map(lambda x: desc[x], events_test.step_type))"
]
},
{
"cell_type": "code",
"execution_count": 502,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ALL_CURSES = structure.size\n",
"ALL_USERS = list(set(events_test.user_id)) + list(set(events.user_id))"
]
},
{
"cell_type": "code",
"execution_count": 503,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
}
],
"source": [
"user_event_pd_frame = {}\n",
"\n",
"for user in tqdm_notebook(set(events_test.user_id)):\n",
" user_event_pd_frame[user] = events_test[events_test.user_id == user]\n",
"\n",
"for user in tqdm_notebook(set(events.user_id)):\n",
" user_event_pd_frame[user] = events[events.user_id == user]\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"user_actions_count = {}\n",
"\n",
"for user in ALL_USERS:\n",
" user_actions_count[user] = Counter(user_event_pd_frame[user].action)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"action_costs = {x:y for x,y in zip(structure.step_id, structure.step_cost)}"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"fast_struct = {}\n",
"for user in tqdm_notebook(ALL_USERS):\n",
" fast_struct[user] = structure[structure.step_id.isin(set(user_event_pd_frame[user].step_id))]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"from collections import defaultdict\n",
"user_passed_actions = defaultdict(lambda: [])\n",
"\n",
"for user in tqdm_notebook(ALL_USERS):\n",
" frame = user_event_pd_frame[user]\n",
" user_passed_actions[user] = frame[frame.action == 2].step_id"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"MAX_STEP_COST_SUM = max([sum([action_costs[action] for action in user_passed_actions[user]]) for user in ALL_USERS])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"costed_passed_user_actions = {}\n",
"\n",
"for user in ALL_USERS:\n",
" user_whole_passed_actions_count = 0\n",
" user_costed_passed_actions = 0\n",
" for action in user_passed_actions[user]:\n",
" user_whole_passed_actions_count+=1\n",
" \n",
" if action_costs[action] > 0:\n",
" user_costed_passed_actions+=1\n",
" \n",
" if user_costed_passed_actions==0:\n",
" costed_passed_user_actions[user]=0\n",
" else:\n",
" costed_passed_user_actions[user]=user_whole_passed_actions_count/user_costed_passed_actions"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"FuckUpTimePart = {}\n",
"\n",
"for user in tqdm_notebook(ALL_USERS):\n",
" a = sorted(user_event_pd_frame[user].time)\n",
" a = list(map(lambda x: x-a[0], a))\n",
" if a != []:\n",
" _all = list(range(0,a[-1]+1))\n",
" _all_lenght = len(_all)\n",
"\n",
" FuckUpTimePart[user] = len(_all)-len(set(a))/_all_lenght/100\n",
" else:\n",
" FuckUpTimePart[user] = 0"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Widget Javascript not detected. It may not be installed properly. Did you enable the widgetsnbextension? If not, then run \"jupyter nbextension enable --py --sys-prefix widgetsnbextension\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"user_viewed_sum = {}\n",
"for user in tqdm_notebook(ALL_USERS):\n",
" ev = user_event_pd_frame[user]\n",
" user_viewed_sum[user] = sum(ev[ev.action == 1].step_cost)\n",
"\n",
"user_passed_sum = {}\n",
"for user in tqdm_notebook(ALL_USERS):\n",
" ev = user_event_pd_frame[user]\n",
" user_passed_sum[user] = sum(ev[ev.action == 2].step_cost)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"structure.sort_values(['module_position','lesson_position', 'step_position'], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"step_id_position = structure.step_id.values.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 729,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def gen_features(us_id, test = False):\n",
" now_ev = user_event_pd_frame[us_id]\n",
" \n",
" now_x = []\n",
" \n",
" for i in now_ev.describe().values:\n",
" now_x.extend(i)\n",
" \n",
" tmp = []\n",
" for i in now_ev.step_id:\n",
" tmp.append(cool_feature[i])\n",
" \n",
" now_x.append(max(tmp))\n",
" now_x.append(FuckUpTimePart[us_id])\n",
" now_x.append(user_passed_sum[us_id])\n",
" now_x.append(user_viewed_sum[us_id])\n",
" now_x.append(costed_passed_user_actions[us_id])\n",
" now_x.append(now_ev.step_cost.sum())\n",
" \n",
" tmp = user_actions_count[us_id]\n",
" now_x.extend([tmp[i] for i in tmp])\n",
" \n",
" for i in now_ev.describe().values:\n",
" now_x.extend(i)\n",
" \n",
" now_x.append(now_ev.step_type.max())\n",
" now_x.append(now_ev.step_type.min())\n",
" now_x.append(now_ev.step_cost.sum())\n",
" now_x.append(now_ev.step_cost.mean())\n",
" \n",
" \n",
" return (us_id, now_x)"
]
},
{
"cell_type": "code",
"execution_count": 708,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"X = []\n",
"Y = []\n",
"for us_id in tqdm_notebook(set(events.user_id.tolist())):\n",
" f = gen_features(us_id)\n",
" X.append(f[1])\n",
" Y.append(targets[targets.user_id == f[0]].passed.values[0])"
]
},
{
"cell_type": "code",
"execution_count": 709,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"X_test = []\n",
"ind = []\n",
"for us_id in tqdm_notebook(set(events_test.user_id.tolist())):\n",
" f = gen_features(us_id,True)\n",
" ind.append(f[0])\n",
" X_test.append(f[1])"
]
},
{
"cell_type": "code",
"execution_count": 710,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#\n",
"# in X we have NAN features, we need to feel it, \n",
"# let's do it with pandas\n",
"# \n",
"X = pd.DataFrame(X)"
]
},
{
"cell_type": "code",
"execution_count": 711,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = X.fillna(method='bfill')"
]
},
{
"cell_type": "code",
"execution_count": 712,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X = X.fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 713,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X_test = pd.DataFrame(X_test).fillna(method='bfill')\n",
"X_test = X_test.fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 714,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X = X.values\n",
"X = X.astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 715,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_test = X_test.as_matrix()\n",
"\n",
"Y = np.ravel(Y)\n",
"Y = Y.astype(np.int32)\n",
"\n",
"\n",
"X_test = X_test.astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 716,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def feature_normalize(feature_index, features_array):\n",
" f3_max = max([a[feature_index] for a in features_array])\n",
" f3_min = min([a[feature_index] for a in features_array])\n",
"\n",
" for row in features_array:\n",
" if (f3_max - f3_min) != 0:\n",
" row[feature_index] = (row[feature_index] - f3_min) / (f3_max - f3_min)\n",
" else:\n",
" pass\n",
" \n",
" return features_array"
]
},
{
"cell_type": "code",
"execution_count": 717,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for i in range(len(X[0])):\n",
" X = feature_normalize(i, X)"
]
},
{
"cell_type": "code",
"execution_count": 718,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for i in range(len(X_test[0])):\n",
" X_test = feature_normalize(i, X_test)"
]
},
{
"cell_type": "code",
"execution_count": 719,
"metadata": {
"button": false,
"collapsed": false,
"deletable": true,
"new_sheet": false,
"run_control": {
"read_only": false
}
},
"outputs": [],
"source": [
"from sklearn.cross_validation import train_test_split\n",
"from sklearn.metrics import f1_score\n",
"\n",
"Xtr,Xval,Ytr,Yval = train_test_split(X,Y,test_size=0.1,random_state=128)"
]
},
{
"cell_type": "code",
"execution_count": 720,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import theano\n",
"import theano.tensor as T\n",
"import lasagne"
]
},
{
"cell_type": "code",
"execution_count": 721,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"input_X = T.vector(\"X\")\n",
"target_y = T.scalar(dtype='int32')"
]
},
{
"cell_type": "code",
"execution_count": 722,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from lasagne.updates import sgd\n",
"from lasagne.nonlinearities import leaky_rectify, softmax, tanh, elu\n",
"from lasagne.layers import InputLayer, DenseLayer\n",
"\n",
"l_in = InputLayer(shape=(None,len(X[0])))\n",
"hl = DenseLayer(incoming=l_in, num_units=100) \n",
"bb = lasagne.layers.batch_norm(hl)\n",
"hl = DenseLayer(incoming=hl, num_units=60)\n",
"bb = lasagne.layers.batch_norm(hl)\n",
"hl = DenseLayer(incoming=bb, num_units=20)\n",
"bb = lasagne.layers.batch_norm(hl)\n",
"l_out = DenseLayer(incoming=bb, num_units=2, nonlinearity=softmax, name='outputlayer') "
]
},
{
"cell_type": "code",
"execution_count": 723,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sym_x = T.matrix('X')\n",
"sym_t = T.ivector('target')\n",
"\n",
"train_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=False)\n",
"eval_out = lasagne.layers.get_output(l_out, {l_in: sym_x}, deterministic=True)\n",
"\n",
"all_params = lasagne.layers.get_all_params(l_out, trainable=True)"
]
},
{
"cell_type": "code",
"execution_count": 724,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cost_train = T.nnet.categorical_crossentropy(train_out, sym_t).mean()\n",
"cost_eval = T.nnet.categorical_crossentropy(eval_out, sym_t).mean()"
]
},
{
"cell_type": "code",
"execution_count": 725,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"all_grads = T.grad(cost_train, all_params)\n",
"updates = lasagne.updates.adagrad(all_grads, all_params, learning_rate=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 726,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f_eval = theano.function(inputs=[sym_x, sym_t],\n",
" outputs=[cost_eval, eval_out])\n",
"\n",
"f_train = theano.function(inputs=[sym_x, sym_t],\n",
" outputs=[cost_train, eval_out],\n",
" updates=updates)\n",
"\n",
"f_pred = theano.function(inputs=[sym_x],\n",
" outputs=eval_out)"
]
},
{
"cell_type": "code",
"execution_count": 727,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def iterate_minibatches(inputs, targets, batchsize, inputs_new=None):\n",
" assert len(inputs) == len(targets)\n",
" if inputs_new != None:\n",
" assert len(inputs_new) == len(inputs_new)\n",
" indices = np.arange(len(inputs))\n",
" np.random.shuffle(indices)\n",
" for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n",
" excerpt = indices[start_idx:start_idx + batchsize]\n",
" if inputs_new != None:\n",
" yield inputs[excerpt], inputs_new[excerpt], targets[excerpt]\n",
" else:\n",
" yield inputs[excerpt], targets[excerpt]"
]
},
{
"cell_type": "code",
"execution_count": 739,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-739-4cf751260849>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterate_minibatches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mf_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mold_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnow_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/a.tvorozhkov/anaconda3/lib/python3.5/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput_subset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_subset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"old_score = 0\n",
"answer = []\n",
"predicted = []\n",
"for epoch in range(1000):\n",
" for x,y in iterate_minibatches(np.array(X),np.array(Y,dtype=np.int32), 10):\n",
" f_train(list(x),list(y))\n",
" if old_score < now_score:\n",
" old_score = now_score\n",
" answer = [i.argmax() for i in f_pred(X_test)]\n",
" predicted = f_pred(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 732,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"a = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 733,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"a['passed'] = answer"
]
},
{
"cell_type": "code",
"execution_count": 734,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"a['user_id'] = ind"
]
},
{
"cell_type": "code",
"execution_count": 735,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"a = a.set_index('user_id')"
]
},
{
"cell_type": "code",
"execution_count": 736,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>passed</th>\n",
" </tr>\n",
" <tr>\n",
" <th>user_id</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8193</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16387</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8196</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" passed\n",
"user_id \n",
"8193 0\n",
"16387 0\n",
"8196 0\n",
"5 0\n",
"9 0"
]
},
"execution_count": 736,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.head()"
]
},
{
"cell_type": "code",
"execution_count": 738,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"a.to_csv('red.csv')"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"widgets": {
"state": {
"00cc5f7a46ca4be0bba00e6ceb579bb5": {
"views": [
{
"cell_index": 8
}
]
},
"0c40a99c801d49128662aad82934a1aa": {
"views": [
{
"cell_index": 8
}
]
},
"1c8753f3411d463580879b08e2ddedb2": {
"views": [
{
"cell_index": 6
}
]
},
"2924a75eb0994a4ea19e4a69da2c9369": {
"views": [
{
"cell_index": 9
}
]
},
"29f9da92b18a453f933638b9c2405c2d": {
"views": [
{
"cell_index": 20
}
]
},
"400480f182d74426902a7a22d237df49": {
"views": [
{
"cell_index": 8
}
]
},
"5042d02270ee4d70a11abc437292a005": {
"views": [
{
"cell_index": 10
}
]
},
"9b2eaa75f9cb4ed69f45edcc18a1bcfa": {
"views": [
{
"cell_index": 8
}
]
},
"ad42efd44c2c4869b5165cb9b28fed70": {
"views": [
{
"cell_index": 8
}
]
},
"c5ab72890efb4c47b71cfe4efffc746f": {
"views": [
{
"cell_index": 7
}
]
},
"cb56ba5222384625a58b9fcd5a4edfd6": {
"views": [
{
"cell_index": 21
}
]
},
"dff634abe09c40bb8e328e7bf43c47df": {
"views": [
{
"cell_index": 6
}
]
},
"e53fe8aabd744a11b7d3ce29f3d89cd8": {
"views": [
{
"cell_index": 14
}
]
},
"f199d361089443e0beec5c02c35f5394": {
"views": [
{
"cell_index": 8
}
]
},
"f59ec1ad186d410aadf1e71bf09e6804": {
"views": [
{
"cell_index": 13
}
]
},
"f715a71e5cad4bf89dae0a3b1f0fe3d5": {
"views": [
{
"cell_index": 8
}
]
},
"f73d00d1f55f48c8bd04e41b2f4100ac": {
"views": [
{
"cell_index": 14
}
]
},
"fde5d46ad89d410585f882b641c279b9": {
"views": [
{
"cell_index": 13
}
]
}
},
"version": "1.2.0"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment