Skip to content

Instantly share code, notes, and snippets.

@monajalal
Created November 28, 2018 15:17
Show Gist options
  • Save monajalal/75eb4b2836ee10f59c067f89787e8889 to your computer and use it in GitHub Desktop.
Save monajalal/75eb4b2836ee10f59c067f89787e8889 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"from graphviz import Digraph\n",
"import torch\n",
"from torch.autograd import Variable\n",
"\n",
"\n",
"# make_dot was moved to https://github.com/szagoruyko/pytorchviz\n",
"from torchviz import make_dot"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"\"\"\"\n",
"Transfer Learning Tutorial\n",
"==========================\n",
"**Author**: `Sasank Chilamkurthy <https://chsasank.github.io>`_\n",
"\n",
"In this tutorial, you will learn how to train your network using\n",
"transfer learning. You can read more about the transfer learning at `cs231n\n",
"notes <http://cs231n.github.io/transfer-learning/>`__\n",
"\n",
"Quoting these notes,\n",
"\n",
" In practice, very few people train an entire Convolutional Network\n",
" from scratch (with random initialization), because it is relatively\n",
" rare to have a dataset of sufficient size. Instead, it is common to\n",
" pretrain a ConvNet on a very large dataset (e.g. ImageNet, which\n",
" contains 1.2 million images with 1000 categories), and then use the\n",
" ConvNet either as an initialization or a fixed feature extractor for\n",
" the task of interest.\n",
"\n",
"These two major transfer learning scenarios look as follows:\n",
"\n",
"- **Finetuning the convnet**: Instead of random initializaion, we\n",
" initialize the network with a pretrained network, like the one that is\n",
" trained on imagenet 1000 dataset. Rest of the training looks as\n",
" usual.\n",
"- **ConvNet as fixed feature extractor**: Here, we will freeze the weights\n",
" for all of the network except that of the final fully connected\n",
" layer. This last fully connected layer is replaced with a new one\n",
" with random weights and only this layer is trained.\n",
"\n",
"\"\"\"\n",
"# License: BSD\n",
"# Author: Sasank Chilamkurthy\n",
"\n",
"from __future__ import print_function, division\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from torch.optim import lr_scheduler\n",
"import numpy as np\n",
"import torchvision\n",
"from torchvision import datasets, models, transforms\n",
"import matplotlib.pyplot as plt\n",
"import time\n",
"import os\n",
"import copy\n",
"\n",
"plt.ion() # interactive mode"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [],
"source": [
"#ADAM\n",
"######################################################################\n",
"# Load Data\n",
"# ---------\n",
"#\n",
"# We will use torchvision and torch.utils.data packages for loading the\n",
"# data.\n",
"#\n",
"# The problem we're going to solve today is to train a model to classify\n",
"# **ants** and **bees**. We have about 120 training images each for ants and bees.\n",
"# There are 75 validation images for each class. Usually, this is a very\n",
"# small dataset to generalize upon, if trained from scratch. Since we\n",
"# are using transfer learning, we should be able to generalize reasonably\n",
"# well.\n",
"#\n",
"# This dataset is a very small subset of imagenet.\n",
"#\n",
"# .. Note ::\n",
"# Download the data from\n",
"# `here <https://download.pytorch.org/tutorial/hymenoptera_data.zip>`_\n",
"# and extract it to the current directory.\n",
"\n",
"# Data augmentation and normalization for training\n",
"# Just normalization for validation\n",
"data_transforms = {\n",
" 'train': transforms.Compose([\n",
" transforms.RandomResizedCrop(224),\n",
" transforms.RandomHorizontalFlip(),\n",
" transforms.RandomRotation(20),\n",
" transforms.ColorJitter(0.3, 0.3, 0.3),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
" ]),\n",
"# 'val': transforms.Compose([\n",
"# transforms.Resize(256),\n",
"# transforms.CenterCrop(224),\n",
"# transforms.ToTensor(),\n",
"# transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
"# ]),\n",
" \n",
" 'test': transforms.Compose([\n",
" transforms.Resize(256),\n",
" transforms.CenterCrop(224),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
" ]),\n",
"}\n",
"\n",
"\n",
"data_dir = \"10folds/10fold_9\"\n",
"\n",
"image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),\n",
" data_transforms[x])\n",
" for x in ['train', 'test']}\n",
"dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,\n",
" shuffle=True, num_workers=4)\n",
" for x in ['train', 'test']}\n",
"dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}\n",
"\n",
"\n",
"class_names = image_datasets['train'].classes\n",
"\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f947f0cc240>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"######################################################################\n",
"# Visualize a few images\n",
"# ^^^^^^^^^^^^^^^^^^^^^^\n",
"# Let's visualize a few training images so as to understand the data\n",
"# augmentations.\n",
"\n",
"def imshow(inp, title=None):\n",
" \"\"\"Imshow for Tensor.\"\"\"\n",
" inp = inp.numpy().transpose((1, 2, 0))\n",
" mean = np.array([0.485, 0.456, 0.406])\n",
" std = np.array([0.229, 0.224, 0.225])\n",
" inp = std * inp + mean\n",
" inp = np.clip(inp, 0, 1)\n",
" plt.imshow(inp)\n",
" if title is not None:\n",
" plt.title(title)\n",
" plt.pause(0.001) # pause a bit so that plots are updated\n",
"\n",
"\n",
"# Get a batch of training data\n",
"inputs, classes = next(iter(dataloaders['train']))\n",
"\n",
"# Make a grid from batch\n",
"out = torchvision.utils.make_grid(inputs)\n",
"\n",
"imshow(out, title=[class_names[x] for x in classes])\n"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"######################################################################\n",
"# Training the model\n",
"# ------------------\n",
"#\n",
"# Now, let's write a general function to train a model. Here, we will\n",
"# illustrate:\n",
"#\n",
"# - Scheduling the learning rate\n",
"# - Saving the best model\n",
"#\n",
"# In the following, parameter ``scheduler`` is an LR scheduler object from\n",
"# ``torch.optim.lr_scheduler``.\n",
"\n",
"\n",
"def train_model(model, criterion, optimizer, scheduler, num_epochs=25):\n",
" since = time.time()\n",
"\n",
" best_model_wts = copy.deepcopy(model.state_dict())\n",
" best_acc = 0.0\n",
"\n",
" for epoch in range(num_epochs):\n",
" print('Epoch {}/{}'.format(epoch, num_epochs - 1))\n",
" print('-' * 10)\n",
"\n",
" # Each epoch has a training and validation phase\n",
" ##for phase in ['train', 'test']:\n",
" for phase in ['train']:\n",
" if phase == 'train':\n",
" scheduler.step()\n",
" model.train() # Set model to training mode\n",
" else:\n",
" model.eval() # Set model to evaluate mode\n",
"\n",
" running_loss = 0.0\n",
" running_corrects = 0\n",
"\n",
" # Iterate over data.\n",
" for inputs, labels in dataloaders[phase]:\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" # zero the parameter gradients\n",
" optimizer.zero_grad()\n",
"\n",
" # forward\n",
" # track history if only in train\n",
" with torch.set_grad_enabled(phase == 'train'):\n",
" outputs = model(inputs)\n",
" _, preds = torch.max(outputs, 1)\n",
" loss = criterion(outputs, labels)\n",
"\n",
" # backward + optimize only if in training phase\n",
" if phase == 'train':\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" # statistics\n",
" running_loss += loss.item() * inputs.size(0)\n",
" running_corrects += torch.sum(preds == labels.data)\n",
"\n",
" epoch_loss = running_loss / dataset_sizes[phase]\n",
" epoch_acc = running_corrects.double() / dataset_sizes[phase]\n",
"\n",
" print('{} Loss: {:.4f} Acc: {:.4f}'.format(\n",
" phase, epoch_loss, epoch_acc))\n",
"\n",
" # deep copy the model\n",
" # if phase == 'val' and epoch_acc > best_acc:\n",
" # best_acc = epoch_acc\n",
" # best_model_wts = copy.deepcopy(model.state_dict())\n",
"\n",
" print()\n",
"\n",
" time_elapsed = time.time() - since\n",
" print('Training complete in {:.0f}m {:.0f}s'.format(\n",
" time_elapsed // 60, time_elapsed % 60))\n",
"# print('Best val Acc: {:4f}'.format(best_acc))\n",
"\n",
" # load best model weights\n",
"# model.load_state_dict(best_model_wts)\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"\n",
"######################################################################\n",
"# Visualizing the model predictions\n",
"# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"#\n",
"# Generic function to display predictions for a few images\n",
"#\n",
"\n",
"def visualize_model(model, num_images=6):\n",
" was_training = model.training\n",
" model.eval()\n",
" images_so_far = 0\n",
" fig = plt.figure()\n",
"\n",
" with torch.no_grad():\n",
" #for i, (inputs, labels) in enumerate(dataloaders['test]):\n",
" for i, (inputs, labels) in enumerate(dataloaders['train']):\n",
"\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" outputs = model(inputs)\n",
" _, preds = torch.max(outputs, 1)\n",
"\n",
" for j in range(inputs.size()[0]):\n",
" images_so_far += 1\n",
" ax = plt.subplot(num_images//2, 2, images_so_far)\n",
" ax.axis('off')\n",
" ax.set_title('predicted: {}'.format(class_names[preds[j]]))\n",
" imshow(inputs.cpu().data[j])\n",
"\n",
" if images_so_far == num_images:\n",
" model.train(mode=was_training)\n",
" return\n",
" model.train(mode=was_training)"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"######################################################################\n",
"# Finetuning the convnet\n",
"# ----------------------\n",
"#\n",
"# Load a pretrained model and reset final fully connected layer.\n",
"#\n",
"\n",
"#model_ft = models.resnet18(pretrained=True)\n",
"model_ft = models.resnet50(pretrained=True)\n",
"\n",
"num_ftrs = model_ft.fc.in_features\n",
"model_ft.fc = nn.Linear(num_ftrs, 9)\n",
"\n",
"model_ft = model_ft.to(device)\n",
"\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"# Observe that all parameters are being optimized\n",
"optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)\n",
"\n",
"# Decay LR by a factor of 0.1 every 7 epochs\n",
"exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)\n"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0/24\n",
"----------\n",
"train Loss: 2.1206 Acc: 0.3099\n",
"\n",
"Epoch 1/24\n",
"----------\n",
"train Loss: 2.0338 Acc: 0.3188\n",
"\n",
"Epoch 2/24\n",
"----------\n",
"train Loss: 1.9403 Acc: 0.3291\n",
"\n",
"Epoch 3/24\n",
"----------\n",
"train Loss: 2.0065 Acc: 0.3496\n",
"\n",
"Epoch 4/24\n",
"----------\n",
"train Loss: 1.9442 Acc: 0.3316\n",
"\n",
"Epoch 5/24\n",
"----------\n",
"train Loss: 1.8409 Acc: 0.3713\n",
"\n",
"Epoch 6/24\n",
"----------\n",
"train Loss: 1.7708 Acc: 0.4110\n",
"\n",
"Epoch 7/24\n",
"----------\n",
"train Loss: 1.6016 Acc: 0.4571\n",
"\n",
"Epoch 8/24\n",
"----------\n",
"train Loss: 1.5667 Acc: 0.4686\n",
"\n",
"Epoch 9/24\n",
"----------\n",
"train Loss: 1.4753 Acc: 0.5134\n",
"\n",
"Epoch 10/24\n",
"----------\n",
"train Loss: 1.4763 Acc: 0.5058\n",
"\n",
"Epoch 11/24\n",
"----------\n",
"train Loss: 1.4015 Acc: 0.5134\n",
"\n",
"Epoch 12/24\n",
"----------\n",
"train Loss: 1.4210 Acc: 0.5224\n",
"\n",
"Epoch 13/24\n",
"----------\n",
"train Loss: 1.3903 Acc: 0.5250\n",
"\n",
"Epoch 14/24\n",
"----------\n",
"train Loss: 1.3189 Acc: 0.5519\n",
"\n",
"Epoch 15/24\n",
"----------\n",
"train Loss: 1.3964 Acc: 0.5467\n",
"\n",
"Epoch 16/24\n",
"----------\n",
"train Loss: 1.3757 Acc: 0.5352\n",
"\n",
"Epoch 17/24\n",
"----------\n",
"train Loss: 1.3398 Acc: 0.5608\n",
"\n",
"Epoch 18/24\n",
"----------\n",
"train Loss: 1.3365 Acc: 0.5659\n",
"\n",
"Epoch 19/24\n",
"----------\n",
"train Loss: 1.3535 Acc: 0.5621\n",
"\n",
"Epoch 20/24\n",
"----------\n",
"train Loss: 1.3501 Acc: 0.5365\n",
"\n",
"Epoch 21/24\n",
"----------\n",
"train Loss: 1.3291 Acc: 0.5429\n",
"\n",
"Epoch 22/24\n",
"----------\n",
"train Loss: 1.3449 Acc: 0.5493\n",
"\n",
"Epoch 23/24\n",
"----------\n",
"train Loss: 1.3546 Acc: 0.5467\n",
"\n",
"Epoch 24/24\n",
"----------\n",
"train Loss: 1.3232 Acc: 0.5467\n",
"\n",
"Training complete in 4m 3s\n"
]
}
],
"source": [
"model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"classes list: [6, 3, 6, 2]\n",
"_classes: [7, 4, 7, 3]\n",
"classes list: [2, 6, 3, 4]\n",
"_classes: [3, 7, 4, 5]\n",
"classes list: [1, 1, 2, 2]\n",
"_classes: [2, 2, 3, 3]\n",
"classes list: [6, 2, 2, 6]\n",
"_classes: [7, 3, 3, 7]\n",
"classes list: [1, 0, 6, 8]\n",
"_classes: [2, 1, 7, 9]\n",
"classes list: [2, 4, 1, 2]\n",
"_classes: [3, 5, 2, 3]\n",
"classes list: [6, 6, 5, 5]\n",
"_classes: [7, 7, 6, 6]\n",
"classes list: [8, 2, 1, 4]\n",
"_classes: [9, 3, 2, 5]\n",
"classes list: [1, 4, 2, 2]\n",
"_classes: [2, 5, 3, 3]\n",
"classes list: [5, 2, 3, 4]\n",
"_classes: [6, 3, 4, 5]\n",
"classes list: [2, 1, 5, 1]\n",
"_classes: [3, 2, 6, 2]\n",
"classes list: [6, 2, 2, 4]\n",
"_classes: [7, 3, 3, 5]\n",
"classes list: [5, 3, 6, 1]\n",
"_classes: [6, 4, 7, 2]\n",
"classes list: [2, 2, 2, 1]\n",
"_classes: [3, 3, 3, 2]\n",
"classes list: [8, 1, 2, 0]\n",
"_classes: [9, 2, 3, 1]\n",
"classes list: [1, 7, 8, 2]\n",
"_classes: [2, 8, 9, 3]\n",
"classes list: [4, 6, 2, 2]\n",
"_classes: [5, 7, 3, 3]\n",
"classes list: [4, 2, 2, 8]\n",
"_classes: [5, 3, 3, 9]\n",
"classes list: [6, 2, 1, 7]\n",
"_classes: [7, 3, 2, 8]\n",
"classes list: [8, 2, 4, 4]\n",
"_classes: [9, 3, 5, 5]\n",
"classes list: [6, 6, 6]\n",
"_classes: [7, 7, 7]\n",
"tensor([[ 0., 1., 0., 0., 0., 0., 1., 0., 0.],\n",
" [ 0., 2., 6., 0., 0., 0., 2., 0., 3.],\n",
" [ 0., 2., 21., 0., 0., 0., 3., 0., 0.],\n",
" [ 0., 0., 2., 0., 1., 0., 1., 0., 0.],\n",
" [ 0., 3., 4., 0., 2., 0., 1., 0., 0.],\n",
" [ 1., 0., 1., 0., 0., 1., 2., 0., 0.],\n",
" [ 0., 2., 2., 0., 1., 0., 8., 0., 2.],\n",
" [ 0., 1., 0., 0., 0., 0., 1., 0., 0.],\n",
" [ 0., 1., 4., 0., 1., 0., 0., 0., 0.]])\n",
"tensor([0.0000, 0.1538, 0.8077, 0.0000, 0.2000, 0.2000, 0.5333, 0.0000, 0.0000])\n"
]
},
{
"data": {
"text/plain": [
"\"cat1_acc = cat1_corr/cat1\\ncat2_acc = cat2_corr/cat2\\ncat3_acc = cat3_corr/cat3\\ncat4_acc = cat4_corr/cat4\\ncat5_acc = cat5_corr/cat5\\ncat6_acc = cat6_corr/cat6\\ncat7_acc = cat7_corr/cat7\\ncat8_acc = cat8_corr/cat8\\ncat9_acc = cat9_corr/cat9\\n\\nprint('class 1 accuracy {0}, class 1 correct classifications: {1}, class 1 images: {2}'.format(cat1_acc, cat1_corr, cat1))\\nprint('class 2 accuracy {0}, class 2 correct classifications: {1}, class 2 images: {2}'.format(cat2_acc, cat2_corr, cat2))\\nprint('class 3 accuracy {0}, class 3 correct classifications: {1}, class 3 images: {2}'.format(cat3_acc, cat3_corr, cat3))\\nprint('class 4 accuracy {0}, class 4 correct classifications: {1}, class 4 images: {2}'.format(cat4_acc, cat4_corr, cat4))\\nprint('class 5 accuracy {0}, class 5 correct classifications: {1}, class 5 images: {2}'.format(cat5_acc, cat5_corr, cat5))\\nprint('class 6 accuracy {0}, class 6 correct classifications: {1}, class 6 images: {2}'.format(cat6_acc, cat6_corr, cat6))\\nprint('class 7 accuracy {0}, class 7 correct classifications: {1}, class 7 images: {2}'.format(cat7_acc, cat7_corr, cat7))\\nprint('class 8 accuracy {0}, class 8 correct classifications: {1}, class 8 images: {2}'.format(cat8_acc, cat8_corr, cat8))\\nprint('class 9 accuracy {0}, class 9 correct classifications: {1}, class 9 images: {2}'.format(cat9_acc, cat9_corr, cat9))\\n\\n\\n\\n\\n\\nprint('1', cat1_acc)\\nprint('2', cat2_acc)\\nprint('3', cat3_acc)\\nprint('4', cat4_acc)\\nprint('5', cat5_acc)\\nprint('6', cat6_acc)\\nprint('7', cat7_acc)\\nprint('8', cat8_acc)\\nprint('9', cat9_acc)\""
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nb_classes = 9\n",
"\n",
"\n",
"confusion_matrix = torch.zeros(nb_classes, nb_classes)\n",
"cat1_corr = 0\n",
"cat1 = 0\n",
"cat2_corr = 0\n",
"cat2 = 0\n",
"cat3_corr = 0\n",
"cat3 = 0\n",
"cat4_corr = 0\n",
"cat4 = 0\n",
"cat5_corr = 0\n",
"cat5 = 0\n",
"cat6_corr = 0\n",
"cat6 = 0\n",
"cat7_corr = 0\n",
"cat7 = 0\n",
"cat8_corr = 0\n",
"cat8 = 0\n",
"cat9_corr = 0\n",
"cat9 = 0\n",
"_classes = []\n",
"_preds = []\n",
"predicted_labels = []\n",
"with torch.no_grad():\n",
" for i, (inputs, classes) in enumerate(dataloaders['test']):\n",
" \n",
" \n",
" inputs = inputs.to(device)\n",
" tmp_labels = model_ft(inputs)\n",
" \n",
" classes = classes.to(device)\n",
" classes_list = classes.cpu().detach().numpy().tolist()\n",
" print('classes list: ', classes_list)\n",
" _classes[:]=[i+1 for i in classes_list]\n",
" print('_classes: ', _classes)\n",
" outputs = model_ft(inputs)\n",
" _, preds = torch.max(outputs, 1)\n",
" preds_list = preds.cpu().detach().numpy().tolist()\n",
" _preds[:]=[i+1 for i in preds_list]\n",
" '''for i in range(4):\n",
" if _classes[i] == 1:\n",
" cat1 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat1_corr += 1\n",
" elif _classes[i] == 2:\n",
" cat2 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat2_corr += 1\n",
" elif _classes[i] == 3:\n",
" cat3 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat3_corr += 1\n",
" elif _classes[i] == 4:\n",
" cat4 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat4_corr += 1\n",
" elif _classes[i] == 5:\n",
" cat5 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat5_corr += 1\n",
" elif _classes[i] == 6:\n",
" cat6 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat6_corr += 1\n",
" elif _classes[i] == 7:\n",
" cat7 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat7_corr += 1\n",
" elif _classes[i] == 8:\n",
" cat8 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat8_corr += 1\n",
" elif _classes[i] == 9:\n",
" print('here')\n",
" cat9 += 1\n",
" if _classes[i] == _preds[i]:\n",
" cat9_corr += 1'''\n",
" \n",
" \n",
" predicted_labels.append(preds.cpu().detach().numpy().tolist())\n",
" for t, p in zip(classes.view(-1), preds.view(-1)):\n",
" confusion_matrix[t.long(), p.long()] += 1\n",
" \n",
"print(confusion_matrix)\n",
"\n",
"print(confusion_matrix.diag()/confusion_matrix.sum(1))\n",
"\n",
"\n",
"'''cat1_acc = cat1_corr/cat1\n",
"cat2_acc = cat2_corr/cat2\n",
"cat3_acc = cat3_corr/cat3\n",
"cat4_acc = cat4_corr/cat4\n",
"cat5_acc = cat5_corr/cat5\n",
"cat6_acc = cat6_corr/cat6\n",
"cat7_acc = cat7_corr/cat7\n",
"cat8_acc = cat8_corr/cat8\n",
"cat9_acc = cat9_corr/cat9\n",
"\n",
"print('class 1 accuracy {0}, class 1 correct classifications: {1}, class 1 images: {2}'.format(cat1_acc, cat1_corr, cat1))\n",
"print('class 2 accuracy {0}, class 2 correct classifications: {1}, class 2 images: {2}'.format(cat2_acc, cat2_corr, cat2))\n",
"print('class 3 accuracy {0}, class 3 correct classifications: {1}, class 3 images: {2}'.format(cat3_acc, cat3_corr, cat3))\n",
"print('class 4 accuracy {0}, class 4 correct classifications: {1}, class 4 images: {2}'.format(cat4_acc, cat4_corr, cat4))\n",
"print('class 5 accuracy {0}, class 5 correct classifications: {1}, class 5 images: {2}'.format(cat5_acc, cat5_corr, cat5))\n",
"print('class 6 accuracy {0}, class 6 correct classifications: {1}, class 6 images: {2}'.format(cat6_acc, cat6_corr, cat6))\n",
"print('class 7 accuracy {0}, class 7 correct classifications: {1}, class 7 images: {2}'.format(cat7_acc, cat7_corr, cat7))\n",
"print('class 8 accuracy {0}, class 8 correct classifications: {1}, class 8 images: {2}'.format(cat8_acc, cat8_corr, cat8))\n",
"print('class 9 accuracy {0}, class 9 correct classifications: {1}, class 9 images: {2}'.format(cat9_acc, cat9_corr, cat9))\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"print('1', cat1_acc)\n",
"print('2', cat2_acc)\n",
"print('3', cat3_acc)\n",
"print('4', cat4_acc)\n",
"print('5', cat5_acc)\n",
"print('6', cat6_acc)\n",
"print('7', cat7_acc)\n",
"print('8', cat8_acc)\n",
"print('9', cat9_acc)'''\n"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10folds/9_test_all/2.jpg\n",
"10folds/9_test_all/19.jpg\n",
"10folds/9_test_all/42.jpg\n",
"10folds/9_test_all/44.jpg\n",
"10folds/9_test_all/45.jpg\n",
"10folds/9_test_all/64.jpg\n",
"10folds/9_test_all/76.jpg\n",
"10folds/9_test_all/164.jpg\n",
"10folds/9_test_all/183.jpg\n",
"10folds/9_test_all/194.jpg\n",
"10folds/9_test_all/221.jpg\n",
"10folds/9_test_all/240.jpg\n",
"10folds/9_test_all/332.jpg\n",
"10folds/9_test_all/352.jpg\n",
"10folds/9_test_all/356.jpg\n",
"10folds/9_test_all/358.jpg\n",
"10folds/9_test_all/372.jpg\n",
"10folds/9_test_all/387.jpg\n",
"10folds/9_test_all/399.jpg\n",
"10folds/9_test_all/90007.jpg\n",
"10folds/9_test_all/90038.jpg\n",
"10folds/9_test_all/90054.jpg\n",
"10folds/9_test_all/90097.jpg\n",
"10folds/9_test_all/90202.jpg\n",
"10folds/9_test_all/90258.jpg\n",
"10folds/9_test_all/100034.jpg\n",
"10folds/9_test_all/100044.jpg\n",
"10folds/9_test_all/100080.jpg\n",
"10folds/9_test_all/100083.jpg\n",
"10folds/9_test_all/100088.jpg\n",
"10folds/9_test_all/100103.jpg\n",
"10folds/9_test_all/100146.jpg\n",
"10folds/9_test_all/100153.jpg\n",
"10folds/9_test_all/100198.jpg\n",
"10folds/9_test_all/100222.jpg\n",
"10folds/9_test_all/100242.jpg\n",
"10folds/9_test_all/100263.jpg\n",
"10folds/9_test_all/100280.jpg\n",
"10folds/9_test_all/100293.jpg\n",
"10folds/9_test_all/100342.jpg\n",
"10folds/9_test_all/100366.jpg\n",
"10folds/9_test_all/100373.jpg\n",
"10folds/9_test_all/100375.jpg\n",
"10folds/9_test_all/100379.jpg\n",
"10folds/9_test_all/10001.jpg\n",
"10folds/9_test_all/10017.jpg\n",
"10folds/9_test_all/10022.jpg\n",
"10folds/9_test_all/10053.jpg\n",
"10folds/9_test_all/10058.jpg\n",
"10folds/9_test_all/10060.jpg\n",
"10folds/9_test_all/10085.jpg\n",
"10folds/9_test_all/10132.jpg\n",
"10folds/9_test_all/10149.jpg\n",
"10folds/9_test_all/10157.jpg\n",
"10folds/9_test_all/10160.jpg\n",
"10folds/9_test_all/10184.jpg\n",
"10folds/9_test_all/10213.jpg\n",
"10folds/9_test_all/10214.jpg\n",
"10folds/9_test_all/10236.jpg\n",
"10folds/9_test_all/10238.jpg\n",
"10folds/9_test_all/10274.jpg\n",
"10folds/9_test_all/10285.jpg\n",
"10folds/9_test_all/10316.jpg\n",
"10folds/9_test_all/10318.jpg\n",
"10folds/9_test_all/10321.jpg\n",
"10folds/9_test_all/10331.jpg\n",
"10folds/9_test_all/10388.jpg\n",
"10folds/9_test_all/10389.jpg\n",
"10folds/9_test_all/10392.jpg\n",
"10folds/9_test_all/10410.jpg\n",
"10folds/9_test_all/10479.jpg\n",
"10folds/9_test_all/10484.jpg\n",
"10folds/9_test_all/10509.jpg\n",
"10folds/9_test_all/10543.jpg\n",
"10folds/9_test_all/10577.jpg\n",
"10folds/9_test_all/10578.jpg\n",
"10folds/9_test_all/10590.jpg\n",
"10folds/9_test_all/10596.jpg\n",
"10folds/9_test_all/10604.jpg\n",
"10folds/9_test_all/10608.jpg\n",
"10folds/9_test_all/10659.jpg\n",
"10folds/9_test_all/10697.jpg\n",
"10folds/9_test_all/10701.jpg\n"
]
}
],
"source": [
"input_path = \"10folds/9_test_all/\"\n",
"vector_fh = open('fold9_resnet50_feature_vectors.txt', 'a+')\n",
"df = pd.read_csv('../../10fold_mona/test-9.csv')\n",
"\n",
"for i in range(df.shape[0]):\n",
" filename = input_path + str(df.iloc[i]['ID']) + '.jpg'\n",
" print(filename)\n",
" img = Image.open(filename)\n",
" image = normalize(to_tensor(scaler(img))).unsqueeze(0).to(device)\n",
" model_ft = model_ft.cuda()\n",
" #model_conv = model_conv.cuda()\n",
" tf_last_layer_chopped = nn.Sequential(*list(model_ft.children())[:-1])\n",
" output = tf_last_layer_chopped(image)\n",
" nd_arr = output.cpu().detach().numpy().reshape(1, 2048)\n",
" np.savetxt(vector_fh, nd_arr)"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0. 0.3445 0.76539 0.135 0.16818 0.39333 0.46833 0. 0.10715]\n"
]
}
],
"source": [
"fold0_acc = np.asarray([0.0000, 0.4286, 0.6154, 0.0000, 0.0000, 0.8333, 0.3750, 0.0000, 0.2857])\n",
"fold1_acc = np.asarray([0.0000, 0.5000, 0.8077, 0.0000, 0.1818, 0.6667, 0.3750, 0.0000, 0.2857])\n",
"acc = np.add(fold0_acc, fold1_acc)\n",
"fold2_acc = np.asarray([0.0000, 0.2857, 0.7308, 0.2000, 0.0000, 0.3333, 0.4000, 0.0000, 0.0000])\n",
"acc = np.add(acc, fold2_acc)\n",
"fold3_acc = np.asarray([0.0000, 0.3571, 0.7692, 0.4000, 0.1000, 0.5000, 0.3333, 0.0000, 0.1667])\n",
"acc = np.add(acc, fold3_acc)\n",
"fold4_acc = np.asarray([0.0000, 0.2857, 0.7308, 0.2500, 0.3000, 0.1667, 0.2000, 0.0000, 0.0000])\n",
"acc = np.add(acc, fold4_acc)\n",
"fold5_acc = np.asarray([0.0000, 0.3571, 0.7692, 0.0000, 0.1000, 0.5000, 0.6000, 0.0000, 0.1667])\n",
"acc = np.add(acc, fold5_acc)\n",
"fold6_acc = np.asarray([0.0000, 0.5385, 0.8462, 0.5000, 0.2000, 0.3333, 0.6000, 0.0000, 0.1667])\n",
"acc = np.add(acc, fold6_acc)\n",
"fold7_acc = np.asarray([0.0000, 0.2308, 0.7692, 0.0000, 0.4000, 0.0000, 0.6000, 0.0000, 0.0000])\n",
"acc = np.add(acc, fold7_acc)\n",
"fold8_acc = np.asarray([0.0000, 0.3077, 0.8077, 0.0000, 0.2000, 0.4000, 0.6667, 0.0000, 0.0000])\n",
"acc = np.add(acc, fold8_acc)\n",
"fold9_acc = np.asarray([0.0000, 0.1538, 0.8077, 0.0000, 0.2000, 0.2000, 0.5333, 0.0000, 0.0000])\n",
"acc = np.add(acc, fold9_acc)\n",
"print(acc/10)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/torchvision-0.2.1-py3.6.egg/torchvision/transforms/transforms.py:188: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.\n"
]
}
],
"source": [
"import glob\n",
"from PIL import Image\n",
"input_path = \"a10folds/0_test_all\"\n",
"# only test\n",
"vector_fh = open('resnet50_feature_vectors_fold0.txt', 'a+')\n",
"\n",
"filenames = glob.glob(input_path + \"/*.*\")\n",
"\n",
"scaler = transforms.Scale((224, 224))\n",
"normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
"to_tensor = transforms.ToTensor()\n",
"\n",
"for filename in filenames:\n",
" #print(filename)\n",
" img = Image.open(filename)\n",
" image = normalize(to_tensor(scaler(img))).unsqueeze(0).to(device)\n",
" model_ft = model_ft.cuda()\n",
" #model_conv = model_conv.cuda()\n",
" tf_last_layer_chopped = nn.Sequential(*list(model_ft.children())[:-1])\n",
" output = tf_last_layer_chopped(image)\n",
" nd_arr = output.cpu().detach().numpy().reshape(1, 2048)\n",
" np.savetxt(vector_fh, nd_arr)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"%rm class_names3_7.txt"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "File b'merged_JanJunAugSep2018_gv_ih_student_coding.csv' does not exist",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-24-af8e1c604e97>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mclass_names_fh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'class_names2_3_5_7_9.txt'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'a+'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"merged_JanJunAugSep2018_gv_ih_student_coding.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ID\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mfilenames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mglob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mglob\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_path\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/*.*\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 707\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 709\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 816\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 817\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 818\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 819\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 820\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1047\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1048\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1049\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1050\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1051\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/scratch/sjn-p3/anaconda/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1693\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'allow_leading_cols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1694\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1695\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1696\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1697\u001b[0m \u001b[0;31m# XXX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: File b'merged_JanJunAugSep2018_gv_ih_student_coding.csv' does not exist"
]
}
],
"source": [
"import pandas as pd\n",
"import ntpath\n",
"import os\n",
"\n",
"#class_names_fh = open('class_names3_7.txt', 'a+')\n",
"class_names_fh = open('class_names2_3_5_7_9.txt', 'a+')\n",
"\n",
"df = pd.read_csv(\"merged_JanJunAugSep2018_gv_ih_student_coding.csv\")\n",
"df = df.set_index(\"ID\")\n",
"filenames = glob.glob(input_path + \"/*.*\")\n",
"\n",
"for filename in filenames:\n",
" no_extension_filename = ntpath.basename(filename)[:-4]\n",
" if int(no_extension_filename) in df.index:\n",
" class_names_fh.write(str(df.loc[int(no_extension_filename)]['Q3 Theme']) + \"\\n\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment