Skip to content

Instantly share code, notes, and snippets.

@gvanhorn38
Created October 11, 2018 16:40
Show Gist options
  • Save gvanhorn38/a24c9269c260f6a396b814d62524e489 to your computer and use it in GitHub Desktop.
Save gvanhorn38/a24c9269c260f6a396b814d62524e489 to your computer and use it in GitHub Desktop.
Simple iNaturalist Classifier from Features
import json
import os
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
# File Paths
DATA_FOLDER = '' # Fill in
TRAIN_DATASET_FP = os.path.join(DATA_FOLDER, 'train2018.json'); assert os.path.exists(TRAIN_DATASET_FP), "Train json file not found"
VAL_DATASET_FP = os.path.join(DATA_FOLDER, 'val2018.json'); assert os.path.exists(VAL_DATASET_FP), "Validation json file not found"
TRAIN_FEATURES_FP = os.path.join(DATA_FOLDER, 'train2018_prelogits.npz'); assert os.path.exists(TRAIN_FEATURES_FP), "Train feature file not found"
VAL_FEATURES_FP = os.path.join(DATA_FOLDER, 'val2018_prelogits.npz'); assert os.path.exists(VAL_FEATURES_FP), "Validation feature file not found"
# Load in the train and validation datasets
# Data format can be found here: https://github.com/visipedia/inat_comp#annotation-format
with open(TRAIN_DATASET_FP) as f:
train_dataset = json.load(f)
with open(VAL_DATASET_FP) as f:
val_dataset = json.load(f)
# Make a map from image id to the class label. The feature files contain image ids and not class labels
train_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in train_dataset['annotations']}
val_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in val_dataset['annotations']}
# Load in the features extracted from a NN
# Train Features
train_features = np.load(TRAIN_FEATURES_FP)
train_X = train_features['PreLogits']
train_image_ids = train_features['ids'].astype(np.int)
train_y = np.array([train_image_id_to_class_id[image_id] for image_id in train_image_ids])
# Val Features
val_features = np.load(VAL_FEATURES_FP)
val_X = val_features['PreLogits']
val_image_ids = val_features['ids'].astype(np.int)
val_y = np.array([val_image_id_to_class_id[image_id] for image_id in val_image_ids])
# Train a linear SVM
model = LinearSVC(
penalty='l2',
loss='squared_hinge',
dual=False,
tol=0.001,
C=1.0,
multi_class='ovr',
max_iter=100,
verbose=1
)
model.fit(train_X, train_y)
val_pred_y = model.predict(val_X)
print("Accuracy: %0.3f" % (accuracy_score(val_y, val_pred_y),)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment