Created
August 22, 2018 06:17
-
-
Save varun19299/3ed124fc6836b3cbf56fb6cc4cb725e8 to your computer and use it in GitHub Desktop.
Activity Recognition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import cv2 | |
import os | |
import h5py | |
from tqdm import tqdm | |
from keras.preprocessing import image | |
from keras.applications.inception_v3 import InceptionV3, preprocess_input | |
from keras.models import Model, load_model, Sequential | |
from keras.layers import Input, LSTM, Dense, Dropout | |
from keras.utils import to_categorical | |
from keras.applications.imagenet_utils import preprocess_input | |
from keras.optimizers import Adam | |
from keras.callbacks import ModelCheckpoint, TensorBoard,EarlyStopping | |
from keras.utils.io_utils import HDF5Matrix | |
SEQ_LEN = 30 | |
MAX_SEQ_LEN = 200 | |
BATCH_SIZE = 16 | |
EPOCHS = 1000 | |
def get_data(path, if_pd=False): | |
"""Load our data from file.""" | |
df = pd.read_csv(path) | |
return df | |
def get_class_dict(df): | |
class_name = list(df['class'].unique()) | |
index = np.arange(0, len(class_name)) | |
label_index = dict(zip(class_name, index)) | |
index_label = dict(zip(index, class_name)) | |
return (label_index, index_label) | |
def clean_data(df): | |
mask = np.logical_and(df['frames'] >= SEQ_LEN, df['frames'] <= MAX_SEQ_LEN) | |
df = df[mask] | |
return df | |
def split_train_test(df): | |
partition = (df.groupby(['partition'])) | |
un = df['partition'].unique() | |
train = partition.get_group(un[0]) | |
test = partition.get_group(un[1]) | |
return (train, test) | |
def preprocess_image(img): | |
img = cv2.resize(img, (227,227)) | |
return preprocess_input(img) | |
def encode_video(row, model, label_index): | |
cap = cv2.VideoCapture(os.path.join("data","UCF-101",str(row["class"].iloc[0]) ,str(row["video_name"].iloc[0]) + ".avi")) | |
images = [] | |
for i in range(SEQ_LEN): | |
ret, frame = cap.read() | |
frame = preprocess_image(frame) | |
images.append(frame) | |
features = model.predict(np.array(images)) | |
index = label_index[row["class"].iloc[0]] | |
y_onehot = to_categorical(index, len(label_index.keys())) | |
return features, y_onehot | |
def encode_dataset(data, model, label_index, phase): | |
input_f = [] | |
output_y = [] | |
required_classes = ["ApplyEyeMakeup" , "ApplyLipstick" , "Archery" , "BabyCrawling" , "BalanceBeam" , | |
"BandMarching" , "BaseballPitch" , "Basketball" , "BasketballDunk"] | |
for i in tqdm(range(data.shape[0])): | |
# Check whether the given row , is of a class that is required | |
if str(data.iloc[[i]]["class"].iloc[0]) in required_classes: | |
features,y = encode_video(data.iloc[[i]], model, label_index) | |
input_f.append(features) | |
output_y.append(y) | |
f = h5py.File(phase+'_8'+'.h5', 'w') | |
f.create_dataset(phase, data=np.array(input_f)) | |
f.create_dataset(phase+"_labels", data=np.array(output_y)) | |
del input_f[:] | |
del output_y[:] | |
def lstm(): | |
"""Build a simple LSTM network. We pass the extracted features from | |
our CNN to this model predomenently.""" | |
input_shape = (SEQ_LEN, 2048) | |
# Model. | |
model = Sequential() | |
model.add(LSTM(2048, return_sequences=False, | |
input_shape=input_shape, | |
dropout=0.5)) | |
model.add(Dense(512, activation='relu')) | |
model.add(Dropout(0.5)) | |
#model.add(Dense(len(label_index.keys()), activation='softmax')) | |
model.add(Dense(99, activation='softmax')) | |
checkpoint = ModelCheckpoint(filepath='models\\checkpoint-{epoch:02d}-{val_loss:.2f}.hdf5') | |
tb_callback = TensorBoard( | |
log_dir="logs", | |
histogram_freq=2, | |
write_graph=True | |
) | |
early_stopping = EarlyStopping(monitor = 'val_loss',patience= 10) | |
callback_list = [checkpoint, tb_callback] | |
optimizer = Adam(lr=1e-5, decay=1e-6) | |
metrics = ['accuracy', 'top_k_categorical_accuracy'] | |
model.compile(loss='categorical_crossentropy', optimizer=optimizer,metrics=metrics) | |
return model, callback_list | |
def main(): | |
# Get model with pretrained weights. | |
base_model = InceptionV3( | |
weights='imagenet', | |
include_top=True) | |
# We'll extract features at the final pool layer. | |
model = Model( | |
inputs=base_model.input, | |
outputs=base_model.get_layer('avg_pool').output) | |
# Getting the data | |
df = get_data('.\\data\\data_file.csv') | |
# Clean the data | |
df_clean = clean_data(df) | |
# Creating index-label maps and inverse_maps | |
label_index, index_label = get_class_dict(df_clean) | |
# Split the dataset into train and test | |
train, test = split_train_test(df_clean) | |
# Encoding the dataset | |
encode_dataset(train, model, label_index, "train") | |
encode_dataset(test,model,label_index,"test") | |
x_train = HDF5Matrix('train_8.h5', 'train') | |
y_train = HDF5Matrix('train_8.h5', 'train_labels') | |
x_test = HDF5Matrix('test_8.h5', 'test') | |
y_test = HDF5Matrix('test_8.h5', 'test_labels') | |
model, callback_list = lstm() | |
model.fit(x_train, y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, | |
verbose = 2,validation_data = (x_test, y_test), | |
shuffle = 'batch', callbacks=callback_list) | |
#model.save("Activity_Recognition.h5") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment