Last active
April 14, 2020 14:36
-
-
Save nttuan8/56f9f11b1ac833d59d839bdceb063b75 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\DELL\\Anaconda3\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", | |
" from ._conv import register_converters as _register_converters\n", | |
"Using TensorFlow backend.\n" | |
] | |
} | |
], | |
"source": [ | |
"# Thêm thư viện\n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"from sklearn.model_selection import GridSearchCV\n", | |
"from sklearn.metrics import classification_report\n", | |
"from imutils import paths\n", | |
"from keras.applications import VGG16\n", | |
"from keras.applications import imagenet_utils\n", | |
"from keras.preprocessing.image import img_to_array\n", | |
"from keras.preprocessing.image import load_img\n", | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"import numpy as np\n", | |
"import random\n", | |
"import os" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Lấy các đường dẫn đến ảnh.\n", | |
"image_path = list(paths.list_images('dataset/'))\n", | |
"\n", | |
"# Đổi vị trí ngẫu nhiên các đường dẫn ảnh\n", | |
"random.shuffle(image_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Đường dẫn ảnh sẽ là dataset/tên_loài_hoa/tên_ảnh ví dụ dataset/Bluebell/image_0241.jpg nên p.split(os.path.sep)[-2] sẽ lấy ra được tên loài hoa\n", | |
"labels = [p.split(os.path.sep)[-2] for p in image_path]\n", | |
"\n", | |
"# Chuyển tên các loài hoa thành số\n", | |
"le = LabelEncoder()\n", | |
"labels = le.fit_transform(labels)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load model VGG 16 của ImageNet dataset, include_top=False để bỏ phần Fully connected layer ở cuối.\n", | |
"model = VGG16(weights='imagenet', include_top=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load ảnh và resize về đúng kích thước mà VGG 16 cần là (224,224)\n", | |
"list_image = []\n", | |
"for (j, imagePath) in enumerate(image_path):\n", | |
" image = load_img(imagePath, target_size=(224, 224))\n", | |
" image = img_to_array(image)\n", | |
" \n", | |
" image = np.expand_dims(image, 0)\n", | |
" image = imagenet_utils.preprocess_input(image)\n", | |
" \n", | |
" list_image.append(image)\n", | |
" \n", | |
"list_image = np.vstack(list_image)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Dùng pre-trained model để lấy ra các feature của ảnh\n", | |
"features = model.predict(list_image)\n", | |
"\n", | |
"# Giống bước flatten trong CNN, chuyển từ tensor 3 chiều sau ConvNet sang vector 1 chiều\n", | |
"features = features.reshape((features.shape[0], 512*7*7))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Chia traing set, test set tỉ lệ 80-20\n", | |
"X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Best parameter for the model {'C': 0.1}\n" | |
] | |
} | |
], | |
"source": [ | |
"# Grid search để tìm các parameter tốt nhất cho model. C = 1/lamda, hệ số trong regularisation. Solver là kiểu optimize\n", | |
"# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\n", | |
"params = {'C' : [0.1, 1.0, 10.0, 100.0]}\n", | |
"#model = GridSearchCV(LogisticRegression(solver='lbfgs', multi_class='multinomial'), params)\n", | |
"model = GridSearchCV(LogisticRegression(), params)\n", | |
"model.fit(X_train, y_train)\n", | |
"print('Best parameter for the model {}'.format(model.best_params_))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.89 1.00 0.94 17\n", | |
" 1 0.91 0.77 0.83 13\n", | |
" 2 0.95 0.95 0.95 19\n", | |
" 3 0.62 0.91 0.74 11\n", | |
" 4 0.87 0.93 0.90 14\n", | |
" 5 0.86 0.86 0.86 14\n", | |
" 6 1.00 1.00 1.00 11\n", | |
" 7 0.92 0.85 0.88 13\n", | |
" 8 1.00 0.95 0.97 20\n", | |
" 9 1.00 0.94 0.97 18\n", | |
" 10 0.91 1.00 0.95 10\n", | |
" 11 1.00 0.94 0.97 16\n", | |
" 12 0.84 0.94 0.89 17\n", | |
" 13 1.00 1.00 1.00 19\n", | |
" 14 1.00 0.96 0.98 27\n", | |
" 15 0.75 0.69 0.72 13\n", | |
" 16 1.00 0.85 0.92 20\n", | |
"\n", | |
"avg / total 0.93 0.92 0.92 272\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"# Đánh giá model\n", | |
"preds = model.predict(X_test)\n", | |
"print(classification_report(y_test, preds))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment