Skip to content

Instantly share code, notes, and snippets.

@nttuan8
Last active April 14, 2020 14:36
Show Gist options
  • Save nttuan8/56f9f11b1ac833d59d839bdceb063b75 to your computer and use it in GitHub Desktop.
Save nttuan8/56f9f11b1ac833d59d839bdceb063b75 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\DELL\\Anaconda3\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n"
]
}
],
"source": [
"# Thêm thư viện\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.metrics import classification_report\n",
"from imutils import paths\n",
"from keras.applications import VGG16\n",
"from keras.applications import imagenet_utils\n",
"from keras.preprocessing.image import img_to_array\n",
"from keras.preprocessing.image import load_img\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.model_selection import train_test_split\n",
"import numpy as np\n",
"import random\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Lấy các đường dẫn đến ảnh.\n",
"image_path = list(paths.list_images('dataset/'))\n",
"\n",
"# Đổi vị trí ngẫu nhiên các đường dẫn ảnh\n",
"random.shuffle(image_path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Đường dẫn ảnh sẽ là dataset/tên_loài_hoa/tên_ảnh ví dụ dataset/Bluebell/image_0241.jpg nên p.split(os.path.sep)[-2] sẽ lấy ra được tên loài hoa\n",
"labels = [p.split(os.path.sep)[-2] for p in image_path]\n",
"\n",
"# Chuyển tên các loài hoa thành số\n",
"le = LabelEncoder()\n",
"labels = le.fit_transform(labels)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Load model VGG 16 của ImageNet dataset, include_top=False để bỏ phần Fully connected layer ở cuối.\n",
"model = VGG16(weights='imagenet', include_top=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Load ảnh và resize về đúng kích thước mà VGG 16 cần là (224,224)\n",
"list_image = []\n",
"for (j, imagePath) in enumerate(image_path):\n",
" image = load_img(imagePath, target_size=(224, 224))\n",
" image = img_to_array(image)\n",
" \n",
" image = np.expand_dims(image, 0)\n",
" image = imagenet_utils.preprocess_input(image)\n",
" \n",
" list_image.append(image)\n",
" \n",
"list_image = np.vstack(list_image)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Dùng pre-trained model để lấy ra các feature của ảnh\n",
"features = model.predict(list_image)\n",
"\n",
"# Giống bước flatten trong CNN, chuyển từ tensor 3 chiều sau ConvNet sang vector 1 chiều\n",
"features = features.reshape((features.shape[0], 512*7*7))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Chia traing set, test set tỉ lệ 80-20\n",
"X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameter for the model {'C': 0.1}\n"
]
}
],
"source": [
"# Grid search để tìm các parameter tốt nhất cho model. C = 1/lamda, hệ số trong regularisation. Solver là kiểu optimize\n",
"# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\n",
"params = {'C' : [0.1, 1.0, 10.0, 100.0]}\n",
"#model = GridSearchCV(LogisticRegression(solver='lbfgs', multi_class='multinomial'), params)\n",
"model = GridSearchCV(LogisticRegression(), params)\n",
"model.fit(X_train, y_train)\n",
"print('Best parameter for the model {}'.format(model.best_params_))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.89 1.00 0.94 17\n",
" 1 0.91 0.77 0.83 13\n",
" 2 0.95 0.95 0.95 19\n",
" 3 0.62 0.91 0.74 11\n",
" 4 0.87 0.93 0.90 14\n",
" 5 0.86 0.86 0.86 14\n",
" 6 1.00 1.00 1.00 11\n",
" 7 0.92 0.85 0.88 13\n",
" 8 1.00 0.95 0.97 20\n",
" 9 1.00 0.94 0.97 18\n",
" 10 0.91 1.00 0.95 10\n",
" 11 1.00 0.94 0.97 16\n",
" 12 0.84 0.94 0.89 17\n",
" 13 1.00 1.00 1.00 19\n",
" 14 1.00 0.96 0.98 27\n",
" 15 0.75 0.69 0.72 13\n",
" 16 1.00 0.85 0.92 20\n",
"\n",
"avg / total 0.93 0.92 0.92 272\n",
"\n"
]
}
],
"source": [
"# Đánh giá model\n",
"preds = model.predict(X_test)\n",
"print(classification_report(y_test, preds))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment