Skip to content

Instantly share code, notes, and snippets.

View sid321axn's full-sized avatar
🎯
Focusing

Manu Siddhartha sid321axn

🎯
Focusing
View GitHub Profile
@sid321axn
sid321axn / skin.html
Created June 29, 2019 07:05
code to view sample images of dataset
n_samples = 5 # number of samples of each cancer type
# Plotting code
fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs,
skin_df.sort_values(['cell_type']).groupby('cell_type')):
n_axs[0].set_title(type_name)
for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=1234).iterrows()):
c_ax.imshow(c_row['image'])
c_ax.axis('off')
@sid321axn
sid321axn / reading_cancer.html
Created June 29, 2019 07:11
Reading and processing data
skin_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))
# Creating New Columns for better readability
skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get)
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes
# Now lets see the sample of tile_df to look on newly made columns
skin_df.head()
@sid321axn
sid321axn / libraries.html
Created June 29, 2019 07:13
importing libraries
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
np.random.seed(123)
from sklearn.preprocessing import label_binarize
@sid321axn
sid321axn / images_dict.html
Created June 29, 2019 07:16
Conerting images to dictionary
base_skin_dir = os.path.join('..', 'input')
# Merging images from both folders HAM10000_images_part1.zip and HAM10000_images_part2.zip into one dictionary
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}
# This dictionary is useful for displaying more human-friendly labels later on
lesion_type_dict = {
features=skin_df.drop(columns=['cell_type_idx'],axis=1)
target=skin_df['cell_type_idx']
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.20,random_state=123)
x_train = np.asarray(x_train_o['image'].tolist())
x_test = np.asarray(x_test_o['image'].tolist())
x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)
x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)
x_train = (x_train - x_train_mean)/x_train_std
# Perform one-hot encoding on the labels
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 2)
# Reshape image in 3 dimensions (height = 75px, width = 100px , canal = 3)
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))