Manu Siddhartha sid321axn

## skin.html
n_samples = 5  # number of samples of each cancer type

# Plotting code
fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs,
                                         skin_df.sort_values(['cell_type']).groupby('cell_type')):
    n_axs[0].set_title(type_name)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=1234).iterrows()):
        c_ax.imshow(c_row['image'])
        c_ax.axis('off')

## reading_cancer.html
skin_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))

# Creating New Columns for better readability

skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get)
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

# Now lets see the sample of tile_df to look on newly made columns
skin_df.head()

## libraries.html
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
np.random.seed(123)
from sklearn.preprocessing import label_binarize

## images_dict.html
base_skin_dir = os.path.join('..', 'input')

# Merging images from both folders HAM10000_images_part1.zip and HAM10000_images_part2.zip into one dictionary

imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

# This dictionary is useful for displaying more human-friendly labels later on

lesion_type_dict = {

## features.html
features=skin_df.drop(columns=['cell_type_idx'],axis=1)

target=skin_df['cell_type_idx']

## train_test.html
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.20,random_state=123)

## norm.html
x_train = np.asarray(x_train_o['image'].tolist())
x_test = np.asarray(x_test_o['image'].tolist())

x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)

x_test_mean = np.mean(x_test)
x_test_std = np.std(x_test)

x_train = (x_train - x_train_mean)/x_train_std

## labels.html
# Perform one-hot encoding on the labels
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)

## validation.html
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 2)

## Reshape.html
# Reshape image in 3 dimensions (height = 75px, width = 100px , canal = 3)
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))
	n_samples = 5 # number of samples of each cancer type

	# Plotting code
	fig, m_axs = plt.subplots(7, n_samples, figsize = (4n_samples, 37))
	for n_axs, (type_name, type_rows) in zip(m_axs,
	skin_df.sort_values(['cell_type']).groupby('cell_type')):
	n_axs[0].set_title(type_name)
	for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=1234).iterrows()):
	c_ax.imshow(c_row['image'])
	c_ax.axis('off')
	skin_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))

	# Creating New Columns for better readability

	skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
	skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get)
	skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

	# Now lets see the sample of tile_df to look on newly made columns
	skin_df.head()
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import os
	from glob import glob
	import seaborn as sns
	from PIL import Image
	np.random.seed(123)
	from sklearn.preprocessing import label_binarize
	base_skin_dir = os.path.join('..', 'input')

	# Merging images from both folders HAM10000_images_part1.zip and HAM10000_images_part2.zip into one dictionary

	imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
	for x in glob(os.path.join(base_skin_dir, '', '.jpg'))}

	# This dictionary is useful for displaying more human-friendly labels later on

	lesion_type_dict = {
	features=skin_df.drop(columns=['cell_type_idx'],axis=1)

	target=skin_df['cell_type_idx']
	x_train = np.asarray(x_train_o['image'].tolist())
	x_test = np.asarray(x_test_o['image'].tolist())

	x_train_mean = np.mean(x_train)
	x_train_std = np.std(x_train)

	x_test_mean = np.mean(x_test)
	x_test_std = np.std(x_test)

	x_train = (x_train - x_train_mean)/x_train_std
	# Perform one-hot encoding on the labels
	y_train = to_categorical(y_train_o, num_classes = 7)
	y_test = to_categorical(y_test_o, num_classes = 7)
	# Reshape image in 3 dimensions (height = 75px, width = 100px , canal = 3)
	x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
	x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
	x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))