Skip to content

Instantly share code, notes, and snippets.

View Akash-Rawat's full-sized avatar

Akash Rawat Akash-Rawat

View GitHub Profile
@Akash-Rawat
Akash-Rawat / Vocab
Last active July 2, 2021 10:37
Building Vocabulary
def build_datasets_vocab(root_file, captions_file, transform, split=0.15):
df = pd.read_csv(captions_file)
vocab = {}
def create_vocab(caption):
tokens = [token.lower() for token in word_tokenize(caption)]
for token in tokens:
if token not in vocab:
vocab[token] = len(vocab)
@Akash-Rawat
Akash-Rawat / Encoder
Last active July 2, 2021 10:38
Defining Encoder
class Encoder(nn.Module):
def __init__(self, in_channels, out_channels, image_dim, latent_dim):
super().__init__()
# constants used
iW, iH = image_dim
hW, hH = iW//POOLING_FACTOR, iH//POOLING_FACTOR
@Akash-Rawat
Akash-Rawat / Decoder
Last active July 2, 2021 10:37
Defining Decoder
class Decoder(nn.Module):
def __init__(self, in_channels, out_channels, image_dim):
super().__init__()
iW, iH = image_dim
hW, hH = iW//POOLING_FACTOR, iH//POOLING_FACTOR
self.layer4 = nn.Sequential(
@Akash-Rawat
Akash-Rawat / Caption Generator
Last active July 2, 2021 10:36
Defining Captioner
class CaptionRNN(nn.Module):
CAPTION_LIMIT = MAX_CAPTION_LEN
def __init__(self, input_size, vocab_size, embedding_size, hidden_size, stop_index):
super().__init__()
self.mlp_l1 = nn.Sequential(
nn.Linear(in_features=input_size, out_features=input_size),
@Akash-Rawat
Akash-Rawat / VAECaptioner Model
Created July 2, 2021 10:39
Defining VAECaptioner Model
class VAECaptioner(nn.Module):
def __init__(self, in_channel, code_channels, image_dim, vocab):
super().__init__()
LATENT_DIM = 300
EMBEDDING_SIZE = 600
HIDDEN_SIZE = 512
CODE_FLAT = code_channels*((image_dim[0]*image_dim[1])//(POOLING_FACTOR**2))
@Akash-Rawat
Akash-Rawat / Loss Function
Created July 2, 2021 10:40
Defining loss_function
def calculate_loss(reconstructed, caption_prob, images, captions_transformed, mean, log_std):
size = captions_transformed.shape[0]
reconstruction_error = criterion(reconstructed, images)
likelihoods = torch.stack([
caption_prob[i, np.arange(MAX_CAPTION_LEN), captions_transformed[i]] for i in range(size)])
log_likelihoods = -torch.log(likelihoods).sum()
KL_divergence = - (1 - mean.pow(2) - torch.exp(2 * log_std) + (2 *log_std)).sum()
@Akash-Rawat
Akash-Rawat / Sample_Prediction
Last active July 2, 2021 10:42
Sample_Prediction
plt.imshow(images[4].to("cpu").permute(1, 2, 0))
plt.axis("off")
_ = plt.title(get_caption(model.generate_caption(images[4].unsqueeze(0))))
def prep_test_data(med, train_dir, test_dir):
pop = os.listdir(train_dir+'/'+med)
test_data=random.sample(pop, 2000)
print(test_data)
for f in test_data:
shutil.copy(train_dir+'/'+med+'/'+f, test_dir+'/'+med+'/')
for medi in os.listdir(train_dir):
prep_test_data(medi, train_dir, test_dir)
#for train
target_classes = os.listdir(train_dir)
num_classes = len(target_classes)
print('Number of target classes:', num_classes)
print(list(enumerate(target_classes)))
#for test
target_classes = os.listdir(test_dir)
num_classes = len(target_classes)
print('Number of target classes:', num_classes)
def show_mri(med):
num = len(med)
if num == 0:
return None
rows = int(math.sqrt(num))
cols = (num+1)//rows
f, axs = plt.subplots(rows, cols)
fig = 0
for b in med:
img = image.load_img(b)