Harsh Maheshwari harsh-99

## data_pytorch_lightning.py
transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)

# train (55,000 images), val split (5,000 images)
mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])
mnist_test = MNIST(os.getcwd(), train=False, download=True)

# The dataloaders handle shuffling, batching, etc...
train_dataloader = DataLoader(mnist_train, batch_size=64)

## batch_sampler.py
def create_bin(text, bin_size):
    max_len = max(text)
    min_len = min(text)
    bin = {}
    current = min_len+bin_size-1
    while(current<max_len):
        bin[current] = []
        current = current + bin_size
    bin[max_len] = []
    current_index = 0

## callate.py
def collate_fn(data):
	'''
	We should build a custom collate_fn rather than using default collate_fn,
	as the size of every sentence is different and merging sequences (including padding)
	is not supported in default.
	Args:
		data: list of tuple (training sequence, label)
	Return:
		padded_seq - Padded Sequence, tensor of shape (batch_size, padded_length)
		length - Original length of each sequence(without padding), tensor of shape(batch_size)

## data_sequence.py
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import gensim

class Dataset_seq(Dataset):
	def __init__(self, word2id, train_path):
		self.word2id = word2id
		self.train_path = train_path

## vocab_build.py
import os
import gensim
from collections import Counter
import json

train_path = "./aclImdb/train"
test_path = "./aclImdb/test"

#simple function which read the data from directory and return data and label
# you can make your own reader for other dataset.

## train_gan.py
#To train the Discriminator
output_d_real = discriminator(real_images)
d_real_loss = criterion(output_d_real, real_labels)

z = torch.randn(batch_size, random_size).to(device)
fake_images = generator(z)
output_d_fake = discriminator(fake_images)
d_fake_loss = criterion(output_d_fake, fake_labels)
d_loss = d_real_loss + d_fake_loss

## train_disc.py
#To train the Discriminator
output_d_real = discriminator(real_images)
d_real_loss = criterion(output_d_real, real_labels)

z = torch.randn(batch_size, random_size).to(device)
fake_images = generator(z)
output_d_fake = discriminator(fake_images)
d_fake_loss = criterion(output_d_fake, fake_labels)
d_loss = d_real_loss + d_fake_loss

## train_gen.py
#to train the generator
# Input to generator is a noise of size random_size
z = torch.randn(batch_size, random_size)
output_image = generator(z)
output_discriminator = discriminator(output_image)

#to train the generator the output of this should be compared with real_labels.
#so we compare the output by real label.
#criterion -> BCE Loss
g_loss = criterion(outputs, real_labels)

## discriminator.py
class Discriminator(nn.Module):
    def __init__(self, image_size, hidden_size):
        super(Discriminator, self).__init__()
        # Instead of linear layer one can also use 2d convolution.
        #Imapge_size -> 784 for MNIST, hidden size is hyperparameter
        self.fc1 = nn.Linear(image_size, hidden_size)
        self.relu = nn.LeakyReLU(0.2) #this is the negative slope.
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        #the final output is of shape 1, to classify real or fake
        self.fc3 = nn.Linear(hidden_size, 1)

## generator.py
class Generator(nn.Module):
    def __init__(self, random_size, hidden_size, image_size):
        super(Generator, self).__init__()
        #random _size -> 64,
        #Input is random noise with a fixed size
        self.fc1 = nn.Linear(random_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        #Final output is of shape equal to image size,
        #For MNIST -> 784 (as flatten)
	transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
	mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
	mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)

	# train (55,000 images), val split (5,000 images)
	mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])
	mnist_test = MNIST(os.getcwd(), train=False, download=True)

	# The dataloaders handle shuffling, batching, etc...
	train_dataloader = DataLoader(mnist_train, batch_size=64)
	def create_bin(text, bin_size):
	max_len = max(text)
	min_len = min(text)
	bin = {}
	current = min_len+bin_size-1
	while(current<max_len):
	bin[current] = []
	current = current + bin_size
	bin[max_len] = []
	current_index = 0
	def collate_fn(data):
	'''
	We should build a custom collate_fn rather than using default collate_fn,
	as the size of every sentence is different and merging sequences (including padding)
	is not supported in default.
	Args:
	data: list of tuple (training sequence, label)
	Return:
	padded_seq - Padded Sequence, tensor of shape (batch_size, padded_length)
	length - Original length of each sequence(without padding), tensor of shape(batch_size)
	import torch
	from torch.utils.data import Dataset, DataLoader
	import numpy as np
	import os
	import gensim

	class Dataset_seq(Dataset):
	def __init__(self, word2id, train_path):
	self.word2id = word2id
	self.train_path = train_path
	import os
	import gensim
	from collections import Counter
	import json

	train_path = "./aclImdb/train"
	test_path = "./aclImdb/test"

	#simple function which read the data from directory and return data and label
	# you can make your own reader for other dataset.
	#To train the Discriminator
	output_d_real = discriminator(real_images)
	d_real_loss = criterion(output_d_real, real_labels)

	z = torch.randn(batch_size, random_size).to(device)
	fake_images = generator(z)
	output_d_fake = discriminator(fake_images)
	d_fake_loss = criterion(output_d_fake, fake_labels)
	d_loss = d_real_loss + d_fake_loss
	#to train the generator
	# Input to generator is a noise of size random_size
	z = torch.randn(batch_size, random_size)
	output_image = generator(z)
	output_discriminator = discriminator(output_image)

	#to train the generator the output of this should be compared with real_labels.
	#so we compare the output by real label.
	#criterion -> BCE Loss
	g_loss = criterion(outputs, real_labels)
	class Discriminator(nn.Module):
	def __init__(self, image_size, hidden_size):
	super(Discriminator, self).__init__()
	# Instead of linear layer one can also use 2d convolution.
	#Imapge_size -> 784 for MNIST, hidden size is hyperparameter
	self.fc1 = nn.Linear(image_size, hidden_size)
	self.relu = nn.LeakyReLU(0.2) #this is the negative slope.
	self.fc2 = nn.Linear(hidden_size, hidden_size)
	#the final output is of shape 1, to classify real or fake
	self.fc3 = nn.Linear(hidden_size, 1)
	class Generator(nn.Module):
	def __init__(self, random_size, hidden_size, image_size):
	super(Generator, self).__init__()
	#random _size -> 64,
	#Input is random noise with a fixed size
	self.fc1 = nn.Linear(random_size, hidden_size)
	self.relu = nn.ReLU()
	self.fc2 = nn.Linear(hidden_size, hidden_size)
	#Final output is of shape equal to image size,
	#For MNIST -> 784 (as flatten)