Megha Agarwal megha444

## model.py
class BERT_Arch(nn.Module):

    def __init__(self, bert_model):

      super(BERT_Arch, self).__init__()

      self.bert = bert_model

      # relu activation function
      self.relu =  nn.ReLU()

## freeze.py
# freeze all the parameters
for params in bert_model.parameters():
    params.requires_grad = False

## dataloader.py
from torch.utils.data import DataLoader, TensorDataset, SequentialSampler, RandomSampler

# wrap tensors
valdata = TensorDataset(valseq, valmask, valy)
b_size = 32
# sampler for sampling the data during training
valsampler = SequentialSampler(valdata)

# dataLoader for validation set
valDataLoader = DataLoader(valdata, sampler = valsampler, batch_size = b_size)

## padInteger.py
# tokenizing and encoding the sequences in the validation set
tokensval = tokenizer.batch_encode_plus(
    valtext.tolist(),
    max_length = 25,
    pad_to_max_length=True,
    truncation=True
)

# tokenizeing and encoding the sequences in the training set
tokenstrain = tokenizer.batch_encode_plus (

## selectPadbits.py
# get length of all the messages in the train set
sequence_len = [len(i.split()) for i in traintext]

pd.Series(sequence_len).hist(bins = 30)

## modelToken.py
# importing BERT-base pretrained model
bert_model= AutoModel.from_pretrained('bert-base-uncased')
# Loading the BERT tokenizer
bert_tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

#Tokenizer

# sample data
sample_text = ["this is a bert tutorial", "we will fine tune the bert model"]

## split.py
# split train dataset into train, validation and test sets
traintext, temptext, trainlabels, templabels = train_test_split(df['text'], df['label'],
                                                                    random_state=2018,
                                                                    test_size=0.3,
                                                                    stratify=df['label'])


valtext, testtext, vallabels, testlabels = train_test_split(temptext, templabels,
                                                                random_state=2018,
                                                                test_size=0.5,

## importLoad.py
import torch
import pandas as pd
import torch.nn as nn
import transformers
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import AutoModel, BertTokenizerFast
from sklearn.metrics import classification_report

# specify your GPU

## prediction.py
train_data=new_dataset[:987]
valid_data=new_dataset[987:]
valid_data['Predictions']=predicted_closing_price
plt.plot(train_data["Close"])
plt.plot(valid_data[['Close',"Predictions"]])

## model_name.py
lstm_model.save("saved_model.h5")
	class BERT_Arch(nn.Module):

	def __init__(self, bert_model):

	super(BERT_Arch, self).__init__()

	self.bert = bert_model

	# relu activation function
	self.relu = nn.ReLU()
	# freeze all the parameters
	for params in bert_model.parameters():
	params.requires_grad = False
	from torch.utils.data import DataLoader, TensorDataset, SequentialSampler, RandomSampler

	# wrap tensors
	valdata = TensorDataset(valseq, valmask, valy)
	b_size = 32
	# sampler for sampling the data during training
	valsampler = SequentialSampler(valdata)

	# dataLoader for validation set
	valDataLoader = DataLoader(valdata, sampler = valsampler, batch_size = b_size)
	# tokenizing and encoding the sequences in the validation set
	tokensval = tokenizer.batch_encode_plus(
	valtext.tolist(),
	max_length = 25,
	pad_to_max_length=True,
	truncation=True
	)

	# tokenizeing and encoding the sequences in the training set
	tokenstrain = tokenizer.batch_encode_plus (
	# get length of all the messages in the train set
	sequence_len = [len(i.split()) for i in traintext]

	pd.Series(sequence_len).hist(bins = 30)
	# importing BERT-base pretrained model
	bert_model= AutoModel.from_pretrained('bert-base-uncased')
	# Loading the BERT tokenizer
	bert_tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

	#Tokenizer

	# sample data
	sample_text = ["this is a bert tutorial", "we will fine tune the bert model"]
	# split train dataset into train, validation and test sets
	traintext, temptext, trainlabels, templabels = train_test_split(df['text'], df['label'],
	random_state=2018,
	test_size=0.3,
	stratify=df['label'])


	valtext, testtext, vallabels, testlabels = train_test_split(temptext, templabels,
	random_state=2018,
	test_size=0.5,
	import torch
	import pandas as pd
	import torch.nn as nn
	import transformers
	import numpy as np
	from sklearn.model_selection import train_test_split
	from transformers import AutoModel, BertTokenizerFast
	from sklearn.metrics import classification_report

	# specify your GPU
	train_data=new_dataset[:987]
	valid_data=new_dataset[987:]
	valid_data['Predictions']=predicted_closing_price
	plt.plot(train_data["Close"])
	plt.plot(valid_data[['Close',"Predictions"]])