Snow Storm DanielDaCosta

## text_preprocessing.py
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
def clean_text(text):
    text = text.lower()

    # '@' mention. Even tough @ adds some information to the message,
    # this information doesn't add value build the classifcation model
    text = re.sub(r'@[A-Za-z0-9_]+','', text)

## Keras_tokenizer.py
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
vocabulary_size = 20000 # Choosing size of vocabulary
tokenizer = Tokenizer(num_words=vocabulary_size)
tokenizer.fit_on_texts(df['message'])
sequences = tokenizer.texts_to_sequences(df['message'])
# Pads sequences to the same length: MAXLEN
MAXLEN = 50
X = pad_sequences(sequences, maxlen=MAXLEN)
y = df[output_columns_all]

## Model.py
main_input = Input(shape=(MAXLEN,), dtype='int32', name='main_input')
x = Embedding(input_dim=vocabulary_size, output_dim=50, input_length=MAXLEN)(main_input)
x = Dropout(0.3)(x)
x = Conv1D(64, 5, activation='relu')(x)
x = MaxPooling1D(pool_size=4)(x)
x = LSTM(100)(x)
x = Dropout(0.3)(x)

## output_binary.py
output_array = []
metrics_array = {}
loss_array = {}
for i, dense_layer in enumerate(output_columns_binary):
    name = f'binary_output_{i}'
    # A Dense Layer is created for each output
    binary_output = Dense(1, activation='sigmoid', name=name)(x)
    output_array.append(binary_output)
    metrics_array[name] = 'binary_accuracy'
    loss_array[name] = 'binary_crossentropy'

## output_categorical.py
categorical_output = Dense(3, activation='softmax', name='categorical_output')(x)
output_array.append(categorical_output)
metrics_array['categorical_output'] = 'sparse_categorical_accuracy'
loss_array['categorical_output'] = 'sparse_categorical_crossentropy'

## class_weight.py
weight_binary = {0: 0.5, 1: 7} #values obtained through calculations
weight_categorical = {0: 1.4, 1: 0.43, 2: 7}
classes_weights = {}
# There are 35 outputs in output_columns_binary, having classes (0 or 1)
for i, dense_layer in enumerate(output_columns_binary):
    name = f'binary_output_{i}'
    classes_weights[name] = weight_binary
# There is only 1 output in output_columns_categorical, having classes (0, 1 or 2)
for i, dense_layer in enumerate(output_columns_categorical):
    name = 'categorical_output'

## model_instance.py
model = Model(inputs=main_input, outputs=output_array)
model.compile(optimizer='adadelta',
              loss=loss_array,
              metrics=metrics_array)
model.fit(X_train, y_train_output,
          epochs=40, batch_size=512,
         class_weight=classes_weights, verbose=0);

## variables.tf
variable "environment" {
    description = "Env"
    default     = "dev"
}

variable "name" {
    description = "Application Name"
    type        = string
}

## sqs.tf
resource "aws_sqs_queue" "queue" {
  name                      = "apigateway-queue"
  delay_seconds             = 0
  max_message_size          = 262144
  message_retention_seconds = 86400
  receive_wait_time_seconds = 10

  tags = {
    Product = local.app_name
  }

## api-gateway-permission.json
{
    "Version": "2012-10-17",
    "Statement": [
      {
        "Effect": "Allow",
        "Action": [
          "logs:CreateLogGroup",
          "logs:CreateLogStream",
          "logs:DescribeLogGroups",
          "logs:DescribeLogStreams",
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem.wordnet import WordNetLemmatizer
	def clean_text(text):
	text = text.lower()

	# '@' mention. Even tough @ adds some information to the message,
	# this information doesn't add value build the classifcation model
	text = re.sub(r'@[A-Za-z0-9_]+','', text)
	from keras.preprocessing.text import Tokenizer
	from keras.preprocessing.sequence import pad_sequences
	vocabulary_size = 20000 # Choosing size of vocabulary
	tokenizer = Tokenizer(num_words=vocabulary_size)
	tokenizer.fit_on_texts(df['message'])
	sequences = tokenizer.texts_to_sequences(df['message'])
	# Pads sequences to the same length: MAXLEN
	MAXLEN = 50
	X = pad_sequences(sequences, maxlen=MAXLEN)
	y = df[output_columns_all]
	main_input = Input(shape=(MAXLEN,), dtype='int32', name='main_input')
	x = Embedding(input_dim=vocabulary_size, output_dim=50, input_length=MAXLEN)(main_input)
	x = Dropout(0.3)(x)
	x = Conv1D(64, 5, activation='relu')(x)
	x = MaxPooling1D(pool_size=4)(x)
	x = LSTM(100)(x)
	x = Dropout(0.3)(x)
	output_array = []
	metrics_array = {}
	loss_array = {}
	for i, dense_layer in enumerate(output_columns_binary):
	name = f'binary_output_{i}'
	# A Dense Layer is created for each output
	binary_output = Dense(1, activation='sigmoid', name=name)(x)
	output_array.append(binary_output)
	metrics_array[name] = 'binary_accuracy'
	loss_array[name] = 'binary_crossentropy'
	categorical_output = Dense(3, activation='softmax', name='categorical_output')(x)
	output_array.append(categorical_output)
	metrics_array['categorical_output'] = 'sparse_categorical_accuracy'
	loss_array['categorical_output'] = 'sparse_categorical_crossentropy'
	weight_binary = {0: 0.5, 1: 7} #values obtained through calculations
	weight_categorical = {0: 1.4, 1: 0.43, 2: 7}
	classes_weights = {}
	# There are 35 outputs in output_columns_binary, having classes (0 or 1)
	for i, dense_layer in enumerate(output_columns_binary):
	name = f'binary_output_{i}'
	classes_weights[name] = weight_binary
	# There is only 1 output in output_columns_categorical, having classes (0, 1 or 2)
	for i, dense_layer in enumerate(output_columns_categorical):
	name = 'categorical_output'
	model = Model(inputs=main_input, outputs=output_array)
	model.compile(optimizer='adadelta',
	loss=loss_array,
	metrics=metrics_array)
	model.fit(X_train, y_train_output,
	epochs=40, batch_size=512,
	class_weight=classes_weights, verbose=0);
	variable "environment" {
	description = "Env"
	default = "dev"
	}

	variable "name" {
	description = "Application Name"
	type = string
	}
	resource "aws_sqs_queue" "queue" {
	name = "apigateway-queue"
	delay_seconds = 0
	max_message_size = 262144
	message_retention_seconds = 86400
	receive_wait_time_seconds = 10

	tags = {
	Product = local.app_name
	}
	{
	"Version": "2012-10-17",
	"Statement": [
	{
	"Effect": "Allow",
	"Action": [
	"logs:CreateLogGroup",
	"logs:CreateLogStream",
	"logs:DescribeLogGroups",
	"logs:DescribeLogStreams",