f-rumblefish

## library.csv

          
            Library
             Descritption
             Usage in This Project

            
              Librosa
               audio analysis

            
              scikit-learn
               machine learning
               data split/Naive Bayes

            
              imbalanced-learn
               ...

            
              nlpaug
               data augmentation

## AudioModel.py
# algorithm 1 ------------------------------------------------------------------
print(" Naive Bayes ... ")

from sklearn import naive_bayes

classifier = naive_bayes.GaussianNB()
nb_model = classifier.fit(X, Y)
prediction = nb_model.predict(X_test)

print(" accuracy = ", accuracy_score(Y_test, prediction))

## AudioBalancing.py
# import library
from imblearn.over_sampling import SMOTE

# define the environment variable
seed = 100
k = 1

# apply SMOTE to create the new dataset
sm = SMOTE(sampling_strategy='auto', k_neighbors=k, random_state=seed)
X_res, y_res = sm.fit_resample(pd_mfcc, pd_label)

## AudioAugmentation.py
# import library
import nlpaug
import nlpaug.augmenter.audio as naa

# loudness augmenter (where file_data is the output of librosa.load)
aug = naa.LoudnessAug(factor=(2, 5))
augmented_data = aug.augment(file_data)

# MFCC feature extraction for the new data ...

## mfcc_for_cat_dog.py
# import library
import librosa
import numpy as np

# define the file name
wav_name = 'cat_1.wav'

# define the length of features
max_len = 20000

## performance.csv

          
            Model
             No SMOTE & No nlpaug
             SMOTE & No nlpaug
             SMOTE & nlpaug

            
              Naive Bayes
               73.1%
               81.1%
               81.9%

            
              Random Forest
               73.1%
               87.8%
               95.9%

            
              Gradient Boosting
               79.0%
               89.5%
               97.3%

            
              XGBoost
               88.3%
               94.1%
               97.3%

## template.csv

          
               
                
              topic
                                   data
             model
             software
             reference

            
                 
                  
                classification

            
                 
                  1
               binary classification

            
                 
                  2
               multi-class classification
                MNIST

            
                 
                  
                                         
                Fashion-MNIST

            
                 
                  
                                         
                CIFAR-10/CIFAR-100

            
                 
                  3
               multi-label classifcation

## autoencoder.py
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model

input_img = Input(shape=(28, 28, 1))

x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)

## dataset.csv

          
            
             MNIST
             Fashion-MNIST
             Comment

            
              Training Dataset
               54000
               0
               data for training the autoencoder

            
              Validation Dataset
               6000
               0
               data for validating the autoencoder and defining the threshold

            
              Testing Dataset
               500
               500
               data for testing the solution

## Performance Summary.csv

          
            Approach
                  Core Network
                        Tail Network
                     File
             Accuracy

            
              Multi-Input
                 3 Conv2D/MaxPooling CNN
               Dense(1024/512/256)
                101
                65%

            
              Multi-Input
                 MobileNet
                 ...
                      107

            
              Multi-Channel
               3 Conv2D/MaxPolling CNN
               ...
                                201
                22%

            
              Multi-Channel
               MobileNet
                             GAP(0124)/Dense(256)
               307
                100%

            
              Multi-Channel
               MobileNetV2
                           GAP(1024)/Dense(256)
               308
               2-->96%/10-->22%
Library	Descritption	Usage in This Project
Librosa	audio analysis
scikit-learn	machine learning	data split/Naive Bayes
imbalanced-learn	...
nlpaug	data augmentation
	# algorithm 1 ------------------------------------------------------------------
	print(" Naive Bayes ... ")

	from sklearn import naive_bayes

	classifier = naive_bayes.GaussianNB()
	nb_model = classifier.fit(X, Y)
	prediction = nb_model.predict(X_test)

	print(" accuracy = ", accuracy_score(Y_test, prediction))
	# import library
	from imblearn.over_sampling import SMOTE

	# define the environment variable
	seed = 100
	k = 1

	# apply SMOTE to create the new dataset
	sm = SMOTE(sampling_strategy='auto', k_neighbors=k, random_state=seed)
	X_res, y_res = sm.fit_resample(pd_mfcc, pd_label)
	# import library
	import nlpaug
	import nlpaug.augmenter.audio as naa

	# loudness augmenter (where file_data is the output of librosa.load)
	aug = naa.LoudnessAug(factor=(2, 5))
	augmented_data = aug.augment(file_data)

	# MFCC feature extraction for the new data ...
	# import library
	import librosa
	import numpy as np

	# define the file name
	wav_name = 'cat_1.wav'

	# define the length of features
	max_len = 20000
Model	No SMOTE & No nlpaug	SMOTE & No nlpaug	SMOTE & nlpaug
Naive Bayes	73.1%	81.1%	81.9%
Random Forest	73.1%	87.8%	95.9%
Gradient Boosting	79.0%	89.5%	97.3%
XGBoost	88.3%	94.1%	97.3%
	topic	data	model	software	reference
	classification
1	binary classification
2	multi-class classification	MNIST
		Fashion-MNIST
		CIFAR-10/CIFAR-100
3	multi-label classifcation
	from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
	from keras.models import Model

	input_img = Input(shape=(28, 28, 1))

	x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
	x = MaxPooling2D((2, 2), padding='same')(x)
	x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
	x = MaxPooling2D((2, 2), padding='same')(x)
	x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
	MNIST	Fashion-MNIST	Comment
Training Dataset	54000	0	data for training the autoencoder
Validation Dataset	6000	0	data for validating the autoencoder and defining the threshold
Testing Dataset	500	500	data for testing the solution
Approach	Core Network	Tail Network	File	Accuracy
Multi-Input	3 Conv2D/MaxPooling CNN	Dense(1024/512/256)	101	65%
Multi-Input	MobileNet	...	107
Multi-Channel	3 Conv2D/MaxPolling CNN	...	201	22%
Multi-Channel	MobileNet	GAP(0124)/Dense(256)	307	100%
Multi-Channel	MobileNetV2	GAP(1024)/Dense(256)	308	2-->96%/10-->22%