This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pre-processing of data to remove special characters, hashtags, urls etc | |
import re | |
def cleanResume(resumeText): | |
resumeText = re.sub('http\S+\s*', ' ', resumeText) # remove URLs | |
resumeText = re.sub('RT|cc', ' ', resumeText) # remove RT and cc | |
resumeText = re.sub('#\S+', '', resumeText) # remove hashtags | |
resumeText = re.sub('@\S+', ' ', resumeText) # remove mentions | |
resumeText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>[email protected][\]^_`{|}~"""), ' ', resumeText) # remove punctuations | |
resumeText = re.sub(r'[^\x00-\x7f]',r' ', resumeText) | |
resumeText = re.sub('\s+', ' ', resumeText) # remove extra whitespace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#tokenize features and labels | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
# Tokenize feature data | |
vocab_size = 6000 | |
oov_tok = '<>' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tokenize label data | |
label_tokenizer = Tokenizer(lower=True) | |
label_tokenizer.fit_on_texts(labels) | |
label_index = label_tokenizer.word_index | |
print(dict(list(label_index.items()))) | |
# Print example label encodings from train and test datasets | |
train_label_sequences = label_tokenizer.texts_to_sequences(train_labels) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Train a sequential model | |
# Define the neural network | |
embedding_dim = 64 | |
model = tf.keras.Sequential([ | |
# Add an Embedding layer expecting input vocab of size 6000, and output embedding dimension of size 64 we set at the top | |
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=1), | |
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)), | |
#tf.keras.layers.Dense(embedding_dim, activation='relu'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# let's create an array containing the previous three examples to predict and use our model to get predictions | |
to_predict = [test_feature_padded[3],test_feature_padded[8],test_feature_padded[17]] | |
prediction = model.predict_classes(np.array(to_predict)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(test_labels[3]) | |
print(test_labels[8]) | |
print(test_labels[17]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Name=[] | |
for file in os.listdir(directory): | |
Name+=[file] | |
print(Name) | |
print(len(Name)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Breed = 'dog breed/Akita dog' | |
import os | |
sub_class = os.listdir(Breed) | |
fig = plt.figure(figsize=(10,5)) | |
for e in range(len(sub_class[:10])): | |
plt.subplot(2,5,e+1) | |
img = plt.imread(os.path.join(Breed,sub_class[e])) | |
plt.imshow(img, cmap=plt.get_cmap('gray')) | |
plt.axis('off') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataset=[] | |
testset=[] | |
count=0 | |
for file in os.listdir(directory): | |
path=os.path.join(directory,file) | |
t=0 | |
for im in os.listdir(path): | |
image=load_img(os.path.join(path,im), grayscale=False, color_mode='rgb', target_size=(180,180)) | |
image=img_to_array(image) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labels1=to_categorical(labels0) | |
labels=np.array(labels1) | |
data=np.array(data) | |
test=np.array(test) |
OlderNewer