View led.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<!-- | |
Copyright 2016 Google Inc. All Rights Reserved. | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 |
View bbc_classify_01.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import tensorflow as tf | |
import numpy as np | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Activation, Embedding, Bidirectional |
View bbc_classify_02.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!wget --no-check-certificate \ | |
https://storage.googleapis.com/dataset-uploader/bbc/bbc-text.csv \ | |
-O /tmp/bbc-text.csv |
View bbc_classify_03.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('stopwords') | |
from nltk.corpus import stopwords | |
STOPWORDS = set(stopwords.words('english')) |
View bbc_classify_05.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
articles = [] | |
labels = [] | |
with open("/tmp/bbc-text.csv", 'r') as csvfile: | |
reader = csv.reader(csvfile, delimiter=',') | |
next(reader) | |
for row in reader: | |
labels.append(row[0]) | |
article = row[1] | |
for word in STOPWORDS: |
View gist:f164186a622de189f6f47027e2b74496
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_size = int(len(articles) * training_portion) | |
train_articles = articles[0: train_size] | |
train_labels = labels[0: train_size] | |
validation_articles = articles[train_size:] | |
validation_labels = labels[train_size:] |
View bbc_classify_04.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vocab_size = 5000 # make the top list of words (common words) | |
embedding_dim = 64 | |
max_length = 200 | |
trunc_type = 'post' | |
padding_type = 'post' | |
oov_tok = '<OOV>' # OOV = Out of Vocabulary | |
training_portion = .8 |
View bbc_classify_07.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok) | |
tokenizer.fit_on_texts(train_articles) | |
word_index = tokenizer.word_index |
View bbc_classify_08.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_sequences = tokenizer.texts_to_sequences(train_articles) |
View bbc_classify_09.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type) |
OlderNewer