Ferry Djaja ferrygun

## led.html

<!doctype html>
<!--
Copyright 2016 Google Inc. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

## bbc_classify_01.py
import csv
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Activation, Embedding, Bidirectional

## bbc_classify_02.py
!wget --no-check-certificate \
    https://storage.googleapis.com/dataset-uploader/bbc/bbc-text.csv \
    -O /tmp/bbc-text.csv

## bbc_classify_03.py
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))

## bbc_classify_05.py
articles = []
labels = []

with open("/tmp/bbc-text.csv", 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    next(reader)
    for row in reader:
        labels.append(row[0])
        article = row[1]
        for word in STOPWORDS:

## gist:f164186a622de189f6f47027e2b74496
train_size = int(len(articles) * training_portion)

train_articles = articles[0: train_size]
train_labels = labels[0: train_size]

validation_articles = articles[train_size:]
validation_labels = labels[train_size:]

## bbc_classify_04.py
vocab_size = 5000 # make the top list of words (common words)
embedding_dim = 64
max_length = 200
trunc_type = 'post'
padding_type = 'post'
oov_tok = '<OOV>' # OOV = Out of Vocabulary
training_portion = .8

## bbc_classify_07.py
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(train_articles)
word_index = tokenizer.word_index

## bbc_classify_08.py
train_sequences = tokenizer.texts_to_sequences(train_articles)

## bbc_classify_09.py
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

	<!doctype html>
	<!--
	Copyright 2016 Google Inc. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0
	import csv
	import tensorflow as tf
	import numpy as np
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Activation, Embedding, Bidirectional
	!wget --no-check-certificate \
	https://storage.googleapis.com/dataset-uploader/bbc/bbc-text.csv \
	-O /tmp/bbc-text.csv
	import nltk
	nltk.download('stopwords')
	from nltk.corpus import stopwords
	STOPWORDS = set(stopwords.words('english'))
	articles = []
	labels = []

	with open("/tmp/bbc-text.csv", 'r') as csvfile:
	reader = csv.reader(csvfile, delimiter=',')
	next(reader)
	for row in reader:
	labels.append(row[0])
	article = row[1]
	for word in STOPWORDS:
	train_size = int(len(articles) * training_portion)

	train_articles = articles[0: train_size]
	train_labels = labels[0: train_size]

	validation_articles = articles[train_size:]
	validation_labels = labels[train_size:]
	vocab_size = 5000 # make the top list of words (common words)
	embedding_dim = 64
	max_length = 200
	trunc_type = 'post'
	padding_type = 'post'
	oov_tok = '<OOV>' # OOV = Out of Vocabulary
	training_portion = .8
	tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
	tokenizer.fit_on_texts(train_articles)
	word_index = tokenizer.word_index