Skip to content

Instantly share code, notes, and snippets.

View bbc_classify_10.py
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(train_articles)
word_index = tokenizer.word_index
train_sequences = tokenizer.texts_to_sequences(train_articles)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
validation_sequences = tokenizer.texts_to_sequences(validation_articles)
validation_padded = pad_sequences(validation_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
View bbc_classify_11.py
label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)
training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))
validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels))
View bbc_classify_12.py
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'],
)
View bbc_classify_13.py
num_epochs = 10
history = model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)
View bbc_classify_14.py
txt = ["blair prepares to name poll date tony blair is likely to name 5 may as election day when parliament returns from its easter break the bbc s political editor has learned. andrew marr says mr blair will ask the queen on 4 or 5 april to dissolve parliament at the end of that week. mr blair has so far resisted calls for him to name the day but all parties have stepped up campaigning recently. downing street would not be drawn on the claim saying election timing was a matter for the prime minister. a number 10 spokeswoman would only say: he will announce an election when he wants to announce an election. the move will signal a frantic week at westminster as the government is likely to try to get key legislation through parliament. the government needs its finance bill covering the budget plans to be passed before the commons closes for business at the end of the session on 7 april. but it will also seek to push through its serious and organised crime bill and id cards bill. mr marr said on wednesd
View bbc_classify_15.py
txt = ["call to save manufacturing jobs the trades union congress (tuc) is calling on the government to stem job losses in manufacturing firms by reviewing the help it gives companies. the tuc said in its submission before the budget that action is needed because of 105 000 jobs lost from the sector over the last year. it calls for better pensions child care provision and decent wages. the 36-page submission also urges the government to examine support other european countries provide to industry. tuc general secretary brendan barber called for a commitment to policies that will make a real difference to the lives of working people. greater investment in childcare strategies and the people delivering that childcare will increases the options available to working parents he said. a commitment to our public services and manufacturing sector ensures that we can continue to compete on a global level and deliver the frontline services that this country needs. he also called for practical measures to he
View ner_01.py
class SentenceGetter(object):
def __init__(self, data):
self.n_sent = 1
self.data = data
self.empty = False
agg_func = lambda s: [(w, p, t) for w, p, t in zip(s["Word"].values.tolist(),
s["POS"].values.tolist(),
s["Tag"].values.tolist())]
View ner_02.py
word2idx = {w: i for i, w in enumerate(words)}
tag2idx = {t: i for i, t in enumerate(tags)}
idx2word = {i: w for w, i in word2idx.items()}
idx2tag = {i: w for w, i in tag2idx.items()}
View ner_03.py
X = [[word2idx[w[0]] for w in s] for s in sentences]
View ner_04.py
max_len = 50
# value = n-words-1 which is 'ENDPAD'or index 35178
X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=n_words - 1)