Skip to content

Instantly share code, notes, and snippets.

View table_prediction_df.py
interesting_areas=[]
output = [[x1, y1, x2, y2]]
for x in output:
[x1, y1, x2, y2] = bboxes_pdf(img, pdf_page, x)
bbox_camelot = [
",".join([str(x1), str(y1), str(x2), str(y2)])
][0] # x1,y1,x2,y2 where (x1, y1) -> left-top and (x2, y2) -> right-bottom in PDF coordinate space
#print(bbox_camelot)
interesting_areas.append(bbox_camelot)
View table_prediction.py
import numpy as np
import cv2
import matplotlib.pyplot as plt
image_path = imgfname
#image = cv2.imread(image_path)
image = read_image_bgr(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
output = image.copy()
View ner_11.py
i = 1586
p = model.predict(np.array([X_test[i]]))
p = np.argmax(p, axis=-1)
print("{:15} {:5}".format("Word", "Pred"))
for w, pred in zip(X_test[i], p[0]):
print("{:15}: {}".format(words[w], tags[pred]))
View ner_10.py
import matplotlib.pyplot as plt
def plot_graphs(history, string):
plt.plot(history.history[string])
plt.plot(history.history['val_'+string])
plt.xlabel("Epochs")
plt.ylabel(string)
plt.legend([string, 'val_'+string])
plt.show()
View ner_09.py
history = model.fit(X_train, np.array(y_train),
batch_size=32,
epochs=10,
validation_split=0.2,
verbose=1)
View ner_08.py
input = Input(shape=(max_len,))
model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)(input)
model = Dropout(0.5)(model)
model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model)
out = TimeDistributed(Dense(n_tags, activation="softmax"))(model) # softmax output layer
model = Model(input, out)
opt = tf.keras.optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"])
View ner_07.py
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
View ner_06.py
# One hot encoding
# n_tags = 17
y = [to_categorical(i, num_classes=n_tags) for i in y]
View ner_05.py
y = [[tag2idx[w[2]] for w in s] for s in sentences]
View ner_04.py
max_len = 50
# value = n-words-1 which is 'ENDPAD'or index 35178
X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=n_words - 1)