View table_prediction_df.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
interesting_areas=[] | |
output = [[x1, y1, x2, y2]] | |
for x in output: | |
[x1, y1, x2, y2] = bboxes_pdf(img, pdf_page, x) | |
bbox_camelot = [ | |
",".join([str(x1), str(y1), str(x2), str(y2)]) | |
][0] # x1,y1,x2,y2 where (x1, y1) -> left-top and (x2, y2) -> right-bottom in PDF coordinate space | |
#print(bbox_camelot) | |
interesting_areas.append(bbox_camelot) |
View table_prediction.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import cv2 | |
import matplotlib.pyplot as plt | |
image_path = imgfname | |
#image = cv2.imread(image_path) | |
image = read_image_bgr(image_path) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
output = image.copy() |
View ner_11.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
i = 1586 | |
p = model.predict(np.array([X_test[i]])) | |
p = np.argmax(p, axis=-1) | |
print("{:15} {:5}".format("Word", "Pred")) | |
for w, pred in zip(X_test[i], p[0]): | |
print("{:15}: {}".format(words[w], tags[pred])) |
View ner_10.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
def plot_graphs(history, string): | |
plt.plot(history.history[string]) | |
plt.plot(history.history['val_'+string]) | |
plt.xlabel("Epochs") | |
plt.ylabel(string) | |
plt.legend([string, 'val_'+string]) | |
plt.show() | |
View ner_09.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
history = model.fit(X_train, np.array(y_train), | |
batch_size=32, | |
epochs=10, | |
validation_split=0.2, | |
verbose=1) |
View ner_08.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input = Input(shape=(max_len,)) | |
model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)(input) | |
model = Dropout(0.5)(model) | |
model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) | |
out = TimeDistributed(Dense(n_tags, activation="softmax"))(model) # softmax output layer | |
model = Model(input, out) | |
opt = tf.keras.optimizers.Adam(lr=0.01, decay=1e-6) | |
model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"]) |
View ner_07.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) |
View ner_06.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# One hot encoding | |
# n_tags = 17 | |
y = [to_categorical(i, num_classes=n_tags) for i in y] |
View ner_05.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y = [[tag2idx[w[2]] for w in s] for s in sentences] |
View ner_04.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
max_len = 50 | |
# value = n-words-1 which is 'ENDPAD'or index 35178 | |
X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=n_words - 1) |
NewerOlder