This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from dotenv import load_dotenv | |
import requests | |
#load your credentials through the .env file | |
load_dotenv() | |
def create_headers(): | |
api_key = os.getenv('api_key') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
import re | |
ner = spacy.load('trf_ner\model-best') | |
def test_model(): | |
''' | |
Check if the model is loaded properly | |
''' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from utils.nlp import extract_ents | |
from utils.twitter_api import get_response | |
app = FastAPI() | |
class Query(BaseModel): | |
keyword: str | |
max_results: int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
samples = ["Facebook has a price target of $ 20 for this quarter", | |
"$ AAPL is gaining a new momentum"] | |
for doc in ner.pipe(samples): | |
for ent in doc.ents: | |
print(ent.label_, ent.text) | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#convert raw sentences into list of tuples (token and empty) | |
def sents2tuples(sents): | |
res = [] | |
for sent in sents: | |
tokens = word_tokenize(sent) | |
res.append([(token,'') for token in tokens]) | |
return res | |
#with sent2tuples, preprocessing will work just fine with new text | |
def preprocess( texts): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Evaluation of trained model | |
# Start remove 'O' labels | |
labels = list(crf.classes_) | |
labels.remove('O') | |
print("trained labels :",labels) | |
# start prediction and calculate f-score | |
y_pred = crf.predict(X_test) | |
print (metrics.flat_f1_score(y_test, y_pred, | |
average='weighted', labels=labels,zero_division=True)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
crf = sklearn_crfsuite.CRF( | |
algorithm='lbfgs', | |
c1=0.1, | |
c2=0.1, | |
max_iterations=100, | |
all_possible_transitions=True | |
) | |
crf.fit(X_train, y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Utils functions to extract features | |
def word2features(sent, i): | |
word = sent[i][0] | |
#postag = sent[i][1] | |
features = { | |
'bias': 1.0, | |
'word.lower()': word.lower(), | |
'word[-3:]': word[-3:], | |
'word[-2:]': word[-2:], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def import_documents_set_iob(train_file_path): | |
with open(train_file_path, encoding="utf8") as f: | |
tokens_in_file = f.readlines() | |
# construct list of list train set format | |
new_train_set = [] | |
for index_token,token in enumerate(tokens_in_file): | |
# detect new document | |
is_new_document = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.colab import drive | |
drive.mount('/content/drive') |
NewerOlder