Skip to content

Instantly share code, notes, and snippets.

@khaledadrani
Last active January 17, 2022 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save khaledadrani/f03e857da2e6726425ac1ce2d15dda17 to your computer and use it in GitHub Desktop.
Save khaledadrani/f03e857da2e6726425ac1ce2d15dda17 to your computer and use it in GitHub Desktop.
#convert raw sentences into list of tuples (token and empty)
def sents2tuples(sents):
res = []
for sent in sents:
tokens = word_tokenize(sent)
res.append([(token,'') for token in tokens])
return res
#with sent2tuples, preprocessing will work just fine with new text
def preprocess( texts):
texts = [res for res in sents2tuples(texts)]
X = [sent2features(s) for s in texts]
return X
samples = ["Facebook has a price target of $ 20 for this quarter",
"$ AAPL is gaining a new momentum"]
processed = preprocess(samples)
pred = crf.predict(processed)
for i in range(len(samples)):
sentence = samples[i].split()
for j in range(len(sentence)):
print(sentence[j],'-->',pred[i][j])
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment