Skip to content

Instantly share code, notes, and snippets.

@NesrineSF
NesrineSF / lemmatization.py
Last active March 31, 2021 20:42
same word belonging to different lemma
living_from_live = "She's living her best life"
living_from_living = "What do you do for a living?"
language = 'en'
#Request to the API
output = client.specific_resource_analysis(
body={"document": {"text": living_from_live }},
params={'language': language, 'resource': 'disambiguation'
})
@NesrineSF
NesrineSF / concept_ID.py
Created March 31, 2021 16:33
concept_ID for each word "object" within the knowledge graph
#Concept_ID for object_noun
print (f'\t \033[1mConcept_ID for object when NOUN \033[0m \n')
#to print TOKEN, POS and ID (for the concept) in bold at the beginning of the output array:
print (f'\033[1m{"TOKEN":{20}} {"POS":{15}} {"ID":{6}}\033[0m')
#Syncon stands for "Concept" that we refer to with an ID
for token in output.tokens:
print (f'{object_noun[token.start:token.end]:{20}} {token.pos:{15}} {token.syncon:{6}} ')
#POS of the first sentence
print (f'\t \033[1mOutput of the first sentence : \033[0m \n')
#to print TOKEN and POS in bold at the beginning of the output array:
print (f'\033[1m{"TOKEN":{20}} {"POS":{6}}\033[0m')
#to iterate over the tokens and assign a POS to each token for the first sentence
for token in output.tokens:
print (f'{object_noun[token.start:token.end]:{20}} {token.pos:{6}}')
#POS of the second sentence
@NesrineSF
NesrineSF / request.py
Created March 31, 2021 15:21
request of the API for both sentences
#first sentence
object_noun = "The object of this exercise is to raise money for the charity."
#second sentence
object_verb = "A lot of people will object to the book."
language = 'en'
#Request to the API for the first sentence
output = client.specific_resource_analysis(
body={"document": {"text": object_noun}},
params={'language': language, 'resource': 'disambiguation'
})
@NesrineSF
NesrineSF / API_request_1.py
Created March 30, 2021 21:16
API Request for the 1st sentence
#first sentence
object_noun = "The object of this exercise is to raise money for the charity."
language = 'en'
#Request to the API
output = client.specific_resource_analysis(
body={"document": {"text": object_noun}},
params={'language': language, 'resource': 'disambiguation'
})
@NesrineSF
NesrineSF / sentences_for_POS_Tagging.py
Created March 30, 2021 21:12
Declare the variables related to each sentence
object_noun = "The object of this exercise is to raise money for the charity."
object_verb = "A lot of people will object to the book."
@NesrineSF
NesrineSF / atom_subdivision.py
Created March 30, 2021 21:09
tokenization to get the atom level of the sentence
for token in output.tokens:
print (f'{text[token.start:token.end]:{20}}')
#we iterate on the tokens array to subdivise the compound words into the atom level
for atom in token.atoms:
print (f'\t atom:{text[atom.start:atom.end]:{20}}')
@NesrineSF
NesrineSF / token_subdivison.py
Created March 30, 2021 21:08
tokenization with expert.ai NL API
text = "CNBC has commented on the robot's lifelike skin and her ability to emulate more than 60 facial expressions."
language= 'en'
output = client.specific_resource_analysis(
body={"document": {"text": text}},
params={'language': language, 'resource': 'disambiguation'
})
#to print to tokens within the sentence
print (f'{"TOKEN":{20}} ')
@NesrineSF
NesrineSF / split_sentence.py
Created March 30, 2021 21:05
split() function to tokenize the sentence
text = "CNBC has commented on the robot's lifelike skin and her ability to emulate more than 60 facial expressions."
tokens = text.split()
print('These are the tokens of the sentence', tokens)
@NesrineSF
NesrineSF / phrase_array_size.py
Created March 30, 2021 21:03
Output arrays size
#Output arrays size
print("phrases array size: ", len(output.phrases))