This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For an example of use, see https://gist.github.com/pafonta/21f3db4d9c31f6a1c2f7ede8cbf3406b#gistcomment-3970844. | |
"""Entity Linking - Link mentions from texts to terms in ontologies. | |
Use character-based embedding to handle plurals, misspellings, partial matches, ... | |
""" | |
import pickle | |
import faiss |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For an example of use, see https://gist.github.com/pafonta/37762f56e8c1879569bca64901d0a000#gistcomment-3968062. | |
"""Collect statistics on PubMed articles.""" | |
from __future__ import annotations | |
from pathlib import Path | |
from defusedxml import ElementTree | |
from tqdm import tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For an example of use, see https://gist.github.com/pafonta/d33a0d5d849932f8ceab8b711d995497#gistcomment-3965575. | |
"""Find MeSH terms in the MeSH tree simply (i.e. without using a graph).""" | |
from __future__ import annotations | |
import json | |
from collections.abc import Iterator | |
from xml.etree.ElementTree import Element # nosec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For an example of use, see https://gist.github.com/pafonta/162c1b9ec0380e95a017297a707a4d66#gistcomment-3935739. | |
"""Find & Rank MeSH terms associated with an author.""" | |
from __future__ import annotations | |
import json | |
from collections import Counter | |
from collections.abc import Iterator | |
from xml.etree.ElementTree import Element # nosec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Corpus | |
filename = 'sentences_cord19_v47.parquet' | |
sentences = pd.read_parquet(filename) | |
# This contains all the sentences as a list. | |
corpus = sentences.text.tolist() | |
# Count |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
from pyspark.ml import Pipeline | |
from pyspark.ml.feature import HashingTF, IDF, Tokenizer | |
from pyspark.sql import SparkSession | |
def p(text: str) -> None: | |
now = datetime.datetime.now() | |
time = now.strftime("%Y-%m-%d %H:%M:%S") |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nlp2 = load_model(MODEL) | |
nlp2.add_pipe(nlp2.create_pipe("merge_entities")) | |
nlp2.add_pipe(nlp2.create_pipe("merge_noun_chunks")) | |
def extract_relations_umls(text, umls): | |
ents = {x[0].lower(): x[2] for x in umls} | |
rels = [] | |
for x in nlp2(text): | |
if x.dep_ != "ROOT": | |
if x.ent_type_ == "ENTITY": |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
NewerOlder