Pierre-Alexandre Fonta pafonta

## entity_linking.py
# For an example of use, see https://gist.github.com/pafonta/21f3db4d9c31f6a1c2f7ede8cbf3406b#gistcomment-3970844.

"""Entity Linking - Link mentions from texts to terms in ontologies.

Use character-based embedding to handle plurals, misspellings, partial matches, ...
"""

import pickle

import faiss

## pubmed_statistics.py
# For an example of use, see https://gist.github.com/pafonta/37762f56e8c1879569bca64901d0a000#gistcomment-3968062.

"""Collect statistics on PubMed articles."""

from __future__ import annotations

from pathlib import Path

from defusedxml import ElementTree
from tqdm import tqdm

## nlm_mesh_tree.py
# For an example of use, see https://gist.github.com/pafonta/d33a0d5d849932f8ceab8b711d995497#gistcomment-3965575.

"""Find MeSH terms in the MeSH tree simply (i.e. without using a graph)."""

from __future__ import annotations

import json
from collections.abc import Iterator
from xml.etree.ElementTree import Element  # nosec

## nlm_mesh.py
# For an example of use, see https://gist.github.com/pafonta/162c1b9ec0380e95a017297a707a4d66#gistcomment-3935739.

"""Find & Rank MeSH terms associated with an author."""

from __future__ import annotations

import json
from collections import Counter
from collections.abc import Iterator
from xml.etree.ElementTree import Element  # nosec

## cv_tf-idf.py
import pandas as pd

# Corpus

filename = 'sentences_cord19_v47.parquet'
sentences = pd.read_parquet(filename)
# This contains all the sentences as a list.
corpus = sentences.text.tolist()

# Count

## tf_idf_2.py
import datetime

from pyspark.ml import Pipeline
from pyspark.ml.feature import HashingTF, IDF, Tokenizer
from pyspark.sql import SparkSession


def p(text: str) -> None:
    now = datetime.datetime.now()
    time = now.strftime("%Y-%m-%d %H:%M:%S")

## Knowledge Graph Embeddings PoC.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                pafonta
                / Knowledge Graph Embeddings PoC.ipynb
            
            
              Created
              July 10, 2020 15:57
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Retrieve leaf nodes.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                pafonta
                / Retrieve leaf nodes.ipynb
            
            
              Created
              July 10, 2020 10:00
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## extract_relations_umls
nlp2 = load_model(MODEL)
nlp2.add_pipe(nlp2.create_pipe("merge_entities"))
nlp2.add_pipe(nlp2.create_pipe("merge_noun_chunks"))

def extract_relations_umls(text, umls):
    ents = {x[0].lower(): x[2] for x in umls}
    rels = []
    for x in nlp2(text):
        if x.dep_ != "ROOT":
            if x.ent_type_ == "ENTITY":

## Blue Brain Knowledge Graph.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                pafonta
                / Blue Brain Knowledge Graph.ipynb
            
            
              Last active
              October 2, 2024 14:39
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	# For an example of use, see https://gist.github.com/pafonta/21f3db4d9c31f6a1c2f7ede8cbf3406b#gistcomment-3970844.

	"""Entity Linking - Link mentions from texts to terms in ontologies.

	Use character-based embedding to handle plurals, misspellings, partial matches, ...
	"""

	import pickle

	import faiss
	# For an example of use, see https://gist.github.com/pafonta/37762f56e8c1879569bca64901d0a000#gistcomment-3968062.

	"""Collect statistics on PubMed articles."""

	from __future__ import annotations

	from pathlib import Path

	from defusedxml import ElementTree
	from tqdm import tqdm
	# For an example of use, see https://gist.github.com/pafonta/d33a0d5d849932f8ceab8b711d995497#gistcomment-3965575.

	"""Find MeSH terms in the MeSH tree simply (i.e. without using a graph)."""

	from __future__ import annotations

	import json
	from collections.abc import Iterator
	from xml.etree.ElementTree import Element # nosec
	# For an example of use, see https://gist.github.com/pafonta/162c1b9ec0380e95a017297a707a4d66#gistcomment-3935739.

	"""Find & Rank MeSH terms associated with an author."""

	from __future__ import annotations

	import json
	from collections import Counter
	from collections.abc import Iterator
	from xml.etree.ElementTree import Element # nosec
	import pandas as pd

	# Corpus

	filename = 'sentences_cord19_v47.parquet'
	sentences = pd.read_parquet(filename)
	# This contains all the sentences as a list.
	corpus = sentences.text.tolist()

	# Count
	import datetime

	from pyspark.ml import Pipeline
	from pyspark.ml.feature import HashingTF, IDF, Tokenizer
	from pyspark.sql import SparkSession


	def p(text: str) -> None:
	now = datetime.datetime.now()
	time = now.strftime("%Y-%m-%d %H:%M:%S")
	nlp2 = load_model(MODEL)
	nlp2.add_pipe(nlp2.create_pipe("merge_entities"))
	nlp2.add_pipe(nlp2.create_pipe("merge_noun_chunks"))

	def extract_relations_umls(text, umls):
	ents = {x[0].lower(): x[2] for x in umls}
	rels = []
	for x in nlp2(text):
	if x.dep_ != "ROOT":
	if x.ent_type_ == "ENTITY":