Skip to content

Instantly share code, notes, and snippets.

@zcourtois
Created February 4, 2022 09:39
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save zcourtois/5a4a1c6f1a09096757b0d20b8e8e8f7d to your computer and use it in GitHub Desktop.
Agenda de Frédérique Vidal
import csv
import pprint
import re
import stanza
nlp = stanza.Pipeline("fr")
#Déclaration
em = "Emmanuel Macron"
pr = "Président De La République"
gunther = 0
#Definition des listes
visits = {}
titles = {}
prefixes = [
"M\.",
"M",
"Mme",
"Madame",
"Monsieur",
"le Professeur",
"le Pr\.",
"S\.E\.M\.?",
"le Dr\.?",
"Dr\."
]
#Fonction qui enlève les prefixes de la liste prefixes
def removeprefix(text, prefixes):
for prefix in prefixes:
if text.startswith(prefix):
return text[len(prefix):]
return text
#Fonction qui vérifie qu'une personne est identifiée comme une personne et enlève les préfixes
def validate(entity):
if entity.type != "PER":
return None
if " " not in entity.text:
return None
p = removeprefix (entity.text, prefixes)
return p.title()
# Je récupère seulement les face-à-faces
with open("clean.csv", "r") as csvfile:
reader = csv.DictReader(csvfile)
entretien = re.compile("Entretien.*")
for row in reader:
if entretien.match(row["Description"]) is not None:
print(gunther)
doc = nlp(row["Description"])
# Je vérifie la validité d'un nom, je m'occupe d'E.Macron,
# j'actualise le compteur des visites
for entity in doc.entities:
if entity.type == "PER":
macron_present = False
person = validate(entity)
if person is None:
continue
if person == em or person == pr:
if macron_present:
continue
else:
person = em
macron_present = True
if person not in visits:
visits[person] = 0
visits[person] += 1
gunther += 1
pprint.pprint(sorted(visits.items(), key=lambda item: item[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment