Skip to content

Instantly share code, notes, and snippets.

@ettorerizza
Last active June 24, 2016 14:01
Show Gist options
  • Save ettorerizza/ac91ab4e23e5bcf884862b9a9d8547eb to your computer and use it in GitHub Desktop.
Save ettorerizza/ac91ab4e23e5bcf884862b9a9d8547eb to your computer and use it in GitHub Desktop.
# Ce script récupère une liste de noms et vérifie d'abord s'il existent dans Wikipedia.fr, puis dans Wikipedia.nl
# -*- coding: utf-8 -*-
######################################################
#
# Ce script récupère une liste de noms et vérifie
# d'abord s'il existent dans Wikipedia.fr, puis
# dans Wikipedia.nl
#
######################################################
import codecs
import wikipedia
# voir https://pypi.python.org/pypi/wikipedia/
with codecs.open('fichier.txt', 'r', encoding='utf-8') as f:
lines = f.read().splitlines()
page = []
for names in lines:
try:
wikipedia.set_lang("fr")
page = wikipedia.page(names, auto_suggest=True).url + " ||| " + wikipedia.page(names, auto_suggest=False).summary
except:
try:
wikipedia.set_lang("nl")
page = wikipedia.page(names, auto_suggest=True).url + " ||| " + wikipedia.page(names,
auto_suggest=False).summary
except:
page = "notFind " + names
print page
with codecs.open("resultats.txt", "w", encoding='utf-8') as fp:
for line in page:
fp.write(line + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment