Skip to content

Instantly share code, notes, and snippets.

@julienvaslet
Created May 25, 2018 12:47
Show Gist options
  • Save julienvaslet/ccfa2327bf2531eadc51cdab17300e03 to your computer and use it in GitHub Desktop.
Save julienvaslet/ccfa2327bf2531eadc51cdab17300e03 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
import re
requested_data = [ "Calcium" ]
post_data = {
"methode": "rechercher",
"idRegion": "73",
"usd": "AEP",
"posPLV": "0",
"departement": "031",
"communeDepartement": "555",
"reseau": "000006_031"
}
http_session = requests.session()
first_url = "https://orobnat.sante.gouv.fr/orobnat/afficherPage.do?methode=menu&idRegion={region}&dpt={dpt}&usd={usd}&comDpt={comdpt}".format( region=post_data["idRegion"], dpt=post_data["departement"], usd=post_data["usd"], comdpt=post_data["communeDepartement"] )
print( "Loading:", first_url )
result = http_session.get( first_url )
if result.status_code == 200:
failure = False
pos_plv = 0
source_url = "https://orobnat.sante.gouv.fr/orobnat/rechercherResultatQualite.do"
pattern = re.compile( r"{0}".format( requested_data[0] ) )
while pattern.search( result.text ) is None:
print( "Try #{0}".format( pos_plv ) )
print( "Loading:", source_url )
result = http_session.post( source_url, post_data )
if result.status_code != 200:
print( "Unable to get page:", result.status_code, result.reason )
failure = True
break
pos_plv += 1
post_data["posPLV"] = str(pos_plv)
if not failure:
print( "Data found." )
soup = BeautifulSoup( result.text )
# All page tables have the same "id"...
for table in soup.find_all( id="tableau" ):
tds = table.find_all( "td", { "class": "gras" } )
found = False
for td in tds:
if len(td.contents):
# Some td have div tag inside...
title = td.contents[0].find( "Valeur" )
if title is not None and title >= 0:
# Good table !
found = True
break
if found:
for tr in table.find_all( "tr" ):
td = tr.find( "td" )
needed_data = False
for value in requested_data:
if len(td.contents) and td.contents[0].find( value ) >= 0:
needed_data = True
break
if needed_data:
print( " ".join( tr.stripped_strings ) )
else:
print( "Unable to find criteria" )
else:
print( "Can not get main page:", result.status_code, result.reason )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment