Skip to content

Instantly share code, notes, and snippets.

@cquest
Created July 24, 2021 17:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cquest/1e75e409e4a8b92fcbaef0b733ad16f9 to your computer and use it in GitHub Desktop.
Save cquest/1e75e409e4a8b92fcbaef0b733ad16f9 to your computer and use it in GitHub Desktop.
Script de contrôle des attributions sur les cartes OpenStreetMap
#! /usr/bin/python3
import sys, fileinput, socket, re, csv, time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#from selenium.webdriver.chrome.options import Options
from selenium.webdriver.firefox.options import Options as FirefoxOptions
import psycopg2
conn = psycopg2.connect("")
db = conn.cursor()
db.execute("CREATE TABLE IF NOT EXISTS attributor (ref text, status text, tiles text, site text, refip text, last timestamp)")
# headless chrome browser
if False:
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920x1080")
#browser = webdriver.Chrome(chrome_options=chrome_options)
#browser = webdriver.Chrome()
options = FirefoxOptions()
options.headless = True
browser = webdriver.Firefox(options=options)
browser.set_page_load_timeout(5)
print()
for ref in fileinput.input():
sys.stdout.write('\033[A\033[K' + ref + '\r')
db.execute('COMMIT')
data = None
try:
url = ref[:-1].replace("'","%22")
if url[:4] == 'http':
site = re.sub(r'http.*//', '', url)
site = re.sub(r'/.*$', '', site)
refip = socket.gethostbyname(site)
if True:
query = "SELECT * FROM attributor WHERE ref='%s'" % url
db.execute(query)
data = db.fetchone()
if data:
continue
browser.get(url)
time.sleep(1)
img = False
tile = ''
# check if we have images from openstreetmap.fr servers
images = browser.find_elements_by_tag_name('img')
if not images:
db.execute(db.mogrify("INSERT INTO attributor VALUES (%s,'no_map','',%s,%s,now())", (url, site, refip)))
continue
for image in images:
src = image.get_attribute('src')
if src and "openstreetmap" in src:
img = True
tile = src
break
if img:
# is there an OpenStreetMap link somewhere ?
try:
osm = browser.find_element_by_partial_link_text("OpenStreetMap")
db.execute(
"INSERT INTO attributor VALUES ('%s','attribution_link','%s','%s','%s',now())" % (url, tile, site, refip))
continue
except:
pass
try:
osm = browser.find_element_by_partial_link_text("OPENSTREETMAP")
db.execute(
"INSERT INTO attributor VALUES ('%s','attribution_link','%s','%s','%s',now())" % (url, tile, site, refip))
continue
except:
pass
try:
osm = browser.find_element_by_partial_link_text("Open Street Map")
db.execute(
"INSERT INTO attributor VALUES ('%s','attribution_link','%s','%s','%s',now())" % (url, tile, site, refip))
continue
except:
pass
try:
osm = browser.find_element_by_partial_link_text("OSM")
db.execute(
"INSERT INTO attributor VALUES ('%s','attribution_link','%s','%s','%s',now())" % (url, tile, site, refip))
continue
except:
pass
# is there "OpenStreetMap" somewhere in the page text ?
text = browser.find_element_by_tag_name("body").text
if "openstreetmap" not in text.lower() and "open street map" not in text.lower():
print(url)
db.execute(
"INSERT INTO attributor VALUES ('%s','no_attribution','%s','%s','%s',now())" % (url, tile, site, refip))
continue
query = "INSERT INTO attributor VALUES ('%s' ,'attribution_text','%s','%s','%s',now())" % (
url, tile, site, refip)
db.execute(query)
else:
db.execute("INSERT INTO attributor VALUES ('%s','no_tile','','%s','%s',now())" % (
url, site, refip))
continue
except:
if not data:
db.execute(db.mogrify("INSERT INTO attributor VALUES (%s,'error','',%s,'',now())", (url,site)))
pass
db.execute('COMMIT')
browser.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment