Skip to content

Instantly share code, notes, and snippets.

@nas-

nas-/main.py Secret

Created November 19, 2020 23:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nas-/6d50ec98baf8925a9f96605213d4bfd4 to your computer and use it in GitHub Desktop.
Save nas-/6d50ec98baf8925a9f96605213d4bfd4 to your computer and use it in GitHub Desktop.
scrape marinetraffic
from seleniumwire import webdriver
import json
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument('disable-blink-features=AutomationControlled')
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0")
driver = webdriver.Chrome('chromedriver', options=chrome_options)
driver.maximize_window()
CONFIG = {'BOSS': 'https://www.marinetraffic.com/en/ais/home/centerx:-32.7/centery:-10.1/zoom:10',
'LinkedOut': 'https://www.marinetraffic.com/en/ais/home/centerx:-31.4/centery:-9.7/zoom:10',
'APIVIA': 'https://www.marinetraffic.com/en/ais/home/centerx:-31.4/centery:-8.5/zoom:13',
'CAM': 'https://www.marinetraffic.com/en/ais/home/centerx:-30.5/centery:-6.5/zoom:11'}
def unpack_boats(boats_response):
# filter only pleasure crafts
pleasures = []
for item in boats_response:
item_dict = json.loads(item)
if type(item_dict) == int:
continue
test = item_dict.get('data')
if test is None:
continue
else:
rows = test.get('rows')
for row in rows:
ship_kind = row.get('TYPE_NAME')
if ship_kind == 'Pleasure Craft':
pleasures.append(row)
return pleasures
for item in CONFIG:
driver.get(CONFIG.get(item))
time.sleep(5)
requests_boats = [item.response.body for item in driver.requests if
item.url.startswith('https://www.marinetraffic.com/getData')]
boats = unpack_boats(requests_boats)
if not boats:
print(f'{item} --> Not found :(')
continue
if len(boats) > 1:
print(f'{item} --> More than 1 boat found!')
url_old = CONFIG.get(item)[:-2]
zoom = int(CONFIG.get(item)[-2:])
del driver.requests
print(f'requesting new url with zoom {zoom + 2}')
url = f"{url_old}{zoom + 2}"
driver.get(url)
time.sleep(5)
requests_boats = [item.response.body for item in driver.requests if
item.url.startswith('https://www.marinetraffic.com/getData')]
boats = unpack_boats(requests_boats)
if len(boats) == 1:
print(
f"{item} --> LAT:{boats[0].get('LAT')} LON :{boats[0].get('LON')} SPEED :{int(boats[0].get('SPEED')) / 10}, HEADING :{boats[0].get('HEADING')} ELAPSED :{boats[0].get('ELAPSED')}")
else:
print(f'{item} -->Boat not found :(')
del driver.requests
del driver.requests
continue
print(
f"{item} --> LAT:{boats[0].get('LAT')} LON :{boats[0].get('LON')} SPEED :{int(boats[0].get('SPEED')) / 10}, HEADING :{boats[0].get('HEADING')} ELAPSED :{boats[0].get('ELAPSED')}")
del driver.requests
"""
[{'LAT': '-1.276983', 'LON': '-28.90611', 'SPEED': '149', 'COURSE': '188',
'HEADING': '188', 'ELAPSED': '396', 'SHIPNAME': '[SAT-AIS]', 'SHIPTYPE': '9',
'SHIP_ID': 'T1RjME56TXhPVGMwTnpNeE9UYzBOdz09LTdiR2Q3SVRBWE9JQytIbEtJYWtxTkE9PQ==',
'TYPE_IMG': '9', 'TYPE_NAME': 'Pleasure Craft', 'STATUS_NAME': 'Unknown'}]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment