-
-
Save nas-/6d50ec98baf8925a9f96605213d4bfd4 to your computer and use it in GitHub Desktop.
scrape marinetraffic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from seleniumwire import webdriver | |
import json | |
import time | |
chrome_options = webdriver.ChromeOptions() | |
chrome_options.add_argument('--no-sandbox') | |
chrome_options.add_argument('--headless') | |
chrome_options.add_argument('disable-blink-features=AutomationControlled') | |
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0") | |
driver = webdriver.Chrome('chromedriver', options=chrome_options) | |
driver.maximize_window() | |
CONFIG = {'BOSS': 'https://www.marinetraffic.com/en/ais/home/centerx:-32.7/centery:-10.1/zoom:10', | |
'LinkedOut': 'https://www.marinetraffic.com/en/ais/home/centerx:-31.4/centery:-9.7/zoom:10', | |
'APIVIA': 'https://www.marinetraffic.com/en/ais/home/centerx:-31.4/centery:-8.5/zoom:13', | |
'CAM': 'https://www.marinetraffic.com/en/ais/home/centerx:-30.5/centery:-6.5/zoom:11'} | |
def unpack_boats(boats_response): | |
# filter only pleasure crafts | |
pleasures = [] | |
for item in boats_response: | |
item_dict = json.loads(item) | |
if type(item_dict) == int: | |
continue | |
test = item_dict.get('data') | |
if test is None: | |
continue | |
else: | |
rows = test.get('rows') | |
for row in rows: | |
ship_kind = row.get('TYPE_NAME') | |
if ship_kind == 'Pleasure Craft': | |
pleasures.append(row) | |
return pleasures | |
for item in CONFIG: | |
driver.get(CONFIG.get(item)) | |
time.sleep(5) | |
requests_boats = [item.response.body for item in driver.requests if | |
item.url.startswith('https://www.marinetraffic.com/getData')] | |
boats = unpack_boats(requests_boats) | |
if not boats: | |
print(f'{item} --> Not found :(') | |
continue | |
if len(boats) > 1: | |
print(f'{item} --> More than 1 boat found!') | |
url_old = CONFIG.get(item)[:-2] | |
zoom = int(CONFIG.get(item)[-2:]) | |
del driver.requests | |
print(f'requesting new url with zoom {zoom + 2}') | |
url = f"{url_old}{zoom + 2}" | |
driver.get(url) | |
time.sleep(5) | |
requests_boats = [item.response.body for item in driver.requests if | |
item.url.startswith('https://www.marinetraffic.com/getData')] | |
boats = unpack_boats(requests_boats) | |
if len(boats) == 1: | |
print( | |
f"{item} --> LAT:{boats[0].get('LAT')} LON :{boats[0].get('LON')} SPEED :{int(boats[0].get('SPEED')) / 10}, HEADING :{boats[0].get('HEADING')} ELAPSED :{boats[0].get('ELAPSED')}") | |
else: | |
print(f'{item} -->Boat not found :(') | |
del driver.requests | |
del driver.requests | |
continue | |
print( | |
f"{item} --> LAT:{boats[0].get('LAT')} LON :{boats[0].get('LON')} SPEED :{int(boats[0].get('SPEED')) / 10}, HEADING :{boats[0].get('HEADING')} ELAPSED :{boats[0].get('ELAPSED')}") | |
del driver.requests | |
""" | |
[{'LAT': '-1.276983', 'LON': '-28.90611', 'SPEED': '149', 'COURSE': '188', | |
'HEADING': '188', 'ELAPSED': '396', 'SHIPNAME': '[SAT-AIS]', 'SHIPTYPE': '9', | |
'SHIP_ID': 'T1RjME56TXhPVGMwTnpNeE9UYzBOdz09LTdiR2Q3SVRBWE9JQytIbEtJYWtxTkE9PQ==', | |
'TYPE_IMG': '9', 'TYPE_NAME': 'Pleasure Craft', 'STATUS_NAME': 'Unknown'}] | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment