Skip to content

Instantly share code, notes, and snippets.

@bearhunt11
Last active March 27, 2021 12:11
Show Gist options
  • Save bearhunt11/9b02f4977436be2b1cf15c9b10a5feb0 to your computer and use it in GitHub Desktop.
Save bearhunt11/9b02f4977436be2b1cf15c9b10a5feb0 to your computer and use it in GitHub Desktop.
vesselfinder.com
import requests
import urllib.request
from bs4 import BeautifulSoup
from datetime import datetime
import time
import csv
from selenium import webdriver
import schedule
def retrieve_website():
""" (1) First we are going to get the data from the website """
# Create headers, otherwise vesselfinder will block you
headers = {'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17'}
# URL of the ship you want to track, execute the request and parse it to the variable 'soup'
url = 'https://www.vesselfinder.com/vessels/MOTIVATION-D-IMO-9301108-MMSI-636092241'
reqs = requests.get(url, headers=headers)
soup = BeautifulSoup(reqs.text, 'lxml')
# Save file to local disk
with open("output1.html", "w", encoding='utf-8') as file:
file.write(str(soup))
""" (2) Next part is to find some info we can put into a csv file """
# open file to local disk
with open("output1.html", "r", encoding='utf-8') as file:
soup = BeautifulSoup(file, 'lxml')
# All td tags are read into a list
data = soup.find_all('td')
# Extract the coordinates
coordinates = data[21].get_text()
# extract the date
dtg = data[25]
dtg_str = str(dtg)
dtg_str_soup = BeautifulSoup(dtg_str, features="lxml")
dtg = dtg_str_soup.td['data-title']
# Extract the heading / speed
head_spd = data[19].get_text()
heading = head_spd.split(' / ')[0]
speed = head_spd.split(' / ')[1]
"""" (3) Final part, write the data to a csv file """
# Divide the coordinate pair into northing and south degrees
coordinates = str(coordinates)
north = coordinates.split('/')[0]
east = coordinates.split('/')[1]
# Transform dtg-element into date and time elements
dtg = dtg.replace(',', '').strip(' UTC')
dtg = datetime.strptime(dtg, '%b %d %Y %H:%M')
date = dtg.strftime('%Y-%m-%d')
current_time = dtg.strftime('%H:%M')
# The counter is a global variable
global ctr
# Write data to a csv file with comma as seperator
with open('AIS_Track.csv', 'a', newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
writer.writerow([ctr,north, east, date, current_time, heading, speed])
""" (4) - OPTIONAL - If we want to create a screenshot of the website """
# Prepare prefix filename
dtg = datetime.now()
screenshot = dtg.strftime("screenshots/%Y%m%d_%H%M_screenshot.png")
# Find the image URL
img_url = soup.find_all('a', href=True)
img_url = img_url[22]['href']
# # Retrieve website
DRIVER = 'chromedriver'
driver = webdriver.Chrome(DRIVER)
driver.get('https://www.vesselfinder.com' + img_url)
time.sleep(5)
driver.save_screenshot(screenshot)
driver.quit()
# Print status message
print(ctr, 'Last AIS data was sent at:', current_time, 'UTC')
# Add one by counter.
ctr =+ 1
""" Start the program """
# Create counter
ctr = 0
# Start the funtion the first time when the program starts
retrieve_website()
# Re-run every 15 minutes the function
schedule.every(900).seconds.do(retrieve_website)
while True:
schedule.run_pending()
time.sleep(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment