Last active
March 27, 2021 12:11
-
-
Save bearhunt11/9b02f4977436be2b1cf15c9b10a5feb0 to your computer and use it in GitHub Desktop.
vesselfinder.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import urllib.request | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
import time | |
import csv | |
from selenium import webdriver | |
import schedule | |
def retrieve_website(): | |
""" (1) First we are going to get the data from the website """ | |
# Create headers, otherwise vesselfinder will block you | |
headers = {'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17'} | |
# URL of the ship you want to track, execute the request and parse it to the variable 'soup' | |
url = 'https://www.vesselfinder.com/vessels/MOTIVATION-D-IMO-9301108-MMSI-636092241' | |
reqs = requests.get(url, headers=headers) | |
soup = BeautifulSoup(reqs.text, 'lxml') | |
# Save file to local disk | |
with open("output1.html", "w", encoding='utf-8') as file: | |
file.write(str(soup)) | |
""" (2) Next part is to find some info we can put into a csv file """ | |
# open file to local disk | |
with open("output1.html", "r", encoding='utf-8') as file: | |
soup = BeautifulSoup(file, 'lxml') | |
# All td tags are read into a list | |
data = soup.find_all('td') | |
# Extract the coordinates | |
coordinates = data[21].get_text() | |
# extract the date | |
dtg = data[25] | |
dtg_str = str(dtg) | |
dtg_str_soup = BeautifulSoup(dtg_str, features="lxml") | |
dtg = dtg_str_soup.td['data-title'] | |
# Extract the heading / speed | |
head_spd = data[19].get_text() | |
heading = head_spd.split(' / ')[0] | |
speed = head_spd.split(' / ')[1] | |
"""" (3) Final part, write the data to a csv file """ | |
# Divide the coordinate pair into northing and south degrees | |
coordinates = str(coordinates) | |
north = coordinates.split('/')[0] | |
east = coordinates.split('/')[1] | |
# Transform dtg-element into date and time elements | |
dtg = dtg.replace(',', '').strip(' UTC') | |
dtg = datetime.strptime(dtg, '%b %d %Y %H:%M') | |
date = dtg.strftime('%Y-%m-%d') | |
current_time = dtg.strftime('%H:%M') | |
# The counter is a global variable | |
global ctr | |
# Write data to a csv file with comma as seperator | |
with open('AIS_Track.csv', 'a', newline='') as csv_file: | |
writer = csv.writer(csv_file, delimiter=',') | |
writer.writerow([ctr,north, east, date, current_time, heading, speed]) | |
""" (4) - OPTIONAL - If we want to create a screenshot of the website """ | |
# Prepare prefix filename | |
dtg = datetime.now() | |
screenshot = dtg.strftime("screenshots/%Y%m%d_%H%M_screenshot.png") | |
# Find the image URL | |
img_url = soup.find_all('a', href=True) | |
img_url = img_url[22]['href'] | |
# # Retrieve website | |
DRIVER = 'chromedriver' | |
driver = webdriver.Chrome(DRIVER) | |
driver.get('https://www.vesselfinder.com' + img_url) | |
time.sleep(5) | |
driver.save_screenshot(screenshot) | |
driver.quit() | |
# Print status message | |
print(ctr, 'Last AIS data was sent at:', current_time, 'UTC') | |
# Add one by counter. | |
ctr =+ 1 | |
""" Start the program """ | |
# Create counter | |
ctr = 0 | |
# Start the funtion the first time when the program starts | |
retrieve_website() | |
# Re-run every 15 minutes the function | |
schedule.every(900).seconds.do(retrieve_website) | |
while True: | |
schedule.run_pending() | |
time.sleep(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment