bearhunt11/AIS_tracker.py

## AIS_tracker.py
import requests
import urllib.request
from bs4 import BeautifulSoup
from datetime import datetime
import time
import csv
from selenium import webdriver
import schedule


def retrieve_website():
    """ (1) First we are going to get the data from the website """
    # Create headers, otherwise vesselfinder will block you
    headers = {'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17'}

    # URL of the ship you want to track, execute the request and parse it to the variable 'soup'
    url = 'https://www.vesselfinder.com/vessels/MOTIVATION-D-IMO-9301108-MMSI-636092241'
    reqs = requests.get(url, headers=headers)
    soup = BeautifulSoup(reqs.text, 'lxml')

    # Save file to local disk
    with open("output1.html", "w", encoding='utf-8') as file:
        file.write(str(soup))


    """ (2) Next part is to find some info we can put into a csv file """
    # open file to local disk
    with open("output1.html", "r", encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'lxml')

    # All td tags are read into a list
    data = soup.find_all('td')

    # Extract the coordinates
    coordinates = data[21].get_text()

    # extract the date
    dtg = data[25]
    dtg_str = str(dtg)
    dtg_str_soup = BeautifulSoup(dtg_str, features="lxml")
    dtg = dtg_str_soup.td['data-title']

    # Extract the heading / speed
    head_spd = data[19].get_text()
    heading = head_spd.split(' / ')[0]
    speed = head_spd.split(' / ')[1]


    """" (3) Final part, write the data to a csv file """
    # Divide the coordinate pair into northing and south degrees
    coordinates = str(coordinates)
    north = coordinates.split('/')[0]
    east = coordinates.split('/')[1]

    # Transform dtg-element into date and time elements
    dtg = dtg.replace(',', '').strip(' UTC')
    dtg = datetime.strptime(dtg, '%b %d %Y %H:%M')
    date = dtg.strftime('%Y-%m-%d')
    current_time = dtg.strftime('%H:%M')

    # The counter is a global variable
    global ctr

    # Write data to a csv file with comma as seperator
    with open('AIS_Track.csv', 'a', newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter=',')
        writer.writerow([ctr,north, east, date, current_time, heading, speed])


    """ (4) - OPTIONAL - If we want to create a screenshot of the website """
    # Prepare prefix filename
    dtg = datetime.now()
    screenshot = dtg.strftime("screenshots/%Y%m%d_%H%M_screenshot.png")

    # Find the image URL
    img_url = soup.find_all('a', href=True)
    img_url = img_url[22]['href']

    # # Retrieve website
    DRIVER = 'chromedriver'
    driver = webdriver.Chrome(DRIVER)
    driver.get('https://www.vesselfinder.com' + img_url)
    time.sleep(5)
    driver.save_screenshot(screenshot)
    driver.quit()

    # Print status message
    print(ctr, 'Last AIS data was sent at:', current_time, 'UTC')

    # Add one by counter.
    ctr =+ 1


""" Start the program """
# Create counter
ctr = 0

# Start the funtion the first time when the program starts
retrieve_website()


# Re-run every 15 minutes the function
schedule.every(900).seconds.do(retrieve_website)
while True:
    schedule.run_pending()
    time.sleep(1)
	import requests
	import urllib.request
	from bs4 import BeautifulSoup
	from datetime import datetime
	import time
	import csv
	from selenium import webdriver
	import schedule


	def retrieve_website():
	""" (1) First we are going to get the data from the website """
	# Create headers, otherwise vesselfinder will block you
	headers = {'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17'}

	# URL of the ship you want to track, execute the request and parse it to the variable 'soup'
	url = 'https://www.vesselfinder.com/vessels/MOTIVATION-D-IMO-9301108-MMSI-636092241'
	reqs = requests.get(url, headers=headers)
	soup = BeautifulSoup(reqs.text, 'lxml')

	# Save file to local disk
	with open("output1.html", "w", encoding='utf-8') as file:
	file.write(str(soup))


	""" (2) Next part is to find some info we can put into a csv file """
	# open file to local disk
	with open("output1.html", "r", encoding='utf-8') as file:
	soup = BeautifulSoup(file, 'lxml')

	# All td tags are read into a list
	data = soup.find_all('td')

	# Extract the coordinates
	coordinates = data[21].get_text()

	# extract the date
	dtg = data[25]
	dtg_str = str(dtg)
	dtg_str_soup = BeautifulSoup(dtg_str, features="lxml")
	dtg = dtg_str_soup.td['data-title']

	# Extract the heading / speed
	head_spd = data[19].get_text()
	heading = head_spd.split(' / ')[0]
	speed = head_spd.split(' / ')[1]


	"""" (3) Final part, write the data to a csv file """
	# Divide the coordinate pair into northing and south degrees
	coordinates = str(coordinates)
	north = coordinates.split('/')[0]
	east = coordinates.split('/')[1]

	# Transform dtg-element into date and time elements
	dtg = dtg.replace(',', '').strip(' UTC')
	dtg = datetime.strptime(dtg, '%b %d %Y %H:%M')
	date = dtg.strftime('%Y-%m-%d')
	current_time = dtg.strftime('%H:%M')

	# The counter is a global variable
	global ctr

	# Write data to a csv file with comma as seperator
	with open('AIS_Track.csv', 'a', newline='') as csv_file:
	writer = csv.writer(csv_file, delimiter=',')
	writer.writerow([ctr,north, east, date, current_time, heading, speed])


	""" (4) - OPTIONAL - If we want to create a screenshot of the website """
	# Prepare prefix filename
	dtg = datetime.now()
	screenshot = dtg.strftime("screenshots/%Y%m%d_%H%M_screenshot.png")

	# Find the image URL
	img_url = soup.find_all('a', href=True)
	img_url = img_url[22]['href']

	# # Retrieve website
	DRIVER = 'chromedriver'
	driver = webdriver.Chrome(DRIVER)
	driver.get('https://www.vesselfinder.com' + img_url)
	time.sleep(5)
	driver.save_screenshot(screenshot)
	driver.quit()

	# Print status message
	print(ctr, 'Last AIS data was sent at:', current_time, 'UTC')

	# Add one by counter.
	ctr =+ 1


	""" Start the program """
	# Create counter
	ctr = 0

	# Start the funtion the first time when the program starts
	retrieve_website()


	# Re-run every 15 minutes the function
	schedule.every(900).seconds.do(retrieve_website)
	while True:
	schedule.run_pending()
	time.sleep(1)