Skip to content

Instantly share code, notes, and snippets.

@FrankRuns
Created October 22, 2021 21:08
Show Gist options
  • Save FrankRuns/d180a028bbab13429dfa67168e69af70 to your computer and use it in GitHub Desktop.
Save FrankRuns/d180a028bbab13429dfa67168e69af70 to your computer and use it in GitHub Desktop.
from tabula.io import read_pdf
import pandas as pd
import matplotlib.pyplot as plt
# set path to file
pdf_path = "Container-Vessels-In-Port.pdf"
# creates a python list, each page is a list item stored as pandas df
dfs = read_pdf(pdf_path, stream=True, pages='2-10')
# check it
dfs[7].head()
# embarassing line of code to concat all df's in list
new_data = pd.concat([dfs[0], dfs[1], dfs[2], dfs[3], dfs[4], dfs[5], dfs[6], dfs[7]])
# rows with na are mangled header columns, drop 'em
new_data = new_data.dropna()
# I jsut want date and vessels at anchor
new_data = new_data[new_data.columns[0:2]]
# rename columns
new_data = new_data.rename(columns={"Unnamed: 0": "date", "POLA Vessels at": "at_anchor_count"})
# transform types
new_data["date"] = pd.to_datetime(new_data["date"], format = "%m/%d/%Y")
new_data["at_anchor_count"] = new_data["at_anchor_count"].astype("int")
# visualize
fig, ax = plt.subplots(figsize=(12, 6))
# Set tick font size
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(12)
ax.plot(new_data["date"], new_data["at_anchor_count"])
plt.title('Vessels at Anchor Awaiting Berth at Los Angeles', fontsize = 16)
plt.xlabel('Date', fontsize = 16)
plt.ylabel('Count of Vessels', fontsize = 16)
plt.grid(color='lightgray', linestyle='--', linewidth=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment