kenielf/anomalies2.py

## anomalies2.py
from pathlib import Path
from re import sub
from sys import exit, stderr

import pandas as pd
import matplotlib.pyplot as plt
from colorama import Fore
from requests import get

CWD = Path(__file__).parent
URL = r"https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices"
DATA_FILE = CWD / "data.csv"
DATA_DETAILS = {
    'ANOM': {
        "linestyle": "solid",
        "color": "red",
    },
    'ANOM.1': {
        "linestyle": "dashed",
        "color": "green",
    },
    'ANOM.2': {
        "linestyle": "dotted",
        "color": "blue",
    },
    'ANOM.3': {
        "linestyle": "dashdot",
        "color": "orange",
    },
}
DATA_RANGE = [
    2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0
]
PLOT_DIR = CWD / "plots"
PLOT_DIR.mkdir(parents=True, exist_ok=True)


def info(message: str) -> None:
    print(f"{Fore.CYAN}Info:{Fore.RESET} {message}")


def render_plot(year: int, raw_data: pd.DataFrame):
    info("Rendering plot...")
    # Clear previous plots
    plt.cla()

    # Set basic plot attributes
    plt.grid()
    plt.tight_layout()

    # Isolate data
    regions = DATA_DETAILS.keys()
    data = raw_data[['MON', *DATA_DETAILS]]
    print(data)

    # Render data
    for region in regions:
        x = data['MON']
        y = data[region]
        # Lines
        plt.plot(
            x,
            y,
            lw=1,
            linestyle=DATA_DETAILS[region]['linestyle'],
            color=DATA_DETAILS[region]['color'],
            marker='.',
            markersize=5,
            label=region
        )
        # Annotations
        for pos in x.index:
            plt.annotate(str(y[pos]), (x[pos], y[pos]))
        plt.legend()

    # Set advanced plot attributes
    plt.title(f"{year} Anomalies")
    plt.xticks(data['MON'])
    plt.yticks(DATA_RANGE)

    # Save figure
    plt.savefig(PLOT_DIR / f"anomaly-{year}.png")


if __name__ == "__main__":
    # If the data file does not exist, download it
    if not DATA_FILE.exists():
        print(f"{Fore.CYAN}Downloading data from source!{Fore.RESET}")
        html = get(URL)
        # Safety checks
        if html.status_code != 200:
            print(f"{Fore.RED}Error:{Fore.RESET} Failed to download page!", file=stderr)
            exit(1)

        # Write the file
        with open(DATA_FILE, "w") as file:
            cleaned = (
                sub(r"\s+", r",", str(html.content))
                .replace(r"\n", "\n")
                .lstrip(r"b'")
                .rstrip("'")
            )  # Clean formatting
            file.write(cleaned)

    # Read the data
    print(f"{Fore.CYAN}Parsing data!{Fore.RESET}")
    df = pd.read_csv(DATA_FILE)

    # Print the whole table with pandas
    # with pd.option_context(
    #    "display.max_rows", None, "display.max_columns", None, "display.precision", 3
    # ):
    #    print(df)

    # Print pandas table from range
    _range = df[(df["YR"] >= 2015) & (df["YR"] <= 2022)]
    print(_range.to_string())

    _range.to_csv(Path(__file__).parent / "range.csv", encoding='utf-8', index=False)

    df = pd.read_csv('range.csv')

    # Configure Matplotlib
    print(f"{Fore.CYAN}Plotting data...{Fore.RESET}")
    plt.rcParams["figure.figsize"] = (7.50, 3.50)
    plt.rcParams["figure.autolayout"] = True
    headers = ['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']

    ## Plot the actual data
    #data = [
    #    df['ANOM'],
    #    df['ANOM.1'],
    #    df['ANOM.2'],
    #    df['ANOM.3'],
    #]

    ##for column in data:
    ##    ax = plt.plot(column, lw=2, marker='.', markersize=10)
    #plt.plot(data)
    #plt.xticks(df['MON'])

    # Test 2
    #years = df.groupby(['YR']).mean()

    ## Print average of each value per year
    #for column in years[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
    #    plt.plot(years['MON'], years[column], lw=1, marker='.', markersize=10)

    ## show
    #plt.title("Average of each value")
    #plt.grid()
    #plt.tight_layout()
    #plt.xticks(years['MON'])
    #plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])
    #plt.show()

    # NOTE: THIS WORKED!
    #### Test 1
    ###year_shit = df.loc[(df['YR'] == 2020)]
    ###year_shit = year_shit[['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]
    ###print(year_shit)
    ####data = [
    ####    year_shit['ANOM'],
    ####    year_shit['ANOM.1'],
    ####    year_shit['ANOM.2'],
    ####    year_shit['ANOM.3'],
    ####]

    ###line_styles = {
    ###    'ANOM': 'solid',
    ###    'ANOM.1': 'dotted',
    ###    'ANOM.2': 'dashed',
    ###    'ANOM.3': 'dashdot',
    ###}

    ###colors = {
    ###    'ANOM': 'red',
    ###    'ANOM.1': 'blue',
    ###    'ANOM.2': 'green',
    ###    'ANOM.3': 'orange',
    ###}
    ###for column in year_shit[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
    ###    x = year_shit['MON']
    ###    y = year_shit[column]
    ###    plt.plot(
    ###        x,
    ###        y,
    ###        lw=1,
    ###        linestyle=line_styles[column],
    ###        color=colors[column],
    ###        marker='.',
    ###        markersize=10,
    ###        label=column
    ###    )
    ###    # annotations
    ###    for pos in x.index:
    ###        plt.annotate(str(y[pos]), (x[pos], y[pos]))
    ###    plt.legend()

    #### X, Y ticks
    ###plt.title("2020 Anomalies")
    ###plt.grid()
    ###plt.tight_layout()
    ###plt.xticks(year_shit['MON'])
    ###plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])

    ##### Display it
    ###plt.show()

    # test 3 - Plot anomalies for each individual year
    info("Plotting year anomalies...")
    year_begin = 2015
    year_end = 2022
    for year in range(year_begin, year_end + 1):
        print(f"{Fore.BLUE}Year: {Fore.RESET}{year}")
        data: pd.DataFrame = df.loc[df['YR'] == year]
        print(data)
        render_plot(year, data)
	from pathlib import Path
	from re import sub
	from sys import exit, stderr

	import pandas as pd
	import matplotlib.pyplot as plt
	from colorama import Fore
	from requests import get

	CWD = Path(__file__).parent
	URL = r"https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices"
	DATA_FILE = CWD / "data.csv"
	DATA_DETAILS = {
	'ANOM': {
	"linestyle": "solid",
	"color": "red",
	},
	'ANOM.1': {
	"linestyle": "dashed",
	"color": "green",
	},
	'ANOM.2': {
	"linestyle": "dotted",
	"color": "blue",
	},
	'ANOM.3': {
	"linestyle": "dashdot",
	"color": "orange",
	},
	}
	DATA_RANGE = [
	2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0
	]
	PLOT_DIR = CWD / "plots"
	PLOT_DIR.mkdir(parents=True, exist_ok=True)


	def info(message: str) -> None:
	print(f"{Fore.CYAN}Info:{Fore.RESET} {message}")


	def render_plot(year: int, raw_data: pd.DataFrame):
	info("Rendering plot...")
	# Clear previous plots
	plt.cla()

	# Set basic plot attributes
	plt.grid()
	plt.tight_layout()

	# Isolate data
	regions = DATA_DETAILS.keys()
	data = raw_data[['MON', *DATA_DETAILS]]
	print(data)

	# Render data
	for region in regions:
	x = data['MON']
	y = data[region]
	# Lines
	plt.plot(
	x,
	y,
	lw=1,
	linestyle=DATA_DETAILS[region]['linestyle'],
	color=DATA_DETAILS[region]['color'],
	marker='.',
	markersize=5,
	label=region
	)
	# Annotations
	for pos in x.index:
	plt.annotate(str(y[pos]), (x[pos], y[pos]))
	plt.legend()

	# Set advanced plot attributes
	plt.title(f"{year} Anomalies")
	plt.xticks(data['MON'])
	plt.yticks(DATA_RANGE)

	# Save figure
	plt.savefig(PLOT_DIR / f"anomaly-{year}.png")


	if __name__ == "__main__":
	# If the data file does not exist, download it
	if not DATA_FILE.exists():
	print(f"{Fore.CYAN}Downloading data from source!{Fore.RESET}")
	html = get(URL)
	# Safety checks
	if html.status_code != 200:
	print(f"{Fore.RED}Error:{Fore.RESET} Failed to download page!", file=stderr)
	exit(1)

	# Write the file
	with open(DATA_FILE, "w") as file:
	cleaned = (
	sub(r"\s+", r",", str(html.content))
	.replace(r"\n", "\n")
	.lstrip(r"b'")
	.rstrip("'")
	) # Clean formatting
	file.write(cleaned)

	# Read the data
	print(f"{Fore.CYAN}Parsing data!{Fore.RESET}")
	df = pd.read_csv(DATA_FILE)

	# Print the whole table with pandas
	# with pd.option_context(
	# "display.max_rows", None, "display.max_columns", None, "display.precision", 3
	# ):
	# print(df)

	# Print pandas table from range
	_range = df[(df["YR"] >= 2015) & (df["YR"] <= 2022)]
	print(_range.to_string())

	_range.to_csv(Path(__file__).parent / "range.csv", encoding='utf-8', index=False)

	df = pd.read_csv('range.csv')

	# Configure Matplotlib
	print(f"{Fore.CYAN}Plotting data...{Fore.RESET}")
	plt.rcParams["figure.figsize"] = (7.50, 3.50)
	plt.rcParams["figure.autolayout"] = True
	headers = ['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']

	## Plot the actual data
	#data = [
	# df['ANOM'],
	# df['ANOM.1'],
	# df['ANOM.2'],
	# df['ANOM.3'],
	#]

	##for column in data:
	## ax = plt.plot(column, lw=2, marker='.', markersize=10)
	#plt.plot(data)
	#plt.xticks(df['MON'])

	# Test 2
	#years = df.groupby(['YR']).mean()

	## Print average of each value per year
	#for column in years[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
	# plt.plot(years['MON'], years[column], lw=1, marker='.', markersize=10)

	## show
	#plt.title("Average of each value")
	#plt.grid()
	#plt.tight_layout()
	#plt.xticks(years['MON'])
	#plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])
	#plt.show()

	# NOTE: THIS WORKED!
	#### Test 1
	###year_shit = df.loc[(df['YR'] == 2020)]
	###year_shit = year_shit[['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]
	###print(year_shit)
	####data = [
	#### year_shit['ANOM'],
	#### year_shit['ANOM.1'],
	#### year_shit['ANOM.2'],
	#### year_shit['ANOM.3'],
	####]

	###line_styles = {
	### 'ANOM': 'solid',
	### 'ANOM.1': 'dotted',
	### 'ANOM.2': 'dashed',
	### 'ANOM.3': 'dashdot',
	###}

	###colors = {
	### 'ANOM': 'red',
	### 'ANOM.1': 'blue',
	### 'ANOM.2': 'green',
	### 'ANOM.3': 'orange',
	###}
	###for column in year_shit[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
	### x = year_shit['MON']
	### y = year_shit[column]
	### plt.plot(
	### x,
	### y,
	### lw=1,
	### linestyle=line_styles[column],
	### color=colors[column],
	### marker='.',
	### markersize=10,
	### label=column
	### )
	### # annotations
	### for pos in x.index:
	### plt.annotate(str(y[pos]), (x[pos], y[pos]))
	### plt.legend()

	#### X, Y ticks
	###plt.title("2020 Anomalies")
	###plt.grid()
	###plt.tight_layout()
	###plt.xticks(year_shit['MON'])
	###plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])

	##### Display it
	###plt.show()

	# test 3 - Plot anomalies for each individual year
	info("Plotting year anomalies...")
	year_begin = 2015
	year_end = 2022
	for year in range(year_begin, year_end + 1):
	print(f"{Fore.BLUE}Year: {Fore.RESET}{year}")
	data: pd.DataFrame = df.loc[df['YR'] == year]
	print(data)
	render_plot(year, data)