kenielf/anomalies.py

## anomalies.py
from pathlib import Path
from re import sub
from sys import exit, stderr

import pandas as pd
import matplotlib.pyplot as plt
from colorama import Fore
from requests import get

URL = r"https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices"
DATA_FILE = Path(__file__).parent / "data.csv"


if __name__ == "__main__":
    # If the data file does not exist, download it
    if not DATA_FILE.exists():
        print(f"{Fore.CYAN}Downloading data from source!{Fore.RESET}")
        html = get(URL)
        # Safety checks
        if html.status_code != 200:
            print(f"{Fore.RED}Error:{Fore.RESET} Failed to download page!", file=stderr)
            exit(1)

        # Write the file
        with open(DATA_FILE, "w") as file:
            cleaned = (
                sub(r"\s+", r",", str(html.content))
                .replace(r"\n", "\n")
                .lstrip(r"b'")
                .rstrip("'")
            )  # Clean formatting
            file.write(cleaned)

    # Read the data
    print(f"{Fore.CYAN}Parsing data!{Fore.RESET}")
    df = pd.read_csv(DATA_FILE)

    # Print the whole table with pandas
    # with pd.option_context(
    #    "display.max_rows", None, "display.max_columns", None, "display.precision", 3
    # ):
    #    print(df)

    # Print pandas table from range
    _range = df[(df["YR"] >= 2015) & (df["YR"] <= 2022)]
    print(_range.to_string())

    _range.to_csv(Path(__file__).parent / "range.csv", encoding='utf-8', index=False)

    df = pd.read_csv('range.csv')

    # Configure Matplotlib
    print(f"{Fore.CYAN}Plotting data...{Fore.RESET}")
    plt.rcParams["figure.figsize"] = (7.50, 3.50)
    plt.rcParams["figure.autolayout"] = True
    headers = ['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']

    ## Plot the actual data
    #data = [
    #    df['ANOM'],
    #    df['ANOM.1'],
    #    df['ANOM.2'],
    #    df['ANOM.3'],
    #]

    ##for column in data:
    ##    ax = plt.plot(column, lw=2, marker='.', markersize=10)
    #plt.plot(data)
    #plt.xticks(df['MON'])

    # Test 2
    years = df.groupby(['YR']).mean()

    # Print average of each value per year
    for column in years[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
        plt.plot(years['MON'], years[column], lw=1, marker='.', markersize=10)

    # show
    plt.title("Average of each value")
    plt.grid()
    plt.tight_layout()
    plt.xticks(years['MON'])
    plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])
    plt.show()

    ## Test 1
    #year_shit = df.loc[(df['YR'] == 2020)]
    #year_shit = year_shit[['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]
    #print(year_shit)
    ##data = [
    ##    year_shit['ANOM'],
    ##    year_shit['ANOM.1'],
    ##    year_shit['ANOM.2'],
    ##    year_shit['ANOM.3'],
    ##]

    #for column in year_shit[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
    #    plt.plot(year_shit['MON'], year_shit[column], lw=1, marker='.', markersize=10)

    ## X, Y ticks
    #plt.title("2020 Anomalies")
    #plt.grid()
    #plt.tight_layout()
    #plt.xticks(year_shit['MON'])
    #plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])

    ### Display it
    #plt.show()
	from pathlib import Path
	from re import sub
	from sys import exit, stderr

	import pandas as pd
	import matplotlib.pyplot as plt
	from colorama import Fore
	from requests import get

	URL = r"https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices"
	DATA_FILE = Path(__file__).parent / "data.csv"


	if __name__ == "__main__":
	# If the data file does not exist, download it
	if not DATA_FILE.exists():
	print(f"{Fore.CYAN}Downloading data from source!{Fore.RESET}")
	html = get(URL)
	# Safety checks
	if html.status_code != 200:
	print(f"{Fore.RED}Error:{Fore.RESET} Failed to download page!", file=stderr)
	exit(1)

	# Write the file
	with open(DATA_FILE, "w") as file:
	cleaned = (
	sub(r"\s+", r",", str(html.content))
	.replace(r"\n", "\n")
	.lstrip(r"b'")
	.rstrip("'")
	) # Clean formatting
	file.write(cleaned)

	# Read the data
	print(f"{Fore.CYAN}Parsing data!{Fore.RESET}")
	df = pd.read_csv(DATA_FILE)

	# Print the whole table with pandas
	# with pd.option_context(
	# "display.max_rows", None, "display.max_columns", None, "display.precision", 3
	# ):
	# print(df)

	# Print pandas table from range
	_range = df[(df["YR"] >= 2015) & (df["YR"] <= 2022)]
	print(_range.to_string())

	_range.to_csv(Path(__file__).parent / "range.csv", encoding='utf-8', index=False)

	df = pd.read_csv('range.csv')

	# Configure Matplotlib
	print(f"{Fore.CYAN}Plotting data...{Fore.RESET}")
	plt.rcParams["figure.figsize"] = (7.50, 3.50)
	plt.rcParams["figure.autolayout"] = True
	headers = ['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']

	## Plot the actual data
	#data = [
	# df['ANOM'],
	# df['ANOM.1'],
	# df['ANOM.2'],
	# df['ANOM.3'],
	#]

	##for column in data:
	## ax = plt.plot(column, lw=2, marker='.', markersize=10)
	#plt.plot(data)
	#plt.xticks(df['MON'])

	# Test 2
	years = df.groupby(['YR']).mean()

	# Print average of each value per year
	for column in years[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
	plt.plot(years['MON'], years[column], lw=1, marker='.', markersize=10)

	# show
	plt.title("Average of each value")
	plt.grid()
	plt.tight_layout()
	plt.xticks(years['MON'])
	plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])
	plt.show()

	## Test 1
	#year_shit = df.loc[(df['YR'] == 2020)]
	#year_shit = year_shit[['MON', 'ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]
	#print(year_shit)
	##data = [
	## year_shit['ANOM'],
	## year_shit['ANOM.1'],
	## year_shit['ANOM.2'],
	## year_shit['ANOM.3'],
	##]

	#for column in year_shit[['ANOM', 'ANOM.1', 'ANOM.2', 'ANOM.3']]:
	# plt.plot(year_shit['MON'], year_shit[column], lw=1, marker='.', markersize=10)

	## X, Y ticks
	#plt.title("2020 Anomalies")
	#plt.grid()
	#plt.tight_layout()
	#plt.xticks(year_shit['MON'])
	#plt.yticks([2.5, 2.0, 1.5, 1.0, 0.5, 0.0, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0])

	### Display it
	#plt.show()