Last active
November 2, 2023 00:00
-
-
Save banditkings/26f640c2e56d91f24c2d4c65087bab5a to your computer and use it in GitHub Desktop.
Python Time Series Datasets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------- STATSMODELS --------- | |
# Statsmodels has a few built in datasets as well as a utility for R-Datasets | |
# Yearly Nile River flows at Ashwan 1871-1970 | |
import statsmodels.api as sm | |
df = sm.datasets.nile.load().data | |
df['ds'] = pd.date_range(start='1871', end='1970', freq='AS') | |
# Mauna Loa Weekly Atmospheric CO2 Data | |
import statsmodels.api as sm | |
df = sm.datasets.co2.load_pandas().data | |
# Air Passengers | |
## Monthly International Air Passengers (in thousands) | |
## Classic Box & Jenkins, of ARIMA fame | |
import statsmodels.api as sm | |
df = sm.datasets.get_rdataset('AirPassengers').data | |
df['ds'] = pd.date_range(start='1949', end='1960-12-31', freq='MS') | |
# Annual Lynx trappings | |
import statsmodels.api as sm | |
df = sm.datasets.get_rdataset('lynx').data | |
df['ds'] = pd.date_range(start='1821', end='1934', freq='AS') | |
# ------- ORBIT --------- | |
# !pip install orbit-ml | |
## Air Passengers | |
from orbit.utils.dataset import load_air_passengers | |
df = load_air_passengers() | |
## Unemployment Claims (mimic the data from the original BSTS paper) | |
from orbit.utils.dataset import load_iclaims | |
df = load_iclaims() # (443, 7) | |
## M3 Competition | |
from orbit.utils.dataset import load_m3monthly | |
df = load_m3monthly() # (165503, 3) | |
## M4 Weekly data | |
from orbit.utils.dataset import load_m4weekly | |
df = load_m4weekly() # (6106, 4) | |
## M5 Aggregated daily dataset | |
from orbit.utils.dataset import load_m5daily | |
df = load_m5daily() # (1913, 42) | |
# --------- UTILSFORECAST (NIXTLA) --------------- | |
## Nixtla has the `utilsforecast` library to help simulate time series | |
## Generating Data with utilsforecast | |
## https://nixtla.github.io/utilsforecast/data.html | |
## !pip install utilsforecast | |
from utilsforecast.data import generate_series | |
# Generate a long dataframe with daily data for 3 unique series | |
df = generate_series(n_series=3, with_trend=True, static_as_categorical=False) | |
## M4 Hourly data with 414 unique series, each with ~900 obs | |
df = pd.read_parquet('https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet') | |
df.shape # (373372, 3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A list of built-in time series datasets commonly used for teaching forecasting and testing various time series forecasting methods in Python.