Skip to content

Instantly share code, notes, and snippets.

@bennyistanto
Last active June 12, 2024 13:46
Show Gist options
  • Save bennyistanto/6b9e1cb0c9e854601d19909df0178137 to your computer and use it in GitHub Desktop.
Save bennyistanto/6b9e1cb0c9e854601d19909df0178137 to your computer and use it in GitHub Desktop.
Simulate how Pettitt test detects inhomogeneous or homogeneous on precipitation data

xkcd-style Pettitt's illustration on Homogeneity testing

The script below try to simulate how Pettitt test detects inhomogeneous or homogeneous on precipitation data. Here's the updated approach:

  1. Daily Precipitation Data:

    • We have daily precipitation.
  2. Generate Monthly Maximum Precipitation Values:

    • Create synthetic monthly maxima with distinct differences to ensure inhomogeneity.
  3. Perform Outlier Detection and Homogeneity Testing:

    • Detect and handle outliers.
    • Perform the Pettitt test for homogeneity.
  4. Visualize the Results:

    • Plot the adjusted and original data to show the effect of the corrections.

You can paste the below code into an online Python compiler like https://python-fiddle.com/ and grab the result instantly.

Homogeneous data

pettitt_homogeneous

Inhomogeneous data

pettitt_inhomogeneous

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import iqr
# Set the figure DPI to 300
plt.rcParams['figure.dpi'] = 300
# Seed for reproducibility
np.random.seed(42)
# Define the provided data with significant differences between months
monthly_max = [
20, 27, 30, 25, 20, 5, 5, 12, 30, 15, 25, 16, # Current data list is inhomogeneous data
35, 30, 45, 50, 55, 60, 55, 50, 10, 20, 25, 50 # <== Change 50 to 10 to get homogeneous
]
# Detect and handle outliers using IQR
def handle_outliers(data):
q1, q3 = np.percentile(data, [25, 75])
iqr_value = q3 - q1
lower_bound = q1 - 3 * iqr_value
upper_bound = q3 + 3 * iqr_value
return np.clip(data, lower_bound, upper_bound)
adjusted_max = handle_outliers(monthly_max)
# Homogeneity testing using Pettitt test
def pettitt_test(data):
n = len(data)
k = np.zeros(n)
for t in range(1, n):
for i in range(t):
for j in range(t, n):
k[t] += np.sign(data[j] - data[i])
U = np.max(np.abs(k))
p_value = 2 * np.exp((-6 * U**2) / (n**3 + n**2))
return p_value > 0.05 # True if data is homogeneous
is_homogeneous = pettitt_test(adjusted_max)
# Adjust inhomogeneities if data is not homogeneous
if not is_homogeneous:
mean_value = np.mean(adjusted_max)
adjusted_homogeneous_max = [mean_value if x > mean_value else x for x in adjusted_max]
else:
adjusted_homogeneous_max = adjusted_max
# Function to generate precipitation data with more zero values and realistic gaps
def generate_precipitation(max_value, days):
precip = np.zeros(days)
day = 0
zero_day_gaps = [3, 4, 5, 8, 2, 4, 7, 1, 4, 6]
while day < days:
gap_length = np.random.choice(zero_day_gaps)
if day + gap_length >= days:
gap_length = days - day
day += gap_length
if day >= days:
break
non_zero_length = np.random.randint(1, 5)
if day + non_zero_length >= days:
non_zero_length = days - day
precip[day:day + non_zero_length] = np.random.gamma(2, max_value / 10, size=non_zero_length)
day += non_zero_length
return precip
# Generate daily precipitation data based on monthly maxima
days_in_month = [30] * len(monthly_max)
data = [generate_precipitation(max_val, days) for max_val, days in zip(monthly_max, days_in_month)]
# Concatenate daily data for plotting
daily_precip = np.concatenate(data)
days = np.arange(len(daily_precip))
# Plot the results
x = np.arange(len(monthly_max))
# Plotting in XKCD style
plt.xkcd()
plt.figure(figsize=(15, 10))
# Panel 1: Original Daily Precipitation Data
plt.subplot(231)
plt.plot(days, daily_precip, color='skyblue')
plt.title('Original Daily Precipitation Data')
plt.xlabel('Day')
plt.ylabel('Precipitation (mm)')
# Panel 2: Monthly Maxima
plt.subplot(232)
bars = plt.bar(x, monthly_max, color='blue')
plt.title('Monthly Maxima')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm)')
for bar, value in zip(bars, monthly_max):
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black')
# Panel 3: Homogeneity Test Result
plt.subplot(233)
bars = plt.bar(x, adjusted_max, color='green')
plt.title('Homogeneity Test')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm)')
plt.axhline(np.mean(adjusted_max), color='red', linestyle='--', label='Mean')
if not is_homogeneous:
plt.title('Inhomogeneous Data Detected')
else:
plt.title('Data is Homogeneous')
plt.legend()
for bar, value in zip(bars, adjusted_max):
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black')
# Add text to indicate 'After Outlier Handling'
plt.text(0.05, 0.75, 'After\nOutlier Handling', transform=plt.gca().transAxes, fontsize=14, ha='left', va='bottom')
# Panel 4: Adjusted for Homogeneity
plt.subplot(234)
bars = plt.bar(x, adjusted_homogeneous_max, color='orange')
plt.title('After Homogeneity Adjustment')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm)')
plt.axhline(np.mean(adjusted_homogeneous_max), color='red', linestyle='--', label='Mean')
plt.legend()
for bar, value in zip(bars, adjusted_homogeneous_max):
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black')
# Panel 5: Histogram and Normality Plot
plt.subplot(235)
plt.hist(adjusted_homogeneous_max, bins=10, color='purple', alpha=0.7, label='Histogram')
plt.title('Histogram and Normality Plot')
plt.xlabel('Precipitation (mm)')
plt.ylabel('Frequency')
plt.legend()
# Panel 6: Final Adjusted Data
plt.subplot(236)
bars = plt.bar(x, adjusted_homogeneous_max, color='red')
plt.title('Final Adjusted Data')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm)')
for bar, value in zip(bars, adjusted_homogeneous_max):
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black')
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment