Skip to content

Instantly share code, notes, and snippets.

@Gro-Tsen
Last active March 23, 2023 10:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gro-Tsen/92774bc42899872acc87c9de197becd0 to your computer and use it in GitHub Desktop.
Save Gro-Tsen/92774bc42899872acc87c9de197becd0 to your computer and use it in GitHub Desktop.
#### Some statistical analysis of daily mean temperatures in France
#### See: <URL: https://twitter.com/gro_tsen/status/1620488744867598337 >
## Input data: /tmp/temps.csv is expected to contain a list of daily
## average temperatures in France (first column = date (unused),
## second column = temperature in Celsius), over a whole number of
## calendar years. It can be produced from the data source described
## in <URL: https://twitter.com/gro_tsen/status/1620435663844970497 >
## by the following command line:
# perl -ne 'next if /^\#/; die unless /^([0-9]{8})\s+(\-?[0-9]+\.[0-9]+(?:[Ee](?:[+-]?[0-9]+))?)\s*$/; printf "%s,%.4f\n", $1,($2+0)' /data/meteo/climexp/iera5_t2m_daily_eu_France_metropolitan_su.dat > /tmp/temps.csv
### Read data
import csv
data = list(csv.reader(open("/tmp/temps.csv","r")))
data = list(map(lambda x: [x[0],float(x[1])],data))
# Truncate to an integer number of years:
datayears = floor(len(data)/365.25)
data = data[:floor(datayears*365.25+0.25)]
### Sample basic plots
line([(i/365.25+1950, data[i][1]) for i in range(len(data))], thickness=0.1, title="Température quotidienne moyenne en France", axes_labels=(None, "T (°C)")).show(dpi=192)
line([(i/365.25+1950, data[i][1]) for i in range(len(data))[-365:]], thickness=0.5, title="Température quotidienne moyenne en France", axes_labels=(None, "T (°C)")).show(dpi=192)
### Compute expected temperature per day of year and anomalies
smoothing = 6 # Smaller means more smoothing
## Old version (perform Fourier by hand):
# twoIpiN = N(2*I*pi)
# fourier = [sum([data[i][1]*(N(exp(-twoIpiN*i*k/365.25))) for i in range(len(data))])/len(data) for k in range(61)]
# fouriersm = [fourier[k]*N(exp(-(k/smoothing)^2)) for k in range(len(fourier))]
# stdyear = [sum([(2 if k>0 else 1)*real(fouriersm[k]*(N(exp(twoIpiN*i*k/365.25)))) for k in range(len(fourier))]) for i in range(366)]
# g = line([(i+1,stdyear[i]) for i in range(366)], title="Température moyenne lissée en France dans l'année (1950–2022)", axes_labels=(None, "T (°C)"))
# g.show(dpi=192)
# # stdyearderiv = [sum([2*real(twoIpiN*k*fouriersm[k]*(N(exp(twoIpiN*i*k/365.25)))) for k in range(len(fourier))]) for i in range(366)]
# (list_plot([(frac(i/365.2425)*365.2425+1, data[i][1]) for i in range(len(data))], size=1) + line([(i+1,stdyear[i]) for i in range(366)],color="red", title="Température quotidienne moyenne en France dans l'année (1950–2022)", axes_labels=(None, "T (°C)"))).show(dpi=192)
# anomalies = [(data[i][0], data[i][1] - sum([(2 if k>0 else 1)*real(fouriersm[k]*(N(exp(twoIpiN*i*k/365.25)))) for k in range(len(fourier))])) for i in range(len(data))]
## Better version (with FastFourierTransform):
fft = FastFourierTransform(len(data))
for i in range(len(data)):
fft[i] = (data[i][1], 0)
fft.forward_transform()
for i in range(len(data)):
ii = min(i,len(data)-i)
if ii%datayears != 0:
fft[i]=(0,0)
fft.inverse_transform()
avgyear = [fft[i][0] for i in range(366)]
fftsm = FastFourierTransform(len(data))
for i in range(len(data)):
fftsm[i] = (data[i][1], 0)
fftsm.forward_transform()
for i in range(len(data)):
ii = min(i,len(data)-i)
if ii%datayears != 0:
fftsm[i]=(0,0)
else:
k = ii/datayears
muf = N(exp(-(k/smoothing)^2))
fftsm[i] = (fftsm[i][0]*muf, fftsm[i][1]*muf)
fftsm.inverse_transform()
stdyear = [fftsm[i][0] for i in range(366)]
gavg = line([(i+1,avgyear[i]) for i in range(366)], title=("Température moyenne en France dans l'année (1950–%d)"%(1950+datayears-1)), axes_labels=(None, "T (°C)"), color="gray", thickness=0.7)
g = line([(i+1,stdyear[i]) for i in range(366)], title=("Température moyenne lissée en France dans l'année (1950–%d)"%(1950+datayears-1)), axes_labels=(None, "T (°C)"))
(gavg+g).show(dpi=192)
fullstd = [(data[i][0], fftsm[i][0]) for i in range(len(data))]
anomalies = [(data[i][0], data[i][1] - fftsm[i][0]) for i in range(len(data))]
# fullstdsorted = fullstd.copy()
# fullstdsorted.sort(key=lambda v: v[1])
### Perform long-term regression on anomalies
# ansorted = anomalies.copy()
# ansorted.sort(key=lambda v: v[1])
import scipy.stats
regr = scipy.stats.linregress([(i/365.25,anomalies[i][1]) for i in range(len(anomalies))])
ganom = list_plot([(i/365.25+1950, anomalies[i][1]) for i in range(len(data))], size=1, color="gray")
(ganom+plot(regr.slope*(x-1950)+regr.intercept, (x,1950,2023), color="red", title="Anomalie brute de température")).show(dpi=192)
## Different polynomial regressions:
import numpy as np
regrpol1 = np.polyfit([i/365.25 for i in range(len(anomalies))], [v[1] for v in anomalies], 1)
regrpol2 = np.polyfit([i/365.25 for i in range(len(anomalies))], [v[1] for v in anomalies], 2)
gregrpol1 = plot(regrpol1[0]*(x-1950)+regrpol1[1], (x,1950,2023), color="red")
gregrpol2 = plot(regrpol2[0]*(x-1950)^2+regrpol2[1]*(x-1960)+regrpol2[2], (x,1950,2023), color="magenta")
### Corrected anomalies:
bnomalies = [(anomalies[i][0], anomalies[i][1]-(regr.slope*(i/365.25)+regr.intercept)) for i in range(len(anomalies))]
bnsorted = bnomalies.copy()
bnsorted.sort(key=lambda v: v[1])
## Standard deviation, skewness and excess kurtosis:
sigma = scipy.stats.tstd([v[1] for v in bnomalies])
skew = scipy.stats.skew([v[1] for v in bnomalies])
exckurt = scipy.stats.kurtosis([v[1] for v in bnomalies], fisher=True)
## Quantiles
import numpy
numpy.percentile([v[1] for v in bnomalies], 25)
numpy.percentile([v[1] for v in bnomalies], 50)
numpy.percentile([v[1] for v in bnomalies], 75)
## Average position of 2-sigma events (check that this is close to 0.5):
N(sum([i for i in range(len(bnomalies)) if bnomalies[i][1]>sigma*2])/(len([i for i in range(len(bnomalies)) if bnomalies[i][1]>sigma*2])*(len(bnomalies)-1)))
N(sum([i for i in range(len(bnomalies)) if bnomalies[i][1]<-sigma*2])/(len([i for i in range(len(bnomalies)) if bnomalies[i][1]<-sigma*2])*(len(bnomalies)-1)))
## Standard deviation on 25-year intervals:
scipy.stats.tstd([v[1] for v in bnomalies[:9131]]
scipy.stats.tstd([v[1] for v in bnomalies[8766:17897]]
scipy.stats.tstd([v[1] for v in bnomalies[17532:]]
## (I guess 9131 etc should be computed as floor(...*365.25+0.25))
## Plot corrected anomalies:
gbnom = list_plot([(i/365.25+1950, bnomalies[i][1]) for i in range(len(data))], size=1, color="gray") + list_plot([(i/365.25+1950, bnomalies[i][1]) for i in range(len(data)) if bnomalies[i][1]>sigma*2], size=5, color="red") + list_plot([(i/365.25+1950, bnomalies[i][1]) for i in range(len(data)) if bnomalies[i][1]<-sigma*2], size=5, color="blue", title="Anomalie corrigée de température (évts à 2σ en gras)")
gbnom.show(dpi=192)
## Plot distribution of corrected anomalies (linear scale):
gbsrt = line([(sigma,0),(sigma,1)],color="black",linestyle="dashed")+line([(-sigma,0),(-sigma,1)],color="black",linestyle="dashed")+line([(bnsorted[i][1], (i+1)/(len(bnsorted)+1)) for i in range(len(bnomalies))], title="Distribution de l'anomalie corrigée", axes_labels=("T (°C)", None))
gbsrt.show(dpi=192)
## Plot distribution of corrected anomalies (log scale):
gbsrtlogscale = plot((1+erf(x/(sqrt(2)*sigma)))/2, (x,-4*sigma,4*sigma), scale="semilogy", color="gray", linestyle="dashed") + plot((1-erf(x/(sqrt(2)*sigma)))/2, (x,-4*sigma,4*sigma), scale="semilogy", color="gray", linestyle="dashed") + line([(bnsorted[i][1], (i+1)/(len(bnsorted)+1)) for i in range(len(bnomalies))], title="Distribution de l'anomalie corrigée", axes_labels=("T (°C)", None), scale="semilogy") + line([(bnsorted[i][1], 1-(i+1)/(len(bnsorted)+1)) for i in range(len(bnomalies))], title="Distribution de l'anomalie corrigée", axes_labels=("T (°C)", None), color="red", scale="semilogy")
gbsrtlogscale.show(dpi=192)
### Autocorrelation analysis:
autocorrels = [1]+[scipy.stats.pearsonr([v[1] for v in bnomalies[:-k]], [v[1] for v in bnomalies[k:]])[0] for k in range(1,15)]
gautocorr = line([(i,autocorrels[i]) for i in range(len(autocorrels))], scale="semilogy", title="Autocorrélation à n jours des anomalies corrigées", axes_labels=("n", "r"))
gautocorr.show(dpi=192)
scipy.stats.linregress([(i,log(autocorrels[i])) for i in range(len(autocorrels))])
@Gro-Tsen
Copy link
Author

Gro-Tsen commented Feb 7, 2023

See Twitter thread starting at https://twitter.com/gro_tsen/status/1620488744867598337 and ending at https://twitter.com/gro_tsen/status/1622918364086534145 for context and explanations about this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment