Created
September 18, 2017 04:10
-
-
Save jose-solorzano/2361a1479acc34128401d59f4e8aaf0f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Functions for analysis of century-long periodicity of KIC 8462852. | |
library(lomb) | |
readDaschData = function(fileName = "./dasch/KIC8462852_K8462852_0000.txt", | |
removeFlagged = F, maxError = NULL, magField = "magcal_local", maxOutlierSdFactor = NULL) { | |
con = file(fileName); | |
lines = readLines(con); | |
close(con); | |
lines = lines[c(-1, -3)]; | |
rawFrame = read.delim(text = lines, header = T); | |
if(removeFlagged) { | |
rawFrame = rawFrame[rawFrame$rejectFlag == 0,]; | |
} | |
if(!is.null(maxError)) { | |
rawFrame = rawFrame[rawFrame$magcal_local_error <= maxError,]; | |
} | |
result = data.frame(Year = bjdDayToYear(rawFrame$Date), Magnitude = rawFrame[[magField]]); | |
if(!is.null(maxOutlierSdFactor)) { | |
result = removeOutliers(result, maxSdFactorPositive = maxOutlierSdFactor); | |
} | |
return(result); | |
} | |
readSonnebergData = function(fileName = "./sonneberg/new/stata_output_pg.csv", magField = "pg", maxOutlierSdFactor = NULL) { | |
rawFrame = read.csv(fileName, header = T); | |
cleanFrame = rawFrame[!is.na(rawFrame[[magField]]),]; | |
result = data.frame(Year = cleanFrame$date, Magnitude = cleanFrame[[magField]]); | |
if(!is.null(maxOutlierSdFactor)) { | |
result = removeOutliers(result, maxSdFactorPositive = maxOutlierSdFactor); | |
} | |
return(result); | |
} | |
readSternbergData = function(fileName = "./sonneberg/new/sternberg_all.csv", maxOutlierSdFactor = NULL) { | |
rawFrame = read.csv(fileName, header = F); | |
result = data.frame(Year = rawFrame[[1]], Magnitude = rawFrame[[2]]); | |
if(!is.null(maxOutlierSdFactor)) { | |
result = removeOutliers(result, maxSdFactorPositive = maxOutlierSdFactor); | |
} | |
return(result); | |
} | |
loadAnalysisData = function(minYear = NULL, maxYear = NULL) { | |
dataFrame = readDaschData(removeFlagged = F, maxOutlierSdFactor = 5.0); | |
dataFrame = appendLightCurveData(dataFrame, | |
readSonnebergData(maxOutlierSdFactor = 2.0)); | |
if(!is.null(minYear)) { | |
dataFrame = dataFrame[dataFrame$Year >= minYear,]; | |
} | |
if(!is.null(maxYear)) { | |
dataFrame = dataFrame[dataFrame$Year <= maxYear,]; | |
} | |
dataFrame = dataFrame[order(dataFrame$Year),]; | |
return(dataFrame); | |
} | |
appendLightCurveData = function(baseFrame, newFrame) { | |
baseRef = mean(baseFrame$Magnitude); | |
newRef = mean(newFrame$Magnitude); | |
newFrameAdjusted = data.frame(Year = newFrame$Year, | |
Magnitude = newFrame$Magnitude + baseRef - newRef); | |
return(rbind(baseFrame, newFrameAdjusted)); | |
} | |
bjdDayToYear = function(bjd) { | |
bjd1800 = bjd - 2378496.5; | |
seconds = bjd1800 * 24 * 60 * 60; | |
date = as.POSIXct(seconds, tz = "UCT", origin = "1800-01-01 UCT"); | |
year = as.numeric(format(date, "%Y")); | |
day = as.numeric(format(date, "%j")); | |
numDays = ifelse(is.leapyear(year), 366, 365); | |
return(year + (day - 1) / numDays); | |
} | |
is.leapyear=function(year){ | |
return(((year %% 4 == 0) & (year %% 100 != 0)) | (year %% 400 == 0)); | |
} | |
# fitSinusoid | |
# This function fits a series given by (x,y) using a pair of sine/cosine signals, | |
# asuming the given period. | |
fitSinusoid = function(x, y, period) { | |
angles = x * 2 * pi / period; | |
sinParams = sin(angles); | |
cosParams = cos(angles); | |
model = lm(y ~ cbind(sinParams, cosParams)); | |
a = model$coefficients[[2]]; | |
b = model$coefficients[[3]]; | |
amp = sqrt(a^2 + b^2); | |
phaseAngle = atan2(b, a); | |
while(phaseAngle < 0) { | |
phaseAngle = phaseAngle + 2 * pi; | |
} | |
phase = phaseAngle * period / (2 * pi); | |
return(list( | |
a = a, | |
b = b, | |
amp = amp, | |
phase = phase | |
)); | |
} | |
crossValidatedEvaluation = function(dataFrame, periods, numFolds = 10) { | |
numRows = nrow(dataFrame); | |
rowOrdinals = seq(numRows); | |
rdata = dataFrame[sample(numRows),]; | |
resMags = numeric(0); | |
resModMags = numeric(0); | |
for(f in 0:(numFolds - 1)) { | |
trainData = rdata[rowOrdinals %% numFolds != f,]; | |
testData = rdata[rowOrdinals %% numFolds == f,]; | |
ms = modelSignal(trainData$Year, trainData$Magnitude, periods, testData$Year); | |
resMags = c(resMags, testData$Magnitude); | |
resModMags = c(resModMags, ms$yout); | |
} | |
return(list( | |
correlation = unbiasedCorrelation(resModMags, resMags) | |
)); | |
} | |
unbiasedCorrelation = function(x, y) { | |
m = mean(x); | |
lenx = length(x); | |
var1 = sum((x - m) ^ 2); | |
var2 = sum((y - m) ^ 2); | |
return(sum((x - m) * (y - m)) / sqrt(var1 * var2)); | |
} | |
multiCrossValidationEvaluation = function(dataFrame, periods, numFolds = 10, numTrials = 50) { | |
correlations = numeric(0); | |
for(t in 1:numTrials) { | |
eval = crossValidatedEvaluation(dataFrame, periods, numFolds); | |
correlations = c(correlations, eval$correlation); | |
} | |
model = modelSignal(dataFrame$Year, dataFrame$Magnitude, periods, dataFrame$Year); | |
return(list( | |
meanCorrelation = mean(correlations), | |
correlations = correlations, | |
model = model, | |
signalFunction = getSignalFunction(model$model, periods) | |
)); | |
} | |
getSignalFunction = function(model, periods) { | |
return(function(year) { | |
varMatrix = getSinusoidalVarMatrix(year, periods); | |
return(predict(model, varMatrix)); | |
}); | |
} | |
modelSignal = function(x, y, periods, xout) { | |
varMatrix = getSinusoidalVarMatrix(x, periods); | |
model = lm(y ~ ., data = varMatrix); | |
outVarMatrix = getSinusoidalVarMatrix(xout, periods); | |
return(list( | |
yout = predict(model, outVarMatrix), | |
model = model | |
)); | |
} | |
getSinusoidalVarMatrix = function(x, periods) { | |
varMatrix = NULL; | |
np = length(periods); | |
for(i in 1:np) { | |
period = periods[i]; | |
angles = x * 2 * pi / period; | |
sinX = sin(angles); | |
cosX = cos(angles); | |
varMatrix = cbind(varMatrix, sinX, cosX); | |
} | |
return(data.frame(varMatrix)); | |
} | |
periodSearch = function(x, y, minPeriod, maxPeriod, convergeDiff = NULL) { | |
if(is.null(convergeDiff)) { | |
convergeDiff = abs(0.001 * (maxPeriod - minPeriod)); | |
} | |
p1 = minPeriod; | |
p2 = maxPeriod; | |
repeat { | |
r = p2 - p1; | |
a = p1 + r * 1 / 3; | |
b = p1 + r * 2 / 3; | |
sf1 = fitSinusoid(x, y, a); | |
sf2 = fitSinusoid(x, y, b); | |
if(sf1$amp > sf2$amp) { | |
p2 = b; | |
} | |
else { | |
p1 = a; | |
} | |
if(abs(p2 - p1) < convergeDiff) { | |
break; | |
} | |
} | |
return((p1 + p2) / 2); | |
} | |
nelderMeadPeriodSearch = function(x, y, initPeriods) { | |
costFunction = function(periods) { | |
m = modelSignal(x, y, periods, x); | |
return(sum((y - m$yout) ^ 2)); | |
}; | |
return(optim(initPeriods, costFunction, method = "Nelder-Mead")); | |
} | |
produceSimSeries = function(period1, period2, phase1, phase2, baselineMag = 12.5, sd = 0.18, amp = 0.02) { | |
time = 1890 + runif(2000) * (1994 - 1890); | |
nobs = length(time); | |
mag = baselineMag + rnorm(n = nobs, sd = sd) + | |
amp * sin(phase1 + time * 2 * pi / period1) + | |
amp * sin(phase2 + time * 2 * pi / period2); | |
dataFrame = data.frame(Year = time, Magnitude = mag); | |
return(dataFrame[order(dataFrame$Year),]); | |
} | |
removeOutliers = function(dataFrame, maxSdFactorPositive, maxSdFactorNegative = NULL) { | |
if(is.null(maxSdFactorNegative)) { | |
maxSdFactorNegative = maxSdFactorPositive; | |
} | |
mean = mean(dataFrame$Magnitude); | |
sd = sd(dataFrame$Magnitude); | |
min = mean - sd * maxSdFactorNegative; | |
max = mean + sd * maxSdFactorPositive; | |
return(dataFrame[dataFrame$Magnitude < max & dataFrame$Magnitude > min,]); | |
} | |
evaluateDasch = function(periods, | |
fileName = "./dasch/KIC8462852_K8462852_0000.txt", | |
magField = "magcal_local", | |
removeFlagged = F, | |
maxOutlierSdFactorPositive = NULL, | |
maxOutlierSdFactorNegative = NULL, | |
minYear = NULL, | |
maxYear = NULL, | |
maxError = NULL) { | |
dataFrame = readDaschData(fileName = fileName, magField = magField, | |
removeFlagged = removeFlagged, maxError = maxError); | |
if(!is.null(minYear)) { | |
dataFrame = dataFrame[dataFrame$Year >= minYear,]; | |
} | |
if(!is.null(maxYear)) { | |
dataFrame = dataFrame[dataFrame$Year <= maxYear,]; | |
} | |
if(!is.null(maxOutlierSdFactorPositive)) { | |
if(is.null(maxOutlierSdFactorNegative)) { | |
maxOutlierSdFactorNegative = maxOutlierSdFactorPositive; | |
} | |
dataFrame = removeOutliers(dataFrame, maxOutlierSdFactorPositive, maxOutlierSdFactorNegative); | |
} | |
print(paste0("Number of rows: ", nrow(dataFrame))); | |
eval = multiCrossValidationEvaluation(dataFrame, periods); | |
return(eval$meanCorrelation); | |
} | |
evaluateSonneberg = function(periods, | |
fileName = "./sonneberg/new/stata_output_pg.csv", | |
magField = "pg", | |
minYear = NULL, | |
maxYear = NULL, | |
maxOutlierSdFactorPositive = NULL, | |
maxOutlierSdFactorNegative = NULL) { | |
dataFrame = readSonnebergData(fileName = fileName, magField = magField); | |
if(!is.null(minYear)) { | |
dataFrame = dataFrame[dataFrame$Year >= minYear,]; | |
} | |
if(!is.null(maxYear)) { | |
dataFrame = dataFrame[dataFrame$Year <= maxYear,]; | |
} | |
if(!is.null(maxOutlierSdFactorPositive)) { | |
if(is.null(maxOutlierSdFactorNegative)) { | |
maxOutlierSdFactorNegative = maxOutlierSdFactorPositive; | |
} | |
dataFrame = removeOutliers(dataFrame, maxOutlierSdFactorPositive, maxOutlierSdFactorNegative); | |
} | |
print(paste0("Number of rows: ", nrow(dataFrame))); | |
eval = multiCrossValidationEvaluation(dataFrame, periods); | |
return(eval$meanCorrelation); | |
} | |
evaluateAnalysisData = function(periods, | |
minYear = NULL, | |
maxYear = NULL) { | |
dataFrame = loadAnalysisData(); | |
if(!is.null(minYear)) { | |
dataFrame = dataFrame[dataFrame$Year >= minYear,]; | |
} | |
if(!is.null(maxYear)) { | |
dataFrame = dataFrame[dataFrame$Year <= maxYear,]; | |
} | |
print(paste0("Number of rows: ", nrow(dataFrame))); | |
eval = multiCrossValidationEvaluation(dataFrame, periods); | |
return(eval$meanCorrelation); | |
} | |
testNoisePeriodograms = function(numTrials = 500) { | |
maxPowerArray = numeric(numTrials); | |
for(t in 1:numTrials) { | |
print(paste0("Trial: ", t)); | |
ss = produceSimSeries(5, 7, 0, 0, amp = 0); | |
ssp = lsp(x = ss$Magnitude, times = ss$Year, from = 3, to = 10, | |
type = "period", ofac = 8, plot = F); | |
maxPowerArray[t] = max(ssp$power); | |
} | |
return(maxPowerArray); | |
} | |
plotPeriodogram = function(ss, title = "LS Periodogram", fileName = NULL, ylim = NULL) { | |
ssp = lsp(x = ss$Magnitude, times = ss$Year, from = 3, to = 12, | |
type = "period", ofac = 8, plot = F); | |
if(!is.null(fileName)) { | |
png(fileName, width = 800, height = 600); | |
} | |
plot(ssp$scanned, ssp$power, | |
pch=19, cex = .5, | |
ylim = ylim, | |
xlab = "Period (Years)", ylab = "Normalized Power"); | |
lines(ssp$scanned, ssp$power); | |
title(main = title); | |
if(!is.null(fileName)) { | |
dev.off(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment