Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jose-solorzano
Created September 18, 2017 04:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jose-solorzano/2361a1479acc34128401d59f4e8aaf0f to your computer and use it in GitHub Desktop.
Save jose-solorzano/2361a1479acc34128401d59f4e8aaf0f to your computer and use it in GitHub Desktop.
# Functions for analysis of century-long periodicity of KIC 8462852.
library(lomb)
readDaschData = function(fileName = "./dasch/KIC8462852_K8462852_0000.txt",
removeFlagged = F, maxError = NULL, magField = "magcal_local", maxOutlierSdFactor = NULL) {
con = file(fileName);
lines = readLines(con);
close(con);
lines = lines[c(-1, -3)];
rawFrame = read.delim(text = lines, header = T);
if(removeFlagged) {
rawFrame = rawFrame[rawFrame$rejectFlag == 0,];
}
if(!is.null(maxError)) {
rawFrame = rawFrame[rawFrame$magcal_local_error <= maxError,];
}
result = data.frame(Year = bjdDayToYear(rawFrame$Date), Magnitude = rawFrame[[magField]]);
if(!is.null(maxOutlierSdFactor)) {
result = removeOutliers(result, maxSdFactorPositive = maxOutlierSdFactor);
}
return(result);
}
readSonnebergData = function(fileName = "./sonneberg/new/stata_output_pg.csv", magField = "pg", maxOutlierSdFactor = NULL) {
rawFrame = read.csv(fileName, header = T);
cleanFrame = rawFrame[!is.na(rawFrame[[magField]]),];
result = data.frame(Year = cleanFrame$date, Magnitude = cleanFrame[[magField]]);
if(!is.null(maxOutlierSdFactor)) {
result = removeOutliers(result, maxSdFactorPositive = maxOutlierSdFactor);
}
return(result);
}
readSternbergData = function(fileName = "./sonneberg/new/sternberg_all.csv", maxOutlierSdFactor = NULL) {
rawFrame = read.csv(fileName, header = F);
result = data.frame(Year = rawFrame[[1]], Magnitude = rawFrame[[2]]);
if(!is.null(maxOutlierSdFactor)) {
result = removeOutliers(result, maxSdFactorPositive = maxOutlierSdFactor);
}
return(result);
}
loadAnalysisData = function(minYear = NULL, maxYear = NULL) {
dataFrame = readDaschData(removeFlagged = F, maxOutlierSdFactor = 5.0);
dataFrame = appendLightCurveData(dataFrame,
readSonnebergData(maxOutlierSdFactor = 2.0));
if(!is.null(minYear)) {
dataFrame = dataFrame[dataFrame$Year >= minYear,];
}
if(!is.null(maxYear)) {
dataFrame = dataFrame[dataFrame$Year <= maxYear,];
}
dataFrame = dataFrame[order(dataFrame$Year),];
return(dataFrame);
}
appendLightCurveData = function(baseFrame, newFrame) {
baseRef = mean(baseFrame$Magnitude);
newRef = mean(newFrame$Magnitude);
newFrameAdjusted = data.frame(Year = newFrame$Year,
Magnitude = newFrame$Magnitude + baseRef - newRef);
return(rbind(baseFrame, newFrameAdjusted));
}
bjdDayToYear = function(bjd) {
bjd1800 = bjd - 2378496.5;
seconds = bjd1800 * 24 * 60 * 60;
date = as.POSIXct(seconds, tz = "UCT", origin = "1800-01-01 UCT");
year = as.numeric(format(date, "%Y"));
day = as.numeric(format(date, "%j"));
numDays = ifelse(is.leapyear(year), 366, 365);
return(year + (day - 1) / numDays);
}
is.leapyear=function(year){
return(((year %% 4 == 0) & (year %% 100 != 0)) | (year %% 400 == 0));
}
# fitSinusoid
# This function fits a series given by (x,y) using a pair of sine/cosine signals,
# asuming the given period.
fitSinusoid = function(x, y, period) {
angles = x * 2 * pi / period;
sinParams = sin(angles);
cosParams = cos(angles);
model = lm(y ~ cbind(sinParams, cosParams));
a = model$coefficients[[2]];
b = model$coefficients[[3]];
amp = sqrt(a^2 + b^2);
phaseAngle = atan2(b, a);
while(phaseAngle < 0) {
phaseAngle = phaseAngle + 2 * pi;
}
phase = phaseAngle * period / (2 * pi);
return(list(
a = a,
b = b,
amp = amp,
phase = phase
));
}
crossValidatedEvaluation = function(dataFrame, periods, numFolds = 10) {
numRows = nrow(dataFrame);
rowOrdinals = seq(numRows);
rdata = dataFrame[sample(numRows),];
resMags = numeric(0);
resModMags = numeric(0);
for(f in 0:(numFolds - 1)) {
trainData = rdata[rowOrdinals %% numFolds != f,];
testData = rdata[rowOrdinals %% numFolds == f,];
ms = modelSignal(trainData$Year, trainData$Magnitude, periods, testData$Year);
resMags = c(resMags, testData$Magnitude);
resModMags = c(resModMags, ms$yout);
}
return(list(
correlation = unbiasedCorrelation(resModMags, resMags)
));
}
unbiasedCorrelation = function(x, y) {
m = mean(x);
lenx = length(x);
var1 = sum((x - m) ^ 2);
var2 = sum((y - m) ^ 2);
return(sum((x - m) * (y - m)) / sqrt(var1 * var2));
}
multiCrossValidationEvaluation = function(dataFrame, periods, numFolds = 10, numTrials = 50) {
correlations = numeric(0);
for(t in 1:numTrials) {
eval = crossValidatedEvaluation(dataFrame, periods, numFolds);
correlations = c(correlations, eval$correlation);
}
model = modelSignal(dataFrame$Year, dataFrame$Magnitude, periods, dataFrame$Year);
return(list(
meanCorrelation = mean(correlations),
correlations = correlations,
model = model,
signalFunction = getSignalFunction(model$model, periods)
));
}
getSignalFunction = function(model, periods) {
return(function(year) {
varMatrix = getSinusoidalVarMatrix(year, periods);
return(predict(model, varMatrix));
});
}
modelSignal = function(x, y, periods, xout) {
varMatrix = getSinusoidalVarMatrix(x, periods);
model = lm(y ~ ., data = varMatrix);
outVarMatrix = getSinusoidalVarMatrix(xout, periods);
return(list(
yout = predict(model, outVarMatrix),
model = model
));
}
getSinusoidalVarMatrix = function(x, periods) {
varMatrix = NULL;
np = length(periods);
for(i in 1:np) {
period = periods[i];
angles = x * 2 * pi / period;
sinX = sin(angles);
cosX = cos(angles);
varMatrix = cbind(varMatrix, sinX, cosX);
}
return(data.frame(varMatrix));
}
periodSearch = function(x, y, minPeriod, maxPeriod, convergeDiff = NULL) {
if(is.null(convergeDiff)) {
convergeDiff = abs(0.001 * (maxPeriod - minPeriod));
}
p1 = minPeriod;
p2 = maxPeriod;
repeat {
r = p2 - p1;
a = p1 + r * 1 / 3;
b = p1 + r * 2 / 3;
sf1 = fitSinusoid(x, y, a);
sf2 = fitSinusoid(x, y, b);
if(sf1$amp > sf2$amp) {
p2 = b;
}
else {
p1 = a;
}
if(abs(p2 - p1) < convergeDiff) {
break;
}
}
return((p1 + p2) / 2);
}
nelderMeadPeriodSearch = function(x, y, initPeriods) {
costFunction = function(periods) {
m = modelSignal(x, y, periods, x);
return(sum((y - m$yout) ^ 2));
};
return(optim(initPeriods, costFunction, method = "Nelder-Mead"));
}
produceSimSeries = function(period1, period2, phase1, phase2, baselineMag = 12.5, sd = 0.18, amp = 0.02) {
time = 1890 + runif(2000) * (1994 - 1890);
nobs = length(time);
mag = baselineMag + rnorm(n = nobs, sd = sd) +
amp * sin(phase1 + time * 2 * pi / period1) +
amp * sin(phase2 + time * 2 * pi / period2);
dataFrame = data.frame(Year = time, Magnitude = mag);
return(dataFrame[order(dataFrame$Year),]);
}
removeOutliers = function(dataFrame, maxSdFactorPositive, maxSdFactorNegative = NULL) {
if(is.null(maxSdFactorNegative)) {
maxSdFactorNegative = maxSdFactorPositive;
}
mean = mean(dataFrame$Magnitude);
sd = sd(dataFrame$Magnitude);
min = mean - sd * maxSdFactorNegative;
max = mean + sd * maxSdFactorPositive;
return(dataFrame[dataFrame$Magnitude < max & dataFrame$Magnitude > min,]);
}
evaluateDasch = function(periods,
fileName = "./dasch/KIC8462852_K8462852_0000.txt",
magField = "magcal_local",
removeFlagged = F,
maxOutlierSdFactorPositive = NULL,
maxOutlierSdFactorNegative = NULL,
minYear = NULL,
maxYear = NULL,
maxError = NULL) {
dataFrame = readDaschData(fileName = fileName, magField = magField,
removeFlagged = removeFlagged, maxError = maxError);
if(!is.null(minYear)) {
dataFrame = dataFrame[dataFrame$Year >= minYear,];
}
if(!is.null(maxYear)) {
dataFrame = dataFrame[dataFrame$Year <= maxYear,];
}
if(!is.null(maxOutlierSdFactorPositive)) {
if(is.null(maxOutlierSdFactorNegative)) {
maxOutlierSdFactorNegative = maxOutlierSdFactorPositive;
}
dataFrame = removeOutliers(dataFrame, maxOutlierSdFactorPositive, maxOutlierSdFactorNegative);
}
print(paste0("Number of rows: ", nrow(dataFrame)));
eval = multiCrossValidationEvaluation(dataFrame, periods);
return(eval$meanCorrelation);
}
evaluateSonneberg = function(periods,
fileName = "./sonneberg/new/stata_output_pg.csv",
magField = "pg",
minYear = NULL,
maxYear = NULL,
maxOutlierSdFactorPositive = NULL,
maxOutlierSdFactorNegative = NULL) {
dataFrame = readSonnebergData(fileName = fileName, magField = magField);
if(!is.null(minYear)) {
dataFrame = dataFrame[dataFrame$Year >= minYear,];
}
if(!is.null(maxYear)) {
dataFrame = dataFrame[dataFrame$Year <= maxYear,];
}
if(!is.null(maxOutlierSdFactorPositive)) {
if(is.null(maxOutlierSdFactorNegative)) {
maxOutlierSdFactorNegative = maxOutlierSdFactorPositive;
}
dataFrame = removeOutliers(dataFrame, maxOutlierSdFactorPositive, maxOutlierSdFactorNegative);
}
print(paste0("Number of rows: ", nrow(dataFrame)));
eval = multiCrossValidationEvaluation(dataFrame, periods);
return(eval$meanCorrelation);
}
evaluateAnalysisData = function(periods,
minYear = NULL,
maxYear = NULL) {
dataFrame = loadAnalysisData();
if(!is.null(minYear)) {
dataFrame = dataFrame[dataFrame$Year >= minYear,];
}
if(!is.null(maxYear)) {
dataFrame = dataFrame[dataFrame$Year <= maxYear,];
}
print(paste0("Number of rows: ", nrow(dataFrame)));
eval = multiCrossValidationEvaluation(dataFrame, periods);
return(eval$meanCorrelation);
}
testNoisePeriodograms = function(numTrials = 500) {
maxPowerArray = numeric(numTrials);
for(t in 1:numTrials) {
print(paste0("Trial: ", t));
ss = produceSimSeries(5, 7, 0, 0, amp = 0);
ssp = lsp(x = ss$Magnitude, times = ss$Year, from = 3, to = 10,
type = "period", ofac = 8, plot = F);
maxPowerArray[t] = max(ssp$power);
}
return(maxPowerArray);
}
plotPeriodogram = function(ss, title = "LS Periodogram", fileName = NULL, ylim = NULL) {
ssp = lsp(x = ss$Magnitude, times = ss$Year, from = 3, to = 12,
type = "period", ofac = 8, plot = F);
if(!is.null(fileName)) {
png(fileName, width = 800, height = 600);
}
plot(ssp$scanned, ssp$power,
pch=19, cex = .5,
ylim = ylim,
xlab = "Period (Years)", ylab = "Normalized Power");
lines(ssp$scanned, ssp$power);
title(main = title);
if(!is.null(fileName)) {
dev.off();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment