Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#We define some function f(x,y) which is the baseline truth. Neither agents nor the regulator have access to this.
Build_True_fx = function(degrees){
#we need a polynomial function where the coefficients are random.
num_terms = ((degrees+2)^2-degrees-2) / 2 #I need this many; sum(1..[degrees+1]). See below.
# deg=0: 1 - c, deg=1: 3 - x, y + terms from deg=0, etc.
coeffs = rnorm(num_terms)
print(coeffs)
#print(sum(coeffs))
#The exponents of each term are:
#n=3: x^3+x^2*y
True_Fx = function(x,y){
output = 0
counter = 1
for(n in 0:degrees) {
for (k in 0:n){ #For each set, the exponents are (n,0), (n-1,1), ... (0,n)
#print(paste0("n=", n, " k=", k, " coefficient =", coeffs[counter]))
output = output + coeffs[counter]*x^(n-k)*y^k
counter=counter+1
}
}
return(output)
}
return(True_Fx)
}
set.seed(0)
fx=Build_True_fx(3)
fx(1,1)
gx=Build_True_fx(2)
#Regulator goal is maximize f(x,y)
#Agent goal is to maximize g(x,y)
#With this seed, they won't get along well...
#Let's look at the true surface.
grid_nums = seq.int(-50,50,1)/2.0
cols=palette(gray(5))
plot(x=grid_nums,fx(grid_nums,-20),col=cols[1])
points(x=grid_nums,fx(grid_nums,-10),col=cols[2])
points(x=grid_nums,fx(grid_nums,0),col=cols[3])
points(x=grid_nums,fx(grid_nums,10), col=cols[4])
points(x=grid_nums,fx(grid_nums,20), col=cols[5])
x_grid=matrix(rep(grid_nums,each=101),101,101)
y_grid=t(matrix(rep(grid_nums,each=101),101,101))
library(plotly)
f_x_grid_values= fx(x_grid,y_grid)
p <- plot_ly(z = ~f_x_grid_values) %>% add_surface()
p
g_x_grid_values= gx(x_grid,y_grid)
p <- plot_ly(z = ~g_x_grid_values) %>% add_surface()
p
#Now we need a regulator metric and an agent metric.
#Obvious choice is to have regulator see x, agent sees y also.
#Each builds a model approximating their true goal based on seeing data, and picks a region they think they will like by restricting their input.
#Let's start off with everyone getting access to 50 data points.
datapoints=50
#These are all near the origin ;)
x_inputs=rnorm(datapoints)
y_inputs=rnorm(datapoints)
#We start by assuming everyone thinks the true model is linear, and that the data they get on the inputs is noisy...
Observed_x_inputs=rnorm(datapoints,x_inputs,0.1)
Observed_y_inputs=rnorm(datapoints,y_inputs,0.1)
f_outputs = fx(x_inputs,y_inputs)
Perfect_data=data.frame(x=x_inputs,y=y_inputs,z=f_outputs)
Regulator_data=data.frame(x=Observed_x_inputs,y=Observed_y_inputs,z=f_outputs)
g_outputs = fx(x_inputs,y_inputs)
unfair_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Perfect_data)
#That is an essentially perfect fit. Good to know.
summary.lm(unfair_model)
best_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Regulator_data)
summary.lm(best_model)
regulator_linear = lm(z ~ polym(x, y, degree=1,raw=TRUE), data = Regulator_data)
summary.lm(regulator_linear)
regulator_quadratic = lm(z ~ polym(x, degree=2,raw=TRUE), data = Regulator_data)
summary.lm(regulator_quadratic)
####################################
# VERY Simple Goodhart Simulations #
####################################
# Missing Cause Case
#In continuous cases, this is more difficult to describe. For example, if we consider the trunc_valated distribution for the Missing Cause case, we need to look at $p(Goal|Metric>c)$. This gives:
# \begin{equation}
#\int_{-\infty}^{\infty}{
# \int_{c}^{\infty}{ p(Metric|X,Goal) \cdot p(X) \cdot p(Goal)
# \: \partial Metric}\: \partial X}
#\end{equation}
hist(rcauchy(10000,0,1/10))
x <- rnorm(10000)
x_fat <- rcauchy(10000,0,1)/10
Goal <- rnorm(10000)
Metric <- rnorm(10000,x+Goal)
Metric_NoX <- rnorm(10000,Goal)
Metric_X_fat <- rnorm(10000,x_fat+Goal)
plot(x_fat,Metric_X_fat)
plot(Goal,Metric_X_fat)
hist(Metric, ylab="Density", main="Untrunc_valated Metric")
hist(x_fat)
#trunc_valate on metric for c=3
#hist(Goal[which(Metric>3)])
trunc_val = 3
#require(tikzDevice)
#tikz( 'myPlot.tex' )
plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue")
#dev.off()
plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue")
cor(Goal,Metric_NoX)
cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])
trunc_val = 3
cor(Goal,Metric_X_fat)
cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)])
plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X")
points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue")
trunc_val=4
cor(Goal,Metric_X_fat)
cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)])
plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X")
points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue")
mean(Goal[which(Metric_X_fat>0)])
mean(Goal[which(Metric_X_fat>3)])
mean(Goal[which(Metric_X_fat>4)])
mean(Goal[which(Metric_X_fat>5)])
mean(Goal[which(Metric_X_fat>6)])
mean(Goal[which(Metric_X_fat>7)])
mean(Goal[which(Metric_X_fat>15)])
mean(Goal[which(Metric_X_fat>25)])
plot(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],col=(Metric>trunc_val))
cor(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)])
cor(Goal,Metric)
# Now, we allow the agent to apply selection pressure
x <- rnorm(10000)
Goal <- rnorm(10000)
Metric <- rnorm(10000,x+Goal)
A_Goal <-rnorm(10000)
A_Metric <-rnorm(10000,x*A_Goal)
cor(Goal,Metric)
cor(A_Goal,A_Metric)
cor(Goal[which((Metric>2)&(A_Metric>2))],Metric[which((Metric>2)&(A_Metric>2))])
cor(Goal[which((Metric>2))],Metric[which((Metric>2))])
mean(Goal[which(Metric>2)])
mean(Goal[which((Metric>2)&(A_Metric>2))])
mean(A_Goal[which((Metric>2)&(A_Metric>2))])
mean(A_Goal)
mean(A_Goal[which(Metric>2)])
mean(Goal[which(Metric>2)])
mean(Goal[which((Metric>2)&(A_Metric>1))])
mean(A_Goal[which((Metric>2)&(A_Metric>1))])
mean(A_Goal)
mean(A_Goal[which(Metric>2)])
#Alternatively;
A_Metric <-rnorm(10000,Metric*A_Goal)
A_Metric <-rnorm(10000,Metric*(A_Goal-1))
cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])
#trunc_valate on metric for c=3
#hist(Goal[which(Metric>3)])
trunc_val = 2
#require(tikzDevice)
#tikz( 'myPlot.tex' )
plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue")
#dev.off()
plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue")
cor(Goal,Metric_NoX)
cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.