davidmanheim/Goodhart Agent-Regulator Simulations.R

## Goodhart Agent-Regulator Simulations.R

#We define some function f(x,y) which is the baseline truth. Neither agents nor the regulator have access to this.

Build_True_fx = function(degrees){
  #we need a polynomial function where the coefficients are random.

  num_terms = ((degrees+2)^2-degrees-2) / 2 #I need this many; sum(1..[degrees+1]). See below.
  # deg=0: 1 - c, deg=1: 3 - x, y  + terms from deg=0, etc.
  coeffs = rnorm(num_terms)
  print(coeffs)
  #print(sum(coeffs))
  #The exponents of each term are:
  #n=3: x^3+x^2*y
  True_Fx = function(x,y){
    output = 0
    counter = 1
      for(n in 0:degrees) {
        for (k in 0:n){ #For each set, the exponents are (n,0), (n-1,1), ... (0,n)
          #print(paste0("n=", n, " k=", k, " coefficient =", coeffs[counter]))
          output = output + coeffs[counter]*x^(n-k)*y^k
          counter=counter+1
        }
      }
    return(output)
  }

  return(True_Fx)
}

set.seed(0)
fx=Build_True_fx(3)
fx(1,1)
gx=Build_True_fx(2)

#Regulator goal is maximize f(x,y)

#Agent goal is to maximize g(x,y)


#With this seed, they won't get along well...

#Let's look at the true surface.
grid_nums = seq.int(-50,50,1)/2.0
cols=palette(gray(5))
plot(x=grid_nums,fx(grid_nums,-20),col=cols[1])
points(x=grid_nums,fx(grid_nums,-10),col=cols[2])
points(x=grid_nums,fx(grid_nums,0),col=cols[3])
points(x=grid_nums,fx(grid_nums,10), col=cols[4])
points(x=grid_nums,fx(grid_nums,20), col=cols[5])

x_grid=matrix(rep(grid_nums,each=101),101,101)
y_grid=t(matrix(rep(grid_nums,each=101),101,101))
library(plotly)

f_x_grid_values= fx(x_grid,y_grid)
p <- plot_ly(z = ~f_x_grid_values) %>% add_surface()
p
g_x_grid_values= gx(x_grid,y_grid)
p <- plot_ly(z = ~g_x_grid_values) %>% add_surface()
p


#Now we need a regulator metric and an agent metric.
#Obvious choice is to have regulator see x, agent sees y also.
#Each builds a model approximating their true goal based on seeing data, and picks a region they think they will like by restricting their input.


#Let's start off with everyone getting access to 50 data points.
datapoints=50
#These are all near the origin ;)

x_inputs=rnorm(datapoints)
y_inputs=rnorm(datapoints)

#We start by assuming everyone thinks the true model is linear, and that the data they get on the inputs is noisy...
Observed_x_inputs=rnorm(datapoints,x_inputs,0.1)
Observed_y_inputs=rnorm(datapoints,y_inputs,0.1)

f_outputs = fx(x_inputs,y_inputs)
Perfect_data=data.frame(x=x_inputs,y=y_inputs,z=f_outputs)
Regulator_data=data.frame(x=Observed_x_inputs,y=Observed_y_inputs,z=f_outputs)

g_outputs = fx(x_inputs,y_inputs)


unfair_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Perfect_data)
#That is an essentially perfect fit. Good to know.
summary.lm(unfair_model)

best_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Regulator_data)
summary.lm(best_model)


regulator_linear = lm(z ~ polym(x, y, degree=1,raw=TRUE), data = Regulator_data)
summary.lm(regulator_linear)

regulator_quadratic = lm(z ~ polym(x, degree=2,raw=TRUE), data = Regulator_data)
summary.lm(regulator_quadratic)

####################################
# VERY Simple Goodhart Simulations #
####################################

# Missing Cause Case

#In continuous cases, this is more difficult to describe. For example, if we consider the trunc_valated distribution for the Missing Cause case, we need to look at $p(Goal|Metric>c)$. This gives:
#  \begin{equation}
#\int_{-\infty}^{\infty}{
#  \int_{c}^{\infty}{ p(Metric|X,Goal) \cdot p(X) \cdot p(Goal)
#    \: \partial Metric}\: \partial X}
#\end{equation}

hist(rcauchy(10000,0,1/10))

x <- rnorm(10000)
x_fat <- rcauchy(10000,0,1)/10
Goal <- rnorm(10000)
Metric <- rnorm(10000,x+Goal)
Metric_NoX <- rnorm(10000,Goal)
Metric_X_fat <- rnorm(10000,x_fat+Goal)
plot(x_fat,Metric_X_fat)
plot(Goal,Metric_X_fat)
hist(Metric, ylab="Density", main="Untrunc_valated Metric")
hist(x_fat)
#trunc_valate on metric for c=3
#hist(Goal[which(Metric>3)])
trunc_val = 3
#require(tikzDevice)
#tikz( 'myPlot.tex' )
plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue")
#dev.off()

plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue")
cor(Goal,Metric_NoX)
cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])

trunc_val = 3
cor(Goal,Metric_X_fat)
cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)])
plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X")
points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue")

trunc_val=4
cor(Goal,Metric_X_fat)
cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)])
plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X")
points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue")

mean(Goal[which(Metric_X_fat>0)])
mean(Goal[which(Metric_X_fat>3)])
mean(Goal[which(Metric_X_fat>4)])
mean(Goal[which(Metric_X_fat>5)])
mean(Goal[which(Metric_X_fat>6)])
mean(Goal[which(Metric_X_fat>7)])
mean(Goal[which(Metric_X_fat>15)])
mean(Goal[which(Metric_X_fat>25)])

plot(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],col=(Metric>trunc_val))
cor(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)])
cor(Goal,Metric)


# Now, we allow the agent to apply selection pressure

x <- rnorm(10000)
Goal <- rnorm(10000)
Metric <- rnorm(10000,x+Goal)
A_Goal <-rnorm(10000)
A_Metric <-rnorm(10000,x*A_Goal)

cor(Goal,Metric)
cor(A_Goal,A_Metric)

cor(Goal[which((Metric>2)&(A_Metric>2))],Metric[which((Metric>2)&(A_Metric>2))])
cor(Goal[which((Metric>2))],Metric[which((Metric>2))])
mean(Goal[which(Metric>2)])
mean(Goal[which((Metric>2)&(A_Metric>2))])
mean(A_Goal[which((Metric>2)&(A_Metric>2))])
mean(A_Goal)
mean(A_Goal[which(Metric>2)])


mean(Goal[which(Metric>2)])
mean(Goal[which((Metric>2)&(A_Metric>1))])
mean(A_Goal[which((Metric>2)&(A_Metric>1))])
mean(A_Goal)
mean(A_Goal[which(Metric>2)])


#Alternatively;
A_Metric <-rnorm(10000,Metric*A_Goal)

A_Metric <-rnorm(10000,Metric*(A_Goal-1))


cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])


#trunc_valate on metric for c=3
#hist(Goal[which(Metric>3)])
trunc_val = 2
#require(tikzDevice)
#tikz( 'myPlot.tex' )
plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue")
#dev.off()

plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue")
cor(Goal,Metric_NoX)
cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])

	#We define some function f(x,y) which is the baseline truth. Neither agents nor the regulator have access to this.

	Build_True_fx = function(degrees){
	#we need a polynomial function where the coefficients are random.

	num_terms = ((degrees+2)^2-degrees-2) / 2 #I need this many; sum(1..[degrees+1]). See below.
	# deg=0: 1 - c, deg=1: 3 - x, y + terms from deg=0, etc.
	coeffs = rnorm(num_terms)
	print(coeffs)
	#print(sum(coeffs))
	#The exponents of each term are:
	#n=3: x^3+x^2*y
	True_Fx = function(x,y){
	output = 0
	counter = 1
	for(n in 0:degrees) {
	for (k in 0:n){ #For each set, the exponents are (n,0), (n-1,1), ... (0,n)
	#print(paste0("n=", n, " k=", k, " coefficient =", coeffs[counter]))
	output = output + coeffs[counter]x^(n-k)y^k
	counter=counter+1
	}
	}
	return(output)
	}

	return(True_Fx)
	}

	set.seed(0)
	fx=Build_True_fx(3)
	fx(1,1)
	gx=Build_True_fx(2)

	#Regulator goal is maximize f(x,y)

	#Agent goal is to maximize g(x,y)



	#With this seed, they won't get along well...

	#Let's look at the true surface.
	grid_nums = seq.int(-50,50,1)/2.0
	cols=palette(gray(5))
	plot(x=grid_nums,fx(grid_nums,-20),col=cols[1])
	points(x=grid_nums,fx(grid_nums,-10),col=cols[2])
	points(x=grid_nums,fx(grid_nums,0),col=cols[3])
	points(x=grid_nums,fx(grid_nums,10), col=cols[4])
	points(x=grid_nums,fx(grid_nums,20), col=cols[5])

	x_grid=matrix(rep(grid_nums,each=101),101,101)
	y_grid=t(matrix(rep(grid_nums,each=101),101,101))
	library(plotly)

	f_x_grid_values= fx(x_grid,y_grid)
	p <- plot_ly(z = ~f_x_grid_values) %>% add_surface()
	p
	g_x_grid_values= gx(x_grid,y_grid)
	p <- plot_ly(z = ~g_x_grid_values) %>% add_surface()
	p


	#Now we need a regulator metric and an agent metric.
	#Obvious choice is to have regulator see x, agent sees y also.
	#Each builds a model approximating their true goal based on seeing data, and picks a region they think they will like by restricting their input.


	#Let's start off with everyone getting access to 50 data points.
	datapoints=50
	#These are all near the origin ;)

	x_inputs=rnorm(datapoints)
	y_inputs=rnorm(datapoints)

	#We start by assuming everyone thinks the true model is linear, and that the data they get on the inputs is noisy...
	Observed_x_inputs=rnorm(datapoints,x_inputs,0.1)
	Observed_y_inputs=rnorm(datapoints,y_inputs,0.1)

	f_outputs = fx(x_inputs,y_inputs)
	Perfect_data=data.frame(x=x_inputs,y=y_inputs,z=f_outputs)
	Regulator_data=data.frame(x=Observed_x_inputs,y=Observed_y_inputs,z=f_outputs)

	g_outputs = fx(x_inputs,y_inputs)


	unfair_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Perfect_data)
	#That is an essentially perfect fit. Good to know.
	summary.lm(unfair_model)

	best_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Regulator_data)
	summary.lm(best_model)


	regulator_linear = lm(z ~ polym(x, y, degree=1,raw=TRUE), data = Regulator_data)
	summary.lm(regulator_linear)

	regulator_quadratic = lm(z ~ polym(x, degree=2,raw=TRUE), data = Regulator_data)
	summary.lm(regulator_quadratic)

	####################################
	# VERY Simple Goodhart Simulations #
	####################################

	# Missing Cause Case

	#In continuous cases, this is more difficult to describe. For example, if we consider the trunc_valated distribution for the Missing Cause case, we need to look at $p(Goal\|Metric>c)$. This gives:
	# \begin{equation}
	#\int_{-\infty}^{\infty}{
	# \int_{c}^{\infty}{ p(Metric\|X,Goal) \cdot p(X) \cdot p(Goal)
	# \: \partial Metric}\: \partial X}
	#\end{equation}

	hist(rcauchy(10000,0,1/10))

	x <- rnorm(10000)
	x_fat <- rcauchy(10000,0,1)/10
	Goal <- rnorm(10000)
	Metric <- rnorm(10000,x+Goal)
	Metric_NoX <- rnorm(10000,Goal)
	Metric_X_fat <- rnorm(10000,x_fat+Goal)
	plot(x_fat,Metric_X_fat)
	plot(Goal,Metric_X_fat)
	hist(Metric, ylab="Density", main="Untrunc_valated Metric")
	hist(x_fat)
	#trunc_valate on metric for c=3
	#hist(Goal[which(Metric>3)])
	trunc_val = 3
	#require(tikzDevice)
	#tikz( 'myPlot.tex' )
	plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
	points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue")
	#dev.off()

	plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
	points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue")
	cor(Goal,Metric_NoX)
	cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])

	trunc_val = 3
	cor(Goal,Metric_X_fat)
	cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)])
	plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X")
	points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue")

	trunc_val=4
	cor(Goal,Metric_X_fat)
	cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)])
	plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X")
	points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue")

	mean(Goal[which(Metric_X_fat>0)])
	mean(Goal[which(Metric_X_fat>3)])
	mean(Goal[which(Metric_X_fat>4)])
	mean(Goal[which(Metric_X_fat>5)])
	mean(Goal[which(Metric_X_fat>6)])
	mean(Goal[which(Metric_X_fat>7)])
	mean(Goal[which(Metric_X_fat>15)])
	mean(Goal[which(Metric_X_fat>25)])

	plot(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],col=(Metric>trunc_val))
	cor(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)])
	cor(Goal,Metric)


	# Now, we allow the agent to apply selection pressure

	x <- rnorm(10000)
	Goal <- rnorm(10000)
	Metric <- rnorm(10000,x+Goal)
	A_Goal <-rnorm(10000)
	A_Metric <-rnorm(10000,x*A_Goal)

	cor(Goal,Metric)
	cor(A_Goal,A_Metric)

	cor(Goal[which((Metric>2)&(A_Metric>2))],Metric[which((Metric>2)&(A_Metric>2))])
	cor(Goal[which((Metric>2))],Metric[which((Metric>2))])
	mean(Goal[which(Metric>2)])
	mean(Goal[which((Metric>2)&(A_Metric>2))])
	mean(A_Goal[which((Metric>2)&(A_Metric>2))])
	mean(A_Goal)
	mean(A_Goal[which(Metric>2)])


	mean(Goal[which(Metric>2)])
	mean(Goal[which((Metric>2)&(A_Metric>1))])
	mean(A_Goal[which((Metric>2)&(A_Metric>1))])
	mean(A_Goal)
	mean(A_Goal[which(Metric>2)])


	#Alternatively;
	A_Metric <-rnorm(10000,Metric*A_Goal)

	A_Metric <-rnorm(10000,Metric*(A_Goal-1))


	cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])



	#trunc_valate on metric for c=3
	#hist(Goal[which(Metric>3)])
	trunc_val = 2
	#require(tikzDevice)
	#tikz( 'myPlot.tex' )
	plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
	points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue")
	#dev.off()

	plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X")
	points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue")
	cor(Goal,Metric_NoX)
	cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])