# davidmanheim/Goodhart Agent-Regulator Simulations.R Created Jun 18, 2019
 #We define some function f(x,y) which is the baseline truth. Neither agents nor the regulator have access to this. Build_True_fx = function(degrees){ #we need a polynomial function where the coefficients are random. num_terms = ((degrees+2)^2-degrees-2) / 2 #I need this many; sum(1..[degrees+1]). See below. # deg=0: 1 - c, deg=1: 3 - x, y + terms from deg=0, etc. coeffs = rnorm(num_terms) print(coeffs) #print(sum(coeffs)) #The exponents of each term are: #n=3: x^3+x^2*y True_Fx = function(x,y){ output = 0 counter = 1 for(n in 0:degrees) { for (k in 0:n){ #For each set, the exponents are (n,0), (n-1,1), ... (0,n) #print(paste0("n=", n, " k=", k, " coefficient =", coeffs[counter])) output = output + coeffs[counter]*x^(n-k)*y^k counter=counter+1 } } return(output) } return(True_Fx) } set.seed(0) fx=Build_True_fx(3) fx(1,1) gx=Build_True_fx(2) #Regulator goal is maximize f(x,y) #Agent goal is to maximize g(x,y) #With this seed, they won't get along well... #Let's look at the true surface. grid_nums = seq.int(-50,50,1)/2.0 cols=palette(gray(5)) plot(x=grid_nums,fx(grid_nums,-20),col=cols) points(x=grid_nums,fx(grid_nums,-10),col=cols) points(x=grid_nums,fx(grid_nums,0),col=cols) points(x=grid_nums,fx(grid_nums,10), col=cols) points(x=grid_nums,fx(grid_nums,20), col=cols) x_grid=matrix(rep(grid_nums,each=101),101,101) y_grid=t(matrix(rep(grid_nums,each=101),101,101)) library(plotly) f_x_grid_values= fx(x_grid,y_grid) p <- plot_ly(z = ~f_x_grid_values) %>% add_surface() p g_x_grid_values= gx(x_grid,y_grid) p <- plot_ly(z = ~g_x_grid_values) %>% add_surface() p #Now we need a regulator metric and an agent metric. #Obvious choice is to have regulator see x, agent sees y also. #Each builds a model approximating their true goal based on seeing data, and picks a region they think they will like by restricting their input. #Let's start off with everyone getting access to 50 data points. datapoints=50 #These are all near the origin ;) x_inputs=rnorm(datapoints) y_inputs=rnorm(datapoints) #We start by assuming everyone thinks the true model is linear, and that the data they get on the inputs is noisy... Observed_x_inputs=rnorm(datapoints,x_inputs,0.1) Observed_y_inputs=rnorm(datapoints,y_inputs,0.1) f_outputs = fx(x_inputs,y_inputs) Perfect_data=data.frame(x=x_inputs,y=y_inputs,z=f_outputs) Regulator_data=data.frame(x=Observed_x_inputs,y=Observed_y_inputs,z=f_outputs) g_outputs = fx(x_inputs,y_inputs) unfair_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Perfect_data) #That is an essentially perfect fit. Good to know. summary.lm(unfair_model) best_model = lm(z ~ polym(x, y, degree=2,raw=TRUE), data = Regulator_data) summary.lm(best_model) regulator_linear = lm(z ~ polym(x, y, degree=1,raw=TRUE), data = Regulator_data) summary.lm(regulator_linear) regulator_quadratic = lm(z ~ polym(x, degree=2,raw=TRUE), data = Regulator_data) summary.lm(regulator_quadratic) #################################### # VERY Simple Goodhart Simulations # #################################### # Missing Cause Case #In continuous cases, this is more difficult to describe. For example, if we consider the trunc_valated distribution for the Missing Cause case, we need to look at $p(Goal|Metric>c)$. This gives: # \begin{equation} #\int_{-\infty}^{\infty}{ # \int_{c}^{\infty}{ p(Metric|X,Goal) \cdot p(X) \cdot p(Goal) # \: \partial Metric}\: \partial X} #\end{equation} hist(rcauchy(10000,0,1/10)) x <- rnorm(10000) x_fat <- rcauchy(10000,0,1)/10 Goal <- rnorm(10000) Metric <- rnorm(10000,x+Goal) Metric_NoX <- rnorm(10000,Goal) Metric_X_fat <- rnorm(10000,x_fat+Goal) plot(x_fat,Metric_X_fat) plot(Goal,Metric_X_fat) hist(Metric, ylab="Density", main="Untrunc_valated Metric") hist(x_fat) #trunc_valate on metric for c=3 #hist(Goal[which(Metric>3)]) trunc_val = 3 #require(tikzDevice) #tikz( 'myPlot.tex' ) plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X") points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue") #dev.off() plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X") points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue") cor(Goal,Metric_NoX) cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)]) trunc_val = 3 cor(Goal,Metric_X_fat) cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)]) plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X") points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue") trunc_val=4 cor(Goal,Metric_X_fat) cor(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)]) plot(Goal[which(Metric_X_fat<=trunc_val)],Metric_X_fat[which(Metric_X_fat<=trunc_val)],ylim=c(-6,12),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with Bimodal X") points(Goal[which(Metric_X_fat>trunc_val)],Metric_X_fat[which(Metric_X_fat>trunc_val)],cex=0.5,col="blue") mean(Goal[which(Metric_X_fat>0)]) mean(Goal[which(Metric_X_fat>3)]) mean(Goal[which(Metric_X_fat>4)]) mean(Goal[which(Metric_X_fat>5)]) mean(Goal[which(Metric_X_fat>6)]) mean(Goal[which(Metric_X_fat>7)]) mean(Goal[which(Metric_X_fat>15)]) mean(Goal[which(Metric_X_fat>25)]) plot(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],col=(Metric>trunc_val)) cor(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)]) cor(Goal,Metric) # Now, we allow the agent to apply selection pressure x <- rnorm(10000) Goal <- rnorm(10000) Metric <- rnorm(10000,x+Goal) A_Goal <-rnorm(10000) A_Metric <-rnorm(10000,x*A_Goal) cor(Goal,Metric) cor(A_Goal,A_Metric) cor(Goal[which((Metric>2)&(A_Metric>2))],Metric[which((Metric>2)&(A_Metric>2))]) cor(Goal[which((Metric>2))],Metric[which((Metric>2))]) mean(Goal[which(Metric>2)]) mean(Goal[which((Metric>2)&(A_Metric>2))]) mean(A_Goal[which((Metric>2)&(A_Metric>2))]) mean(A_Goal) mean(A_Goal[which(Metric>2)]) mean(Goal[which(Metric>2)]) mean(Goal[which((Metric>2)&(A_Metric>1))]) mean(A_Goal[which((Metric>2)&(A_Metric>1))]) mean(A_Goal) mean(A_Goal[which(Metric>2)]) #Alternatively; A_Metric <-rnorm(10000,Metric*A_Goal) A_Metric <-rnorm(10000,Metric*(A_Goal-1)) cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)]) #trunc_valate on metric for c=3 #hist(Goal[which(Metric>3)]) trunc_val = 2 #require(tikzDevice) #tikz( 'myPlot.tex' ) plot(Goal[which(Metric<=trunc_val)],Metric[which(Metric<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X") points(Goal[which(Metric>trunc_val)],Metric[which(Metric>trunc_val)],cex=0.5,col="blue") #dev.off() plot(Goal[which(Metric_NoX<=trunc_val)],Metric_NoX[which(Metric_NoX<=trunc_val)],ylim=c(-6,6),col="green",ylab="Goal Value",xlab="Metric Value",main="trunc_valated Metric with X") points(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)],cex=0.5,col="blue") cor(Goal,Metric_NoX) cor(Goal[which(Metric_NoX>trunc_val)],Metric_NoX[which(Metric_NoX>trunc_val)])