Last active
July 21, 2020 21:07
-
-
Save bridgesign/12e9a0349f45b84564592cbad42a0e95 to your computer and use it in GitHub Desktop.
Gist for Simple RTB Game
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class bidmodel(nn.Module): | |
""" | |
This defines the neural model used by the bidder | |
It gives teo vectors as output. The first is the vectors | |
of bids and the second the probability of click vector over | |
all the ads the bidder owns. | |
""" | |
def __init__(self, size, cuda=True): | |
super(bidmodel, self).__init__() | |
self.fc1 = torch.nn.Linear(out_features=size//2, in_features=size) | |
self.fc2 = torch.nn.Linear(out_features=size//2, in_features=size) | |
self.fc11 = torch.nn.Linear(out_features=100, in_features=size//2) | |
self.fc21 = torch.nn.Linear(out_features=100, in_features=size//2) | |
self.fc3 = torch.nn.Linear(out_features=200, in_features=200) | |
self.fc3 = torch.nn.Linear(out_features=size//2, in_features=200) | |
self.fc4 = torch.nn.Linear(out_features=size, in_features=size//2) | |
self.fc5 = torch.nn.Linear(out_features=size, in_features=size//2) | |
if cuda: | |
self.cuda() | |
def forward(self, vvec, cvec): | |
vvec = self.fc1(vvec) | |
vvec = F.relu(self.fc11(vvec)) | |
cvec = self.fc2(cvec) | |
cvec = F.relu(self.fc21(cvec)) | |
vec = torch.cat([vvec, cvec]) | |
vec = self.fc3(vec) | |
vec = torch.tanh(vec) | |
bid = F.relu(torch.sigmoid(self.fc4(vec))*torch.tensor(2).float() - torch.tensor(1).float())+ torch.tensor(0.001).float() | |
click = torch.sigmoid(self.fc5(vec)) | |
return bid, click | |
class bidder: | |
""" | |
The bidder stores the ads that it owns and generates a bidmodel of | |
appropriate size. It has a reward which is it tries to maximize. | |
For maximization, it uses the predicted bid and the fact whether | |
it won, the ad clicked or not. The data is not shared between bidders. | |
So the bidders only know their own bids and whether they won the bid or not. | |
""" | |
def __init__(self, advec, cuda=True): | |
self.advec = advec | |
self.model = bidmodel(advec.size, cuda=cuda) | |
self.cuda = cuda | |
self.loss = nn.MSELoss() | |
self.optimizer = optim.Adam(self.model.parameters(), lr=0.003) | |
self.reward = 1 | |
def predict(self, vvec, cvec): | |
in_v = torch.from_numpy(vvec[self.advec]).float() | |
in_c = torch.from_numpy(cvec[self.advec]).float() | |
if self.cuda: | |
in_v = in_v.to('cuda') | |
in_c = in_c.to('cuda') | |
self.bid, self.click = self.model(in_v, in_c) | |
def backprop(self, view, click): | |
bidv = self.bid.clone().cpu().detach().numpy() | |
clickv = self.click.clone().cpu().detach().numpy() | |
ccl = np.where(clickv>0.5, clickv, (1-clickv)) | |
bidv = ccl*bidv+0.01 | |
if click==1: | |
self.reward+=1 | |
bidv = ccl*bidv[view] | |
if view>-1: | |
self.reward-=bidv[view] | |
clickv[view]=click | |
if self.reward<0: | |
bidv = 0.5*bidv | |
bidv[view] = 0.89*(bidv[view]-0.01)*click | |
bidv = (max(0, self.reward)+2.1)/(max(0, self.reward)+1)*bidv | |
if self.reward<-5 and view==-1: | |
bidv = bidv*0.1 | |
if np.random.binomial(1, 0.01)==1 and self.reward>0: | |
bidv = np.ones(self.advec.size)*0.1 | |
elif np.random.binomial(1, 0.01)==1: | |
bidv = np.ones(self.advec.size)*0.01 | |
elif np.random.binomial(1, 0.01)==1: | |
bidv = np.ones(self.advec.size) | |
self.optimizer.zero_grad() | |
bidv = torch.from_numpy(bidv) | |
clickv = torch.from_numpy(clickv) | |
if self.cuda: | |
bidv = bidv.to('cuda') | |
clickv = clickv.to('cuda') | |
loss = self.loss(bidv, self.bid) | |
loss+= self.loss(clickv, self.click) | |
loss.backward() | |
self.optimizer.step() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment