Skip to content

Instantly share code, notes, and snippets.

@bridgesign
Last active July 21, 2020 21:07
Show Gist options
  • Save bridgesign/12e9a0349f45b84564592cbad42a0e95 to your computer and use it in GitHub Desktop.
Save bridgesign/12e9a0349f45b84564592cbad42a0e95 to your computer and use it in GitHub Desktop.
Gist for Simple RTB Game
class bidmodel(nn.Module):
"""
This defines the neural model used by the bidder
It gives teo vectors as output. The first is the vectors
of bids and the second the probability of click vector over
all the ads the bidder owns.
"""
def __init__(self, size, cuda=True):
super(bidmodel, self).__init__()
self.fc1 = torch.nn.Linear(out_features=size//2, in_features=size)
self.fc2 = torch.nn.Linear(out_features=size//2, in_features=size)
self.fc11 = torch.nn.Linear(out_features=100, in_features=size//2)
self.fc21 = torch.nn.Linear(out_features=100, in_features=size//2)
self.fc3 = torch.nn.Linear(out_features=200, in_features=200)
self.fc3 = torch.nn.Linear(out_features=size//2, in_features=200)
self.fc4 = torch.nn.Linear(out_features=size, in_features=size//2)
self.fc5 = torch.nn.Linear(out_features=size, in_features=size//2)
if cuda:
self.cuda()
def forward(self, vvec, cvec):
vvec = self.fc1(vvec)
vvec = F.relu(self.fc11(vvec))
cvec = self.fc2(cvec)
cvec = F.relu(self.fc21(cvec))
vec = torch.cat([vvec, cvec])
vec = self.fc3(vec)
vec = torch.tanh(vec)
bid = F.relu(torch.sigmoid(self.fc4(vec))*torch.tensor(2).float() - torch.tensor(1).float())+ torch.tensor(0.001).float()
click = torch.sigmoid(self.fc5(vec))
return bid, click
class bidder:
"""
The bidder stores the ads that it owns and generates a bidmodel of
appropriate size. It has a reward which is it tries to maximize.
For maximization, it uses the predicted bid and the fact whether
it won, the ad clicked or not. The data is not shared between bidders.
So the bidders only know their own bids and whether they won the bid or not.
"""
def __init__(self, advec, cuda=True):
self.advec = advec
self.model = bidmodel(advec.size, cuda=cuda)
self.cuda = cuda
self.loss = nn.MSELoss()
self.optimizer = optim.Adam(self.model.parameters(), lr=0.003)
self.reward = 1
def predict(self, vvec, cvec):
in_v = torch.from_numpy(vvec[self.advec]).float()
in_c = torch.from_numpy(cvec[self.advec]).float()
if self.cuda:
in_v = in_v.to('cuda')
in_c = in_c.to('cuda')
self.bid, self.click = self.model(in_v, in_c)
def backprop(self, view, click):
bidv = self.bid.clone().cpu().detach().numpy()
clickv = self.click.clone().cpu().detach().numpy()
ccl = np.where(clickv>0.5, clickv, (1-clickv))
bidv = ccl*bidv+0.01
if click==1:
self.reward+=1
bidv = ccl*bidv[view]
if view>-1:
self.reward-=bidv[view]
clickv[view]=click
if self.reward<0:
bidv = 0.5*bidv
bidv[view] = 0.89*(bidv[view]-0.01)*click
bidv = (max(0, self.reward)+2.1)/(max(0, self.reward)+1)*bidv
if self.reward<-5 and view==-1:
bidv = bidv*0.1
if np.random.binomial(1, 0.01)==1 and self.reward>0:
bidv = np.ones(self.advec.size)*0.1
elif np.random.binomial(1, 0.01)==1:
bidv = np.ones(self.advec.size)*0.01
elif np.random.binomial(1, 0.01)==1:
bidv = np.ones(self.advec.size)
self.optimizer.zero_grad()
bidv = torch.from_numpy(bidv)
clickv = torch.from_numpy(clickv)
if self.cuda:
bidv = bidv.to('cuda')
clickv = clickv.to('cuda')
loss = self.loss(bidv, self.bid)
loss+= self.loss(clickv, self.click)
loss.backward()
self.optimizer.step()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment