Skip to content

Instantly share code, notes, and snippets.

@yang-zhang
Last active December 21, 2022 07:14
Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 8 You must be signed in to fork a gist
  • Save yang-zhang/c94304dbc7f1f9be3333742b7e8249a7 to your computer and use it in GitHub Desktop.
Save yang-zhang/c94304dbc7f1f9be3333742b7e8249a7 to your computer and use it in GitHub Desktop.
git/yang-zhang.github.io/ds_code/pytorch-losses-in-plain-python.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.4.1'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"\n",
"torch.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## L1Loss"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"x = torch.randn(2, 3)\n",
"y = torch.randn(2, 3)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1.4763, -0.0492, 0.7067],\n",
" [-0.3756, -1.8713, 1.5535]])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-0.3171, -1.6037, -0.3038],\n",
" [ 0.3671, 0.6510, -2.2076]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.7917)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.L1Loss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1.1592, 1.5544, 1.0105],\n",
" [ 0.7427, 2.5223, 3.7611]])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.L1Loss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.1591892, 1.5544204, 1.0105054],\n",
" [0.7426802, 2.522295 , 3.7610588]], dtype=float32)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abs(x.numpy() - y.numpy())"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.7916914"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abs(x.numpy() - y.numpy()).mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MSELoss"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 2.6290, 1.8988, -1.6431],\n",
" [ 0.3855, 0.4029, -0.4366]])"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 3)\n",
"y = torch.randn(2, 3)\n",
"\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.5482, 0.2927, -0.4202],\n",
" [ 0.6128, -0.4606, -0.8558]])"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 4.3301, 2.5797, 1.4955],\n",
" [ 0.0517, 0.7457, 0.1757]])"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MSELoss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.5630)"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MSELoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[4.330082 , 2.5796666 , 1.4955105 ],\n",
" [0.05166636, 0.7456514 , 0.17567822]], dtype=float32)"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(x.numpy() - y.numpy())**2"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"1.5630425"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"((x.numpy() - y.numpy())**2).mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CrossEntropyLoss"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.6293, 0.7490, 0.3947, -0.5312],\n",
" [-0.7038, -1.1594, 0.6817, -0.8557]])"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"y = torch.LongTensor(2).random_(4)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 1, 0])"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([ 1.0532, 1.8702]), tensor(1.4617))"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.CrossEntropyLoss(reduce=False)(x, y), nn.CrossEntropyLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([1.0531884, 1.8702432], 1.4617158)"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = x.numpy()\n",
"y = y.numpy()\n",
"\n",
"lst = []\n",
"for k in range(len(x)):\n",
" lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NLLLoss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"LogSoftmax"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1.4217, 0.4832, -0.0611, 0.6160],\n",
" [ 0.4007, -0.3147, 1.9577, -1.0041]])"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-2.9596, -1.0547, -1.5990, -0.9219],\n",
" [-1.8686, -2.5839, -0.3116, -3.2733]])"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = nn.LogSoftmax(dim=1)(x)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[array([-2.9596107 , -1.0546701 , -1.5989888 , -0.92192453], dtype=float32),\n",
" array([-1.8685771 , -2.583946 , -0.31156364, -3.2733235 ], dtype=float32)]"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = x.numpy()\n",
"lst = []\n",
"for k in range(len(x)):\n",
" lst.append(np.log( np.exp(x[k]) / np.exp(x[k]).sum()))\n",
"lst"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NLLLoss"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[ 1.2387, 0.4941, 0.3404, 0.4291],\n",
" [ 1.1745, -0.1141, 0.8808, 1.8481],\n",
" [-0.6773, -0.9456, 0.4741, 0.8378]]),\n",
" tensor([[-0.8447, -1.5893, -1.7430, -1.6543],\n",
" [-1.3819, -2.6705, -1.6756, -0.7083],\n",
" [-2.2489, -2.5172, -1.0975, -0.7338]]))"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x0 = torch.randn(3, 4)\n",
"x = nn.LogSoftmax(dim=1)(x0)\n",
"x0, x"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 2, 1, 3])"
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.LongTensor(3).random_(4)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.7157)"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.NLLLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 1.7430, 2.6705, 0.7338])"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.NLLLoss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([tensor(1.7430), tensor(2.6705), tensor(0.7338)], 1.7157394)"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for k in range(len(x)):\n",
" lst.append(-x[k][y[k]])\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PoissonNLLLoss"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.6711, 1.5167, 1.4041, 0.2249],\n",
" [-0.5825, -1.2273, -1.5340, 0.8917]])"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-0.4753, 0.1411, 0.3186, 0.1708],\n",
" [ 0.2095, -0.6558, -0.7816, 0.5834]])"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.randn(2, 4)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.5702)"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.PoissonNLLLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 2.2753, 4.3434, 3.6244, 1.2137],\n",
" [ 0.6806, -0.5119, -0.9833, 1.9191]])"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.PoissonNLLLoss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [],
"source": [
"# target∗log(target)−target+0.5∗log(2πtarget)\n",
"def sterling_approx(y):\n",
" return y*np.log(y) - y + 0.5*np.log(np.pi*y)"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [],
"source": [
"lst = []\n",
"for k in range(len(x)):\n",
" lsti = []\n",
" for i in range(len(x[k])):\n",
" lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)\n",
" lsti.append(lss)\n",
" lst.append(lsti)"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 2.27534461, 4.34339952, 3.62439346, 1.21373343],\n",
" [ 0.68055761, -0.51185942, -0.9832679 , 1.91914582]])"
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array(lst)"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.570180892944336"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## KLDivLoss"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.2269, 0.9956, 0.9354],\n",
" [0.1313, 0.7004, 0.9105]])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.rand(2, 3)\n",
"y = torch.rand(2, 3)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1.4832, -0.0044, -0.0668],\n",
" [-2.0302, -0.3561, -0.0938]])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xlog = torch.log(x)\n",
"xlog"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.5502, 0.6448, 0.7401],\n",
" [0.7998, 0.2121, 0.2097]])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.1529)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.KLDivLoss()(xlog, y)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.4873, -0.2801, -0.1733],\n",
" [ 1.4451, -0.2534, -0.3079]])"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.KLDivLoss(reduce=False)(xlog, y)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"xlog = np.log(x)\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.48733026, -0.2801091 , -0.17330758],\n",
" [ 1.4450648 , -0.2533762 , -0.3079228 ]], dtype=float32)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x)):\n",
" lsti = []\n",
" for j in range(len(x[i])):\n",
" # xi is already log \n",
" lsti.append(y[i][j] * (np.log(y[i][j]) - xlog[i][j]))\n",
" lst.append(lsti)\n",
"np.array(lst)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.15294655"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## BCELoss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sigmoid"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.3220, -0.8002, -0.5503, -2.0414],\n",
" [ 1.0465, -0.2785, -1.6423, 1.1158]])"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"y = nn.Sigmoid()(x)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 163,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.5798, 0.3100, 0.3658, 0.1149],\n",
" [ 0.7401, 0.4308, 0.1621, 0.7532]])"
]
},
"execution_count": 163,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.5798062 , 0.30998793, 0.36578804, 0.11492275],\n",
" [0.7401055 , 0.43081176, 0.1621461 , 0.7531997 ]], dtype=float32)"
]
},
"execution_count": 165,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1 / (1 + np.exp(-x))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### single label"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 0.5213, 0.5932, 0.5333])"
]
},
"execution_count": 174,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x0 = torch.randn(3)\n",
"x = nn.Sigmoid()(x0)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 1., 1., 1.])"
]
},
"execution_count": 175,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.FloatTensor(3).random_(2)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.6008)"
]
},
"execution_count": 176,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCELoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 0.6514, 0.5222, 0.6287])"
]
},
"execution_count": 177,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCELoss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.8024)"
]
},
"execution_count": 178,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loss = nn.BCELoss(size_average=False)\n",
"lss = loss(x, y)\n",
"lss"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.65144944, 0.52221346, 0.6287041], 0.600789)"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x)):\n",
" lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Equivalently"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.65144944190979, 0.5222134590148926, 0.6287040710449219],\n",
" 0.6007889906565348)"
]
},
"execution_count": 184,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x)):\n",
" lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### multilabel"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.2400, 0.4380],\n",
" [ 0.2651, 0.4915],\n",
" [ 0.3721, 0.5370]])"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x0 = torch.randn(3, 2)\n",
"x = nn.Sigmoid()(x0)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1., 1.],\n",
" [ 1., 1.],\n",
" [ 1., 0.]])"
]
},
"execution_count": 190,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.FloatTensor(3, 2).random_(2)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.0082)"
]
},
"execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCELoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1.4272, 0.8255],\n",
" [ 1.3278, 0.7102],\n",
" [ 0.9886, 0.7700]])"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCELoss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[1.42715609, 0.82551563],\n",
" [1.32778549, 0.71021408],\n",
" [0.9886421 , 0.76996785]]), 1.0082135393626286)"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x)):\n",
" lsti = []\n",
" for j in range(len(x[i])):\n",
" lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))\n",
" lst.append(lsti)\n",
"np.array(lst), np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Equivalently"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(array([[1.4271561 , 0.8255156 ],\n",
" [1.3277855 , 0.7102141 ],\n",
" [0.9886421 , 0.76996785]], dtype=float32), 1.0082136)"
]
},
"execution_count": 198,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x)):\n",
" lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\n",
"np.array(lst), np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## BCEWithLogitsLoss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is just simply adding a sigmoid in front of BCELoss above."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### single label"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([-0.1104, 0.2577, -0.5487])"
]
},
"execution_count": 206,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(3)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 0.4724, 0.5641, 0.3662])"
]
},
"execution_count": 207,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xs = nn.Sigmoid()(x)\n",
"xs"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 0., 0., 0.])"
]
},
"execution_count": 208,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.FloatTensor(3).random_(2)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.6419)"
]
},
"execution_count": 209,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCELoss()(xs, y)"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.6419)"
]
},
"execution_count": 210,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCEWithLogitsLoss()(x, y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### multilabel"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1.4298, 0.1712],\n",
" [ 0.7382, -1.8834],\n",
" [-1.7065, 1.1530]])"
]
},
"execution_count": 211,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(3, 2)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.1931, 0.5427],\n",
" [ 0.6766, 0.1320],\n",
" [ 0.1536, 0.7601]])"
]
},
"execution_count": 212,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xs = nn.Sigmoid()(x)\n",
"xs"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1., 1.],\n",
" [ 1., 0.],\n",
" [ 1., 1.]])"
]
},
"execution_count": 213,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = torch.FloatTensor(3, 2).random_(2)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 214,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.8226)"
]
},
"execution_count": 214,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCELoss()(xs, y)"
]
},
{
"cell_type": "code",
"execution_count": 216,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.8226)"
]
},
"execution_count": 216,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.BCEWithLogitsLoss()(x, y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MarginRankingLoss"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([-0.5451, 0.1447, -0.3011]),\n",
" tensor([ 0.1900, 0.6117, 1.5479]),\n",
" tensor([ 1., 1., -1.]))"
]
},
"execution_count": 221,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x1 = torch.randn(3)\n",
"x2 = torch.randn(3)\n",
"y = torch.FloatTensor(np.random.choice([1, -1], 3))\n",
"\n",
"x1, x2, y"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.4674)"
]
},
"execution_count": 222,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MarginRankingLoss(margin=0.1)(x1, x2, y)"
]
},
{
"cell_type": "code",
"execution_count": 223,
"metadata": {},
"outputs": [],
"source": [
"x1 = x1.numpy()\n",
"x2 = x2.numpy()\n",
"y = y.numpy()\n",
"margin=0.1"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.835101580619812, 0.5670205116271972, 0], 0.4673740307490031)"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x1)):\n",
" lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## HingeEmbeddingLoss"
]
},
{
"cell_type": "code",
"execution_count": 235,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.4457, -1.5535, -0.1648],\n",
" [ 0.7037, 0.2432, 0.3004]])"
]
},
"execution_count": 235,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 3)\n",
"y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))\n",
"\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 236,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1., -1., 1.],\n",
" [ 1., 1., 1.]])"
]
},
"execution_count": 236,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 237,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.6984)"
]
},
"execution_count": 237,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.HingeEmbeddingLoss(margin=1)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 238,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()\n",
"margin=1"
]
},
{
"cell_type": "code",
"execution_count": 239,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.55430901, 2.55346417, -0.16479899],\n",
" [ 0.70371646, 0.24319194, 0.30040452]])"
]
},
"execution_count": 239,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst=[]\n",
"\n",
"for i in range(len(x)):\n",
" lsti = []\n",
" for j in range(len(x[i])):\n",
" if y[i][j]==1:\n",
" lsti.append(x[i][j])\n",
" else:\n",
" lsti.append(max(0, margin-x[i][j]))\n",
" lst.append(lsti)\n",
"np.array(lst)"
]
},
{
"cell_type": "code",
"execution_count": 240,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.6983811855316162"
]
},
"execution_count": 240,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MultiLabelMarginLoss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### one-sample example"
]
},
{
"cell_type": "code",
"execution_count": 244,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.3257, -1.2182, 1.4421, 0.2452]])"
]
},
"execution_count": 244,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(1, 4)\n",
"y = torch.LongTensor(1, 4).random_(-1, 4)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 3, -1, -1, -1]])"
]
},
"execution_count": 245,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 246,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.8194)"
]
},
"execution_count": 246,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MultiLabelMarginLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 248,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 249,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 3\n",
"1 3\n",
"2 3\n"
]
}
],
"source": [
"lst = []\n",
"for k in range(len(x)):\n",
" sm = 0\n",
" js = []\n",
" for j in range(len(y[k])):\n",
" if y[k][j]<0: break \n",
" js.append(y[k][j])\n",
" for i in range(len(x[k])):\n",
" for j in js:\n",
" if (i not in js) and (i!=j):\n",
" print(i, j)\n",
" sm += max(0, 1-(x[k][j] - x[k][i]))\n",
" lst.append(sm/len(x[k]))"
]
},
{
"cell_type": "code",
"execution_count": 250,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.8193658106029034], 0.8193658106029034)"
]
},
"execution_count": 250,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### multi-sample example"
]
},
{
"cell_type": "code",
"execution_count": 252,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1.3972, -0.5922, -0.7198, 0.8985],\n",
" [-0.6777, 0.3352, -0.1973, -0.0305],\n",
" [ 0.4067, 0.2513, -1.0973, -0.1837]])"
]
},
"execution_count": 252,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(3, 4)\n",
"y = torch.LongTensor(3, 4).random_(-1, 4)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 253,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 3, 1, 0, 1],\n",
" [ 2, 0, 0, -1],\n",
" [ 1, -1, 2, 1]])"
]
},
"execution_count": 253,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 254,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(1.2635)"
]
},
"execution_count": 254,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MultiLabelMarginLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 255,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"([0.8555163443088531, 2.5048549212515354, 0.4300655126571655],\n",
" 1.263478926072518)"
]
},
"execution_count": 255,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = x.numpy()\n",
"y = y.numpy()\n",
"\n",
"lst = []\n",
"for k in range(len(x)):\n",
" sm = 0\n",
" js = []\n",
" for j in range(len(y[k])):\n",
" if y[k][j]<0: break \n",
" js.append(y[k][j])\n",
" for i in range(len(x[k])):\n",
" for j in js:\n",
" if (i not in js) and (i!=j):\n",
" sm += max(0, 1-(x[k][j] - x[k][i]))\n",
" lst.append(sm/len(x[k]))\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SmoothL1Loss"
]
},
{
"cell_type": "code",
"execution_count": 257,
"metadata": {},
"outputs": [],
"source": [
"x = torch.randn(2, 3)\n",
"y = torch.randn(2, 3)"
]
},
{
"cell_type": "code",
"execution_count": 258,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.5490)"
]
},
"execution_count": 258,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.SmoothL1Loss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 259,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.6491, 0.0651, 1.2454],\n",
" [ 0.3355, 0.9598, 0.0390]])"
]
},
"execution_count": 259,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.SmoothL1Loss(reduce=False)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 260,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy() \n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 261,
"metadata": {},
"outputs": [],
"source": [
"def smoothl1loss(x, y):\n",
" if abs(x-y)<1: return 1/2*(x-y)**2\n",
" else: return abs(x-y)-1/2"
]
},
{
"cell_type": "code",
"execution_count": 263,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(array([[0.64909291, 0.06508577, 1.24535966],\n",
" [0.33547111, 0.95977783, 0.03898569]]), 0.5489621638637431)"
]
},
"execution_count": 263,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for i in range(len(x)):\n",
" lsti=[]\n",
" for j in range(len(x[i])):\n",
" lsti.append(smoothl1loss(x[i][j], y[i][j]))\n",
" lst.append(lsti)\n",
"np.array(lst), np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SoftMarginLoss"
]
},
{
"cell_type": "code",
"execution_count": 264,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-0.8887, -0.3107, -0.6408, -2.5345],\n",
" [ 0.2605, -0.1133, 0.2433, 0.3387]])"
]
},
"execution_count": 264,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 265,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1., 1., 1., -1.],\n",
" [ 1., 1., 1., 1.]])"
]
},
"execution_count": 265,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 266,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.7092)"
]
},
"execution_count": 266,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.SoftMarginLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 267,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 268,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.8084959688801056, 0.6099205543628277], 0.7092082616214666)"
]
},
"execution_count": 268,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for k in range(len(x)):\n",
" sm = 0\n",
" for i in range(len(x[k])):\n",
" sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))\n",
" lst.append(sm/len(x[k]))\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MultiLabelSoftMarginLoss"
]
},
{
"cell_type": "code",
"execution_count": 269,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-1.9676, -1.4290, -0.1576, 0.6041],\n",
" [ 0.0584, -1.0734, -0.4568, 0.4787]])"
]
},
"execution_count": 269,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"y = torch.FloatTensor(2, 4).random_(2)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 270,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0., 1., 1., 1.],\n",
" [ 1., 0., 1., 0.]])"
]
},
"execution_count": 270,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 271,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.7315)"
]
},
"execution_count": 271,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MultiLabelSoftMarginLoss()(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 272,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 274,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.7464252382614533, 0.7166620319227274], 0.7315436350920903)"
]
},
"execution_count": 274,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for k in range(len(x)):\n",
" sm = 0\n",
" for i in range(len(x[k])):\n",
" sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\\\n",
" (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))\n",
" lst.append(sm/len(x[k]))\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CosineEmbeddingLoss"
]
},
{
"cell_type": "code",
"execution_count": 276,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.1394, -0.9875, -0.4814],\n",
" [ 0.4167, 0.9489, -0.2292]])"
]
},
"execution_count": 276,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x1 = torch.randn(2, 3)\n",
"x2 = torch.randn(2, 3)\n",
"y = torch.FloatTensor(np.random.choice([1, -1], 2))\n",
"\n",
"x1"
]
},
{
"cell_type": "code",
"execution_count": 277,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 0.3618, 1.1291, -1.3030],\n",
" [ 0.9274, 0.7760, -1.6018]])"
]
},
"execution_count": 277,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x2"
]
},
{
"cell_type": "code",
"execution_count": 278,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([-1., 1.])"
]
},
"execution_count": 278,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 279,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.1503)"
]
},
"execution_count": 279,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)"
]
},
{
"cell_type": "code",
"execution_count": 280,
"metadata": {},
"outputs": [],
"source": [
"x1 = x1.numpy()\n",
"x2 = x2.numpy()\n",
"y = y.numpy()\n",
"margin=0.1"
]
},
{
"cell_type": "code",
"execution_count": 281,
"metadata": {},
"outputs": [],
"source": [
"from scipy.spatial.distance import cosine\n",
"\n",
"def cos(x, y): return 1-cosine(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 282,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0, 0.3005916476249695], 0.15029582381248474)"
]
},
"execution_count": 282,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for k in range(len(x1)):\n",
" if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))\n",
" elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MultiMarginLoss"
]
},
{
"cell_type": "code",
"execution_count": 283,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[-0.0025, -0.6019, 1.9809, -1.2663],\n",
" [ 0.6060, -0.0002, 0.9110, -0.6320]])"
]
},
"execution_count": 283,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.randn(2, 4)\n",
"y = torch.LongTensor(2).random_(4)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 284,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([ 1, 3])"
]
},
"execution_count": 284,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 285,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(3.4149)"
]
},
"execution_count": 285,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.MultiMarginLoss(margin=0.9, p=2)(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 286,
"metadata": {},
"outputs": [],
"source": [
"x = x.numpy()\n",
"y = y.numpy()\n",
"p=2\n",
"margin=0.9"
]
},
{
"cell_type": "code",
"execution_count": 287,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([3.6083879542856043, 3.2214048583725967], 3.4148964063291007)"
]
},
"execution_count": 287,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for k in range(len(x)):\n",
" sm = 0\n",
" for i in range(len(x[k])):\n",
" if i!= y[k]:\n",
" sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)\n",
" lst.append(sm/len(x[k]))\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## TripletMarginLoss"
]
},
{
"cell_type": "code",
"execution_count": 288,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 2.0001, 1.2658, -1.1397],\n",
" [ 0.9793, -0.3433, -0.0746]])"
]
},
"execution_count": 288,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x1 = torch.randn(2, 3)\n",
"x2 = torch.randn(2, 3)\n",
"x3 = torch.randn(2, 3)\n",
"margin = 0.9\n",
"p = 2\n",
"\n",
"x1"
]
},
{
"cell_type": "code",
"execution_count": 289,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(0.4055)"
]
},
"execution_count": 289,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)"
]
},
{
"cell_type": "code",
"execution_count": 290,
"metadata": {},
"outputs": [],
"source": [
"x1 = x1.numpy()\n",
"x2 = x2.numpy()\n",
"x3 = x3.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 291,
"metadata": {},
"outputs": [],
"source": [
"def d(x1, x2, p):\n",
" return sum((x1-x2)**p)**(1/p)"
]
},
{
"cell_type": "code",
"execution_count": 292,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0.0, 0.8110052643651849], 0.40550263218259247)"
]
},
"execution_count": 292,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lst = []\n",
"for k in range(len(x1)):\n",
" sm = 0\n",
" for i in range(len(x1[k])):\n",
" sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) \n",
" lst.append(sm/len(x1[k]))\n",
"\n",
"lst, np.mean(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## References"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- https://pytorch.org/docs/0.4.0/nn.html#loss-functions\n",
"- https://blog.csdn.net/zhangxb35/article/details/72464152"
]
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/c94304dbc7f1f9be3333742b7e8249a7"
},
"gist": {
"data": {
"description": "git/yang-zhang.github.io/ds_code/pytorch-losses-in-plain-python.ipynb",
"public": true
},
"id": "c94304dbc7f1f9be3333742b7e8249a7"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "230px"
},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment