yang-zhang/pytorch-losses-in-plain-python.ipynb

## pytorch-losses-in-plain-python.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0.4.1'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "torch.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## L1Loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = torch.randn(2, 3)\n",
    "y = torch.randn(2, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1.4763, -0.0492,  0.7067],\n",
       "        [-0.3756, -1.8713,  1.5535]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.3171, -1.6037, -0.3038],\n",
       "        [ 0.3671,  0.6510, -2.2076]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.7917)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.L1Loss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.1592,  1.5544,  1.0105],\n",
       "        [ 0.7427,  2.5223,  3.7611]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.L1Loss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.1591892, 1.5544204, 1.0105054],\n",
       "       [0.7426802, 2.522295 , 3.7610588]], dtype=float32)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "abs(x.numpy() - y.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.7916914"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "abs(x.numpy() - y.numpy()).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MSELoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 2.6290,  1.8988, -1.6431],\n",
       "        [ 0.3855,  0.4029, -0.4366]])"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 3)\n",
    "y = torch.randn(2, 3)\n",
    "\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.5482,  0.2927, -0.4202],\n",
       "        [ 0.6128, -0.4606, -0.8558]])"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 4.3301,  2.5797,  1.4955],\n",
       "        [ 0.0517,  0.7457,  0.1757]])"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MSELoss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.5630)"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MSELoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[4.330082  , 2.5796666 , 1.4955105 ],\n",
       "       [0.05166636, 0.7456514 , 0.17567822]], dtype=float32)"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(x.numpy() - y.numpy())**2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.5630425"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "((x.numpy() - y.numpy())**2).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CrossEntropyLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.6293,  0.7490,  0.3947, -0.5312],\n",
       "        [-0.7038, -1.1594,  0.6817, -0.8557]])"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "y = torch.LongTensor(2).random_(4)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 1,  0])"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([ 1.0532,  1.8702]), tensor(1.4617))"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.CrossEntropyLoss(reduce=False)(x, y), nn.CrossEntropyLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([1.0531884, 1.8702432], 1.4617158)"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()\n",
    "\n",
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NLLLoss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LogSoftmax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1.4217,  0.4832, -0.0611,  0.6160],\n",
       "        [ 0.4007, -0.3147,  1.9577, -1.0041]])"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-2.9596, -1.0547, -1.5990, -0.9219],\n",
       "        [-1.8686, -2.5839, -0.3116, -3.2733]])"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = nn.LogSoftmax(dim=1)(x)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([-2.9596107 , -1.0546701 , -1.5989888 , -0.92192453], dtype=float32),\n",
       " array([-1.8685771 , -2.583946  , -0.31156364, -3.2733235 ], dtype=float32)]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = x.numpy()\n",
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    lst.append(np.log( np.exp(x[k]) / np.exp(x[k]).sum()))\n",
    "lst"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NLLLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([[ 1.2387,  0.4941,  0.3404,  0.4291],\n",
       "         [ 1.1745, -0.1141,  0.8808,  1.8481],\n",
       "         [-0.6773, -0.9456,  0.4741,  0.8378]]),\n",
       " tensor([[-0.8447, -1.5893, -1.7430, -1.6543],\n",
       "         [-1.3819, -2.6705, -1.6756, -0.7083],\n",
       "         [-2.2489, -2.5172, -1.0975, -0.7338]]))"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x0 = torch.randn(3, 4)\n",
    "x = nn.LogSoftmax(dim=1)(x0)\n",
    "x0, x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 2,  1,  3])"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.LongTensor(3).random_(4)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.7157)"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.NLLLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 1.7430,  2.6705,  0.7338])"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.NLLLoss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([tensor(1.7430), tensor(2.6705), tensor(0.7338)], 1.7157394)"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    lst.append(-x[k][y[k]])\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## PoissonNLLLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.6711,  1.5167,  1.4041,  0.2249],\n",
       "        [-0.5825, -1.2273, -1.5340,  0.8917]])"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.4753,  0.1411,  0.3186,  0.1708],\n",
       "        [ 0.2095, -0.6558, -0.7816,  0.5834]])"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.randn(2, 4)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.5702)"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.PoissonNLLLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 2.2753,  4.3434,  3.6244,  1.2137],\n",
       "        [ 0.6806, -0.5119, -0.9833,  1.9191]])"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.PoissonNLLLoss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "# target∗log(target)−target+0.5∗log(2πtarget)\n",
    "def sterling_approx(y):\n",
    "    return y*np.log(y) - y + 0.5*np.log(np.pi*y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    lsti = []\n",
    "    for i in range(len(x[k])):\n",
    "        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)\n",
    "        lsti.append(lss)\n",
    "    lst.append(lsti)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 2.27534461,  4.34339952,  3.62439346,  1.21373343],\n",
       "       [ 0.68055761, -0.51185942, -0.9832679 ,  1.91914582]])"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array(lst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.570180892944336"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## KLDivLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0.2269, 0.9956, 0.9354],\n",
       "        [0.1313, 0.7004, 0.9105]])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.rand(2, 3)\n",
    "y = torch.rand(2, 3)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1.4832, -0.0044, -0.0668],\n",
       "        [-2.0302, -0.3561, -0.0938]])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlog = torch.log(x)\n",
    "xlog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[0.5502, 0.6448, 0.7401],\n",
       "        [0.7998, 0.2121, 0.2097]])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.1529)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.KLDivLoss()(xlog, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.4873, -0.2801, -0.1733],\n",
       "        [ 1.4451, -0.2534, -0.3079]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.KLDivLoss(reduce=False)(xlog, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "xlog = np.log(x)\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.48733026, -0.2801091 , -0.17330758],\n",
       "       [ 1.4450648 , -0.2533762 , -0.3079228 ]], dtype=float32)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x)):\n",
    "    lsti = []\n",
    "    for j in range(len(x[i])):\n",
    "        # xi is already log \n",
    "        lsti.append(y[i][j] * (np.log(y[i][j]) - xlog[i][j]))\n",
    "    lst.append(lsti)\n",
    "np.array(lst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.15294655"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## BCELoss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sigmoid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.3220, -0.8002, -0.5503, -2.0414],\n",
       "        [ 1.0465, -0.2785, -1.6423,  1.1158]])"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "y = nn.Sigmoid()(x)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.5798,  0.3100,  0.3658,  0.1149],\n",
       "        [ 0.7401,  0.4308,  0.1621,  0.7532]])"
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.5798062 , 0.30998793, 0.36578804, 0.11492275],\n",
       "       [0.7401055 , 0.43081176, 0.1621461 , 0.7531997 ]], dtype=float32)"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "1 / (1 + np.exp(-x))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### single label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0.5213,  0.5932,  0.5333])"
      ]
     },
     "execution_count": 174,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x0 = torch.randn(3)\n",
    "x = nn.Sigmoid()(x0)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 1.,  1.,  1.])"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.FloatTensor(3).random_(2)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.6008)"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCELoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0.6514,  0.5222,  0.6287])"
      ]
     },
     "execution_count": 177,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCELoss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.8024)"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loss = nn.BCELoss(size_average=False)\n",
    "lss = loss(x, y)\n",
    "lss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.65144944, 0.52221346, 0.6287041], 0.600789)"
      ]
     },
     "execution_count": 180,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x)):\n",
    "    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Equivalently"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.65144944190979, 0.5222134590148926, 0.6287040710449219],\n",
       " 0.6007889906565348)"
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x)):\n",
    "    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### multilabel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.2400,  0.4380],\n",
       "        [ 0.2651,  0.4915],\n",
       "        [ 0.3721,  0.5370]])"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x0 = torch.randn(3, 2)\n",
    "x = nn.Sigmoid()(x0)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.,  1.],\n",
       "        [ 1.,  1.],\n",
       "        [ 1.,  0.]])"
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.FloatTensor(3, 2).random_(2)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.0082)"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCELoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.4272,  0.8255],\n",
       "        [ 1.3278,  0.7102],\n",
       "        [ 0.9886,  0.7700]])"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCELoss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[1.42715609, 0.82551563],\n",
       "        [1.32778549, 0.71021408],\n",
       "        [0.9886421 , 0.76996785]]), 1.0082135393626286)"
      ]
     },
     "execution_count": 196,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x)):\n",
    "    lsti = []\n",
    "    for j in range(len(x[i])):\n",
    "        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))\n",
    "    lst.append(lsti)\n",
    "np.array(lst), np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Equivalently"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[1.4271561 , 0.8255156 ],\n",
       "        [1.3277855 , 0.7102141 ],\n",
       "        [0.9886421 , 0.76996785]], dtype=float32), 1.0082136)"
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x)):\n",
    "    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\n",
    "np.array(lst), np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## BCEWithLogitsLoss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is just simply adding a sigmoid in front of BCELoss above."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### single label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-0.1104,  0.2577, -0.5487])"
      ]
     },
     "execution_count": 206,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(3)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0.4724,  0.5641,  0.3662])"
      ]
     },
     "execution_count": 207,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xs = nn.Sigmoid()(x)\n",
    "xs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0.,  0.,  0.])"
      ]
     },
     "execution_count": 208,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.FloatTensor(3).random_(2)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.6419)"
      ]
     },
     "execution_count": 209,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCELoss()(xs, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.6419)"
      ]
     },
     "execution_count": 210,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCEWithLogitsLoss()(x, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### multilabel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1.4298,  0.1712],\n",
       "        [ 0.7382, -1.8834],\n",
       "        [-1.7065,  1.1530]])"
      ]
     },
     "execution_count": 211,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(3, 2)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.1931,  0.5427],\n",
       "        [ 0.6766,  0.1320],\n",
       "        [ 0.1536,  0.7601]])"
      ]
     },
     "execution_count": 212,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xs = nn.Sigmoid()(x)\n",
    "xs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.,  1.],\n",
       "        [ 1.,  0.],\n",
       "        [ 1.,  1.]])"
      ]
     },
     "execution_count": 213,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = torch.FloatTensor(3, 2).random_(2)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.8226)"
      ]
     },
     "execution_count": 214,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCELoss()(xs, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.8226)"
      ]
     },
     "execution_count": 216,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.BCEWithLogitsLoss()(x, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MarginRankingLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([-0.5451,  0.1447, -0.3011]),\n",
       " tensor([ 0.1900,  0.6117,  1.5479]),\n",
       " tensor([ 1.,  1., -1.]))"
      ]
     },
     "execution_count": 221,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = torch.randn(3)\n",
    "x2 = torch.randn(3)\n",
    "y = torch.FloatTensor(np.random.choice([1, -1], 3))\n",
    "\n",
    "x1, x2, y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.4674)"
      ]
     },
     "execution_count": 222,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MarginRankingLoss(margin=0.1)(x1, x2, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [],
   "source": [
    "x1 = x1.numpy()\n",
    "x2 = x2.numpy()\n",
    "y = y.numpy()\n",
    "margin=0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.835101580619812, 0.5670205116271972, 0], 0.4673740307490031)"
      ]
     },
     "execution_count": 224,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x1)):\n",
    "    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## HingeEmbeddingLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.4457, -1.5535, -0.1648],\n",
       "        [ 0.7037,  0.2432,  0.3004]])"
      ]
     },
     "execution_count": 235,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 3)\n",
    "y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))\n",
    "\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 236,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1., -1.,  1.],\n",
       "        [ 1.,  1.,  1.]])"
      ]
     },
     "execution_count": 236,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 237,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.6984)"
      ]
     },
     "execution_count": 237,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.HingeEmbeddingLoss(margin=1)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()\n",
    "margin=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.55430901,  2.55346417, -0.16479899],\n",
       "       [ 0.70371646,  0.24319194,  0.30040452]])"
      ]
     },
     "execution_count": 239,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst=[]\n",
    "\n",
    "for i in range(len(x)):\n",
    "    lsti = []\n",
    "    for j in range(len(x[i])):\n",
    "        if y[i][j]==1:\n",
    "            lsti.append(x[i][j])\n",
    "        else:\n",
    "            lsti.append(max(0, margin-x[i][j]))\n",
    "    lst.append(lsti)\n",
    "np.array(lst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6983811855316162"
      ]
     },
     "execution_count": 240,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MultiLabelMarginLoss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### one-sample example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 244,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.3257, -1.2182,  1.4421,  0.2452]])"
      ]
     },
     "execution_count": 244,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(1, 4)\n",
    "y = torch.LongTensor(1, 4).random_(-1, 4)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 245,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 3, -1, -1, -1]])"
      ]
     },
     "execution_count": 245,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 246,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.8194)"
      ]
     },
     "execution_count": 246,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MultiLabelMarginLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 248,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 249,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 3\n",
      "1 3\n",
      "2 3\n"
     ]
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    sm = 0\n",
    "    js = []\n",
    "    for j in range(len(y[k])):\n",
    "        if y[k][j]<0: break \n",
    "        js.append(y[k][j])\n",
    "    for i in range(len(x[k])):\n",
    "        for j in js:\n",
    "            if (i not in js) and (i!=j):\n",
    "                print(i, j)\n",
    "                sm += max(0, 1-(x[k][j] - x[k][i]))\n",
    "    lst.append(sm/len(x[k]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 250,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.8193658106029034], 0.8193658106029034)"
      ]
     },
     "execution_count": 250,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### multi-sample example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 252,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1.3972, -0.5922, -0.7198,  0.8985],\n",
       "        [-0.6777,  0.3352, -0.1973, -0.0305],\n",
       "        [ 0.4067,  0.2513, -1.0973, -0.1837]])"
      ]
     },
     "execution_count": 252,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(3, 4)\n",
    "y = torch.LongTensor(3, 4).random_(-1, 4)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 3,  1,  0,  1],\n",
       "        [ 2,  0,  0, -1],\n",
       "        [ 1, -1,  2,  1]])"
      ]
     },
     "execution_count": 253,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.2635)"
      ]
     },
     "execution_count": 254,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MultiLabelMarginLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 255,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.8555163443088531, 2.5048549212515354, 0.4300655126571655],\n",
       " 1.263478926072518)"
      ]
     },
     "execution_count": 255,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()\n",
    "\n",
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    sm = 0\n",
    "    js = []\n",
    "    for j in range(len(y[k])):\n",
    "        if y[k][j]<0: break \n",
    "        js.append(y[k][j])\n",
    "    for i in range(len(x[k])):\n",
    "        for j in js:\n",
    "            if (i not in js) and (i!=j):\n",
    "                sm += max(0, 1-(x[k][j] - x[k][i]))\n",
    "    lst.append(sm/len(x[k]))\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SmoothL1Loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 257,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = torch.randn(2, 3)\n",
    "y = torch.randn(2, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 258,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.5490)"
      ]
     },
     "execution_count": 258,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.SmoothL1Loss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.6491,  0.0651,  1.2454],\n",
       "        [ 0.3355,  0.9598,  0.0390]])"
      ]
     },
     "execution_count": 259,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.SmoothL1Loss(reduce=False)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 260,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy() \n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 261,
   "metadata": {},
   "outputs": [],
   "source": [
    "def smoothl1loss(x, y):\n",
    "    if abs(x-y)<1: return 1/2*(x-y)**2\n",
    "    else: return abs(x-y)-1/2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 263,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[0.64909291, 0.06508577, 1.24535966],\n",
       "        [0.33547111, 0.95977783, 0.03898569]]), 0.5489621638637431)"
      ]
     },
     "execution_count": 263,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for i in range(len(x)):\n",
    "    lsti=[]\n",
    "    for j in range(len(x[i])):\n",
    "        lsti.append(smoothl1loss(x[i][j], y[i][j]))\n",
    "    lst.append(lsti)\n",
    "np.array(lst), np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SoftMarginLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 264,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.8887, -0.3107, -0.6408, -2.5345],\n",
       "        [ 0.2605, -0.1133,  0.2433,  0.3387]])"
      ]
     },
     "execution_count": 264,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 265,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.,  1.,  1., -1.],\n",
       "        [ 1.,  1.,  1.,  1.]])"
      ]
     },
     "execution_count": 265,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.7092)"
      ]
     },
     "execution_count": 266,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.SoftMarginLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 267,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 268,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.8084959688801056, 0.6099205543628277], 0.7092082616214666)"
      ]
     },
     "execution_count": 268,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    sm = 0\n",
    "    for i in range(len(x[k])):\n",
    "        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))\n",
    "    lst.append(sm/len(x[k]))\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MultiLabelSoftMarginLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 269,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-1.9676, -1.4290, -0.1576,  0.6041],\n",
       "        [ 0.0584, -1.0734, -0.4568,  0.4787]])"
      ]
     },
     "execution_count": 269,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "y = torch.FloatTensor(2, 4).random_(2)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 270,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.,  1.,  1.,  1.],\n",
       "        [ 1.,  0.,  1.,  0.]])"
      ]
     },
     "execution_count": 270,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 271,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.7315)"
      ]
     },
     "execution_count": 271,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MultiLabelSoftMarginLoss()(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 272,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 274,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.7464252382614533, 0.7166620319227274], 0.7315436350920903)"
      ]
     },
     "execution_count": 274,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    sm = 0\n",
    "    for i in range(len(x[k])):\n",
    "        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\\\n",
    "            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))\n",
    "    lst.append(sm/len(x[k]))\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CosineEmbeddingLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 276,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.1394, -0.9875, -0.4814],\n",
       "        [ 0.4167,  0.9489, -0.2292]])"
      ]
     },
     "execution_count": 276,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = torch.randn(2, 3)\n",
    "x2 = torch.randn(2, 3)\n",
    "y = torch.FloatTensor(np.random.choice([1, -1], 2))\n",
    "\n",
    "x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 277,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.3618,  1.1291, -1.3030],\n",
       "        [ 0.9274,  0.7760, -1.6018]])"
      ]
     },
     "execution_count": 277,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 278,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-1.,  1.])"
      ]
     },
     "execution_count": 278,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 279,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.1503)"
      ]
     },
     "execution_count": 279,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "metadata": {},
   "outputs": [],
   "source": [
    "x1 = x1.numpy()\n",
    "x2 = x2.numpy()\n",
    "y = y.numpy()\n",
    "margin=0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.spatial.distance import cosine\n",
    "\n",
    "def cos(x, y): return 1-cosine(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0, 0.3005916476249695], 0.15029582381248474)"
      ]
     },
     "execution_count": 282,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x1)):\n",
    "    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))\n",
    "    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MultiMarginLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.0025, -0.6019,  1.9809, -1.2663],\n",
       "        [ 0.6060, -0.0002,  0.9110, -0.6320]])"
      ]
     },
     "execution_count": 283,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = torch.randn(2, 4)\n",
    "y = torch.LongTensor(2).random_(4)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 1,  3])"
      ]
     },
     "execution_count": 284,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(3.4149)"
      ]
     },
     "execution_count": 285,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.MultiMarginLoss(margin=0.9, p=2)(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = x.numpy()\n",
    "y = y.numpy()\n",
    "p=2\n",
    "margin=0.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([3.6083879542856043, 3.2214048583725967], 3.4148964063291007)"
      ]
     },
     "execution_count": 287,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x)):\n",
    "    sm = 0\n",
    "    for i in range(len(x[k])):\n",
    "        if i!= y[k]:\n",
    "            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)\n",
    "    lst.append(sm/len(x[k]))\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TripletMarginLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 2.0001,  1.2658, -1.1397],\n",
       "        [ 0.9793, -0.3433, -0.0746]])"
      ]
     },
     "execution_count": 288,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = torch.randn(2, 3)\n",
    "x2 = torch.randn(2, 3)\n",
    "x3 = torch.randn(2, 3)\n",
    "margin = 0.9\n",
    "p = 2\n",
    "\n",
    "x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.4055)"
      ]
     },
     "execution_count": 289,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [],
   "source": [
    "x1 = x1.numpy()\n",
    "x2 = x2.numpy()\n",
    "x3 = x3.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [],
   "source": [
    "def d(x1, x2, p):\n",
    "    return sum((x1-x2)**p)**(1/p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0.0, 0.8110052643651849], 0.40550263218259247)"
      ]
     },
     "execution_count": 292,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lst = []\n",
    "for k in range(len(x1)):\n",
    "    sm = 0\n",
    "    for i in range(len(x1[k])):\n",
    "        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) \n",
    "    lst.append(sm/len(x1[k]))\n",
    "\n",
    "lst, np.mean(lst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## References"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- https://pytorch.org/docs/0.4.0/nn.html#loss-functions\n",
    "- https://blog.csdn.net/zhangxb35/article/details/72464152"
   ]
  }
 ],
 "metadata": {
  "_draft": {
   "nbviewer_url": "https://gist.github.com/c94304dbc7f1f9be3333742b7e8249a7"
  },
  "gist": {
   "data": {
    "description": "git/yang-zhang.github.io/ds_code/pytorch-losses-in-plain-python.ipynb",
    "public": true
   },
   "id": "c94304dbc7f1f9be3333742b7e8249a7"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "230px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}