Skip to content

Instantly share code, notes, and snippets.

@mizar
Last active May 17, 2024 04:42
Show Gist options
  • Save mizar/95d6fbb4610f46b62ee48ff77c15ab62 to your computer and use it in GitHub Desktop.
Save mizar/95d6fbb4610f46b62ee48ff77c15ab62 to your computer and use it in GitHub Desktop.
pmont.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyO6vVNi0tNEx6DFd4O70k+l",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/mizar/95d6fbb4610f46b62ee48ff77c15ab62/pmont.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "uchu2wHTYMc_",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "56c277a6-7842-4f2c-f3fd-7225ebaa1f60"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1 5.161857058999999\n",
"2 11.628609650000001\n",
"3 16.76951633099999\n",
"4 23.545394556000005\n",
"5 28.622354041999998\n",
"6 35.123929577\n",
"7 40.33383867900001\n",
"8 46.52410703600002\n",
"9 51.80913902900001\n",
"10 57.32361777999999\n",
"11 63.705687438\n",
"12 69.013825227\n",
"13 75.34107914600001\n",
"14 80.850022273\n",
"15 87.37955822900001\n",
"16 93.00775329100001\n",
"17 99.83178845100001\n",
"18 105.256818169\n",
"19 112.02802609500002\n",
"20 117.42705780300001\n",
"21 123.78214076100001\n",
"22 129.04878393500002\n",
"23 135.08825574500003\n",
"24 141.15447713400005\n",
"25 146.42697222700002\n",
"26 153.212669872\n",
"27 158.35661702700003\n",
"28 165.054020395\n",
"29 170.278740034\n",
"30 176.97879065300003\n",
"31 182.30537595700002\n",
"32 188.97040871900003\n",
"33 194.40884870500003\n",
"34 200.623873644\n",
"35 206.72833685900002\n",
"36 212.44516519400003\n",
"37 218.37311307700003\n",
"38 225.61842158500002\n",
"39 232.37653412100002\n",
"40 239.498111864\n",
"41 244.73936825600003\n",
"42 249.907167529\n",
"43 256.60927942\n",
"44 262.135761065\n",
"45 268.579657726\n",
"46 273.758853894\n",
"47 280.14614037300004\n",
"48 285.504026397\n",
"49 292.047471723\n",
"50 297.34246475000003\n",
"51 303.34922181800005\n",
"52 308.89459839\n",
"53 314.077020129\n",
"54 320.660634333\n",
"55 326.05785305800003\n",
"56 332.466837667\n",
"57 337.85674462400004\n",
"58 344.51947926900004\n",
"59 350.009190352\n",
"60 356.86582276300004\n",
"61 361.930161352\n",
"62 368.00640245700004\n",
"63 373.78858361700003\n",
"64 379.043521299\n",
"65 385.89736250100003\n",
"66 391.29048701100004\n",
"67 398.02643346100007\n",
"68 403.21317263099996\n",
"69 409.759423239\n",
"70 415.19258906700003\n",
"71 421.77569646399996\n",
"72 427.232362123\n",
"73 433.835711655\n",
"74 439.324457238\n",
"75 445.51072832200003\n",
"76 451.13954852800003\n",
"77 456.74268780799997\n",
"78 463.4131163800001\n",
"79 468.860823391\n",
"80 475.42110059\n",
"81 480.69267021\n",
"82 487.29593049600004\n",
"83 492.84061558\n",
"84 500.730425165\n",
"85 505.97698534700004\n",
"86 511.76976437900004\n",
"87 517.734462842\n",
"88 523.111261808\n",
"89 534.139990258\n",
"90 541.0202975779999\n",
"91 547.783780969\n",
"92 553.476454302\n",
"93 560.507437282\n",
"94 565.905845809\n",
"95 572.488181718\n",
"96 577.7453255749999\n",
"97 584.43238385\n",
"98 589.717450723\n",
"99 596.321844482\n",
"100 601.8223312069999\n"
]
}
],
"source": [
"import time\n",
"import random\n",
"\n",
"# Simple Montgomery Reduction\n",
"def MR0(A: int, B: int, M: int, W: int) -> int:\n",
" assert 0 <= A < M\n",
" assert 0 <= B < M\n",
" assert M % 2 == 1\n",
" N = -((-(M.bit_length())) // W)\n",
" K = (1 << (W * N)) - 1\n",
" MU = pow(M, -1, 1 << (W * N))\n",
" t = A * B\n",
" q = (MU * (t & K)) & K\n",
" D = t >> (W * N)\n",
" E = (q * M) >> (W * N)\n",
" C = D - E\n",
" if C < 0:\n",
" C += M\n",
" assert 0 <= C < M\n",
" return C\n",
"\n",
"# Bos, J.W., Montgomery, P.L., Shumow, D., Zaverucha, G.M. (2014). Montgomery Multiplication Using Vector Instructions. In: Lange, T., Lauter, K., Lisoněk, P. (eds) Selected Areas in Cryptography -- SAC 2013. SAC 2013. Lecture Notes in Computer Science(), vol 8282. Springer, Berlin, Heidelberg. https://doi.org/10.1007/978-3-662-43414-7_24\n",
"# Algorithm 1: https://media.springernature.com/lw685/springer-static/image/chp%3A10.1007%2F978-3-662-43414-7_24/MediaObjects/326290_1_En_24_Figa_HTML.gif?as=webp\n",
"def MR1(A: int, B: int, M: int, W: int) -> int:\n",
" assert 0 <= A < M\n",
" assert 0 <= B < M\n",
" assert M % 2 == 1\n",
" K = (1 << W) - 1\n",
" N = -((-(M.bit_length())) // W)\n",
" MU = pow(-M, -1, 1 << W)\n",
" a = [(A >> (W * i)) & K for i in range(N)]\n",
" C = 0\n",
" for i in range(N):\n",
" t = C + a[i] * B\n",
" q = (MU * (t & K)) & K\n",
" C = (t + (q * M)) >> W\n",
" assert 0 <= C < 2 * M\n",
" if C >= M:\n",
" C -= M\n",
" assert 0 <= C < M\n",
" return C\n",
"\n",
"# Bos, J.W., Montgomery, P.L., Shumow, D., Zaverucha, G.M. (2014). Montgomery Multiplication Using Vector Instructions. In: Lange, T., Lauter, K., Lisoněk, P. (eds) Selected Areas in Cryptography -- SAC 2013. SAC 2013. Lecture Notes in Computer Science(), vol 8282. Springer, Berlin, Heidelberg. https://doi.org/10.1007/978-3-662-43414-7_24\n",
"# Algorithm 1 (modified).\n",
"# base algorithm: https://media.springernature.com/lw685/springer-static/image/chp%3A10.1007%2F978-3-662-43414-7_24/MediaObjects/326290_1_En_24_Figa_HTML.gif?as=webp\n",
"def MR1m(A: int, B: int, M: int, W: int) -> int:\n",
" assert 0 <= A < M\n",
" assert 0 <= B < M\n",
" assert M % 2 == 1\n",
" K = (1 << W) - 1\n",
" N = -((-(M.bit_length())) // W)\n",
" MU = pow(M, -1, 1 << W)\n",
" a = [(A >> (W * i)) & K for i in range(N)]\n",
" C = 0\n",
" for i in range(N):\n",
" t = C + a[i] * B\n",
" q = (MU * (t & K)) & K\n",
" C = (t >> W) - ((q * M) >> W)\n",
" if C < 0:\n",
" C += M\n",
" assert 0 <= C < M\n",
" assert 0 <= C < M\n",
" return C\n",
"\n",
"# Bos, J.W., Montgomery, P.L., Shumow, D., Zaverucha, G.M. (2014). Montgomery Multiplication Using Vector Instructions. In: Lange, T., Lauter, K., Lisoněk, P. (eds) Selected Areas in Cryptography -- SAC 2013. SAC 2013. Lecture Notes in Computer Science(), vol 8282. Springer, Berlin, Heidelberg. https://doi.org/10.1007/978-3-662-43414-7_24\n",
"# Algorithm 2: https://media.springernature.com/lw685/springer-static/image/chp%3A10.1007%2F978-3-662-43414-7_24/MediaObjects/326290_1_En_24_Figb_HTML.gif?as=webp\n",
"def MR2(A: int, B: int, M: int, W: int) -> int:\n",
" assert 0 <= A < M\n",
" assert 0 <= B < M\n",
" assert M % 2 == 1\n",
" K = (1 << W) - 1\n",
" N = -((-(M.bit_length())) // W)\n",
" MU = pow(M, -1, 1 << W)\n",
" a = [(A >> (W * i)) & K for i in range(N)]\n",
" b = [(B >> (W * i)) & K for i in range(N)]\n",
" m = [(M >> (W * i)) & K for i in range(N)]\n",
" d = [0] * N\n",
" e = [0] * N\n",
" mu_b0 = (MU * b[0]) & K\n",
" for j in range(N):\n",
" q = (mu_b0 * a[j] + MU * (d[0] - e[0])) & K\n",
" t0 = (a[j] * b[0] + d[0]) >> W\n",
" t1 = (q * m[0] + e[0]) >> W\n",
" for i in range(1, N):\n",
" p0 = a[j] * b[i] + t0 + d[i]\n",
" p1 = q * m[i] + t1 + e[i]\n",
" t0 = p0 >> W\n",
" t1 = p1 >> W\n",
" d[i-1] = p0 & K\n",
" e[i-1] = p1 & K\n",
" d[N-1] = t0\n",
" e[N-1] = t1\n",
" D = sum(d[i] << (W * i) for i in range(N))\n",
" E = sum(e[i] << (W * i) for i in range(N))\n",
" C = D - E\n",
" if C < 0:\n",
" C += M\n",
" assert 0 <= C < M\n",
" return C\n",
"\n",
"start = time.perf_counter()\n",
"for i in range(100):\n",
" for _ in range(1000):\n",
" W = random.randint(1, 64)\n",
" M = random.randint(1, (1 << (W * random.randint(1, 128))) - 1) | 1\n",
" N = -((-(M.bit_length())) // W)\n",
" A = random.randint(0, M - 1)\n",
" B = random.randint(0, M - 1)\n",
" assert 0 <= A < M\n",
" assert 0 <= B < M\n",
" assert M % 2 == 1\n",
" C0 = MR0(A, B, M, W)\n",
" C1 = MR1m(A, B, M, W)\n",
" C1m = MR1m(A, B, M, W)\n",
" C2 = MR2(A, B, M, W)\n",
" C_estimated = (A * B * pow(2, -W * N, M)) % M\n",
" assert C0 == C_estimated, f\"MR0: A[{A}]*B[{B}]/2^(W * N)[{1 << (W * N)}]%M[{M}]=[{C_estimated}]!=[{C0}]\"\n",
" assert C1 == C_estimated, f\"MR1: A[{A}]*B[{B}]/2^(W * N)[{1 << (W * N)}]%M[{M}]=[{C_estimated}]!=[{C1m}]\"\n",
" assert C1m == C_estimated, f\"MR1: A[{A}]*B[{B}]/2^(W * N)[{1 << (W * N)}]%M[{M}]=[{C_estimated}]!=[{C1m}]\"\n",
" assert C2 == C_estimated, f\"MR2: A[{A}]*B[{B}]/2^(W * N)[{1 << (W * N)}]%M[{M}]=[{C_estimated}]!=[{C2}]\"\n",
" end = time.perf_counter()\n",
" print(i + 1, end - start)\n"
]
}
]
}
@mizar
Copy link
Author

mizar commented May 15, 2024

参考文献

アルゴリズム・実装

MR0: 単純なモンゴメリ乗算 の実装テスト

$\textbf{Algorithm 0.}$

  • $\displaystyle\textbf{Input: }A,B,M,\mu,R\text{ such that }$
    • $A,B,M,\mu,R\in\mathbb{Z},$
      • $A,B,M,\mu,R$ は整数
    • $0\leq A,B\lt M\lt R,$
    • $2\not\mid M,$
      • $M$ は奇数 ($R$ は通常、計算機上で除数 $R$ による除算・剰余算の計算コストを減らす目的で $2$ の累乗数とするため)
        • $2$ の累乗数である除数 $R$ による除算・剰余算は、論理右シフト演算(上位ビット取り出し)・ビット論理積演算(下位ビット取り出し)に置き換える事ができるため、計算コストが低い
      • $R$$2$ の累乗数である場合、 $M$ が奇数でないと $\gcd(R,M)=1$ が満たせず、 $\mu=M^{-1}\ \mathrm{mod}\ R$ が存在しない
    • $\gcd(R,M)=1,$
      • $R,M$ は互いに素
    • $\mu=M^{-1}\ \mathrm{mod}\ R.$
      • $\mu$$\mathrm{mod}\ R$ における $M$ の乗法逆元
      • $R,M$ は互いに素であるため、このような $\mu$ の値は存在する
  • $\displaystyle\textbf{Output: }C\equiv A\cdot B\cdot R^{-1}\ (\mathrm{mod}\ M)\text{ such that }0\leq C\lt M.$
  1. $t\leftarrow A\cdot B$
  2. $q\leftarrow \mu\cdot (t\ \mathrm{mod}\ R)\ \mathrm{mod}\ R$
  3. $C\leftarrow(t-q\cdot M)/R=\lfloor t/R\rfloor-\lfloor q\cdot M/R\rfloor$
  4. $\textbf{if }C\lt 0\textbf{ then return }(C+M)\textbf{ else return }C$
  • $t-q\cdot M\equiv t-t\cdot M^{-1}\cdot M\equiv 0\ (\mathrm{mod}\ R)$ $\quad\longrightarrow\quad t/R-\lfloor t/R\rfloor=(q\cdot M/R)-\lfloor q\cdot M/R\rfloor$ $\quad\longrightarrow\quad C=(t-q\cdot M)/R=\lfloor t/R\rfloor-\lfloor q\cdot M/R\rfloor$
  • $C\cdot R\equiv t-q\cdot M\equiv t\equiv A\cdot B\ (\mathrm{mod}\ M)$ $\quad\longrightarrow\quad C\equiv A\cdot B\cdot R^{-1}\ (\mathrm{mod}\ M)$
  • $0\leq t=A\cdot B\lt MR,\ 0\leq q\cdot M\lt MR$ $\quad\longrightarrow\quad -M\lt C=(t-q\cdot M)/R\lt M$

MR1: 論文の文中 Algorithm 1. の実装テスト

MR1m: Algorithm 1 modified.

($\mu=-M^{-1}\mod R$ ではなく $\mu=M^{-1}\mod R$ を用いたものに変更) の実装テスト

$\textbf{Algorithm 1 modified.}$

  • $\displaystyle\textbf{Input: }A,B,M,\mu,R,n\text{ such that}$
    • $A,B,M,\mu,R,n\in\mathbb{Z},$
    • $\displaystyle A=\sum_{i=0}^{n-1} a_iR^i,$
    • $\displaystyle 0\leq a_i\lt R,$
    • $\displaystyle 0\leq A,B\lt M,$
    • $\displaystyle 2\not\mid M,$
    • $\displaystyle R^{n-1}\leq M\lt R^n,$
    • $\displaystyle \gcd(R,M)=1,$
    • $\displaystyle \mu=M^{-1}\ \mathrm{mod}\ R.$
  • $\displaystyle\textbf{Output: }C\equiv A\cdot B\cdot R^{-n}\ (\mathrm{mod}\ M)\text{ such that }0\leq C\lt M.$
  1. $C\leftarrow 0$
  2. $\textbf{for }i=0\text{ to }n-1\textbf{ do}$
  3. $\quad C\leftarrow C+a_i\cdot B$
  4. $\quad q\leftarrow \mu\cdot C\mod R$
  5. $\quad C\leftarrow(C-q\cdot M)/R=\lfloor C/R\rfloor-\lfloor q\cdot M/R\rfloor$
  6. $\quad \textbf{if }C\lt 0\textbf{ then}$
  7. $\quad\quad C\leftarrow C+M$
  8. $\textbf{return }C$

MR2: 論文の文中 Algorithm 2. の実装テスト

(Computation 1 と Computation 2のループは順次実行ではなく並行実行を企図?)

Algorithm 2. の現在の解釈

$$ \begin{array}{l} \textbf{Algorithm 2. }\text{(current interpretation)} \\ \textbf{Input: } \begin{cases} A,B,M,\mu,R,n,W\text{ such that} \\ \quad A,B,M,\mu,R,n,W\in\mathbb{Z}, \\ \quad W\geq 1, \\ \quad R=2^W, \\ \quad\displaystyle A=\sum_{i=0}^{n-1}a_iR^i,\ B=\sum_{i=0}^{n-1}b_iR^i,\ M=\sum_{i=0}^{n-1}m_iR^i, \\ \quad a_i,b_i,m_i\in\mathbb{Z}\quad(0\leq i\lt n), \\ \quad 0\leq a_i,b_i,m_i\lt R\quad(0\leq i\lt n), \\ \quad 0\leq A,B\lt M, \\ \quad R^{n-1}\leq M\lt R^n, \\ \quad \gcd(R,M)=1, \\ \quad 2\not\mid M, \\ \quad \mu=M^{-1}\ \mathrm{mod}\ R\text{ such that }0\leq\mu\lt R. \end{cases} \\ \textbf{Output: }C\equiv A\cdot B\cdot R^{-n}\ \mathrm{mod}\ M\text{ such that }0\leq C\lt M. \\ \hline \textbf{Computation} \\ \hline \begin{cases} d_i=0\textbf{ for }0\leq i\lt n\\ e_i=0\textbf{ for }0\leq i\lt n\\ \end{cases} \\ \textbf{for }j = 0\text{ to }n - 1\textbf{ do} \\ \quad q \leftarrow ((\mu\cdot b_0)\cdot a_j+\mu\cdot(d_0-e_0))\ \mathrm{mod}\ R \\ \quad\begin{cases} t_0\leftarrow a_j\cdot b_0+d_0 \\ t_1\leftarrow q\cdot m_0+e_0 \\ \end{cases} \\ \quad\begin{cases} t_0\leftarrow \lfloor t_0/R\rfloor \\ t_1\leftarrow \lfloor t_1/R\rfloor \\ \end{cases} \\ \quad\textbf{for }i=1\text{ to }n-1\textbf{ do} \\ \quad\quad\begin{cases} p_0\leftarrow a_j\cdot b_i+t_0+d_i \\ p_1\leftarrow q\cdot m_i+t_1+e_i \\ \end{cases} \\ \quad\quad\begin{cases} t_0\leftarrow\lfloor p_0/R\rfloor \\ t_1\leftarrow\lfloor p_1/R\rfloor \\ \end{cases} \\ \quad\quad\begin{cases} d_{i-1}\leftarrow p_0\ \mathrm{mod}\ R \\ e_{i-1}\leftarrow p_1\ \mathrm{mod}\ R \\ \end{cases} \\ \quad\begin{cases} d_{n-1}\leftarrow t_0 \\ e_{n-1}\leftarrow t_1 \\ \end{cases} \\ \displaystyle C\leftarrow\sum_{i=0}^{n-1}(d_i-e_i)\cdot R^i \\ \textbf{if }C\lt 0\textbf{ do } C\leftarrow C+M \\ \textbf{return }C \\ \end{array} $$

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment