Created
October 19, 2015 07:12
-
-
Save robertcampion/24ebd19353eff797b2b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
primes_53 = \ | |
b'\x03\x05\x07\x0b\x0d\x11\x13\x17\x1d\x1f\x25\x29\x2b\x2f\x35\x3b' \ | |
b'\x3d\x43\x47\x49\x4f\x53\x59\x61\x65\x67\x6b\x6d\x71\x7f\x83\x89' \ | |
b'\x8b\x95\x97\x9d\xa3\xa7\xad\xb3\xb5\xbf\xc1\xc5\xc7\xd3\xdf\xe3' \ | |
b'\xe5\xe9\xef\xf1\xfb' | |
# for 2**16+1:2:2**17-1, 1 if prime, 0 otherwise | |
prime_bytes = \ | |
b'\x8b\x24\x60\x82\x10\x41\x81\x12\x40\x08\x26\x0d\x03\x00\x01\x41' \ | |
b'\x00\x01\xca\x22\x20\x10\x20\x00\x19\x80\x24\xc0\x10\x00\x40\x94' \ | |
b'\x00\x0a\x12\x68\x00\x10\x00\x51\x82\x00\x40\x80\x00\x80\x00\x44' \ | |
b'\x00\x00\x29\x18\x94\x00\x42\x80\x00\x91\x02\x20\x42\x20\x45\x10' \ | |
b'\x30\x20\x11\x10\x04\x42\x00\x20\x50\x10\x41\xc2\x20\x00\x48\x10' \ | |
b'\x04\x10\x86\x40\x12\x00\x28\x82\x86\x40\x01\x20\x40\x10\x12\x05' \ | |
b'\x09\x80\x00\x11\x06\x05\x41\x06\x60\x83\x80\x40\x18\x10\x20\x00' \ | |
b'\xa0\x04\x10\x80\x08\x43\x16\x48\x00\x14\x25\x08\x22\x21\x88\x30' \ | |
b'\x04\x09\x82\x00\x02\x80\x00\x42\x20\x44\xc8\x02\x49\x90\x20\x04' \ | |
b'\x42\x20\x20\x42\x00\x68\x0a\x94\x20\x12\x00\x08\x10\x24\x08\x08' \ | |
b'\x82\x01\x40\x02\x21\x00\x80\x25\x82\x00\x04\x18\x00\x08\x10\x80' \ | |
b'\x00\x02\x84\x00\x00\xa4\x01\x50\x32\x24\x10\xa2\x00\x08\x04\x04' \ | |
b'\x02\x90\x00\x42\xa0\x20\x18\x10\x49\x80\x12\x08\x40\x06\x00\x80' \ | |
b'\x80\x21\x4b\x84\x00\x02\x02\x00\x11\x22\x0c\x00\x24\x01\x00\x84' \ | |
b'\x08\xc8\x20\x00\x58\x02\x04\x89\xb0\x00\x00\x00\x05\x50\x00\x20' \ | |
b'\x4a\x30\x00\x8a\x00\x29\x11\x36\x00\x00\x02\x04\x00\x10\x00\x08' \ | |
b'\x00\x44\x40\x20\x41\x00\x30\x00\x0b\x02\x00\x41\x16\x08\x03\x04' \ | |
b'\x20\x08\x30\x04\x08\x00\x48\x13\x02\x08\x42\x82\x00\x82\x34\x60' \ | |
b'\x40\x02\x0d\x98\x80\x44\x11\x80\x00\x11\x14\x09\x80\x00\x64\x10' \ | |
b'\x00\x48\x08\x32\x00\x19\x24\x00\x11\x04\x60\x48\x80\x60\x82\x00' \ | |
b'\x29\x80\x00\x44\x10\x00\x01\x81\x10\x00\x08\x30\x05\xc2\x20\x04' \ | |
b'\x19\x10\x4c\x10\x86\x0c\x82\x80\x00\x42\x10\x01\x80\x10\x28\x91' \ | |
b'\x02\x08\x10\x00\x05\xd1\x00\x48\x00\x84\x65\x92\x02\x60\x08\x26' \ | |
b'\x44\x42\x80\x29\x80\x00\x08\xc1\x20\x01\x50\x00\x00\x81\x80\x08' \ | |
b'\x40\xa2\x09\x40\x00\x01\x41\x80\x41\x40\x12\x24\x98\x20\x44\x40' \ | |
b'\x24\x20\x82\x92\x00\x82\xa0\x04\x50\x30\x4c\x00\x20\x00\x59\x00' \ | |
b'\x00\xc2\x06\x60\xc0\x84\x00\x12\x00\x05\x81\xa0\x00\x00\x84\x00' \ | |
b'\x40\x06\x41\x09\x00\x04\x8a\x00\x01\x00\x94\x00\x1a\x84\x09\x41' \ | |
b'\x82\x48\x02\xa0\x00\x40\x00\x09\x00\x20\x0c\x52\xa4\x20\x12\x94' \ | |
b'\x40\x01\x04\x25\xc0\x02\x0c\x08\x02\x00\x49\x04\x00\x40\x04\x00' \ | |
b'\x00\x24\x20\x40\x20\x48\x80\x02\x08\x13\x22\x00\x50\x10\x40\x82' \ | |
b'\x24\x00\x40\x12\x20\x11\x82\x44\x58\x00\x24\x02\x90\x41\x42\x90' \ | |
b'\x00\x48\x20\x08\x00\xa0\x00\x18\x22\x24\x10\x02\x00\x8a\x10\x04' \ | |
b'\x80\x00\x25\x80\x02\x48\x40\x06\x00\x82\x84\x09\x41\x34\x21\x08' \ | |
b'\x00\x01\x88\x14\x48\x02\x00\x25\xd0\x00\x60\x02\x10\x05\x08\x10' \ | |
b'\x60\x90\x02\x08\x49\x20\x04\x83\x00\x40\x00\x80\x20\xd2\x30\x20' \ | |
b'\x18\x22\x40\x00\xa4\x20\x50\x14\x28\x09\x20\x45\x80\x00\x00\x00' \ | |
b'\x16\x44\x01\x00\x28\x00\x04\x40\x40\x90\x41\x10\x30\x29\x08\x04' \ | |
b'\x44\x10\x20\x20\x11\x12\x08\xc1\xb0\x00\x48\x00\x00\x08\x80\x08' \ | |
b'\x49\x06\x05\x92\x00\x20\x43\x10\x24\x80\x12\x68\x10\x82\x00\x48' \ | |
b'\x24\x05\x40\x10\x40\x40\x20\x00\x10\x20\x05\x80\xa0\x4c\x03\x80' \ | |
b'\x08\x00\x80\x21\x0a\x20\x01\xc2\x02\x00\x80\x04\x08\x52\x06\x20' \ | |
b'\x81\x00\x40\x08\x80\x21\x02\x02\x0c\x10\x80\x08\x49\xa0\x08\xd0' \ | |
b'\x00\x01\x02\xa4\x00\x08\x30\x20\x09\x10\x40\x40\x00\x05\x52\x00' \ | |
b'\x01\xc2\x84\x00\x18\x02\x20\x09\x80\x04\x03\x80\x20\x90\x04\x09' \ | |
b'\x40\x10\x44\x00\x00\x05\x81\x02\x08\x41\x00\x2c\xc1\x86\x20\x00' \ | |
b'\x24\x20\x8a\x10\x09\x00\x30\x08\x08\x80\x0c\x03\x14\x09\xc8\x24' \ | |
b'\x05\x0a\x20\x05\x90\x20\x00\x19\x82\x29\x82\x02\x41\x08\x14\x04' \ | |
b'\x80\x10\x28\x00\x24\x00\x50\x80\x00\x91\x80\x08\x81\x10\x21\x80' \ | |
b'\x00\x04\x10\x30\x40\x4a\x82\x21\x00\x14\x01\x02\x24\x40\x4a\x00' \ | |
b'\x24\x80\x00\x00\x01\x20\x0c\x00\x82\x08\xc1\x04\x60\x10\x30\x09' \ | |
b'\x81\x80\x00\x52\x00\x00\x12\x12\x01\x01\x00\x60\x4a\x12\x08\x08' \ | |
b'\x14\x40\x58\x04\x08\x41\x00\x00\x43\x00\x00\x88\x02\x08\x80\x80' \ | |
b'\x08\x40\xa2\x09\x40\x12\x01\xc0\x10\x20\x90\x02\x44\x08\x14\x44' \ | |
b'\x08\x02\x0d\x82\x12\x20\x00\x10\x04\x00\x20\x41\x18\x20\x04\x51' \ | |
b'\xa4\x01\x10\x04\x68\x03\x00\x00\x40\x10\x01\x10\x96\x04\x0b\x24' \ | |
b'\x01\xc1\x02\x00\x0a\x80\x24\xc2\x00\x04\x08\x02\x48\x00\xa0\x00' \ | |
b'\x40\x16\x48\x81\x80\x41\x98\x20\x08\x89\x00\x44\x10\x24\x20\x01' \ | |
b'\x02\x08\x40\x80\x01\x18\x30\x00\x81\x10\x40\x08\x00\x28\x42\x06' \ | |
b'\x20\xc1\xb4\x40\x10\x00\x61\x10\xb0\x04\x12\x82\x01\x81\x00\x00' \ | |
b'\xc1\x24\x05\x10\x02\x40\x10\xb4\x08\x1b\x80\x24\x50\x80\x20\x00' \ | |
b'\x24\x44\xc0\x02\x08\x09\x00\x00\x43\x22\x00\x82\x94\x60\x00\x10' \ | |
b'\x41\x80\x00\x09\x00\x00\x44\x48\xa0\x29\x01\x00\x20\x42\x84\x01' \ | |
b'\x4a\x10\x2c\x80\x10\x04\x51\x00\x08\x10\x80\x48\x40\x20\x60\x80' \ | |
b'\x30\x09\x81\x06\x00\x5a\x84\x0c\x02\x12\x00\x00\x00\x61\x10\x22' \ | |
b'\x48\x81\xb0\x48\x01\x24\x08\x40\x02\x00\xc2\x10\x04\x12\x00\x04' \ | |
b'\x00\x90\x0c\x4a\x22\x00\x03\x84\x29\x80\x10\x25\x10\x20\x01\x00' \ | |
b'\x20\x00\x1b\x00\x2c\x40\x92\x60\x00\x84\x00\x00\x32\x08\x01\x20' \ | |
b'\x00\x02\x04\x24\x81\x00\x08\x8a\x04\x45\x40\x00\x01\x00\x10\x00' \ | |
b'\x0a\xa6\x21\x10\x16\x00\x82\x00\x60\x40\x00\x48\x98\x90\x48\x12' \ | |
b'\x82\x04\x10\x00\x00\x01\x34\x20\x48\x10\x05\x18\x84\x00\x10\x04' \ | |
b'\x0c\x10\x00\x41\x81\x00\x04\x10\x02\x41\x01\x82\x08\x41\x02\x00' \ | |
b'\x81\x02\x20\x01\x80\x64\x02\x10\x00\x90\x92\x00\x02\x86\x08\xc2' \ | |
b'\x84\x08\x08\x00\x20\xd2\x00\x60\x00\x10\x0c\x09\x84\x24\x91\x02' \ | |
b'\x01\x4a\x30\x04\x88\x00\x40\x00\x00\x0c\x50\x02\x01\x52\x90\x20' \ | |
b'\x81\x10\x00\x12\x30\x20\x00\x30\x0c\x00\x00\x09\x00\x02\x09\x83' \ | |
b'\x00\x41\x42\x10\x04\x90\x86\x04\x41\x20\x08\x00\x90\x41\x48\xa4' \ | |
b'\x20\x00\x00\x09\x01\x06\x00\x13\x04\x00\x02\x86\x68\x40\x80\x00' \ | |
b'\x50\x22\x40\x00\x12\x40\x09\x26\x28\x40\x06\x20\x88\x04\x04\x08' \ | |
b'\x00\x04\x01\x30\x4c\x4a\x00\x00\x81\x04\x60\x00\x00\x00\x88\x20' \ | |
b'\x61\x00\x00\x44\x02\x80\x05\x92\x00\x21\x0a\x14\x01\x80\x12\x49' \ | |
b'\x80\x16\x08\x02\x84\x05\x43\x84\x40\x08\x10\x44\x42\x20\x44\x00' \ | |
b'\x26\x00\x41\x22\x08\x00\x06\x00\x00\x04\x05\x88\x10\x08\x81\x82' \ | |
b'\x00\x40\x00\x24\x52\x04\x00\x00\xa0\x21\x88\x10\x04\x01\x06\x40' \ | |
b'\x11\x80\x20\x80\x82\x41\xc0\x00\x60\x02\x20\x0c\x00\x16\x40\x41' \ | |
b'\x02\x25\x10\x80\x00\x08\xa0\x40\x12\x02\x00\x11\x02\x4c\x02\x26' \ | |
b'\x0d\x80\x06\x40\xc8\x30\x01\x88\x22\x05\x18\xa0\x08\x08\x00\x25' \ | |
b'\x02\x10\x40\x40\x94\x00\x48\x20\x08\x18\x14\x08\x0b\x02\x04\x00' \ | |
b'\x84\x40\x00\x04\x01\x00\x10\x01\x00\x82\x40\x01\x06\x00\x41\x00' \ | |
b'\x09\x03\x00\x20\x40\x20\x28\x00\x80\x0c\x40\x02\x21\x00\x02\x00' \ | |
b'\xc0\x80\x01\x00\x22\x2d\x10\x00\x00\x11\x24\x00\x02\x94\x69\x03' \ | |
b'\x20\x44\x12\x02\x09\x08\x10\x08\x10\x00\x25\x51\x00\x21\xc1\xa0' \ | |
b'\x40\x92\x02\x01\x01\x00\x04\x52\xa0\x04\x40\x14\x40\x08\x04\x01' \ | |
b'\x00\x22\x24\x09\x14\x08\x12\x02\x04\x42\x90\x49\x00\xb0\x00\xc8' \ | |
b'\x22\x20\x01\x34\x0c\x11\x00\x00\x00\x90\x08\x0a\x04\x61\x90\x10' \ | |
b'\x20\x00\x32\x44\x02\x02\x28\x90\x04\x29\x8b\x00\x24\x1a\x10\x20' \ | |
b'\x90\x00\x04\x42\xa0\x2c\x10\x82\x08\x88\x10\x01\x10\x00\x00\x09' \ | |
b'\x26\x40\x0b\x00\x0c\x00\x14\x01\x82\xa0\x05\x02\x00\x08\x08\x30' \ | |
b'\x08\x09\x02\x0c\x90\x04\x20\x40\x00\x00\x12\x02\x0c\x91\x80\x00' \ | |
b'\x08\xa4\x04\x02\x02\x20\x41\x04\x21\x10\x00\x00\x18\x30\x00\x19' \ | |
b'\x86\x29\x80\x90\x00\x40\x24\x05\x00\x22\x49\x00\x02\x00\x58\x24' \ | |
b'\x05\x82\x10\x40\x0b\x00\x24\x10\x10\x04\x00\x96\x08\x00\x84\x08' \ | |
b'\x00\x10\x20\x88\x00\x04\x50\x20\x04\x90\x86\x40\x01\x20\x01\x00' \ | |
b'\x04\x01\x8a\x94\x01\x40\x12\x00\x11\x02\x40\x02\xa0\x08\x80\x10' \ | |
b'\x40\x43\x20\x60\x52\x22\x40\x00\x10\x48\x40\x20\x25\xd1\x82\x41' \ | |
b'\x82\x00\x60\x82\x10\x28\x00\x00\x48\x18\x04\x01\x80\x82\x09\xc1' \ | |
b'\x30\x24\x42\x02\x01\x91\x20\x0c\x11\x84\x00\xd1\x02\x40\x02\x90' \ | |
b'\x40\xc8\x00\x20\x11\x00\x00\x48\x02\x24\x91\x10\x60\x82\x00\x21' \ | |
b'\x82\x00\x40\x18\x96\x00\x0b\x20\x28\x11\x00\x20\x09\x80\x20\x02' \ | |
b'\x00\x08\x98\x96\x00\x12\x00\x2d\x40\x00\x09\x40\x04\x00\x00\x02' \ | |
b'\x0c\x08\x20\x00\x03\x20\x00\x03\x02\x20\x80\x80\x45\x18\x02\x45' \ | |
b'\x80\xa2\x40\x18\x00\x01\x91\x80\x00\xc2\x14\x00\x18\x10\x00\x90' \ | |
b'\x20\x04\x40\x04\x01\x02\x06\x29\xc8\x00\x20\xc0\x00\x24\x99\x80' \ | |
b'\x40\x02\x00\x01\xc3\x82\x41\x08\x24\x44\x00\x30\x00\x80\x04\x04' \ | |
b'\x0a\x20\x00\x52\x14\x20\x0a\x94\x01\x10\x10\x28\x00\x04\x0c\x01' \ | |
b'\x00\x29\x41\x02\x00\x88\xa0\x45\x0a\x20\x08\x01\x10\x0c\x42\x82' \ | |
b'\x09\x50\x92\x08\x80\x84\x20\x00\x10\x08\x11\x24\x04\x03\xa4\x28' \ | |
b'\x00\x02\x28\x42\x10\x41\x40\x02\x45\x01\x02\x00\x00\x24\x25\x12' \ | |
b'\x02\x40\xc8\x30\x24\x00\x00\x41\x11\x02\x48\x12\x20\x0c\x02\x06' \ | |
b'\x41\x01\x20\x24\x80\x00\x01\x00\x10\x44\x08\x06\x08\xc2\x10\x00' \ | |
b'\x4a\x10\x05\x00\x32\x01\x01\x10\x00\x01\x82\x20\x52\x84\x48\x01' \ | |
b'\x84\x00\x50\x00\x64\x10\x00\x00\x02\xa4\x00\x90\x00\x20\x43\x24' \ | |
b'\x21\xc2\x00\x2c\x80\x12\x00\x40\xa0\x00\x02\x90\x40\x42\xb4\x40' \ | |
b'\x80\x00\x00\x11\x80\x40\x08\x80\x0c\x13\x02\x40\x80\x10\x20\x00' \ | |
b'\x02\x0c\x80\x00\x00\x58\x26\x04\x80\x04\x41\x0a\x00\x60\x08\x02' \ | |
b'\x60\x11\xb2\x00\x58\x00\x00\x03\x10\x08\x80\x00\x01\x00\x20\x61' \ | |
b'\x00\x04\x04\x1b\x86\x28\x00\x80\x28\x08\x90\x04\x02\x30\x01\x90' \ | |
b'\x10\x00\x01\x04\x20\x81\x00\x00\x02\x80\x44\x02\x20\x41\x00\x30' \ | |
b'\x40\x0b\x20\x00\x11\x06\x28\x42\x20\x24\x92\x20\x20\x08\x84\x00' \ | |
b'\x43\x00\x04\x40\x82\x01\x48\x00\x01\x58\x30\x01\x00\x24\x00\x19' \ | |
b'\x00\x04\x01\x16\x48\xc3\x80\x21\x12\x02\x04\x09\x94\x00\x41\x00' \ | |
b'\x28\x81\x84\x60\x43\x00\x44\x08\x00\x08\x11\x32\x40\x10\x00\x04' \ | |
b'\x00\x06\x28\x81\x14\x01\x88\x00\x40\x80\x10\x00\x1a\x80\x28\x52' \ | |
b'\x00\x28\x08\x20\x40\x82\x10\x60\x19\x04\x00\x09\x20\x01\x91\x94' \ | |
b'\x00\x82\x14\x40\x02\x12\x00\x10\x02\x0c\x02\x02\x20\x11\x04\x00' \ | |
b'\x48\x00\x01\x18\x10\x48\x81\x04\x40\x00\xa0\x08\x12\x10\x40\x08' \ | |
b'\x34\x20\x18\x22\x01\x80\x20\x04\x42\x84\x2c\x10\x86\x00\x80\x00' \ | |
b'\x45\x00\x10\x40\x01\xa4\x00\x40\x24\x00\x01\x82\x21\x48\x80\x04' \ | |
b'\x00\x00\x20\x81\x92\x40\x00\x24\x08\x41\x02\x20\x08\x10\x01\xda' \ | |
b'\x00\x44\x18\x84\x40\x1a\x04\x00\x00\x92\x49\x00\x20\x44\x82\x02' \ | |
b'\x09\x08\x22\x04\x10\x20\x24\xc1\x10\x68\x01\x10\x44\x80\x22\x20' \ | |
b'\x10\x84\x00\x41\x04\x28\x50\x06\x20\x40\x84\x00\xd0\x00\x48\x09' \ | |
b'\x02\x40\x11\x02\x24\x10\x82\x41\x8a\x20\x60\xc8\x00\x2c\x10\x20' \ | |
b'\x04\x01\x84\x2c\x10\x14\x61\x02\x00\x64\x40\x10\x01\x88\x00\x40' \ | |
b'\x18\x00\x05\x42\x00\x01\x0b\x20\x24\x8a\x02\x20\x81\x02\x00\x4a' \ | |
b'\x20\x0d\x40\x00\x00\x00\x30\x01\x50\x20\x40\x10\x20\x48\x01\x80' \ | |
b'\x01\x03\x10\x08\x08\x90\x00\x00\x12\x60\x88\x26\x00\x02\x04\x00' \ | |
b'\x92\x00\x00\x81\x04\x00\x00\x30\x20\x08\x04\x44\x00\xa0\x09\xc1' \ | |
b'\x00\x20\x81\x20\x00\x08\x10\x40\x91\x94\x08\x40\x02\x20\x12\x04' \ | |
b'\x09\x0a\x20\x00\x00\x10\x20\x10\x06\x40\x03\x20\x00\x81\x90\x00' \ | |
b'\x80\xa0\x61\x58\x12\x04\x81\x24\x00\x18\x20\x29\x00\x06\x41\x0a' \ | |
b'\x80\x00\x98\x02\x00\x90\x10\x0c\x0a\x20\x00\xc1\x90\x00\x01\x00' \ | |
b'\x20\x98\x20\x00\x81\x90\x0c\x01\x02\x05\x80\x80\x20\x02\x34\x44' \ | |
b'\x4a\x12\x08\x18\x24\x04\x03\xa0\x00\x03\x84\x20\x80\x04\x04\x92' \ | |
b'\x20\x20\x08\xa0\x48\x00\x80\x00\x10\x02\x00\x09\x80\x04\x80\x20' \ | |
b'\x60\x08\x86\x44\x00\x00\x28\x52\x80\x49\x42\x94\x00\x40\x10\x20' \ | |
b'\x00\xa2\x44\x58\x04\x24\x00\x00\x49\x02\x00\x05\x12\x22\x04\x80' \ | |
b'\x04\x08\x19\x00\x00\x10\x04\x00\x88\x20\x20\x12\x10\x08\x81\x92' \ | |
b'\x40\x40\x82\x01\x00\x00\x08\x48\x30\x01\x00\x02\x64\x80\x34\x00' \ | |
b'\x02\x04\x0d\x92\x82\x21\x40\xa4\x04\x48\x20\x21\x11\x06\x00\x08' \ | |
b'\x04\x00\xd2\x00\x00\x80\x00\x60\x42\x10\x4d\x10\xa4\x00\x09\xa2' \ | |
b'\x09\xc0\x02\x01\x08\x00\x64\x4a\x00\x28\x10\x10\x44\x03\x02\x01' \ | |
b'\x40\x10\x40\xc1\x04\x61\x80\x02\x01\x00\x82\x40\x11\x20\x20\x02' \ | |
b'\x94\x00\x40\x20\x20\x4a\x00\x01\x00\x00\x00\x51\x20\x01\x00\x06' \ | |
b'\x00\x00\x20\x40\x80\x12\x04\x90\x00\x08\x48\x02\x0d\x40\x02\x41' \ | |
b'\x81\x00\x00\xc2\x02\x21\x11\x90\x00\x0a\x84\x00\x42\x80\x68\x08' \ | |
b'\x14\x40\x80\x10\x60\x80\x22\x00\x01\xa2\x21\x92\x10\x08\x88\x80' \ | |
b'\x45\x00\x00\x28\x08\x30\x00\x41\x04\x08\xd0\x00\x00\x0b\x04\x45' \ | |
b'\x50\x20\x20\x00\x00\x48\x12\x00\x04\x00\x06\x08\x82\x10\x01\x98' \ | |
b'\x02\x24\x00\x80\x44\x4b\x20\x24\x81\x86\x29\x80\x10\x04\x40\x10' \ | |
b'\x08\x01\x90\x08\x41\x26\x08\x03\x80\x21\x40\x00\x04\x82\x00\x09' \ | |
b'\x11\x02\x40\x0a\xa2\x04\x80\x10\x61\xc0\x10\x00\xc2\x00\x25\x80' \ | |
b'\xa0\x44\x10\x06\x08\xc0\x10\x00\x00\x14\x40\x08\x00\x40\x81\x20' \ | |
b'\x0c\x1b\x02\x21\x92\x90\x00\x09\x10\x00\x02\x00\x09\x08\xa2\x04' \ | |
b'\x02\x00\x08\x01\x00\x01\x08\x00\x40\x50\x20\x44\x11\x02\x08\x03' \ | |
b'\x02\x08\x00\x00\x00\x82\x14\x20\xc0\x20\x0c\x80\x84\x40\x5a\x24' \ | |
b'\x24\x81\x04\x29\x00\x10\x00\x08\x30\x40\x01\x12\x00\x48\x06\x00' \ | |
b'\x81\x80\x01\x42\xb4\x24\x0a\x10\x41\x00\x00\x0c\x52\x00\x00\x82' \ | |
b'\x84\x09\x48\x00\x00\x0a\x20\x61\x00\x84\x0c\x00\x04\x24\x50\x02' \ | |
b'\x40\x00\x00\x01\x8a\x02\x20\x80\x16\x04\x41\x02\x00\x50\x90\x20' \ | |
b'\x08\x14\x24\x02\x00\x01\x10\x06\x08\x48\x86\x08\x40\x16\x08\xc9' \ | |
b'\x00\x04\x90\x00\x40\x09\x14\x40\x00\x82\x00\x40\x06\x08\x89\x10' \ | |
b'\x21\x10\x02\x04\x88\x84\x40\x59\x04\x08\x93\x00\x28\x43\x30\x00' \ | |
b'\x40\x02\x40\x00\x20\x00\x01\x24\x00\x80\x80\x60\x49\x30\x40\x00' \ | |
b'\x02\x09\x80\x02\x00\x00\x04\x0c\x02\x82\x08\x80\x20\x20\x40\x00' \ | |
b'\x44\x99\x20\x08\x08\x02\x04\xd2\x82\x21\x08\x30\x05\x48\x22\x68' \ | |
b'\x99\x16\x04\x50\x20\x01\x00\x80\x08\x00\x80\x41\xc0\x22\x0c\x00' \ | |
b'\x10\x40\x40\x20\x00\x51\x10\x00\x03\x84\x00\x08\x00\x0c\x80\x10' \ | |
b'\x00\x02\x00\x2c\x02\x84\x01\x0b\x04\x21\x40\x02\x00\x00\xa2\x04' \ | |
b'\x41\x20\x24\x81\x04\x41\x80\x30\x00\x52\x12\x00\x01\x80\x40\x09' \ | |
b'\x00\x2d\x50\x04\x00\x42\x14\x20\x88\x10\x08\x00\x12\x00\x08\x02' \ | |
b'\x00\x02\x92\x48\x00\x00\x00\x12\x20\x01\x19\x14\x00\x13\x86\x08' \ | |
b'\x03\x10\x00\x48\x14\x45\xc0\x10\x20\x81\x20\x00\x12\x04\x01\x40' \ | |
b'\x00\x48\x00\x14\x64\x00\x00\x25\x08\x20\x40\x4a\x80\x21\x80\x00' \ | |
b'\x21\x08\x04\x21\x80\x00\x4c\x98\x92\x00\x03\x00\x08\x40\x04\x00' \ | |
b'\xc2\x00\x20\x10\x02\x20\x11\x02\x44\x43\x80\x04\x90\x80\x40\x00' \ | |
b'\x80\x44\x48\x32\x0d\x81\x82\x48\x50\x06\x2d\xc2\x00\x60\x00\x20' \ | |
b'\x60\x02\x10\x20\x90\x00\x00\x10\x26\x01\x42\x80\x08\xc1\x10\x00' \ | |
b'\x80\x02\x41\x09\x04\x00\x10\x84\x08\x42\x02\x20\x08\xa4\x40\x08' \ | |
b'\x22\x20\x00\x36\x04\x00\x02\x01\x00\x80\x00\x09\x90\x01\x82\x20' \ | |
b'\x24\x00\x00\x4c\x02\x22\x01\x01\x02\x01\x88\xa0\x05\x90\x10\x44' \ | |
b'\x01\x90\x40\x43\x80\x00\x40\x84\x48\x00\xa0\x00\x18\x12\x08\x18' \ | |
b'\x22\x04\x01\x20\x08\x03\x10\x61\x40\x00\x04\x4a\x02\x00\x00\xb0' \ | |
b'\x48\x10\x06\x20\x12\x00\x00\x03\x20\x20\x98\x00\x4c\x90\x02\x48' \ | |
b'\x42\x22\x04\x03\x90\x21\x00\x34\x21\x48\x00\x01\x08\x34\x0c\x00' \ | |
b'\x00\x25\x52\x00\x41\x00\x80\x00\x02\x32\x40\x00\x26\x00\x58\x02' \ | |
b'\x05\x10\x00\x00\x08\x10\x05\x82\x30\x48\x00\x94\x08\x40\x00\x00' \ | |
b'\x81\x04\x20\x48\x20\x60\x10\x00\x44\x91\x02\x44\x00\xa2\x04\x00' \ | |
b'\x82\x01\x82\x20\x40\x10\x20\x2d\x81\x02\x00\x48\x00\x00\x00\x92' \ | |
b'\x08\x41\xa0\x61\x02\x30\x00\x08\xa4\x08\x00\x20\x25\x00\x02\x60' \ | |
b'\x83\x10\x00\x80\x02\x40\x00\x30\x04\x08\x00\x00\x81\x10\x40\xc0' \ | |
b'\x24\x01\x08\x20\x21\x01\x34\x48\x18\x06\x20\x10\x82\x60\x08\x04' \ | |
b'\x05\x8a\x30\x40\x10\x02\x00\x13\x00\x21\xc2\x10\x20\x01\x90\x01' \ | |
b'\xc2\x20\x09\x10\x00\x40\x0a\x04\x09\x81\x10\x08\x00\x24\x24\x80' \ | |
b'\x20\x40\x00\x82\x48\x41\xa0\x00\x00\x10\x00\x88\x00\x20\x48\x12' \ | |
b'\x21\x80\xa0\x00\x09\x84\x04\x03\x14\x01\x43\xa0\x20\x00\x20\x49' \ | |
b'\x81\x82\x48\x10\x22\x0c\xd1\x02\x00\x01\x94\x64\x80\x00\x2c\x10' \ | |
b'\x00\x00\x02\x82\x04\x43\x04\x00\x40\x00\x20\x0a\x22\x40\x91\x84' \ | |
b'\x4c\x18\x02\x20\x40\x80\x49\x40\x80\x44\x02\x22\x20\x09\x32\x08' \ | |
b'\x18\x84\x05\x40\x00\x08\x81\x90\x00\x00\x02\x60\x00\x00\x00\x42' \ | |
b'\x20\x20\x01\x04\x21\xc1\xa0\x00\x50\x20\x64\x19\x06\x04\x01\x02' \ | |
b'\x04\x02\x06\x48\x00\xb0\x21\x08\x20\x28\x09\x20\x00\x0b\x20\x20' \ | |
b'\x00\x96\x01\x00\x20\x05\x48\x10\x4d\x80\x04\x40\x01\x02\x0c\x03' \ | |
b'\x00\x60\x08\x00\x44\x80\x00\x0c\x11\x10\x04\x00\x22\x09\x41\x06' \ | |
b'\x28\x00\x10\x21\x50\x22\x00\x90\x10\x08\x10\x02\x24\x02\x10\x09' \ | |
b'\x08\x00\x01\x08\x12\x41\x08\x20\x04\x10\xa0\x20\x40\x00\x40\x03' \ | |
b'\x94\x20\x92\x10\x01\x08\xa6\x40\x08\xa0\x01\xc0\x10\x01\x82\x04' \ | |
b'\x44\x42\x00\x24\x88\x16\x40\x00\xa0\x20\x50\x14\x40\x08\x80\x00' \ | |
b'\x08\x22\x04\x01\xa4\x44\x01\x04\x00\x00\x02\x08\x01\x30\x24\x12' \ | |
b'\x20\x0c\x09\x04\x08\x18\x00\x04\xd2\x04\x00\xcb\xa0\x00\x00\x00' \ | |
b'\x60\x81\x20\x48\x50\x00\x01\xc1\x00\x09\x01\x10\x00\x10\x02\x40' \ | |
b'\x09\xa4\x40\x01\x02\x09\xd3\x90\x00\x0a\x80\x04\x00\x00\x61\x08' \ | |
b'\x00\x08\x00\xa6\x20\x10\x04\x40\x08\x00\x44\xd2\x20\x44\x00\x80' \ | |
b'\x40\x09\x86\x20\x40\x04\x08\x8a\x00\x21\x10\x20\x28\x88\x80\x04' \ | |
b'\x41\x80\x00\x10\x06\x08\x82\x00\x00\x48\x00\x25\x18\x22\x04\x4a' \ | |
b'\xa0\x04\x92\x10\x00\xc1\x20\x44\x00\x00\x08\x80\x30\x00\x59\x04' \ | |
b'\x28\x00\x00\x61\x8a\x94\x00\x82\x00\x21\x80\x82\x08\x40\x00\x08' \ | |
b'\x82\x16\x01\x81\x00\x00\x4a\x02\x45\x08\x00\x40\x00\x04\x21\x50' \ | |
b'\x10\x69\x02\x04\x40\x0a\x00\x08\x10\x06\x08\x41\x00\x20\x82\x90' \ | |
b'\x40\x8a\x14\x40\x00\x00\x28\x08\x32\x00\x00\x00\x21\x80\x04\x09' \ | |
b'\x40\x00\x00\x58\x20\x00\x18\x80\x00\x42\x02\x24\x00\x04\x08\x03' \ | |
b'\x90\x41\x48\x30\x05\x10\x82\x00\x42\x00\x28\x80\x00\x48\x00\x90' \ | |
b'\x04\x02\x10\x44\x00\xa0\x48\x19\x00\x24\x91\x84\x21\xc8\x84\x24' \ | |
b'\x02\x02\x04\x80\x30\x40\x12\x22\x05\x00\x10\x21\x80\x00\x24\x40' \ | |
b'\x00\x04\x81\x04\x00\x00\x82\x09\x00\x92\x40\x40\x30\x44\xc0\x00' \ | |
b'\x01\x81\x02\x08\x09\xa6\x00\x10\x94\x60\x80\x04\x41\x52\x12\x40' \ | |
b'\x18\x00\x0c\x02\x80\x28\x40\x04\x00\x4b\x24\x04\x40\x00\x60\x90' \ | |
b'\x00\x00\x40\x82\x05\x50\x90\x00\x08\x20\x60\x88\x30\x2c\x11\x82' \ | |
b'\x40\x10\x04\x20\x11\x90\x09\x00\x00\x01\x52\x10\x4c\x00\x80\x40' \ | |
b'\x59\x00\x01\x40\x02\x01\x02\x30\x20\x80\x12\x45\x00\x32\x44\x58' \ | |
b'\x02\x01\x40\x16\x08\xc0\x30\x00\x08\x00\x20\x08\x14\x44\x12\x00' \ | |
b'\x21\x00\x00\x41\x00\x80\x44\xc0\x00\x01\x89\x02\x00\x02\x24\x20' \ | |
b'\x12\x94\x08\x02\x90\x00\x02\x20\x08\x18\x82\x00\x41\x26\x08\x80' \ | |
b'\x02\x00\x49\xa0\x21\x80\x30\x08\x88\x10\x44\x12\x00\x25\x02\x12' \ | |
b'\x00\x42\x00\x60\x98\x32\x01\x90\x24\x00\x10\x00\x00\x81\x02\x28' \ | |
b'\x41\x30\x20\x10\x00\x00\x80\x00\x40\x50\x24\x00\x83\x00\x01\x8a' | |
main: | |
call input | |
# check if input is 1 | |
cmp c, n, 1 | |
sz c, 3 | |
mov o, n | |
call print | |
halt 0 | |
# remove factors of two | |
mov o, 2 | |
div_2_loop: | |
and c, n, 1 | |
snz c, 3 | |
call print | |
shr n, n, 1 | |
jmp div_2_loop | |
# check if n is a power of two | |
cmp c, n, 1 | |
sz c, 1 | |
halt 0 | |
# trial division | |
mov d, data(primes_53) | |
mov i, 53 | |
trial_division_loop: | |
lbu p, d | |
trial_division_check: | |
divu q, r, n, p | |
snz r, 4 | |
mov o, p | |
call print | |
mov n, q | |
jmp trial_division_check | |
mulu s, t, p, p | |
leu c, n, s | |
sz c, 5 | |
cmp c, n, 1 | |
snz c, 2 | |
mov o, n | |
call print | |
halt 0 | |
sub i, i, 1 | |
add d, d, 1 | |
jnz trial_division_loop, i | |
# mov g, 0 # initialize factor list pointers | |
# mov h, 0 # all registers are zero at program start | |
# *g is the next factor to check for primality | |
# *h is the last factor we've found so far | |
# the first factor is always *0 | |
sw g, n | |
prime_test: | |
# by this point we know that n has no prime factors less than 256, | |
# so if n is less than 256**2, then it must be prime | |
lequ c, n, 2**16 | |
jnz is_prime, c | |
geu c, n, 2**17+1 | |
jnz miller_rabin, c | |
sub d, n, 2**16+1 # table starts at 2**16+1 (by above argument) | |
shr d, d, 1 # we know n is odd, so we ignore the last bit | |
and m, d, 2**3-1 # each byte is 3-bit lookup table | |
shr d, d, 3 | |
add d, d, data(prime_bytes) | |
lbu p, d | |
shr p, p, m | |
and p, p, 1 | |
jnz is_prime, p | |
jmp pollard_rho # I should probably do trial division here | |
# since we only have around 20 more primes to check | |
################ | |
# Miller-Rabin primality test; bases from https://miller-rabin.appspot.com/ | |
# Note that this subroutine, as well as most of the ones before the 'utility | |
# functions' section are not proper functions, as they sometimes jump directly | |
# to the next step instead of returning! | |
# | |
# input: n | |
# output: n/a | |
################ | |
miller_rabin: | |
shr d, n, 1 | |
mov b, 1 | |
mr_rd_loop: # n - 1 == d * 2**b, d is odd | |
and c, d, 1 | |
snz c, 3 | |
shr d, d, 1 | |
inc b | |
jmp mr_rd_loop | |
geu c, n, 2**32-1 # test if we can use single-precision arithmetic | |
jnz miller_rabin_montgomery, c | |
sub m, n, 1 | |
mr_test_1: | |
gequ c, n, 0x000000000005361b | |
jnz mr_test_2, c | |
mov a, 0x81b33f22efdceaa9 | |
call miller_rabin_test | |
jmp is_prime | |
mr_test_2: | |
gequ c, n, 0x000000003e9de64d | |
jnz mr_test_3, c | |
mov a, 0x0000004e69b6552d | |
call miller_rabin_test | |
mov a, 0x00223f5bb83fc553 | |
call miller_rabin_test | |
jmp is_prime | |
mr_test_3: | |
mov a, 0x3ab4f88ff0cc7c80 | |
call miller_rabin_test | |
mov a, 0xcbee4cdf120c10aa | |
call miller_rabin_test | |
mov a, 0xe6f1343b0edca8e7 | |
call miller_rabin_test | |
jmp is_prime | |
miller_rabin_montgomery: | |
call montgomery_precompute | |
sub m, n, r # montgomery representation of n-1 | |
mr_test_mt_3: | |
gequ c, n, 0x000000518dafbfd1 | |
jnz mr_test_mt_4, c | |
mov a, 0x3ab4f88ff0cc7c80 | |
call miller_rabin_test_montgomery | |
mov a, 0xcbee4cdf120c10aa | |
call miller_rabin_test_montgomery | |
mov a, 0xe6f1343b0edca8e7 | |
call miller_rabin_test_montgomery | |
jmp is_prime | |
mr_test_mt_4: | |
gequ c, n, 0x0000323ee0e55e6b | |
jnz mr_test_mt_5, c | |
mov a, 0x0000000000000002 | |
call miller_rabin_test_montgomery | |
mov a, 0x0000810c207b08bf | |
call miller_rabin_test_montgomery | |
mov a, 0x10a42595b01d3765 | |
call miller_rabin_test_montgomery | |
mov a, 0x99fd2b545eab5322 | |
call miller_rabin_test_montgomery | |
jmp is_prime | |
mr_test_mt_5: | |
gequ c, n, 0x001c6b470864f683 | |
jnz mr_test_mt_6, c | |
mov a, 0x0000000000000002 | |
call miller_rabin_test_montgomery | |
mov a, 0x000003c1c7396f6d | |
call miller_rabin_test_montgomery | |
mov a, 0x02142e2e3f22de5c | |
call miller_rabin_test_montgomery | |
mov a, 0x0297105b6b7b29dd | |
call miller_rabin_test_montgomery | |
mov a, 0x370eb221a5f176dd | |
call miller_rabin_test_montgomery | |
jmp is_prime | |
mr_test_mt_6: | |
gequ c, n, 0x081f23f390affe89 | |
jnz mr_test_mt_7, c | |
mov a, 0x0000000000000002 | |
call miller_rabin_test_montgomery | |
mov a, 0x000070722e8f5cd0 | |
call miller_rabin_test_montgomery | |
mov a, 0x0020cd6bd5ace2d1 | |
call miller_rabin_test_montgomery | |
mov a, 0x009bbc940c751630 | |
call miller_rabin_test_montgomery | |
mov a, 0x0a90404784bfcb4d | |
call miller_rabin_test_montgomery | |
mov a, 0x1189b3f265c2b0c7 | |
call miller_rabin_test_montgomery | |
jmp is_prime | |
mr_test_mt_7: | |
mov a, 0x0000000000000002 | |
call miller_rabin_test_montgomery | |
mov a, 0x0000000000000145 | |
call miller_rabin_test_montgomery | |
mov a, 0x000000000000249f | |
call miller_rabin_test_montgomery | |
mov a, 0x0000000000006e12 | |
call miller_rabin_test_montgomery | |
mov a, 0x000000000006e0d7 | |
call miller_rabin_test_montgomery | |
mov a, 0x0000000000953d18 | |
call miller_rabin_test_montgomery | |
mov a, 0x000000006b0191fe | |
call miller_rabin_test_montgomery | |
jmp is_prime | |
is_prime: | |
mov o, n | |
call print | |
cmp c, g, h | |
sz c, 1 | |
halt 0 | |
add g, g, 8 | |
lw n, g | |
jmp prime_test | |
found_factor: | |
divu n, b, n, a | |
add h, h, 8 | |
sw h, a | |
jmp prime_test | |
################ | |
# Miller-Rabin primality test subroutine; needs m == n - 1 | |
# | |
# input: a, n, m | |
# output: n/a | |
################ | |
miller_rabin_test: | |
divu q, a, a, n # a = a mod n | |
snz a, 1 | |
ret # a certifies primality if a == 0 mod n | |
call power # a = a**d mod n | |
cmp c, a, 1 # r == 1 | |
sz c, 1 | |
ret # a certifies primality if a**d == 1 mod n | |
cmp c, a, m # m == n-1 | |
sz c, 1 | |
ret # a certifies primality if a**d == -1 mod n | |
mov d, 2 | |
mr_square_loop: | |
dec b | |
jz pollard_rho, b | |
mulu x, y, a, a # a = a**2 mod n | |
divu q, a, x, n # q is unused | |
cmp c, a, 1 # r == 1 | |
jnz pollard_rho, c # a witnesses compositeness if a**(d*2**i) == 1 mod n | |
cmp c, a, m # m == n-1 | |
sz c, 1 | |
ret # a certifies primality if a**(d*2**i) == -1 mod n | |
jmp mr_square_loop | |
################ | |
# Miller-Rabin primality test subroutine using Montgomery arithmetic; needs | |
# the values from the precomputation step as well as m == n - 1 | |
# | |
# input: a, n, m, (u, v, r, s) | |
# output: n/a | |
################ | |
miller_rabin_test_montgomery: | |
divu q, a, a, n # a = a mod n | |
snz a, 1 | |
ret # a certifies primality if a == 0 mod n | |
mulu x, y, a, s | |
call montgomery_reduce # convert a to montgomery form | |
mov a, x | |
call montgomery_power # a = a**d mod n | |
cmp c, a, r # r == 1 (mongtomery form) | |
sz c, 1 | |
ret # a certifies primality if a**d == 1 mod n | |
cmp c, a, m # m == n-1 (montgomery form) | |
sz c, 1 | |
ret # a certifies primality if a**d == -1 mod n | |
mr_mt_square_loop: | |
dec b | |
jz pollard_rho_montgomery, b | |
mulu x, y, a, a # a = a**2 mod n | |
call montgomery_reduce | |
mov a, x | |
cmp c, a, r # r == 1 (mongtomery form) | |
jnz pollard_rho_montgomery, c # a witnesses compositeness if a**(d*2**i) == 1 mod n | |
cmp c, a, m # m == n-1 (montgomery form) | |
sz c, 1 | |
ret # a certifies primality if a**(d*2**i) == -1 mod n | |
jmp mr_mt_square_loop | |
################ | |
# Pollard-Brent rho factorization algorithm | |
# | |
# input: n | |
# output: n/a | |
################ | |
pollard_rho: | |
rho_skip_count = 64 | |
mov o, 42 | |
rho_retry: | |
mov b, o | |
mov j, 1 | |
mov f, 1 | |
mov d, 1 | |
rho_outer_loop: | |
mov a, b | |
mov i, j | |
rho_fast_loop: | |
mulu b, y, b, b | |
add b, b, 1 | |
divu y, b, b, n | |
#mov o, b | |
#call print | |
sub i, i, 1 | |
jnz rho_fast_loop, i | |
mov k, 0 | |
rho_slow_loop: | |
mov e, b | |
sub i, j, k | |
leu c, rho_skip_count, i | |
sz c, 1 | |
mov i, rho_skip_count | |
rho_inner_loop: | |
mulu b, y, b, b | |
add b, b, 1 | |
divu y, b, b, n | |
leu c, a, b | |
sz c, 1 | |
sub x, b, a | |
snz c, 1 | |
sub x, a, b | |
mulu f, y, x, f | |
divu y, f, f, n | |
#add o, f, 1000000000 | |
#call print | |
sub i, i, 1 | |
jnz rho_inner_loop, i | |
mov x, f | |
call gcd | |
mov d, x | |
#mov o, d | |
#call print | |
add k, k, rho_skip_count | |
gequ c, k, j | |
snz c, 2 | |
lequ c, d, 1 | |
jnz rho_slow_loop, c | |
shl j, j, 1 | |
lequ c, d, 1 | |
jnz rho_outer_loop, c | |
cmp c, d, n | |
sz c, 12 | |
rho_final_loop: | |
mulu e, y, e, e | |
add e, e, 1 | |
divu y, e, e, n | |
#add o, e, 2000000000 | |
#call print | |
leu c, a, e | |
sz c, 1 | |
sub x, e, a | |
snz c, 1 | |
sub x, a, e | |
call gcd | |
mov d, x | |
#mov o, d | |
#call print | |
lequ c, d, 1 | |
jnz rho_final_loop, c | |
cmp c, d, n | |
sz c, 2 | |
add o, o, 1 | |
jmp rho_retry | |
mov a, d | |
jmp found_factor | |
################ | |
# Pollard-Brent rho factorization algorithm using Montgomery arithmetic. | |
# Requires m = n - r | |
# | |
# input: n, (u, v, r, s, m) | |
# output: n/a | |
################ | |
pollard_rho_montgomery: | |
rho_mt_skip_count = 64 | |
mov o, 42 | |
rho_mt_retry: | |
mulu x, y, o, s | |
call montgomery_reduce | |
mov b, x | |
mov j, 1 | |
mov f, r | |
mov d, 1 | |
rho_mt_outer_loop: | |
mov a, b | |
mov i, j | |
rho_mt_fast_loop: | |
mulu x, y, b, b | |
call montgomery_reduce | |
mov b, x | |
geu c, b, m | |
sz c, 1 | |
sub b, b, m | |
snz c, 1 | |
add b, b, r | |
#mov o, b | |
#call print | |
sub i, i, 1 | |
jnz rho_mt_fast_loop, i | |
mov k, 0 | |
rho_mt_slow_loop: | |
mov e, b | |
sub i, j, k | |
leu c, rho_mt_skip_count, i | |
sz c, 1 | |
mov i, rho_mt_skip_count | |
rho_mt_inner_loop: | |
mulu x, y, b, b | |
call montgomery_reduce | |
mov b, x | |
geu c, b, m | |
sz c, 1 | |
sub b, b, m | |
snz c, 1 | |
add b, b, r | |
leu c, a, b | |
sz c, 1 | |
sub x, b, a | |
snz c, 1 | |
sub x, a, b | |
mulu x, y, x, f | |
call montgomery_reduce | |
mov f, x | |
sub i, i, 1 | |
jnz rho_mt_inner_loop, i | |
mov x, f | |
#mov o, x | |
#call print | |
call gcd | |
mov d, x | |
#mov o, d | |
#call print | |
add k, k, rho_mt_skip_count | |
gequ c, k, j | |
snz c, 2 | |
lequ c, d, 1 | |
jnz rho_mt_slow_loop, c | |
shl j, j, 1 | |
lequ c, d, 1 | |
jnz rho_mt_outer_loop, c | |
cmp c, d, n | |
sz c, 15 | |
rho_mt_final_loop: | |
mulu x, y, e, e | |
call montgomery_reduce | |
mov e, x | |
geu c, e, m | |
sz c, 1 | |
sub e, e, m | |
snz c, 1 | |
add e, e, r | |
leu c, a, e | |
sz c, 1 | |
sub x, e, a | |
snz c, 1 | |
sub x, a, e | |
call gcd | |
mov d, x | |
lequ c, d, 1 | |
jnz rho_mt_final_loop, c | |
cmp c, d, n | |
sz c, 2 | |
add o, o, 1 | |
jmp rho_mt_retry | |
mov a, d | |
jmp found_factor | |
################ UTILITY FUNCTIONS ################ | |
################ | |
# Loads a decimal number terminated by a newline from stdin to register n. | |
# | |
# inputs: none | |
# outputs: n | |
################ | |
input: | |
lw c, -1 | |
input_add_digit: | |
sub c, c, ord('0') | |
mov o, c | |
mulu n, h, n, 10 | |
add n, n, c | |
lw c, -1 | |
cmp q, c, ord('\n') | |
jz input_add_digit, q | |
ret n | |
################ | |
# Prints the value of register o to stdout in decimal, followed by a newline. | |
# | |
# inputs: o | |
# outputs: none | |
################ | |
print: | |
call print_char | |
sw -1, ord('\n') | |
ret | |
print_char: | |
divu o, x, o, 10 | |
sz o, 1 | |
call print_char | |
add x, x, ord('0') | |
sw -1, x | |
ret | |
################ | |
# Precomputes values used for Montgomery multiplication: | |
# * u = 1/2**64 mod n | |
# * v = -1/n mod 2**64 | |
# * r = 2**64 mod n (montgomery representation of 1) | |
# * s = 2**128 mod n (montgomery representation of 2**64) | |
# | |
# inputs: n | |
# outputs: u, v, s, t | |
################ | |
montgomery_precompute: | |
# first compute the inverses; based on xbinGCD from | |
# http://www.hackersdelight.org/MontgomeryMultiplication.pdf | |
# This step takes something like an average of 7 cycles per bit in the | |
# 'fast' mode, and 8 cycles per bit in 'safe' mode, for a total of around | |
# 500 cycles: but we only have to do it once per modulus | |
mov u, 1 | |
mov v, 0 | |
mov i, 64 | |
# HD defensively computes (u+beta)/2 to avoid overflow, taking two extra | |
# instructions; however, since u < beta overflow is only possible when | |
# beta >= 2**63, in most cases we can use a faster version and save | |
# 2 * 64 = 128 cycles, at the expense of a couple cycles comparison | |
gequ c, n, 2**63 | |
jz mt_pre_gcd_loop, c | |
mt_pre_gcd_loop_safe: | |
dec i | |
and c, u, 1 | |
snz c, 3 | |
shr u, u, 1 | |
shr v, v, 1 | |
sz 0, 6 | |
and c, u, n | |
xor u, u, n | |
shr u, u, 1 | |
add u, u, c | |
shr v, v, 1 | |
add v, v, 2**63 | |
jnz mt_pre_gcd_loop_safe, i | |
jmp mt_pre_gcd_end | |
mt_pre_gcd_loop: | |
dec i | |
and c, u, 1 | |
snz c, 3 | |
shr u, u, 1 | |
shr v, v, 1 | |
sz 0, 4 | |
add u, u, n | |
shr u, u, 1 | |
shr v, v, 1 | |
add v, v, 2**63 | |
jnz mt_pre_gcd_loop, i | |
mt_pre_gcd_end: | |
# compute 2**64 mod n; since 2**64 is too large to fit in a register, we | |
# first calculate r = 2**63 mod n, if it's greater than n/2, r = 2r - n, | |
# otherwise just r = 2r | |
divu q, r, 2**63, n # q is unused | |
shr c, n, 1 | |
geu c, r, c | |
shl r, r, 1 | |
sz c, 1 | |
sub r, r, n | |
# It would be entirely possible to compute 2**128 mod n by starting with | |
# r and using repeated doubling (64 times); this would take something | |
# like 4*64 = 256 cycles. Instead I'll use a doubleword division | |
# algorithm from Hacker's Delight (ch. 9-4, fig. 9-3), which takes | |
# fewer than 100 cycles. | |
mov l, 0 | |
# normalize the divisor (n) | |
and c, n, 2**64 - 2**32 | |
snz c, 2 | |
shl n, n, 32 | |
add l, l, 32 | |
and c, n, 2**64 - 2**48 | |
snz c, 2 | |
shl n, n, 16 | |
add l, l, 16 | |
and c, n, 2**64 - 2**56 | |
snz c, 2 | |
shl n, n, 8 | |
add l, l, 8 | |
and c, n, 2**64 - 2**60 | |
snz c, 2 | |
shl n, n, 4 | |
add l, l, 4 | |
and c, n, 2**64 - 2**62 | |
snz c, 2 | |
shl n, n, 2 | |
add l, l, 2 | |
and c, n, 2**64 - 2**63 | |
snz c, 2 | |
shl n, n, 1 | |
add l, l, 1 | |
# normalize the numerator | |
shl a, r, l | |
# split the divisor into halfwords | |
shr b, n, 32 | |
and m, n, 2**32 - 1 | |
# compute first halfword of remainder | |
divu q, p, a, b | |
mt_pre_div_adjust1: # loop runs at most two times, 035x on average | |
gequ c, q, 2**32 | |
snz c, 4 | |
mulu d, f, q, m # f is unused | |
shl c, p, 32 | |
geu c, d, c | |
sz c, 4 | |
sub q, q, 1 | |
add p, p, b | |
leu c, p, 2**32 | |
jnz mt_pre_div_adjust1, c | |
shl a, a, 32 | |
mulu d, f, q, n | |
sub a, a, d | |
# compute second halfword of remainder | |
divu q, p, a, b | |
mt_pre_div_adjust2: | |
snz c, 4 | |
mulu d, f, q, m | |
shl c, p, 32 | |
geu c, d, c | |
sz c, 4 | |
sub q, q, 1 | |
add p, p, b | |
leu c, p, 2**32 | |
jnz mt_pre_div_adjust2, c | |
shl a, a, 32 | |
mulu d, f, q, n | |
sub a, a, d | |
# de-normalize remainder | |
shr s, a, l | |
ret u, v, r, s | |
################ | |
# Performs the Montgomery reduction step, x = [yx]/2**64 mod n | |
# Requres the u and v calculated in mongomery_precompute. | |
# | |
# inputs: x, y, n, (u, v) | |
# outputs: x | |
################ | |
montgomery_reduce: | |
mulu m, u, x, v # m = ([yx] mod 2**64) / (-n) mod 2**64 | |
mulu z, w, m, n # [wz] = mN | |
add x, x, z # [yx] += [wz] | |
leu c, x, z # this is where I really miss an ADC instruction | |
sz c, 1 | |
inc w | |
add x, y, w # x = [yx] / 2**64 = y (combined with y+w from above) | |
leu c, x, y # x -= n if overflow | |
sz c, 1 | |
sub x, x, n | |
gequ c, x, n # if x >= n | |
sz c, 1 | |
sub x, x, n # x -= n | |
ret x | |
################ | |
# Computes a = a**d mod n by repeated squaring. | |
# | |
# inputs: a, d, n | |
# outputs: a | |
################ | |
power: | |
mov b, a | |
mov a, 1 | |
mt_pow_loop: | |
and c, d, 1 | |
sz c, 2 | |
mulu x, y, a, b # a = a*b mod n | |
divu q, a, x, n # q is unused | |
mulu x, y, b, b # b = b*b mod n | |
divu q, b, x, n | |
shr d, d, 1 | |
jnz mt_pow_loop, d | |
ret a | |
################ | |
# Uses Montgomery multiplication to compute a = a**d mod n. Treats d as an | |
# ordinary unsigned integer; treats d as an integer in Montgomery form. | |
# Requres the u, v, and r calculated in mongomery_precompute. | |
# | |
# inputs: a, d, n, (u, v, r) | |
# outputs: a | |
################ | |
montgomery_power: | |
mov b, a | |
mov a, r | |
pow_loop: | |
and c, d, 1 | |
sz c, 3 | |
mulu x, y, a, b | |
call montgomery_reduce | |
mov a, x | |
mulu x, y, b, b | |
call montgomery_reduce | |
mov b, x | |
shr d, d, 1 | |
jnz pow_loop, d | |
ret a | |
################ | |
# Computes x = GCD(x,n) using the binary GCD algorithm. Assumes that x < n | |
# and that n is odd. Note that we don't need a separate Montgomery GCD, | |
# since, surprisingly, GCD(x*(2**64) mod n, n) == GCD(x, n). | |
# | |
# inputs: x, n | |
# outputs: x | |
################ | |
gcd: | |
snz x, 2 # GCD(0, n) == n | |
mov x, n | |
ret x | |
gcd_loop: | |
and c, x, 1 # n is odd, so the GCD has no factors of two | |
snz c, 3 | |
gcd_div_2_loop: | |
shr x, x, 1 | |
and c, x, 1 | |
jz gcd_div_2_loop, c | |
geu c, n, x | |
sz c, 5 | |
sub x, n, x | |
sub n, n, x | |
jnz gcd_loop, x | |
mov x, n | |
ret x | |
sub x, x, n | |
jnz gcd_loop, x | |
mov x, n | |
ret x | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment