Skip to content

Instantly share code, notes, and snippets.

@robertcampion
Created October 19, 2015 07:12
Show Gist options
  • Save robertcampion/24ebd19353eff797b2b8 to your computer and use it in GitHub Desktop.
Save robertcampion/24ebd19353eff797b2b8 to your computer and use it in GitHub Desktop.
primes_53 = \
b'\x03\x05\x07\x0b\x0d\x11\x13\x17\x1d\x1f\x25\x29\x2b\x2f\x35\x3b' \
b'\x3d\x43\x47\x49\x4f\x53\x59\x61\x65\x67\x6b\x6d\x71\x7f\x83\x89' \
b'\x8b\x95\x97\x9d\xa3\xa7\xad\xb3\xb5\xbf\xc1\xc5\xc7\xd3\xdf\xe3' \
b'\xe5\xe9\xef\xf1\xfb'
# for 2**16+1:2:2**17-1, 1 if prime, 0 otherwise
prime_bytes = \
b'\x8b\x24\x60\x82\x10\x41\x81\x12\x40\x08\x26\x0d\x03\x00\x01\x41' \
b'\x00\x01\xca\x22\x20\x10\x20\x00\x19\x80\x24\xc0\x10\x00\x40\x94' \
b'\x00\x0a\x12\x68\x00\x10\x00\x51\x82\x00\x40\x80\x00\x80\x00\x44' \
b'\x00\x00\x29\x18\x94\x00\x42\x80\x00\x91\x02\x20\x42\x20\x45\x10' \
b'\x30\x20\x11\x10\x04\x42\x00\x20\x50\x10\x41\xc2\x20\x00\x48\x10' \
b'\x04\x10\x86\x40\x12\x00\x28\x82\x86\x40\x01\x20\x40\x10\x12\x05' \
b'\x09\x80\x00\x11\x06\x05\x41\x06\x60\x83\x80\x40\x18\x10\x20\x00' \
b'\xa0\x04\x10\x80\x08\x43\x16\x48\x00\x14\x25\x08\x22\x21\x88\x30' \
b'\x04\x09\x82\x00\x02\x80\x00\x42\x20\x44\xc8\x02\x49\x90\x20\x04' \
b'\x42\x20\x20\x42\x00\x68\x0a\x94\x20\x12\x00\x08\x10\x24\x08\x08' \
b'\x82\x01\x40\x02\x21\x00\x80\x25\x82\x00\x04\x18\x00\x08\x10\x80' \
b'\x00\x02\x84\x00\x00\xa4\x01\x50\x32\x24\x10\xa2\x00\x08\x04\x04' \
b'\x02\x90\x00\x42\xa0\x20\x18\x10\x49\x80\x12\x08\x40\x06\x00\x80' \
b'\x80\x21\x4b\x84\x00\x02\x02\x00\x11\x22\x0c\x00\x24\x01\x00\x84' \
b'\x08\xc8\x20\x00\x58\x02\x04\x89\xb0\x00\x00\x00\x05\x50\x00\x20' \
b'\x4a\x30\x00\x8a\x00\x29\x11\x36\x00\x00\x02\x04\x00\x10\x00\x08' \
b'\x00\x44\x40\x20\x41\x00\x30\x00\x0b\x02\x00\x41\x16\x08\x03\x04' \
b'\x20\x08\x30\x04\x08\x00\x48\x13\x02\x08\x42\x82\x00\x82\x34\x60' \
b'\x40\x02\x0d\x98\x80\x44\x11\x80\x00\x11\x14\x09\x80\x00\x64\x10' \
b'\x00\x48\x08\x32\x00\x19\x24\x00\x11\x04\x60\x48\x80\x60\x82\x00' \
b'\x29\x80\x00\x44\x10\x00\x01\x81\x10\x00\x08\x30\x05\xc2\x20\x04' \
b'\x19\x10\x4c\x10\x86\x0c\x82\x80\x00\x42\x10\x01\x80\x10\x28\x91' \
b'\x02\x08\x10\x00\x05\xd1\x00\x48\x00\x84\x65\x92\x02\x60\x08\x26' \
b'\x44\x42\x80\x29\x80\x00\x08\xc1\x20\x01\x50\x00\x00\x81\x80\x08' \
b'\x40\xa2\x09\x40\x00\x01\x41\x80\x41\x40\x12\x24\x98\x20\x44\x40' \
b'\x24\x20\x82\x92\x00\x82\xa0\x04\x50\x30\x4c\x00\x20\x00\x59\x00' \
b'\x00\xc2\x06\x60\xc0\x84\x00\x12\x00\x05\x81\xa0\x00\x00\x84\x00' \
b'\x40\x06\x41\x09\x00\x04\x8a\x00\x01\x00\x94\x00\x1a\x84\x09\x41' \
b'\x82\x48\x02\xa0\x00\x40\x00\x09\x00\x20\x0c\x52\xa4\x20\x12\x94' \
b'\x40\x01\x04\x25\xc0\x02\x0c\x08\x02\x00\x49\x04\x00\x40\x04\x00' \
b'\x00\x24\x20\x40\x20\x48\x80\x02\x08\x13\x22\x00\x50\x10\x40\x82' \
b'\x24\x00\x40\x12\x20\x11\x82\x44\x58\x00\x24\x02\x90\x41\x42\x90' \
b'\x00\x48\x20\x08\x00\xa0\x00\x18\x22\x24\x10\x02\x00\x8a\x10\x04' \
b'\x80\x00\x25\x80\x02\x48\x40\x06\x00\x82\x84\x09\x41\x34\x21\x08' \
b'\x00\x01\x88\x14\x48\x02\x00\x25\xd0\x00\x60\x02\x10\x05\x08\x10' \
b'\x60\x90\x02\x08\x49\x20\x04\x83\x00\x40\x00\x80\x20\xd2\x30\x20' \
b'\x18\x22\x40\x00\xa4\x20\x50\x14\x28\x09\x20\x45\x80\x00\x00\x00' \
b'\x16\x44\x01\x00\x28\x00\x04\x40\x40\x90\x41\x10\x30\x29\x08\x04' \
b'\x44\x10\x20\x20\x11\x12\x08\xc1\xb0\x00\x48\x00\x00\x08\x80\x08' \
b'\x49\x06\x05\x92\x00\x20\x43\x10\x24\x80\x12\x68\x10\x82\x00\x48' \
b'\x24\x05\x40\x10\x40\x40\x20\x00\x10\x20\x05\x80\xa0\x4c\x03\x80' \
b'\x08\x00\x80\x21\x0a\x20\x01\xc2\x02\x00\x80\x04\x08\x52\x06\x20' \
b'\x81\x00\x40\x08\x80\x21\x02\x02\x0c\x10\x80\x08\x49\xa0\x08\xd0' \
b'\x00\x01\x02\xa4\x00\x08\x30\x20\x09\x10\x40\x40\x00\x05\x52\x00' \
b'\x01\xc2\x84\x00\x18\x02\x20\x09\x80\x04\x03\x80\x20\x90\x04\x09' \
b'\x40\x10\x44\x00\x00\x05\x81\x02\x08\x41\x00\x2c\xc1\x86\x20\x00' \
b'\x24\x20\x8a\x10\x09\x00\x30\x08\x08\x80\x0c\x03\x14\x09\xc8\x24' \
b'\x05\x0a\x20\x05\x90\x20\x00\x19\x82\x29\x82\x02\x41\x08\x14\x04' \
b'\x80\x10\x28\x00\x24\x00\x50\x80\x00\x91\x80\x08\x81\x10\x21\x80' \
b'\x00\x04\x10\x30\x40\x4a\x82\x21\x00\x14\x01\x02\x24\x40\x4a\x00' \
b'\x24\x80\x00\x00\x01\x20\x0c\x00\x82\x08\xc1\x04\x60\x10\x30\x09' \
b'\x81\x80\x00\x52\x00\x00\x12\x12\x01\x01\x00\x60\x4a\x12\x08\x08' \
b'\x14\x40\x58\x04\x08\x41\x00\x00\x43\x00\x00\x88\x02\x08\x80\x80' \
b'\x08\x40\xa2\x09\x40\x12\x01\xc0\x10\x20\x90\x02\x44\x08\x14\x44' \
b'\x08\x02\x0d\x82\x12\x20\x00\x10\x04\x00\x20\x41\x18\x20\x04\x51' \
b'\xa4\x01\x10\x04\x68\x03\x00\x00\x40\x10\x01\x10\x96\x04\x0b\x24' \
b'\x01\xc1\x02\x00\x0a\x80\x24\xc2\x00\x04\x08\x02\x48\x00\xa0\x00' \
b'\x40\x16\x48\x81\x80\x41\x98\x20\x08\x89\x00\x44\x10\x24\x20\x01' \
b'\x02\x08\x40\x80\x01\x18\x30\x00\x81\x10\x40\x08\x00\x28\x42\x06' \
b'\x20\xc1\xb4\x40\x10\x00\x61\x10\xb0\x04\x12\x82\x01\x81\x00\x00' \
b'\xc1\x24\x05\x10\x02\x40\x10\xb4\x08\x1b\x80\x24\x50\x80\x20\x00' \
b'\x24\x44\xc0\x02\x08\x09\x00\x00\x43\x22\x00\x82\x94\x60\x00\x10' \
b'\x41\x80\x00\x09\x00\x00\x44\x48\xa0\x29\x01\x00\x20\x42\x84\x01' \
b'\x4a\x10\x2c\x80\x10\x04\x51\x00\x08\x10\x80\x48\x40\x20\x60\x80' \
b'\x30\x09\x81\x06\x00\x5a\x84\x0c\x02\x12\x00\x00\x00\x61\x10\x22' \
b'\x48\x81\xb0\x48\x01\x24\x08\x40\x02\x00\xc2\x10\x04\x12\x00\x04' \
b'\x00\x90\x0c\x4a\x22\x00\x03\x84\x29\x80\x10\x25\x10\x20\x01\x00' \
b'\x20\x00\x1b\x00\x2c\x40\x92\x60\x00\x84\x00\x00\x32\x08\x01\x20' \
b'\x00\x02\x04\x24\x81\x00\x08\x8a\x04\x45\x40\x00\x01\x00\x10\x00' \
b'\x0a\xa6\x21\x10\x16\x00\x82\x00\x60\x40\x00\x48\x98\x90\x48\x12' \
b'\x82\x04\x10\x00\x00\x01\x34\x20\x48\x10\x05\x18\x84\x00\x10\x04' \
b'\x0c\x10\x00\x41\x81\x00\x04\x10\x02\x41\x01\x82\x08\x41\x02\x00' \
b'\x81\x02\x20\x01\x80\x64\x02\x10\x00\x90\x92\x00\x02\x86\x08\xc2' \
b'\x84\x08\x08\x00\x20\xd2\x00\x60\x00\x10\x0c\x09\x84\x24\x91\x02' \
b'\x01\x4a\x30\x04\x88\x00\x40\x00\x00\x0c\x50\x02\x01\x52\x90\x20' \
b'\x81\x10\x00\x12\x30\x20\x00\x30\x0c\x00\x00\x09\x00\x02\x09\x83' \
b'\x00\x41\x42\x10\x04\x90\x86\x04\x41\x20\x08\x00\x90\x41\x48\xa4' \
b'\x20\x00\x00\x09\x01\x06\x00\x13\x04\x00\x02\x86\x68\x40\x80\x00' \
b'\x50\x22\x40\x00\x12\x40\x09\x26\x28\x40\x06\x20\x88\x04\x04\x08' \
b'\x00\x04\x01\x30\x4c\x4a\x00\x00\x81\x04\x60\x00\x00\x00\x88\x20' \
b'\x61\x00\x00\x44\x02\x80\x05\x92\x00\x21\x0a\x14\x01\x80\x12\x49' \
b'\x80\x16\x08\x02\x84\x05\x43\x84\x40\x08\x10\x44\x42\x20\x44\x00' \
b'\x26\x00\x41\x22\x08\x00\x06\x00\x00\x04\x05\x88\x10\x08\x81\x82' \
b'\x00\x40\x00\x24\x52\x04\x00\x00\xa0\x21\x88\x10\x04\x01\x06\x40' \
b'\x11\x80\x20\x80\x82\x41\xc0\x00\x60\x02\x20\x0c\x00\x16\x40\x41' \
b'\x02\x25\x10\x80\x00\x08\xa0\x40\x12\x02\x00\x11\x02\x4c\x02\x26' \
b'\x0d\x80\x06\x40\xc8\x30\x01\x88\x22\x05\x18\xa0\x08\x08\x00\x25' \
b'\x02\x10\x40\x40\x94\x00\x48\x20\x08\x18\x14\x08\x0b\x02\x04\x00' \
b'\x84\x40\x00\x04\x01\x00\x10\x01\x00\x82\x40\x01\x06\x00\x41\x00' \
b'\x09\x03\x00\x20\x40\x20\x28\x00\x80\x0c\x40\x02\x21\x00\x02\x00' \
b'\xc0\x80\x01\x00\x22\x2d\x10\x00\x00\x11\x24\x00\x02\x94\x69\x03' \
b'\x20\x44\x12\x02\x09\x08\x10\x08\x10\x00\x25\x51\x00\x21\xc1\xa0' \
b'\x40\x92\x02\x01\x01\x00\x04\x52\xa0\x04\x40\x14\x40\x08\x04\x01' \
b'\x00\x22\x24\x09\x14\x08\x12\x02\x04\x42\x90\x49\x00\xb0\x00\xc8' \
b'\x22\x20\x01\x34\x0c\x11\x00\x00\x00\x90\x08\x0a\x04\x61\x90\x10' \
b'\x20\x00\x32\x44\x02\x02\x28\x90\x04\x29\x8b\x00\x24\x1a\x10\x20' \
b'\x90\x00\x04\x42\xa0\x2c\x10\x82\x08\x88\x10\x01\x10\x00\x00\x09' \
b'\x26\x40\x0b\x00\x0c\x00\x14\x01\x82\xa0\x05\x02\x00\x08\x08\x30' \
b'\x08\x09\x02\x0c\x90\x04\x20\x40\x00\x00\x12\x02\x0c\x91\x80\x00' \
b'\x08\xa4\x04\x02\x02\x20\x41\x04\x21\x10\x00\x00\x18\x30\x00\x19' \
b'\x86\x29\x80\x90\x00\x40\x24\x05\x00\x22\x49\x00\x02\x00\x58\x24' \
b'\x05\x82\x10\x40\x0b\x00\x24\x10\x10\x04\x00\x96\x08\x00\x84\x08' \
b'\x00\x10\x20\x88\x00\x04\x50\x20\x04\x90\x86\x40\x01\x20\x01\x00' \
b'\x04\x01\x8a\x94\x01\x40\x12\x00\x11\x02\x40\x02\xa0\x08\x80\x10' \
b'\x40\x43\x20\x60\x52\x22\x40\x00\x10\x48\x40\x20\x25\xd1\x82\x41' \
b'\x82\x00\x60\x82\x10\x28\x00\x00\x48\x18\x04\x01\x80\x82\x09\xc1' \
b'\x30\x24\x42\x02\x01\x91\x20\x0c\x11\x84\x00\xd1\x02\x40\x02\x90' \
b'\x40\xc8\x00\x20\x11\x00\x00\x48\x02\x24\x91\x10\x60\x82\x00\x21' \
b'\x82\x00\x40\x18\x96\x00\x0b\x20\x28\x11\x00\x20\x09\x80\x20\x02' \
b'\x00\x08\x98\x96\x00\x12\x00\x2d\x40\x00\x09\x40\x04\x00\x00\x02' \
b'\x0c\x08\x20\x00\x03\x20\x00\x03\x02\x20\x80\x80\x45\x18\x02\x45' \
b'\x80\xa2\x40\x18\x00\x01\x91\x80\x00\xc2\x14\x00\x18\x10\x00\x90' \
b'\x20\x04\x40\x04\x01\x02\x06\x29\xc8\x00\x20\xc0\x00\x24\x99\x80' \
b'\x40\x02\x00\x01\xc3\x82\x41\x08\x24\x44\x00\x30\x00\x80\x04\x04' \
b'\x0a\x20\x00\x52\x14\x20\x0a\x94\x01\x10\x10\x28\x00\x04\x0c\x01' \
b'\x00\x29\x41\x02\x00\x88\xa0\x45\x0a\x20\x08\x01\x10\x0c\x42\x82' \
b'\x09\x50\x92\x08\x80\x84\x20\x00\x10\x08\x11\x24\x04\x03\xa4\x28' \
b'\x00\x02\x28\x42\x10\x41\x40\x02\x45\x01\x02\x00\x00\x24\x25\x12' \
b'\x02\x40\xc8\x30\x24\x00\x00\x41\x11\x02\x48\x12\x20\x0c\x02\x06' \
b'\x41\x01\x20\x24\x80\x00\x01\x00\x10\x44\x08\x06\x08\xc2\x10\x00' \
b'\x4a\x10\x05\x00\x32\x01\x01\x10\x00\x01\x82\x20\x52\x84\x48\x01' \
b'\x84\x00\x50\x00\x64\x10\x00\x00\x02\xa4\x00\x90\x00\x20\x43\x24' \
b'\x21\xc2\x00\x2c\x80\x12\x00\x40\xa0\x00\x02\x90\x40\x42\xb4\x40' \
b'\x80\x00\x00\x11\x80\x40\x08\x80\x0c\x13\x02\x40\x80\x10\x20\x00' \
b'\x02\x0c\x80\x00\x00\x58\x26\x04\x80\x04\x41\x0a\x00\x60\x08\x02' \
b'\x60\x11\xb2\x00\x58\x00\x00\x03\x10\x08\x80\x00\x01\x00\x20\x61' \
b'\x00\x04\x04\x1b\x86\x28\x00\x80\x28\x08\x90\x04\x02\x30\x01\x90' \
b'\x10\x00\x01\x04\x20\x81\x00\x00\x02\x80\x44\x02\x20\x41\x00\x30' \
b'\x40\x0b\x20\x00\x11\x06\x28\x42\x20\x24\x92\x20\x20\x08\x84\x00' \
b'\x43\x00\x04\x40\x82\x01\x48\x00\x01\x58\x30\x01\x00\x24\x00\x19' \
b'\x00\x04\x01\x16\x48\xc3\x80\x21\x12\x02\x04\x09\x94\x00\x41\x00' \
b'\x28\x81\x84\x60\x43\x00\x44\x08\x00\x08\x11\x32\x40\x10\x00\x04' \
b'\x00\x06\x28\x81\x14\x01\x88\x00\x40\x80\x10\x00\x1a\x80\x28\x52' \
b'\x00\x28\x08\x20\x40\x82\x10\x60\x19\x04\x00\x09\x20\x01\x91\x94' \
b'\x00\x82\x14\x40\x02\x12\x00\x10\x02\x0c\x02\x02\x20\x11\x04\x00' \
b'\x48\x00\x01\x18\x10\x48\x81\x04\x40\x00\xa0\x08\x12\x10\x40\x08' \
b'\x34\x20\x18\x22\x01\x80\x20\x04\x42\x84\x2c\x10\x86\x00\x80\x00' \
b'\x45\x00\x10\x40\x01\xa4\x00\x40\x24\x00\x01\x82\x21\x48\x80\x04' \
b'\x00\x00\x20\x81\x92\x40\x00\x24\x08\x41\x02\x20\x08\x10\x01\xda' \
b'\x00\x44\x18\x84\x40\x1a\x04\x00\x00\x92\x49\x00\x20\x44\x82\x02' \
b'\x09\x08\x22\x04\x10\x20\x24\xc1\x10\x68\x01\x10\x44\x80\x22\x20' \
b'\x10\x84\x00\x41\x04\x28\x50\x06\x20\x40\x84\x00\xd0\x00\x48\x09' \
b'\x02\x40\x11\x02\x24\x10\x82\x41\x8a\x20\x60\xc8\x00\x2c\x10\x20' \
b'\x04\x01\x84\x2c\x10\x14\x61\x02\x00\x64\x40\x10\x01\x88\x00\x40' \
b'\x18\x00\x05\x42\x00\x01\x0b\x20\x24\x8a\x02\x20\x81\x02\x00\x4a' \
b'\x20\x0d\x40\x00\x00\x00\x30\x01\x50\x20\x40\x10\x20\x48\x01\x80' \
b'\x01\x03\x10\x08\x08\x90\x00\x00\x12\x60\x88\x26\x00\x02\x04\x00' \
b'\x92\x00\x00\x81\x04\x00\x00\x30\x20\x08\x04\x44\x00\xa0\x09\xc1' \
b'\x00\x20\x81\x20\x00\x08\x10\x40\x91\x94\x08\x40\x02\x20\x12\x04' \
b'\x09\x0a\x20\x00\x00\x10\x20\x10\x06\x40\x03\x20\x00\x81\x90\x00' \
b'\x80\xa0\x61\x58\x12\x04\x81\x24\x00\x18\x20\x29\x00\x06\x41\x0a' \
b'\x80\x00\x98\x02\x00\x90\x10\x0c\x0a\x20\x00\xc1\x90\x00\x01\x00' \
b'\x20\x98\x20\x00\x81\x90\x0c\x01\x02\x05\x80\x80\x20\x02\x34\x44' \
b'\x4a\x12\x08\x18\x24\x04\x03\xa0\x00\x03\x84\x20\x80\x04\x04\x92' \
b'\x20\x20\x08\xa0\x48\x00\x80\x00\x10\x02\x00\x09\x80\x04\x80\x20' \
b'\x60\x08\x86\x44\x00\x00\x28\x52\x80\x49\x42\x94\x00\x40\x10\x20' \
b'\x00\xa2\x44\x58\x04\x24\x00\x00\x49\x02\x00\x05\x12\x22\x04\x80' \
b'\x04\x08\x19\x00\x00\x10\x04\x00\x88\x20\x20\x12\x10\x08\x81\x92' \
b'\x40\x40\x82\x01\x00\x00\x08\x48\x30\x01\x00\x02\x64\x80\x34\x00' \
b'\x02\x04\x0d\x92\x82\x21\x40\xa4\x04\x48\x20\x21\x11\x06\x00\x08' \
b'\x04\x00\xd2\x00\x00\x80\x00\x60\x42\x10\x4d\x10\xa4\x00\x09\xa2' \
b'\x09\xc0\x02\x01\x08\x00\x64\x4a\x00\x28\x10\x10\x44\x03\x02\x01' \
b'\x40\x10\x40\xc1\x04\x61\x80\x02\x01\x00\x82\x40\x11\x20\x20\x02' \
b'\x94\x00\x40\x20\x20\x4a\x00\x01\x00\x00\x00\x51\x20\x01\x00\x06' \
b'\x00\x00\x20\x40\x80\x12\x04\x90\x00\x08\x48\x02\x0d\x40\x02\x41' \
b'\x81\x00\x00\xc2\x02\x21\x11\x90\x00\x0a\x84\x00\x42\x80\x68\x08' \
b'\x14\x40\x80\x10\x60\x80\x22\x00\x01\xa2\x21\x92\x10\x08\x88\x80' \
b'\x45\x00\x00\x28\x08\x30\x00\x41\x04\x08\xd0\x00\x00\x0b\x04\x45' \
b'\x50\x20\x20\x00\x00\x48\x12\x00\x04\x00\x06\x08\x82\x10\x01\x98' \
b'\x02\x24\x00\x80\x44\x4b\x20\x24\x81\x86\x29\x80\x10\x04\x40\x10' \
b'\x08\x01\x90\x08\x41\x26\x08\x03\x80\x21\x40\x00\x04\x82\x00\x09' \
b'\x11\x02\x40\x0a\xa2\x04\x80\x10\x61\xc0\x10\x00\xc2\x00\x25\x80' \
b'\xa0\x44\x10\x06\x08\xc0\x10\x00\x00\x14\x40\x08\x00\x40\x81\x20' \
b'\x0c\x1b\x02\x21\x92\x90\x00\x09\x10\x00\x02\x00\x09\x08\xa2\x04' \
b'\x02\x00\x08\x01\x00\x01\x08\x00\x40\x50\x20\x44\x11\x02\x08\x03' \
b'\x02\x08\x00\x00\x00\x82\x14\x20\xc0\x20\x0c\x80\x84\x40\x5a\x24' \
b'\x24\x81\x04\x29\x00\x10\x00\x08\x30\x40\x01\x12\x00\x48\x06\x00' \
b'\x81\x80\x01\x42\xb4\x24\x0a\x10\x41\x00\x00\x0c\x52\x00\x00\x82' \
b'\x84\x09\x48\x00\x00\x0a\x20\x61\x00\x84\x0c\x00\x04\x24\x50\x02' \
b'\x40\x00\x00\x01\x8a\x02\x20\x80\x16\x04\x41\x02\x00\x50\x90\x20' \
b'\x08\x14\x24\x02\x00\x01\x10\x06\x08\x48\x86\x08\x40\x16\x08\xc9' \
b'\x00\x04\x90\x00\x40\x09\x14\x40\x00\x82\x00\x40\x06\x08\x89\x10' \
b'\x21\x10\x02\x04\x88\x84\x40\x59\x04\x08\x93\x00\x28\x43\x30\x00' \
b'\x40\x02\x40\x00\x20\x00\x01\x24\x00\x80\x80\x60\x49\x30\x40\x00' \
b'\x02\x09\x80\x02\x00\x00\x04\x0c\x02\x82\x08\x80\x20\x20\x40\x00' \
b'\x44\x99\x20\x08\x08\x02\x04\xd2\x82\x21\x08\x30\x05\x48\x22\x68' \
b'\x99\x16\x04\x50\x20\x01\x00\x80\x08\x00\x80\x41\xc0\x22\x0c\x00' \
b'\x10\x40\x40\x20\x00\x51\x10\x00\x03\x84\x00\x08\x00\x0c\x80\x10' \
b'\x00\x02\x00\x2c\x02\x84\x01\x0b\x04\x21\x40\x02\x00\x00\xa2\x04' \
b'\x41\x20\x24\x81\x04\x41\x80\x30\x00\x52\x12\x00\x01\x80\x40\x09' \
b'\x00\x2d\x50\x04\x00\x42\x14\x20\x88\x10\x08\x00\x12\x00\x08\x02' \
b'\x00\x02\x92\x48\x00\x00\x00\x12\x20\x01\x19\x14\x00\x13\x86\x08' \
b'\x03\x10\x00\x48\x14\x45\xc0\x10\x20\x81\x20\x00\x12\x04\x01\x40' \
b'\x00\x48\x00\x14\x64\x00\x00\x25\x08\x20\x40\x4a\x80\x21\x80\x00' \
b'\x21\x08\x04\x21\x80\x00\x4c\x98\x92\x00\x03\x00\x08\x40\x04\x00' \
b'\xc2\x00\x20\x10\x02\x20\x11\x02\x44\x43\x80\x04\x90\x80\x40\x00' \
b'\x80\x44\x48\x32\x0d\x81\x82\x48\x50\x06\x2d\xc2\x00\x60\x00\x20' \
b'\x60\x02\x10\x20\x90\x00\x00\x10\x26\x01\x42\x80\x08\xc1\x10\x00' \
b'\x80\x02\x41\x09\x04\x00\x10\x84\x08\x42\x02\x20\x08\xa4\x40\x08' \
b'\x22\x20\x00\x36\x04\x00\x02\x01\x00\x80\x00\x09\x90\x01\x82\x20' \
b'\x24\x00\x00\x4c\x02\x22\x01\x01\x02\x01\x88\xa0\x05\x90\x10\x44' \
b'\x01\x90\x40\x43\x80\x00\x40\x84\x48\x00\xa0\x00\x18\x12\x08\x18' \
b'\x22\x04\x01\x20\x08\x03\x10\x61\x40\x00\x04\x4a\x02\x00\x00\xb0' \
b'\x48\x10\x06\x20\x12\x00\x00\x03\x20\x20\x98\x00\x4c\x90\x02\x48' \
b'\x42\x22\x04\x03\x90\x21\x00\x34\x21\x48\x00\x01\x08\x34\x0c\x00' \
b'\x00\x25\x52\x00\x41\x00\x80\x00\x02\x32\x40\x00\x26\x00\x58\x02' \
b'\x05\x10\x00\x00\x08\x10\x05\x82\x30\x48\x00\x94\x08\x40\x00\x00' \
b'\x81\x04\x20\x48\x20\x60\x10\x00\x44\x91\x02\x44\x00\xa2\x04\x00' \
b'\x82\x01\x82\x20\x40\x10\x20\x2d\x81\x02\x00\x48\x00\x00\x00\x92' \
b'\x08\x41\xa0\x61\x02\x30\x00\x08\xa4\x08\x00\x20\x25\x00\x02\x60' \
b'\x83\x10\x00\x80\x02\x40\x00\x30\x04\x08\x00\x00\x81\x10\x40\xc0' \
b'\x24\x01\x08\x20\x21\x01\x34\x48\x18\x06\x20\x10\x82\x60\x08\x04' \
b'\x05\x8a\x30\x40\x10\x02\x00\x13\x00\x21\xc2\x10\x20\x01\x90\x01' \
b'\xc2\x20\x09\x10\x00\x40\x0a\x04\x09\x81\x10\x08\x00\x24\x24\x80' \
b'\x20\x40\x00\x82\x48\x41\xa0\x00\x00\x10\x00\x88\x00\x20\x48\x12' \
b'\x21\x80\xa0\x00\x09\x84\x04\x03\x14\x01\x43\xa0\x20\x00\x20\x49' \
b'\x81\x82\x48\x10\x22\x0c\xd1\x02\x00\x01\x94\x64\x80\x00\x2c\x10' \
b'\x00\x00\x02\x82\x04\x43\x04\x00\x40\x00\x20\x0a\x22\x40\x91\x84' \
b'\x4c\x18\x02\x20\x40\x80\x49\x40\x80\x44\x02\x22\x20\x09\x32\x08' \
b'\x18\x84\x05\x40\x00\x08\x81\x90\x00\x00\x02\x60\x00\x00\x00\x42' \
b'\x20\x20\x01\x04\x21\xc1\xa0\x00\x50\x20\x64\x19\x06\x04\x01\x02' \
b'\x04\x02\x06\x48\x00\xb0\x21\x08\x20\x28\x09\x20\x00\x0b\x20\x20' \
b'\x00\x96\x01\x00\x20\x05\x48\x10\x4d\x80\x04\x40\x01\x02\x0c\x03' \
b'\x00\x60\x08\x00\x44\x80\x00\x0c\x11\x10\x04\x00\x22\x09\x41\x06' \
b'\x28\x00\x10\x21\x50\x22\x00\x90\x10\x08\x10\x02\x24\x02\x10\x09' \
b'\x08\x00\x01\x08\x12\x41\x08\x20\x04\x10\xa0\x20\x40\x00\x40\x03' \
b'\x94\x20\x92\x10\x01\x08\xa6\x40\x08\xa0\x01\xc0\x10\x01\x82\x04' \
b'\x44\x42\x00\x24\x88\x16\x40\x00\xa0\x20\x50\x14\x40\x08\x80\x00' \
b'\x08\x22\x04\x01\xa4\x44\x01\x04\x00\x00\x02\x08\x01\x30\x24\x12' \
b'\x20\x0c\x09\x04\x08\x18\x00\x04\xd2\x04\x00\xcb\xa0\x00\x00\x00' \
b'\x60\x81\x20\x48\x50\x00\x01\xc1\x00\x09\x01\x10\x00\x10\x02\x40' \
b'\x09\xa4\x40\x01\x02\x09\xd3\x90\x00\x0a\x80\x04\x00\x00\x61\x08' \
b'\x00\x08\x00\xa6\x20\x10\x04\x40\x08\x00\x44\xd2\x20\x44\x00\x80' \
b'\x40\x09\x86\x20\x40\x04\x08\x8a\x00\x21\x10\x20\x28\x88\x80\x04' \
b'\x41\x80\x00\x10\x06\x08\x82\x00\x00\x48\x00\x25\x18\x22\x04\x4a' \
b'\xa0\x04\x92\x10\x00\xc1\x20\x44\x00\x00\x08\x80\x30\x00\x59\x04' \
b'\x28\x00\x00\x61\x8a\x94\x00\x82\x00\x21\x80\x82\x08\x40\x00\x08' \
b'\x82\x16\x01\x81\x00\x00\x4a\x02\x45\x08\x00\x40\x00\x04\x21\x50' \
b'\x10\x69\x02\x04\x40\x0a\x00\x08\x10\x06\x08\x41\x00\x20\x82\x90' \
b'\x40\x8a\x14\x40\x00\x00\x28\x08\x32\x00\x00\x00\x21\x80\x04\x09' \
b'\x40\x00\x00\x58\x20\x00\x18\x80\x00\x42\x02\x24\x00\x04\x08\x03' \
b'\x90\x41\x48\x30\x05\x10\x82\x00\x42\x00\x28\x80\x00\x48\x00\x90' \
b'\x04\x02\x10\x44\x00\xa0\x48\x19\x00\x24\x91\x84\x21\xc8\x84\x24' \
b'\x02\x02\x04\x80\x30\x40\x12\x22\x05\x00\x10\x21\x80\x00\x24\x40' \
b'\x00\x04\x81\x04\x00\x00\x82\x09\x00\x92\x40\x40\x30\x44\xc0\x00' \
b'\x01\x81\x02\x08\x09\xa6\x00\x10\x94\x60\x80\x04\x41\x52\x12\x40' \
b'\x18\x00\x0c\x02\x80\x28\x40\x04\x00\x4b\x24\x04\x40\x00\x60\x90' \
b'\x00\x00\x40\x82\x05\x50\x90\x00\x08\x20\x60\x88\x30\x2c\x11\x82' \
b'\x40\x10\x04\x20\x11\x90\x09\x00\x00\x01\x52\x10\x4c\x00\x80\x40' \
b'\x59\x00\x01\x40\x02\x01\x02\x30\x20\x80\x12\x45\x00\x32\x44\x58' \
b'\x02\x01\x40\x16\x08\xc0\x30\x00\x08\x00\x20\x08\x14\x44\x12\x00' \
b'\x21\x00\x00\x41\x00\x80\x44\xc0\x00\x01\x89\x02\x00\x02\x24\x20' \
b'\x12\x94\x08\x02\x90\x00\x02\x20\x08\x18\x82\x00\x41\x26\x08\x80' \
b'\x02\x00\x49\xa0\x21\x80\x30\x08\x88\x10\x44\x12\x00\x25\x02\x12' \
b'\x00\x42\x00\x60\x98\x32\x01\x90\x24\x00\x10\x00\x00\x81\x02\x28' \
b'\x41\x30\x20\x10\x00\x00\x80\x00\x40\x50\x24\x00\x83\x00\x01\x8a'
main:
call input
# check if input is 1
cmp c, n, 1
sz c, 3
mov o, n
call print
halt 0
# remove factors of two
mov o, 2
div_2_loop:
and c, n, 1
snz c, 3
call print
shr n, n, 1
jmp div_2_loop
# check if n is a power of two
cmp c, n, 1
sz c, 1
halt 0
# trial division
mov d, data(primes_53)
mov i, 53
trial_division_loop:
lbu p, d
trial_division_check:
divu q, r, n, p
snz r, 4
mov o, p
call print
mov n, q
jmp trial_division_check
mulu s, t, p, p
leu c, n, s
sz c, 5
cmp c, n, 1
snz c, 2
mov o, n
call print
halt 0
sub i, i, 1
add d, d, 1
jnz trial_division_loop, i
# mov g, 0 # initialize factor list pointers
# mov h, 0 # all registers are zero at program start
# *g is the next factor to check for primality
# *h is the last factor we've found so far
# the first factor is always *0
sw g, n
prime_test:
# by this point we know that n has no prime factors less than 256,
# so if n is less than 256**2, then it must be prime
lequ c, n, 2**16
jnz is_prime, c
geu c, n, 2**17+1
jnz miller_rabin, c
sub d, n, 2**16+1 # table starts at 2**16+1 (by above argument)
shr d, d, 1 # we know n is odd, so we ignore the last bit
and m, d, 2**3-1 # each byte is 3-bit lookup table
shr d, d, 3
add d, d, data(prime_bytes)
lbu p, d
shr p, p, m
and p, p, 1
jnz is_prime, p
jmp pollard_rho # I should probably do trial division here
# since we only have around 20 more primes to check
################
# Miller-Rabin primality test; bases from https://miller-rabin.appspot.com/
# Note that this subroutine, as well as most of the ones before the 'utility
# functions' section are not proper functions, as they sometimes jump directly
# to the next step instead of returning!
#
# input: n
# output: n/a
################
miller_rabin:
shr d, n, 1
mov b, 1
mr_rd_loop: # n - 1 == d * 2**b, d is odd
and c, d, 1
snz c, 3
shr d, d, 1
inc b
jmp mr_rd_loop
geu c, n, 2**32-1 # test if we can use single-precision arithmetic
jnz miller_rabin_montgomery, c
sub m, n, 1
mr_test_1:
gequ c, n, 0x000000000005361b
jnz mr_test_2, c
mov a, 0x81b33f22efdceaa9
call miller_rabin_test
jmp is_prime
mr_test_2:
gequ c, n, 0x000000003e9de64d
jnz mr_test_3, c
mov a, 0x0000004e69b6552d
call miller_rabin_test
mov a, 0x00223f5bb83fc553
call miller_rabin_test
jmp is_prime
mr_test_3:
mov a, 0x3ab4f88ff0cc7c80
call miller_rabin_test
mov a, 0xcbee4cdf120c10aa
call miller_rabin_test
mov a, 0xe6f1343b0edca8e7
call miller_rabin_test
jmp is_prime
miller_rabin_montgomery:
call montgomery_precompute
sub m, n, r # montgomery representation of n-1
mr_test_mt_3:
gequ c, n, 0x000000518dafbfd1
jnz mr_test_mt_4, c
mov a, 0x3ab4f88ff0cc7c80
call miller_rabin_test_montgomery
mov a, 0xcbee4cdf120c10aa
call miller_rabin_test_montgomery
mov a, 0xe6f1343b0edca8e7
call miller_rabin_test_montgomery
jmp is_prime
mr_test_mt_4:
gequ c, n, 0x0000323ee0e55e6b
jnz mr_test_mt_5, c
mov a, 0x0000000000000002
call miller_rabin_test_montgomery
mov a, 0x0000810c207b08bf
call miller_rabin_test_montgomery
mov a, 0x10a42595b01d3765
call miller_rabin_test_montgomery
mov a, 0x99fd2b545eab5322
call miller_rabin_test_montgomery
jmp is_prime
mr_test_mt_5:
gequ c, n, 0x001c6b470864f683
jnz mr_test_mt_6, c
mov a, 0x0000000000000002
call miller_rabin_test_montgomery
mov a, 0x000003c1c7396f6d
call miller_rabin_test_montgomery
mov a, 0x02142e2e3f22de5c
call miller_rabin_test_montgomery
mov a, 0x0297105b6b7b29dd
call miller_rabin_test_montgomery
mov a, 0x370eb221a5f176dd
call miller_rabin_test_montgomery
jmp is_prime
mr_test_mt_6:
gequ c, n, 0x081f23f390affe89
jnz mr_test_mt_7, c
mov a, 0x0000000000000002
call miller_rabin_test_montgomery
mov a, 0x000070722e8f5cd0
call miller_rabin_test_montgomery
mov a, 0x0020cd6bd5ace2d1
call miller_rabin_test_montgomery
mov a, 0x009bbc940c751630
call miller_rabin_test_montgomery
mov a, 0x0a90404784bfcb4d
call miller_rabin_test_montgomery
mov a, 0x1189b3f265c2b0c7
call miller_rabin_test_montgomery
jmp is_prime
mr_test_mt_7:
mov a, 0x0000000000000002
call miller_rabin_test_montgomery
mov a, 0x0000000000000145
call miller_rabin_test_montgomery
mov a, 0x000000000000249f
call miller_rabin_test_montgomery
mov a, 0x0000000000006e12
call miller_rabin_test_montgomery
mov a, 0x000000000006e0d7
call miller_rabin_test_montgomery
mov a, 0x0000000000953d18
call miller_rabin_test_montgomery
mov a, 0x000000006b0191fe
call miller_rabin_test_montgomery
jmp is_prime
is_prime:
mov o, n
call print
cmp c, g, h
sz c, 1
halt 0
add g, g, 8
lw n, g
jmp prime_test
found_factor:
divu n, b, n, a
add h, h, 8
sw h, a
jmp prime_test
################
# Miller-Rabin primality test subroutine; needs m == n - 1
#
# input: a, n, m
# output: n/a
################
miller_rabin_test:
divu q, a, a, n # a = a mod n
snz a, 1
ret # a certifies primality if a == 0 mod n
call power # a = a**d mod n
cmp c, a, 1 # r == 1
sz c, 1
ret # a certifies primality if a**d == 1 mod n
cmp c, a, m # m == n-1
sz c, 1
ret # a certifies primality if a**d == -1 mod n
mov d, 2
mr_square_loop:
dec b
jz pollard_rho, b
mulu x, y, a, a # a = a**2 mod n
divu q, a, x, n # q is unused
cmp c, a, 1 # r == 1
jnz pollard_rho, c # a witnesses compositeness if a**(d*2**i) == 1 mod n
cmp c, a, m # m == n-1
sz c, 1
ret # a certifies primality if a**(d*2**i) == -1 mod n
jmp mr_square_loop
################
# Miller-Rabin primality test subroutine using Montgomery arithmetic; needs
# the values from the precomputation step as well as m == n - 1
#
# input: a, n, m, (u, v, r, s)
# output: n/a
################
miller_rabin_test_montgomery:
divu q, a, a, n # a = a mod n
snz a, 1
ret # a certifies primality if a == 0 mod n
mulu x, y, a, s
call montgomery_reduce # convert a to montgomery form
mov a, x
call montgomery_power # a = a**d mod n
cmp c, a, r # r == 1 (mongtomery form)
sz c, 1
ret # a certifies primality if a**d == 1 mod n
cmp c, a, m # m == n-1 (montgomery form)
sz c, 1
ret # a certifies primality if a**d == -1 mod n
mr_mt_square_loop:
dec b
jz pollard_rho_montgomery, b
mulu x, y, a, a # a = a**2 mod n
call montgomery_reduce
mov a, x
cmp c, a, r # r == 1 (mongtomery form)
jnz pollard_rho_montgomery, c # a witnesses compositeness if a**(d*2**i) == 1 mod n
cmp c, a, m # m == n-1 (montgomery form)
sz c, 1
ret # a certifies primality if a**(d*2**i) == -1 mod n
jmp mr_mt_square_loop
################
# Pollard-Brent rho factorization algorithm
#
# input: n
# output: n/a
################
pollard_rho:
rho_skip_count = 64
mov o, 42
rho_retry:
mov b, o
mov j, 1
mov f, 1
mov d, 1
rho_outer_loop:
mov a, b
mov i, j
rho_fast_loop:
mulu b, y, b, b
add b, b, 1
divu y, b, b, n
#mov o, b
#call print
sub i, i, 1
jnz rho_fast_loop, i
mov k, 0
rho_slow_loop:
mov e, b
sub i, j, k
leu c, rho_skip_count, i
sz c, 1
mov i, rho_skip_count
rho_inner_loop:
mulu b, y, b, b
add b, b, 1
divu y, b, b, n
leu c, a, b
sz c, 1
sub x, b, a
snz c, 1
sub x, a, b
mulu f, y, x, f
divu y, f, f, n
#add o, f, 1000000000
#call print
sub i, i, 1
jnz rho_inner_loop, i
mov x, f
call gcd
mov d, x
#mov o, d
#call print
add k, k, rho_skip_count
gequ c, k, j
snz c, 2
lequ c, d, 1
jnz rho_slow_loop, c
shl j, j, 1
lequ c, d, 1
jnz rho_outer_loop, c
cmp c, d, n
sz c, 12
rho_final_loop:
mulu e, y, e, e
add e, e, 1
divu y, e, e, n
#add o, e, 2000000000
#call print
leu c, a, e
sz c, 1
sub x, e, a
snz c, 1
sub x, a, e
call gcd
mov d, x
#mov o, d
#call print
lequ c, d, 1
jnz rho_final_loop, c
cmp c, d, n
sz c, 2
add o, o, 1
jmp rho_retry
mov a, d
jmp found_factor
################
# Pollard-Brent rho factorization algorithm using Montgomery arithmetic.
# Requires m = n - r
#
# input: n, (u, v, r, s, m)
# output: n/a
################
pollard_rho_montgomery:
rho_mt_skip_count = 64
mov o, 42
rho_mt_retry:
mulu x, y, o, s
call montgomery_reduce
mov b, x
mov j, 1
mov f, r
mov d, 1
rho_mt_outer_loop:
mov a, b
mov i, j
rho_mt_fast_loop:
mulu x, y, b, b
call montgomery_reduce
mov b, x
geu c, b, m
sz c, 1
sub b, b, m
snz c, 1
add b, b, r
#mov o, b
#call print
sub i, i, 1
jnz rho_mt_fast_loop, i
mov k, 0
rho_mt_slow_loop:
mov e, b
sub i, j, k
leu c, rho_mt_skip_count, i
sz c, 1
mov i, rho_mt_skip_count
rho_mt_inner_loop:
mulu x, y, b, b
call montgomery_reduce
mov b, x
geu c, b, m
sz c, 1
sub b, b, m
snz c, 1
add b, b, r
leu c, a, b
sz c, 1
sub x, b, a
snz c, 1
sub x, a, b
mulu x, y, x, f
call montgomery_reduce
mov f, x
sub i, i, 1
jnz rho_mt_inner_loop, i
mov x, f
#mov o, x
#call print
call gcd
mov d, x
#mov o, d
#call print
add k, k, rho_mt_skip_count
gequ c, k, j
snz c, 2
lequ c, d, 1
jnz rho_mt_slow_loop, c
shl j, j, 1
lequ c, d, 1
jnz rho_mt_outer_loop, c
cmp c, d, n
sz c, 15
rho_mt_final_loop:
mulu x, y, e, e
call montgomery_reduce
mov e, x
geu c, e, m
sz c, 1
sub e, e, m
snz c, 1
add e, e, r
leu c, a, e
sz c, 1
sub x, e, a
snz c, 1
sub x, a, e
call gcd
mov d, x
lequ c, d, 1
jnz rho_mt_final_loop, c
cmp c, d, n
sz c, 2
add o, o, 1
jmp rho_mt_retry
mov a, d
jmp found_factor
################ UTILITY FUNCTIONS ################
################
# Loads a decimal number terminated by a newline from stdin to register n.
#
# inputs: none
# outputs: n
################
input:
lw c, -1
input_add_digit:
sub c, c, ord('0')
mov o, c
mulu n, h, n, 10
add n, n, c
lw c, -1
cmp q, c, ord('\n')
jz input_add_digit, q
ret n
################
# Prints the value of register o to stdout in decimal, followed by a newline.
#
# inputs: o
# outputs: none
################
print:
call print_char
sw -1, ord('\n')
ret
print_char:
divu o, x, o, 10
sz o, 1
call print_char
add x, x, ord('0')
sw -1, x
ret
################
# Precomputes values used for Montgomery multiplication:
# * u = 1/2**64 mod n
# * v = -1/n mod 2**64
# * r = 2**64 mod n (montgomery representation of 1)
# * s = 2**128 mod n (montgomery representation of 2**64)
#
# inputs: n
# outputs: u, v, s, t
################
montgomery_precompute:
# first compute the inverses; based on xbinGCD from
# http://www.hackersdelight.org/MontgomeryMultiplication.pdf
# This step takes something like an average of 7 cycles per bit in the
# 'fast' mode, and 8 cycles per bit in 'safe' mode, for a total of around
# 500 cycles: but we only have to do it once per modulus
mov u, 1
mov v, 0
mov i, 64
# HD defensively computes (u+beta)/2 to avoid overflow, taking two extra
# instructions; however, since u < beta overflow is only possible when
# beta >= 2**63, in most cases we can use a faster version and save
# 2 * 64 = 128 cycles, at the expense of a couple cycles comparison
gequ c, n, 2**63
jz mt_pre_gcd_loop, c
mt_pre_gcd_loop_safe:
dec i
and c, u, 1
snz c, 3
shr u, u, 1
shr v, v, 1
sz 0, 6
and c, u, n
xor u, u, n
shr u, u, 1
add u, u, c
shr v, v, 1
add v, v, 2**63
jnz mt_pre_gcd_loop_safe, i
jmp mt_pre_gcd_end
mt_pre_gcd_loop:
dec i
and c, u, 1
snz c, 3
shr u, u, 1
shr v, v, 1
sz 0, 4
add u, u, n
shr u, u, 1
shr v, v, 1
add v, v, 2**63
jnz mt_pre_gcd_loop, i
mt_pre_gcd_end:
# compute 2**64 mod n; since 2**64 is too large to fit in a register, we
# first calculate r = 2**63 mod n, if it's greater than n/2, r = 2r - n,
# otherwise just r = 2r
divu q, r, 2**63, n # q is unused
shr c, n, 1
geu c, r, c
shl r, r, 1
sz c, 1
sub r, r, n
# It would be entirely possible to compute 2**128 mod n by starting with
# r and using repeated doubling (64 times); this would take something
# like 4*64 = 256 cycles. Instead I'll use a doubleword division
# algorithm from Hacker's Delight (ch. 9-4, fig. 9-3), which takes
# fewer than 100 cycles.
mov l, 0
# normalize the divisor (n)
and c, n, 2**64 - 2**32
snz c, 2
shl n, n, 32
add l, l, 32
and c, n, 2**64 - 2**48
snz c, 2
shl n, n, 16
add l, l, 16
and c, n, 2**64 - 2**56
snz c, 2
shl n, n, 8
add l, l, 8
and c, n, 2**64 - 2**60
snz c, 2
shl n, n, 4
add l, l, 4
and c, n, 2**64 - 2**62
snz c, 2
shl n, n, 2
add l, l, 2
and c, n, 2**64 - 2**63
snz c, 2
shl n, n, 1
add l, l, 1
# normalize the numerator
shl a, r, l
# split the divisor into halfwords
shr b, n, 32
and m, n, 2**32 - 1
# compute first halfword of remainder
divu q, p, a, b
mt_pre_div_adjust1: # loop runs at most two times, 035x on average
gequ c, q, 2**32
snz c, 4
mulu d, f, q, m # f is unused
shl c, p, 32
geu c, d, c
sz c, 4
sub q, q, 1
add p, p, b
leu c, p, 2**32
jnz mt_pre_div_adjust1, c
shl a, a, 32
mulu d, f, q, n
sub a, a, d
# compute second halfword of remainder
divu q, p, a, b
mt_pre_div_adjust2:
snz c, 4
mulu d, f, q, m
shl c, p, 32
geu c, d, c
sz c, 4
sub q, q, 1
add p, p, b
leu c, p, 2**32
jnz mt_pre_div_adjust2, c
shl a, a, 32
mulu d, f, q, n
sub a, a, d
# de-normalize remainder
shr s, a, l
ret u, v, r, s
################
# Performs the Montgomery reduction step, x = [yx]/2**64 mod n
# Requres the u and v calculated in mongomery_precompute.
#
# inputs: x, y, n, (u, v)
# outputs: x
################
montgomery_reduce:
mulu m, u, x, v # m = ([yx] mod 2**64) / (-n) mod 2**64
mulu z, w, m, n # [wz] = mN
add x, x, z # [yx] += [wz]
leu c, x, z # this is where I really miss an ADC instruction
sz c, 1
inc w
add x, y, w # x = [yx] / 2**64 = y (combined with y+w from above)
leu c, x, y # x -= n if overflow
sz c, 1
sub x, x, n
gequ c, x, n # if x >= n
sz c, 1
sub x, x, n # x -= n
ret x
################
# Computes a = a**d mod n by repeated squaring.
#
# inputs: a, d, n
# outputs: a
################
power:
mov b, a
mov a, 1
mt_pow_loop:
and c, d, 1
sz c, 2
mulu x, y, a, b # a = a*b mod n
divu q, a, x, n # q is unused
mulu x, y, b, b # b = b*b mod n
divu q, b, x, n
shr d, d, 1
jnz mt_pow_loop, d
ret a
################
# Uses Montgomery multiplication to compute a = a**d mod n. Treats d as an
# ordinary unsigned integer; treats d as an integer in Montgomery form.
# Requres the u, v, and r calculated in mongomery_precompute.
#
# inputs: a, d, n, (u, v, r)
# outputs: a
################
montgomery_power:
mov b, a
mov a, r
pow_loop:
and c, d, 1
sz c, 3
mulu x, y, a, b
call montgomery_reduce
mov a, x
mulu x, y, b, b
call montgomery_reduce
mov b, x
shr d, d, 1
jnz pow_loop, d
ret a
################
# Computes x = GCD(x,n) using the binary GCD algorithm. Assumes that x < n
# and that n is odd. Note that we don't need a separate Montgomery GCD,
# since, surprisingly, GCD(x*(2**64) mod n, n) == GCD(x, n).
#
# inputs: x, n
# outputs: x
################
gcd:
snz x, 2 # GCD(0, n) == n
mov x, n
ret x
gcd_loop:
and c, x, 1 # n is odd, so the GCD has no factors of two
snz c, 3
gcd_div_2_loop:
shr x, x, 1
and c, x, 1
jz gcd_div_2_loop, c
geu c, n, x
sz c, 5
sub x, n, x
sub n, n, x
jnz gcd_loop, x
mov x, n
ret x
sub x, x, n
jnz gcd_loop, x
mov x, n
ret x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment