Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active May 24, 2024 08:18
Show Gist options
  • Save UserUnknownFactor/8f70ddc3dc33b07e158d19aaa335c887 to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/8f70ddc3dc33b07e158d19aaa335c887 to your computer and use it in GitHub Desktop.
Pure Python implementation of MD5 algorithm for experiments and custom mods
#!/usr/bin/python3
# RSA Data Security, Inc., MD5 message-digest algorithm
# Copyright (C) 1991-1992, RSA Data Security, Inc.
"""
## pymd5 module
### The MD5 hash implementation in pure Python
The module exposes same methods as hashlib.md5 and a couple of
low-level methods to help with crypto experiments.
"""
import struct
# Constants for compression function.
S11 = 7
S12 = 12
S13 = 17
S14 = 22
S21 = 5
S22 = 9
S23 = 14
S24 = 20
S31 = 4
S32 = 11
S33 = 16
S34 = 23
S41 = 6
S42 = 10
S43 = 15
S44 = 21
PADDING = b"\x80" + 63 * b"\0"
MD5_A = 0x98BADCFE
MD5_B = 0x10325476
MD5_STANDARD_INIT = (~MD5_A & 0xFFFFFFFF, ~MD5_B & 0xFFFFFFFF, MD5_A, MD5_B)
# F, G, H and I: basic MD5 functions.
def F(x, y, z): return (((x) & (y)) | ((~x) & (z)))
def G(x, y, z): return (((x) & (z)) | ((y) & (~z)))
def H(x, y, z): return ((x) ^ (y) ^ (z))
def I(x, y, z): return ((y) ^ ((x) | (~z)))
def ROTATE_LEFT(x, n):
x = x & 0xffffffff # make shift unsigned
return (((x) << (n)) | ((x) >> (32-(n)))) & 0xffffffff
# FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
# Rotation is separate from addition to prevent recomputation.
def FF(a, b, c, d, x, s, ac):
a = a + F ((b), (c), (d)) + (x) + (ac)
a = ROTATE_LEFT ((a), (s))
a = a + b
return a # must assign this to a
def GG(a, b, c, d, x, s, ac):
a = a + G ((b), (c), (d)) + (x) + (ac)
a = ROTATE_LEFT ((a), (s))
a = a + b
return a # must assign this to a
def HH(a, b, c, d, x, s, ac):
a = a + H ((b), (c), (d)) + (x) + (ac)
a = ROTATE_LEFT ((a), (s))
a = a + b
return a # must assign this to a
def II(a, b, c, d, x, s, ac):
a = a + I ((b), (c), (d)) + (x) + (ac)
a = ROTATE_LEFT ((a), (s))
a = a + b
return a # must assign this to a
class md5(object):
""" Returns a new md5 objects.
Optional advanced parameters allow you to specify the initial string
resume an earlier computation by setting the internal state of
the function and the counter of message bits processed so far.
Most of the interface matches Python's standard hashlib.
### Attributes:
`digest_size`: The size of the resulting hash in bytes (16).
`block_size`: The internal block size of the hash algorithm in bytes (64).
### Examples:
To obtain the digest of the string:
>>> import pymd5
>>> m = pymd5.md5()
>>> m.update("Nobody inspects")
>>> m.update(" the spammish repetition")
>>> m.digest()
More condensed:
>>> pymd5.md5("Nobody inspects the spammish repetition").hexdigest()
'bb649c83dd1ea5c9d9dec9a18df0ffe9'
"""
digest_size = 16 # size of the resulting hash in bytes
block_size = 64 # hash algorithm's internal block size
def __init__(self, data:bytes=b'', state:tuple=None, count:int=0, buffer=b''):
"""Returns a new md5 hash object.
Optionally initialized to a given internal state and count of message
bits processed so far (in this case buffer needs to be restored too),
then processes the string.
"""
self.count = 0
self.buffer = buffer
if state is None:
# initial state defined by standard
self.state = MD5_STANDARD_INIT # (0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476)
else:
self.state = state#_decode(state, md5.digest_size)
if count is not None:
self.count = count
if data:
self.update(data)
def update(self, data: bytes):
"""Updates the md5 object with the string arg.
Repeated calls are equivalent to a single call with the
concatenation of all the arguments.
"""
inputLen = len(data)
index = int(self.count >> 3) & 0x3F # self.count // 8 -> fit 7 bits
self.count = self.count + (inputLen << 3) # update number of bits
partLen = md5.block_size - index
# apply compression function to as many blocks as we have
if inputLen >= partLen:
self.buffer = self.buffer[:index] + data[:partLen]
self.state = self._compress(self.buffer)
i = partLen
while i + 63 < inputLen:
self.state = self._compress(data[i:i+md5.block_size])
i = i + md5.block_size
index = 0
else:
i = 0
# buffer remaining output
self.buffer = self.buffer[:index] + data[i:inputLen]
def digest(self, keep_state=False) -> bytes:
"""Return the MD5 hash of the strings passed to the update()
method so far. This is a string of digest_size bytes which
may contain non-ASCII characters, including null bytes.
"""
_buffer, _count, _state = self.buffer, self.count, self.state
self.update(self.padding())
result = self.state
self.buffer, self.count = _buffer, _count
if not keep_state:
self.state = _state
return md5._encode(result, md5.digest_size)
def hexdigest(self) -> str:
""" Like digest() except the hash value is returned
as a string of hexadecimal digits.
"""
return self.digest().hex()
def getstate(self) -> tuple:
"""Gets current state form state tuple.
First four items are state and the fifth is processed bits."""
return self.state + (self.count,)
def setstate(self, state:tuple):
"""Sets current state form state tuple.
First four items are state and the fifth is processed bits."""
self.state = state[:4]
self.count = state[5]
def padding(self, message_bits:int=None, offset=0):
""" Generates the padding that should be appended
to the end of a message of the given size
to reach a multiple of the block size."""
if message_bits is None:
message_bits = self.count
index = int((message_bits >> 3) & 0x3f)
padLen = 56 - index if index < 56 else 120 - index
# (the last 8 bytes store the number of bits in the message)
#print(f"Bit length: {hex(offset + message_bits)}")
return PADDING[:padLen] + struct.pack('<Q', offset + message_bits)
def _compress(self, block:bytes):
"""The MD5 compression function.
Outputs a 16-byte state based on a 16-byte previous state and a
512-bit message block.
"""
a, b, c, d = self.state
x = md5._decode(block, md5.block_size)
# Round
a = FF (a, b, c, d, x[ 0], S11, 0xd76aa478) # 1
d = FF (d, a, b, c, x[ 1], S12, 0xe8c7b756) # 2
c = FF (c, d, a, b, x[ 2], S13, 0x242070db) # 3
b = FF (b, c, d, a, x[ 3], S14, 0xc1bdceee) # 4
a = FF (a, b, c, d, x[ 4], S11, 0xf57c0faf) # 5
d = FF (d, a, b, c, x[ 5], S12, 0x4787c62a) # 6
c = FF (c, d, a, b, x[ 6], S13, 0xa8304613) # 7
b = FF (b, c, d, a, x[ 7], S14, 0xfd469501) # 8
a = FF (a, b, c, d, x[ 8], S11, 0x698098d8) # 9
d = FF (d, a, b, c, x[ 9], S12, 0x8b44f7af) # 10
c = FF (c, d, a, b, x[10], S13, 0xffff5bb1) # 11
b = FF (b, c, d, a, x[11], S14, 0x895cd7be) # 12
a = FF (a, b, c, d, x[12], S11, 0x6b901122) # 13
d = FF (d, a, b, c, x[13], S12, 0xfd987193) # 14
c = FF (c, d, a, b, x[14], S13, 0xa679438e) # 15
b = FF (b, c, d, a, x[15], S14, 0x49b40821) # 16
# Round 2
a = GG (a, b, c, d, x[ 1], S21, 0xf61e2562) # 17
d = GG (d, a, b, c, x[ 6], S22, 0xc040b340) # 18
c = GG (c, d, a, b, x[11], S23, 0x265e5a51) # 19
b = GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa) # 20
a = GG (a, b, c, d, x[ 5], S21, 0xd62f105d) # 21
d = GG (d, a, b, c, x[10], S22, 0x2441453) # 22
c = GG (c, d, a, b, x[15], S23, 0xd8a1e681) # 23
b = GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8) # 24
a = GG (a, b, c, d, x[ 9], S21, 0x21e1cde6) # 25
d = GG (d, a, b, c, x[14], S22, 0xc33707d6) # 26
c = GG (c, d, a, b, x[ 3], S23, 0xf4d50d87) # 27
b = GG (b, c, d, a, x[ 8], S24, 0x455a14ed) # 28
a = GG (a, b, c, d, x[13], S21, 0xa9e3e905) # 29
d = GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8) # 30
c = GG (c, d, a, b, x[ 7], S23, 0x676f02d9) # 31
b = GG (b, c, d, a, x[12], S24, 0x8d2a4c8a) # 32
# Round 3
a = HH (a, b, c, d, x[ 5], S31, 0xfffa3942) # 33
d = HH (d, a, b, c, x[ 8], S32, 0x8771f681) # 34
c = HH (c, d, a, b, x[11], S33, 0x6d9d6122) # 35
b = HH (b, c, d, a, x[14], S34, 0xfde5380c) # 36
a = HH (a, b, c, d, x[ 1], S31, 0xa4beea44) # 37
d = HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9) # 38
c = HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60) # 39
b = HH (b, c, d, a, x[10], S34, 0xbebfbc70) # 40
a = HH (a, b, c, d, x[13], S31, 0x289b7ec6) # 41
d = HH (d, a, b, c, x[ 0], S32, 0xeaa127fa) # 42
c = HH (c, d, a, b, x[ 3], S33, 0xd4ef3085) # 43
b = HH (b, c, d, a, x[ 6], S34, 0x4881d05) # 44
a = HH (a, b, c, d, x[ 9], S31, 0xd9d4d039) # 45
d = HH (d, a, b, c, x[12], S32, 0xe6db99e5) # 46
c = HH (c, d, a, b, x[15], S33, 0x1fa27cf8) # 47
b = HH (b, c, d, a, x[ 2], S34, 0xc4ac5665) # 48
# Round 4
a = II (a, b, c, d, x[ 0], S41, 0xf4292244) # 49
d = II (d, a, b, c, x[ 7], S42, 0x432aff97) # 50
c = II (c, d, a, b, x[14], S43, 0xab9423a7) # 51
b = II (b, c, d, a, x[ 5], S44, 0xfc93a039) # 52
a = II (a, b, c, d, x[12], S41, 0x655b59c3) # 53
d = II (d, a, b, c, x[ 3], S42, 0x8f0ccc92) # 54
c = II (c, d, a, b, x[10], S43, 0xffeff47d) # 55
b = II (b, c, d, a, x[ 1], S44, 0x85845dd1) # 56
a = II (a, b, c, d, x[ 8], S41, 0x6fa87e4f) # 57
d = II (d, a, b, c, x[15], S42, 0xfe2ce6e0) # 58
c = II (c, d, a, b, x[ 6], S43, 0xa3014314) # 59
b = II (b, c, d, a, x[13], S44, 0x4e0811a1) # 60
a = II (a, b, c, d, x[ 4], S41, 0xf7537e82) # 61
d = II (d, a, b, c, x[11], S42, 0xbd3af235) # 62
c = II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb) # 63
b = II (b, c, d, a, x[ 9], S44, 0xeb86d391) # 64
return (0xffffffff & (self.state[0] + a),
0xffffffff & (self.state[1] + b),
0xffffffff & (self.state[2] + c),
0xffffffff & (self.state[3] + d),)
@staticmethod
def _encode(buffer:tuple, length:int):
k = length // 4
res = struct.pack(f"{k}I", *buffer[:k])
return res
@staticmethod
def _decode(buffer:bytes, length:int):
k = length // 4
res = struct.unpack(f"{k}I", buffer[:k*4])
return list(res)
def test(buffer=b"jk8ssl"):
""" Displays results of input hashed with our md5 function
and the standard Python hashlib implementation.
"""
print(md5(buffer).hexdigest())
import hashlib
print(hashlib.md5(buffer).hexdigest())
if __name__=="__main__":
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment