Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
base58 encoding in Python
""" base58 encoding / decoding functions """
import unittest
alphabet = '123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
base_count = len(alphabet)
def encode(num):
""" Returns num in a base58-encoded string """
encode = ''
if (num < 0):
return ''
while (num >= base_count):
mod = num % base_count
encode = alphabet[mod] + encode
num = num / base_count
if (num):
encode = alphabet[num] + encode
return encode
def decode(s):
""" Decodes the base58-encoded string s into an integer """
decoded = 0
multi = 1
s = s[::-1]
for char in s:
decoded += multi * alphabet.index(char)
multi = multi * base_count
return decoded
class Base58Tests(unittest.TestCase):
def test_alphabet_length(self):
self.assertEqual(58, len(alphabet))
def test_encode_10002343_returns_Tgmc(self):
result = encode(10002343)
self.assertEqual('Tgmc', result)
def test_decode_Tgmc_returns_10002343(self):
decoded = decode('Tgmc')
self.assertEqual(10002343, decoded)
def test_encode_1000_returns_if(self):
result = encode(1000)
self.assertEqual('if', result)
def test_decode_if_returns_1000(self):
decoded = decode('if')
self.assertEqual(1000, decoded)
def test_encode_zero_returns_empty_string(self):
self.assertEqual('', encode(0))
def test_encode_negative_number_returns_empty_string(self):
self.assertEqual('', encode(-100))
if __name__ == '__main__':
unittest.main()
@rscottco

This comment has been minimized.

Show comment
Hide comment
@rscottco

rscottco Mar 8, 2013

Thanks for the starting point, gave me something to think about as I worked through my problem at a bleary-eyed 0500h.

Here are the business-bitz of my re-factored version. Submitted for your entertainment as you might find something of interest i.e. the encode while loop.

Interesting tidbit: For a very long/large integer, the actual difference in length of the encoded value between base-58 and base-94 deviated only two or three characters. Hardly worth the hassles of enclosing punctuation as found in the 'toy' base-94 as supplied.

from collections import deque
base94 = ''.join(map(chr, range(33,127)))

def flex_encode(val, chrset=base94):
    """\
    Returns a value encoded using 'chrset' regardless of length and 
    composition... well, needs 2 printable asccii chars minimum...

    :param val: base-10 integer value to encode as base*
    :param chrset: the characters to use for encoding

    Note: While this could encrypt some value, it is an insecure toy. 

    """
    basect = len(chrset)
    assert basect > 1
    encode = deque()

    while val > 0:
        val, mod = divmod(val, basect)
        encode.appendleft(chrset[mod])

    return ''.join(encode)

def flex_decode(enc, chrset=base94):
    """\
    Returns the 'chrset'-decoded value of 'enc'. Of course this needs to use 
    the exact same charset as when to encoding the value.

    :param enc: base-* encoded value to decode
    :param chrset: the character-set used for original encoding of 'enc' value

    Note: Did you read the 'encode' note above? Splendid, now have 
             some fun... somewhere...

    """
    basect = len(chrset)
    decoded = 0

    for e, c in enumerate(enc[::-1]):
        decoded += ((basect**e) * chrset.index(c))

    return decoded

def flex_decode_alt(enc, chrset=base58):
    """\
    Alternate version of previous decoder

    """
    basect = len(chrset)
    assert basect > 1
    enc = [chrset.index(c) for c in deque(enc)]
    enc.reverse()

    dec = 0
    for e, i in enumerate(enc):
        dec += ((basect**e) * i)

    return dec

def snippet_abuser(iters=10):
    """\
    A silly bit of code to test the encoding/decoding in one pass. Silly because
    it does too much to be worthwhile in the long-run... insert logging, and modify
    if you wish to see the random encoding strings it stuffs the pipe with.

    """
    from random import randint, sample
    import uuid

    chrset = chrset or base94

    for i in range(iters):
        tval = tval or uuid.uuid4().int
        base_set = ''.join(sample(base94, randint(2, len(chrset))))
        if verbose: print "Base %d: %r" % (len(base_set), base_set)

        assert tval == flex_decode(flex_encode(tval, base_set), base_set)

I added the alternate decoder version in case it helps someone else that stumbles upon this (sort of like you reminded me of the [::-1] construct).

Here are a couple of simple cross-pollinating tests that fit with the original base58 code.

    def test_static_encode_flex_decode(self):
        tv = 13766667885870927L
        self.assertEqual(tv, flex_decode(encode(tv), alphabet))

    def test_flex_encode_static_decode(self):
        tv = 13766667885870927L
        self.assertEqual(tv, decode(flex_encode(tv, alphabet)))

At some point I might push this to ActiveState's cookbook. With your permission I will add your name in credit if/when that happens. Regardless, I have no interest in licensing terms for such simple scripts... I've already spent more time typing this paragraph than this portion of the subject is worth! So to whomever it is useful, cheers!

rscottco commented Mar 8, 2013

Thanks for the starting point, gave me something to think about as I worked through my problem at a bleary-eyed 0500h.

Here are the business-bitz of my re-factored version. Submitted for your entertainment as you might find something of interest i.e. the encode while loop.

Interesting tidbit: For a very long/large integer, the actual difference in length of the encoded value between base-58 and base-94 deviated only two or three characters. Hardly worth the hassles of enclosing punctuation as found in the 'toy' base-94 as supplied.

from collections import deque
base94 = ''.join(map(chr, range(33,127)))

def flex_encode(val, chrset=base94):
    """\
    Returns a value encoded using 'chrset' regardless of length and 
    composition... well, needs 2 printable asccii chars minimum...

    :param val: base-10 integer value to encode as base*
    :param chrset: the characters to use for encoding

    Note: While this could encrypt some value, it is an insecure toy. 

    """
    basect = len(chrset)
    assert basect > 1
    encode = deque()

    while val > 0:
        val, mod = divmod(val, basect)
        encode.appendleft(chrset[mod])

    return ''.join(encode)

def flex_decode(enc, chrset=base94):
    """\
    Returns the 'chrset'-decoded value of 'enc'. Of course this needs to use 
    the exact same charset as when to encoding the value.

    :param enc: base-* encoded value to decode
    :param chrset: the character-set used for original encoding of 'enc' value

    Note: Did you read the 'encode' note above? Splendid, now have 
             some fun... somewhere...

    """
    basect = len(chrset)
    decoded = 0

    for e, c in enumerate(enc[::-1]):
        decoded += ((basect**e) * chrset.index(c))

    return decoded

def flex_decode_alt(enc, chrset=base58):
    """\
    Alternate version of previous decoder

    """
    basect = len(chrset)
    assert basect > 1
    enc = [chrset.index(c) for c in deque(enc)]
    enc.reverse()

    dec = 0
    for e, i in enumerate(enc):
        dec += ((basect**e) * i)

    return dec

def snippet_abuser(iters=10):
    """\
    A silly bit of code to test the encoding/decoding in one pass. Silly because
    it does too much to be worthwhile in the long-run... insert logging, and modify
    if you wish to see the random encoding strings it stuffs the pipe with.

    """
    from random import randint, sample
    import uuid

    chrset = chrset or base94

    for i in range(iters):
        tval = tval or uuid.uuid4().int
        base_set = ''.join(sample(base94, randint(2, len(chrset))))
        if verbose: print "Base %d: %r" % (len(base_set), base_set)

        assert tval == flex_decode(flex_encode(tval, base_set), base_set)

I added the alternate decoder version in case it helps someone else that stumbles upon this (sort of like you reminded me of the [::-1] construct).

Here are a couple of simple cross-pollinating tests that fit with the original base58 code.

    def test_static_encode_flex_decode(self):
        tv = 13766667885870927L
        self.assertEqual(tv, flex_decode(encode(tv), alphabet))

    def test_flex_encode_static_decode(self):
        tv = 13766667885870927L
        self.assertEqual(tv, decode(flex_encode(tv, alphabet)))

At some point I might push this to ActiveState's cookbook. With your permission I will add your name in credit if/when that happens. Regardless, I have no interest in licensing terms for such simple scripts... I've already spent more time typing this paragraph than this portion of the subject is worth! So to whomever it is useful, cheers!

@prerakmody

This comment has been minimized.

Show comment
Hide comment
@prerakmody

prerakmody Jan 10, 2017

You may remove this content if you find it not in context of your gist. But if a user simply wants to convert a string into a base-58 encoded function, he could use the following:

def encode(num):
        alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
	####convert each digit into unicode
	buffer = [ord(each[0]) for each in str(num)]
	print 'Buffer:', buffer
	digits = [0]; i= 0; j = 0;
	while (i < len(buffer)):   #loop for as many digits are present
		j = 0
		while (j < len(digits)):
			digits[j] <<= 8
			j += 1
		digits[0] += buffer[i]
		carry = 0; j = 0;
		while(j < len(digits)):
			digits[j] += carry
			carry = (digits[j]/58) | 0
			digits[j] %= 58;
			j += 1
		while(carry):
			digits.append(carry%58)
			carry = (carry/58) | 0
		i += 1
	print 'Digits:', digits
	i = 0;
	while (buffer[i] == 0 and i < len(buffer) - 1):
		digits.push(0);
		i += 1;
	print digits
	print 'Result:', [alphabet[each] for each in digits][::-1]

Inspirations from this URL: https://www.browserling.com/tools/base58-encode

prerakmody commented Jan 10, 2017

You may remove this content if you find it not in context of your gist. But if a user simply wants to convert a string into a base-58 encoded function, he could use the following:

def encode(num):
        alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
	####convert each digit into unicode
	buffer = [ord(each[0]) for each in str(num)]
	print 'Buffer:', buffer
	digits = [0]; i= 0; j = 0;
	while (i < len(buffer)):   #loop for as many digits are present
		j = 0
		while (j < len(digits)):
			digits[j] <<= 8
			j += 1
		digits[0] += buffer[i]
		carry = 0; j = 0;
		while(j < len(digits)):
			digits[j] += carry
			carry = (digits[j]/58) | 0
			digits[j] %= 58;
			j += 1
		while(carry):
			digits.append(carry%58)
			carry = (carry/58) | 0
		i += 1
	print 'Digits:', digits
	i = 0;
	while (buffer[i] == 0 and i < len(buffer) - 1):
		digits.push(0);
		i += 1;
	print digits
	print 'Result:', [alphabet[each] for each in digits][::-1]

Inspirations from this URL: https://www.browserling.com/tools/base58-encode

@BenGimli

This comment has been minimized.

Show comment
Hide comment
@BenGimli

BenGimli May 21, 2017

Just learning python
When running this program, I get this error

encode = alphabet[num] + encode
TypeError: string indices must be integers

I'm using version 3.6
What am I missing?

BenGimli commented May 21, 2017

Just learning python
When running this program, I get this error

encode = alphabet[num] + encode
TypeError: string indices must be integers

I'm using version 3.6
What am I missing?

@acarmisc

This comment has been minimized.

Show comment
Hide comment
@acarmisc

acarmisc Jul 13, 2017

@BenGimli num must be a number

acarmisc commented Jul 13, 2017

@BenGimli num must be a number

@rootux

This comment has been minimized.

Show comment
Hide comment
@rootux

rootux Jan 1, 2018

Here is a library that does that https://github.com/keis/base58

rootux commented Jan 1, 2018

Here is a library that does that https://github.com/keis/base58

@john-shine

This comment has been minimized.

Show comment
Hide comment
@john-shine

john-shine Mar 21, 2018

alphabet is "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" not "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ" if in Bitcoin

john-shine commented Mar 21, 2018

alphabet is "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" not "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ" if in Bitcoin

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment