Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python
#
# Converts any integer into a base [BASE] number. I have chosen 62
# as it is meant to represent the integers using all the alphanumeric
# characters, [no special characters] = {0..9}, {A..Z}, {a..z}
#
# I plan on using this to shorten the representation of possibly long ids,
# a la url shortenters
#
# saturate() takes the base 62 key, as a string, and turns it back into an integer
# dehydrate() takes an integer and turns it into the base 62 string
#
import math
import sys
BASE = 62
UPPERCASE_OFFSET = 55
LOWERCASE_OFFSET = 61
DIGIT_OFFSET = 48
def true_ord(char):
"""
Turns a digit [char] in character representation
from the number system with base [BASE] into an integer.
"""
if char.isdigit():
return ord(char) - DIGIT_OFFSET
elif 'A' <= char <= 'Z':
return ord(char) - UPPERCASE_OFFSET
elif 'a' <= char <= 'z':
return ord(char) - LOWERCASE_OFFSET
else:
raise ValueError("%s is not a valid character" % char)
def true_chr(integer):
"""
Turns an integer [integer] into digit in base [BASE]
as a character representation.
"""
if integer < 10:
return chr(integer + DIGIT_OFFSET)
elif 10 <= integer <= 35:
return chr(integer + UPPERCASE_OFFSET)
elif 36 <= integer < 62:
return chr(integer + LOWERCASE_OFFSET)
else:
raise ValueError("%d is not a valid integer in the range of base %d" % (integer, BASE))
def saturate(key):
"""
Turn the base [BASE] number [key] into an integer
"""
int_sum = 0
reversed_key = key[::-1]
for idx, char in enumerate(reversed_key):
int_sum += true_ord(char) * int(math.pow(BASE, idx))
return int_sum
def dehydrate(integer):
"""
Turn an integer [integer] into a base [BASE] number
in string representation
"""
# we won't step into the while if integer is 0
# so we just solve for that case here
if integer == 0:
return '0'
string = ""
while integer > 0:
remainder = integer % BASE
string = true_chr(remainder) + string
integer /= BASE
return string
if __name__ == '__main__':
# not really unit tests just a rough check to see if anything is way off
if sys.argv[1] == '-tests':
passed_tests = True
for i in xrange(0, 1000):
passed_tests &= (i == saturate(dehydrate(i)))
print passed_tests
else:
user_input = sys.argv[2]
try:
if sys.argv[1] == '-s':
print saturate(user_input)
elif sys.argv[1] == '-d':
print dehydrate(int(user_input))
else:
print "I don't understand option %s" % sys.argv[1]
except ValueError as e:
print e

mitnk commented Jul 27, 2012

Great gist! Thanks!

ftonello commented Jan 8, 2014

I wrote a C version of this similar algorithm. https://gist.github.com/ftonello/8322831

iziang commented Mar 9, 2014

Thanks

mengzhuo commented Jul 2, 2014

This snippet has a bug that can't handle the string startswith "0" .

$ python base_62_converter.py -s "0123456789"
$ 225557475374453
$ python base_62_converter.py -d 225557475374453
$ 123456789

Here is the patch
https://gist.github.com/mengzhuo/23d67ec8ef9e0e18b446/36753801e447328f03857fe4ccbabc5478cc3199

Owner

bhelx commented Dec 2, 2014

thanks @mengzhuo

It seems there is a bug in the saturate(). The loop should be something like this:

for char in key:
int_sum = int_sum * BASE + true_ord(char)

Not trying to resurrect this, but I'm just gonna throw this out there in case anyone finds this. I made a library called BaseN that can convert between any digit sets (aka it's not just limited to converting between base 10 and base 62). Check it out here!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment