Created
January 13, 2011 20:35
-
-
Save bhelx/778542 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Converts any integer into a base [BASE] number. I have chosen 62 | |
# as it is meant to represent the integers using all the alphanumeric | |
# characters, [no special characters] = {0..9}, {A..Z}, {a..z} | |
# | |
# I plan on using this to shorten the representation of possibly long ids, | |
# a la url shortenters | |
# | |
# saturate() takes the base 62 key, as a string, and turns it back into an integer | |
# dehydrate() takes an integer and turns it into the base 62 string | |
# | |
import math | |
import sys | |
BASE = 62 | |
UPPERCASE_OFFSET = 55 | |
LOWERCASE_OFFSET = 61 | |
DIGIT_OFFSET = 48 | |
def true_ord(char): | |
""" | |
Turns a digit [char] in character representation | |
from the number system with base [BASE] into an integer. | |
""" | |
if char.isdigit(): | |
return ord(char) - DIGIT_OFFSET | |
elif 'A' <= char <= 'Z': | |
return ord(char) - UPPERCASE_OFFSET | |
elif 'a' <= char <= 'z': | |
return ord(char) - LOWERCASE_OFFSET | |
else: | |
raise ValueError("%s is not a valid character" % char) | |
def true_chr(integer): | |
""" | |
Turns an integer [integer] into digit in base [BASE] | |
as a character representation. | |
""" | |
if integer < 10: | |
return chr(integer + DIGIT_OFFSET) | |
elif 10 <= integer <= 35: | |
return chr(integer + UPPERCASE_OFFSET) | |
elif 36 <= integer < 62: | |
return chr(integer + LOWERCASE_OFFSET) | |
else: | |
raise ValueError("%d is not a valid integer in the range of base %d" % (integer, BASE)) | |
def saturate(key): | |
""" | |
Turn the base [BASE] number [key] into an integer | |
""" | |
int_sum = 0 | |
reversed_key = key[::-1] | |
for idx, char in enumerate(reversed_key): | |
int_sum += true_ord(char) * int(math.pow(BASE, idx)) | |
return int_sum | |
def dehydrate(integer): | |
""" | |
Turn an integer [integer] into a base [BASE] number | |
in string representation | |
""" | |
# we won't step into the while if integer is 0 | |
# so we just solve for that case here | |
if integer == 0: | |
return '0' | |
string = "" | |
while integer > 0: | |
remainder = integer % BASE | |
string = true_chr(remainder) + string | |
integer /= BASE | |
return string | |
if __name__ == '__main__': | |
# not really unit tests just a rough check to see if anything is way off | |
if sys.argv[1] == '-tests': | |
passed_tests = True | |
for i in xrange(0, 1000): | |
passed_tests &= (i == saturate(dehydrate(i))) | |
print passed_tests | |
else: | |
user_input = sys.argv[2] | |
try: | |
if sys.argv[1] == '-s': | |
print saturate(user_input) | |
elif sys.argv[1] == '-d': | |
print dehydrate(int(user_input)) | |
else: | |
print "I don't understand option %s" % sys.argv[1] | |
except ValueError as e: | |
print e | |
I wrote a C version of this similar algorithm. https://gist.github.com/ftonello/8322831
Thanks
This snippet has a bug that can't handle the string startswith "0" .
$ python base_62_converter.py -s "0123456789"
$ 225557475374453
$ python base_62_converter.py -d 225557475374453
$ 123456789
Here is the patch
https://gist.github.com/mengzhuo/23d67ec8ef9e0e18b446/36753801e447328f03857fe4ccbabc5478cc3199
thanks @mengzhuo
It seems there is a bug in the saturate(). The loop should be something like this:
for char in key:
int_sum = int_sum * BASE + true_ord(char)
Not trying to resurrect this, but I'm just gonna throw this out there in case anyone finds this. I made a library called BaseN that can convert between any digit sets (aka it's not just limited to converting between base 10 and base 62). Check it out here!
Thanks so much!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great gist! Thanks!