Created
March 30, 2015 13:35
-
-
Save cosimo/0240963a9643e9d05750 to your computer and use it in GitHub Desktop.
Simple class used to generate "shortened" URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
class Shortener: | |
""" | |
Simple class to shorten URLs, or rather, numerical IDs. | |
Each URL must be saved to a database, with the data about source, | |
category, country, publisher, etc... The database will give back | |
a numerical ID. We're using that ID to convert it to a base 62 | |
number and using a symbol table, converting it to a sequence | |
of alphanumeric chars. | |
Our base sequence of 62 symbols will be arranged like this: | |
'A' .. 'Z' 0 .. 25 | |
'a' .. 'z' 26 .. 51 | |
'0' .. '9' 52 .. 61 | |
For example, the number 62 in base 62 is 10, so the resulting | |
string will be "BA", where "B" -> 1, "A" -> 0. | |
""" | |
def shorturl_char(self, i): | |
if i < 26: | |
return chr(65 + i) # 'A' .. 'Z' | |
elif 26 <= i < 52: | |
return chr(71 + i) # 'a' .. 'z' | |
elif 52 <= i < 62: | |
return chr(i - 4) # '0' .. '9' | |
else: | |
raise ValueError("Can't convert int %d to char" % i) | |
def char_to_number(self, char): | |
n = ord(char) | |
if 65 <= n <= 90: | |
return n - 65 | |
elif 97 <= n <= 122: | |
return n - 71 | |
elif 48 <= n < 58: | |
return n + 4 | |
else: | |
raise ValueError("Can't convert char %s to int" % char) | |
def token_to_number(self, token): | |
n = 0 | |
reverse_token = token[::-1] | |
pos = 0 | |
for c in reverse_token: | |
n += self.char_to_number(c) * int(math.pow(62, pos)) | |
pos += 1 | |
return n | |
def convert_number(self, n): | |
token = '' | |
if n == 0: | |
return 'A' | |
while n > 0: | |
rmd = n % 62 | |
token = self.shorturl_char(rmd) + token | |
n = n / 62 | |
return token | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment