Skip to content

Instantly share code, notes, and snippets.

@dudelson
Last active January 29, 2020 04:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dudelson/badad3bd55dcf8a874763b641abdbaee to your computer and use it in GitHub Desktop.
Save dudelson/badad3bd55dcf8a874763b641abdbaee to your computer and use it in GitHub Desktop.
Slides for my presentation at CyberArk

URL Shortener

Logistics

  1. high-level project overview
  2. short demo of main features
  3. code walkthrough

Logistics

About Me

  • Software Engineer with 2-3 years of experience across the entire product pipeline
  • Technical Interests: FOSS, security, emacs
  • Non-Technical Interests: cross-country skiing, martial arts, personal productivity
  • https://www.davidudelson.com/about/

Project Overview

  • Shortcake: Deliciously short URLs!
  • Flask 1.1 on top of Python 3.8.1
  • Designed for easy deployment to Heroku

Short Demo

API

API Specification

/api/v1/shorten:
  post:
    data:
      - {
        name: "url"
        type: "string"
      }
    responses:
      201:
        data:
          - {
            name: "short_url"
            type: "string"
          }
      400: client error

API Specification

/api/v1/lengthen/<short_key>:
  get:
    responses:
      200:
        data:
          - {
            name: "url"
            type: "string"
          }
      400: client error

API Routes

@bp.route('/shorten', methods=['POST'])
def shorten():
    if not request.json or 'url' not in request.json:
        abort(400)
    try:
        url = request.json['url']
        key = core.shorten_url(url)
        short_url = 'http://{}/{}'.format(
            current_app.config['DOMAIN_NAME'],
            key)
        return jsonify({'short_url': short_url}), 201
    except core.InvalidURLError as e:
        abort(400)
    except core.OutOfShortKeysError as e:
        abort(400)

API Routes

@bp.route('/lengthen/<string:key>', methods=['GET'])
def lengthen(key):
    try:
        url = core.lengthen_url(key)
        return jsonify({'url': url})
    except core.InvalidShortKeyError as e:
        abort(400)

Webapp

Webapp Routes

@bp.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        try:
            short_key = core.shorten_url(request.form['url'])
            short_url = 'http://{}/{}'.format(
                current_app.config['DOMAIN_NAME'], short_key)
            short_url_markup = Markup('<a href="{0}">{0}</a>'
                                      .format(short_url))
            flash(short_url_markup, 'success')
          except core.InvalidURLError:
            flash('Invalid URL. Please try again.', 'error')
        except core.OutOfShortKeysError:
            flash('Unable to shorten URL, sorry!', 'error')
    return render_template('index.html')

Webapp Routes

@bp.route('/<string:key>', methods=['GET'])
def short_url_redirect(key):
    try:
        expanded_url = core.lengthen_url(key)
        if not expanded_url:
            return render_template('404.html'), 404
        return redirect(expanded_url)
    except core.InvalidShortKeyError:
        return render_template('404.html'), 404

Core Logic

Key Generation Challenges

  1. Shortness vs space constraints
  2. Uniqueness

Deriving Keys From Hashes

def shorten_url(url: str) -> str:
    # InvalidURLError is propogated to caller
    url = _validate_url(url)
    hashstr = hashlib.sha1(url.encode()).hexdigest()
    long_keystr = _key_from_hex(hashstr)

Deriving Keys From Hashes

SHORTKEY_LENGTH = 7  # can be increased up to 10 if need be
SHORTKEY_CHARSET = string.digits + \
    string.ascii_uppercase + \
    string.ascii_lowercase

Deriving Keys From Hashes

def _key_from_hex(hexs: str) -> str:
    hash_bytes = bytes.fromhex(hexs)
    hash_binstr = ''.join(bin(b)[2:] for b in hash_bytes)
    keystr = ''; shortkey_fillchar = None
    for group in grouper(hash_binstr, 6, fillvalue='0'):
        sextet = ''.join(group)
        i = int(sextet, 2)
        if i >= N_SHORTKEY_CHARS:
            # compute shortkey_fillchar if we haven't already
            if not shortkey_fillchar:
                shortkey_fillchar = int(hash_binstr, 2) \
                    % N_SHORTKEY_CHARS
            i = shortkey_fillchar
        keystr += SHORTKEY_CHARSET[i]
    return keystr

Ensuring Unique Keys

def shorten_url(url: str) -> str:
    # ...
    long_keystr = _key_from_hex(hashstr)
    for i in range(len(long_keystr) - SHORTKEY_LENGTH + 1):
        candidate_key = long_keystr[i:i+SHORTKEY_LENGTH]
        if _try_insert(candidate_key, url):
            return candidate_key

Ensuring Unique Keys

def shorten_url(url: str) -> str:
    # ...
    initial_key = candidate_key
    cur = _next_key(initial_key)
    while cur != initial_key:
        if _try_insert(cur, url):
            return cur
        cur = _next_key(cur)
        if not cur:  # wrap around
            cur = '0' * SHORTKEY_LENGTH
    # there are no more available shortkeys
    raise OutOfShortKeysError

URL Validation Challenges

Anything is a valid URL.

URL Validation

def _validate_url(url: str) -> str:
    valid_schemes = ['http', 'https']
    valid_netloc_pattern = re.compile(r'\w+\.\w+')

    url_tuple = url_parse(url, scheme='http')
    scheme, netloc, path = url_tuple.scheme, url_tuple.netloc, url_tuple.path
    if scheme not in valid_schemes: raise InvalidURLError
    if not re.match(valid_netloc_pattern, netloc) and \
       (netloc or not re.match(valid_netloc_pattern, path)):
        raise InvalidURLError
    return url_fix(url)

End

Thanks for listening!

Questions?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment