Skip to content

Instantly share code, notes, and snippets.

@zed
Last active September 3, 2022 04:42
Show Gist options
  • Save zed/c2168b9c52b032b5fb7d to your computer and use it in GitHub Desktop.
Save zed/c2168b9c52b032b5fb7d to your computer and use it in GitHub Desktop.
Extract filename from an url
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import posixpath
try:
from urlparse import urlsplit
from urllib import unquote
except ImportError: # Python 3
from urllib.parse import urlsplit, unquote
def url2filename(url):
"""Return basename corresponding to url.
>>> print(url2filename('http://example.com/path/to/file%C3%80?opt=1'))
fileÀ
>>> print(url2filename('http://example.com/slash%2fname')) # '/' in name
Traceback (most recent call last):
...
ValueError
"""
urlpath = urlsplit(url).path
basename = posixpath.basename(unquote(urlpath))
if (os.path.basename(basename) != basename or
unquote(posixpath.basename(urlpath)) != basename):
raise ValueError # reject '%2f' or 'dir%5Cbasename.ext' on Windows
return basename
if __name__=="__main__":
import doctest; doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment