Created
October 28, 2015 16:05
-
-
Save vitzli/ec30fb4570a7f7faa83b to your computer and use it in GitHub Desktop.
nohome2.py: Reads from user3.txt because I cannot neither parse sys.args nor use argparse module >.<
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3.4 | |
# -*- coding: UTF-8 -*- | |
import urllib.parse | |
# import os.path | |
import pathlib | |
import re | |
import sys | |
NOHOME_REGEX = re.compile('(?:\w+\:?\/\/)?(?:[\w\-\.]+\.)?home.online.no/[~|%7E]' | |
'{0,1}(?:[\w\-\.]+)(?:[\w\/\-\.]+)') | |
PROPER_URL_START = re.compile('^\w+://') | |
if __name__ == '__main__': | |
fp = open('user3.txt', mode='rt') | |
for nl in fp: | |
line = nl.strip('\n') | |
x = NOHOME_REGEX.findall(line) | |
if len(x) > 0: | |
for url in x: | |
# x = pathlib.PurePath(out.path).parts | |
userdir = None | |
if not PROPER_URL_START.match(url): | |
url = 'http://' + str(url) | |
out = urllib.parse.urlparse(url) | |
ps = out.netloc | |
pp = urllib.parse.unquote(out.path) | |
dirpath = pathlib.PurePath(pp).parts | |
# print(dirpath) | |
if ps.startswith('www.'): | |
ps = ps[4:] | |
# print(ps) | |
if ps.endswith('.home.online.no'): | |
userdir = '~' + str(ps.split('.', maxsplit=1)[0]) | |
# print(userdir) | |
if userdir is None: | |
if not dirpath[1].startswith('~'): | |
print('error for', dirpath, file=sys.stderr) | |
else: | |
userdir = dirpath[1] | |
print('http://home.online.no/{0}/'.format(userdir)) | |
# print(out, '->', userdir) | |
fp.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment