-
-
Save chpwssn/e9b158df103de29923a0 to your computer and use it in GitHub Desktop.
nohome2.py: Reads from user3.txt because I cannot neither parse sys.args nor use argparse module >.<
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3.4 | |
# -*- coding: UTF-8 -*- | |
import urllib.parse | |
# import os.path | |
import pathlib | |
import re | |
import sys | |
from optparse import OptionParser | |
parser = OptionParser() | |
parser.add_option("-f", "--file", dest="file", help="File to parse", metavar="FILE") | |
(options, args) = parser.parse_args() | |
if not options.file: | |
print("I need a file: "+__file__+" -f <filename>") | |
quit(1) | |
NOHOME_REGEX = re.compile('(?:\w+\:?\/\/)?(?:[\w\-\.]+\.)?home.online.no/[~|%7E]' | |
'{0,1}(?:[\w\-\.]+)(?:[\w\/\-\.]+)') | |
PROPER_URL_START = re.compile('^\w+://') | |
if __name__ == '__main__': | |
fp = open(options.file, mode='rt') | |
for nl in fp: | |
line = nl.strip('\n') | |
x = NOHOME_REGEX.findall(line) | |
if len(x) > 0: | |
for url in x: | |
# x = pathlib.PurePath(out.path).parts | |
userdir = None | |
if not PROPER_URL_START.match(url): | |
url = 'http://' + str(url) | |
out = urllib.parse.urlparse(url) | |
ps = out.netloc | |
pp = urllib.parse.unquote(out.path) | |
dirpath = pathlib.PurePath(pp).parts | |
# print(dirpath) | |
if ps.startswith('www.'): | |
ps = ps[4:] | |
# print(ps) | |
if ps.endswith('.home.online.no'): | |
userdir = '~' + str(ps.split('.', maxsplit=1)[0]) | |
# print(userdir) | |
if userdir is None: | |
if not dirpath[1].startswith('~'): | |
print('error for', dirpath, file=sys.stderr) | |
else: | |
userdir = dirpath[1] | |
print('http://home.online.no/{0}/'.format(userdir)) | |
# print(out, '->', userdir) | |
fp.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment