|
#!/usr/bin/env python |
|
|
|
from __future__ import print_function |
|
import re, sys, os |
|
from popen2 import popen2 |
|
import argparse |
|
import urlparse |
|
from subprocess import Popen, PIPE |
|
import logging as log |
|
|
|
def system(cmd): |
|
p = Popen(cmd, shell=True, bufsize=4096, |
|
stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) |
|
(child_stdin, child_stdout, child_stderr) = (p.stdin, p.stdout, p.stderr) |
|
sys.stderr.write(child_stderr.read()) |
|
return child_stdout.read().strip() |
|
|
|
def pprint(input_str): |
|
res = input_str |
|
res = re.sub(r'\n\s?\(edit \| delete\)', '', res) |
|
res = re.sub(r'(- (Comment|Like|Share)| \(Un\-like\))\n', '', res, flags=re.MULTILINE) |
|
res = re.sub(r'\n\s?- ', r'\n\t- ', res) # joining comment text and username |
|
res = re.sub(r'\nfrom \w+\n', r'', res) # removing "from Android" stuff |
|
res = re.sub(r'\n(.+)\n\1\n', r'\n\1\n', res) # removing repeating non-empty lines |
|
res = re.sub(r'\n+', r'\n', res) # removing repeating newlines with |
|
return res |
|
|
|
def download(url, out, args): |
|
os.environ["URL"] = url |
|
cmd = "timeout 10 slimerjs --profile '%s' --disk-cache=yes %s" % (args.profile, os.path.join(os.path.dirname(sys.argv[0]), "show.js")) |
|
log.debug("Downloading %s" % url) |
|
log.debug(cmd) |
|
print(url, file=out) |
|
print(" ".join(args.rest_of_args), file=out) |
|
print(pprint(system(cmd)), file=out) |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("-i", dest="input", help="input file. stdin by default", default="-") |
|
parser.add_argument("-o", dest="stdout", help="output to stdout", action="store_true", default=False) |
|
parser.add_argument("-p", dest="profile", required=True, help="Firefox profile path, see http://goo.gl/gjc4Fq") |
|
parser.add_argument("-f", dest="firefox", help="Firefox application path", default="/Applications/Firefox.app/Contents/MacOS/firefox") |
|
parser.add_argument("-d", dest="debug", help="debug", action="store_true", default=False) |
|
parser.add_argument(dest="rest_of_args", nargs=argparse.REMAINDER) |
|
args = parser.parse_args() |
|
|
|
os.environ["SLIMERJSLAUNCHER"] = args.firefox |
|
|
|
loglevel = log.ERROR |
|
if args.debug: |
|
loglevel = log.DEBUG |
|
log.basicConfig(format='%(asctime)s %(msg)s', level=loglevel) |
|
|
|
input = sys.stdin |
|
if args.input != "-": |
|
input = open(args.input) |
|
|
|
for url in input.readlines(): |
|
url = url.strip().replace("/iphone/", "/").replace("https://", "http://").replace("http://www.", "http://") |
|
if not url.startswith("http://friendfeed.com/"): |
|
log.debug("Not a friendfeed page: %s" % url) |
|
continue |
|
|
|
if args.stdout: |
|
out = sys.stdout |
|
else: |
|
out_fn = urlparse.urlparse(url).path.strip("/").replace("/", "_")[:50] + ".txt" |
|
if os.path.exists(out_fn): |
|
log.debug("File exists: %s" % out_fn) |
|
continue |
|
|
|
out = open(out_fn, "w") |
|
|
|
download(url, out, args) |
|
|
|
if __name__ == "__main__": |
|
main() |