Skip to content

Instantly share code, notes, and snippets.

@haron

haron/README.md Secret

Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save haron/5d4c70b15081cd92f83c to your computer and use it in GitHub Desktop.
Save haron/5d4c70b15081cd92f83c to your computer and use it in GitHub Desktop.

Штука, чтобы сохранить страницу поста (даже friends-only) в текстовый файл

Нужен Firefox (в нем надо залогиниться во френдфид), slimerjs (brew install slimerjs) и надо найти директорию с профилем файрфокса (инструкция).

Как пользоваться штукой

git clone git@gist.github.com:/5d4c70b15081cd92f83c.git
echo http://friendfeed.com/urbansheep/e5e8b60a | ./frf-page-download.py -d -p "$HOME/Library/Application Support/Firefox/Profiles/wkahns0x.default-2109136123"
#!/usr/bin/env python
from __future__ import print_function
import re, sys, os
from popen2 import popen2
import argparse
import urlparse
from subprocess import Popen, PIPE
import logging as log
def system(cmd):
p = Popen(cmd, shell=True, bufsize=4096,
stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
(child_stdin, child_stdout, child_stderr) = (p.stdin, p.stdout, p.stderr)
sys.stderr.write(child_stderr.read())
return child_stdout.read().strip()
def pprint(input_str):
res = input_str
res = re.sub(r'\n\s?\(edit \| delete\)', '', res)
res = re.sub(r'(- (Comment|Like|Share)| \(Un\-like\))\n', '', res, flags=re.MULTILINE)
res = re.sub(r'\n\s?- ', r'\n\t- ', res) # joining comment text and username
res = re.sub(r'\nfrom \w+\n', r'', res) # removing "from Android" stuff
res = re.sub(r'\n(.+)\n\1\n', r'\n\1\n', res) # removing repeating non-empty lines
res = re.sub(r'\n+', r'\n', res) # removing repeating newlines with
return res
def download(url, out, args):
os.environ["URL"] = url
cmd = "timeout 10 slimerjs --profile '%s' --disk-cache=yes %s" % (args.profile, os.path.join(os.path.dirname(sys.argv[0]), "show.js"))
log.debug("Downloading %s" % url)
log.debug(cmd)
print(url, file=out)
print(" ".join(args.rest_of_args), file=out)
print(pprint(system(cmd)), file=out)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-i", dest="input", help="input file. stdin by default", default="-")
parser.add_argument("-o", dest="stdout", help="output to stdout", action="store_true", default=False)
parser.add_argument("-p", dest="profile", required=True, help="Firefox profile path, see http://goo.gl/gjc4Fq")
parser.add_argument("-f", dest="firefox", help="Firefox application path", default="/Applications/Firefox.app/Contents/MacOS/firefox")
parser.add_argument("-d", dest="debug", help="debug", action="store_true", default=False)
parser.add_argument(dest="rest_of_args", nargs=argparse.REMAINDER)
args = parser.parse_args()
os.environ["SLIMERJSLAUNCHER"] = args.firefox
loglevel = log.ERROR
if args.debug:
loglevel = log.DEBUG
log.basicConfig(format='%(asctime)s %(msg)s', level=loglevel)
input = sys.stdin
if args.input != "-":
input = open(args.input)
for url in input.readlines():
url = url.strip().replace("/iphone/", "/").replace("https://", "http://").replace("http://www.", "http://")
if not url.startswith("http://friendfeed.com/"):
log.debug("Not a friendfeed page: %s" % url)
continue
if args.stdout:
out = sys.stdout
else:
out_fn = urlparse.urlparse(url).path.strip("/").replace("/", "_")[:50] + ".txt"
if os.path.exists(out_fn):
log.debug("File exists: %s" % out_fn)
continue
out = open(out_fn, "w")
download(url, out, args)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment