nucular/chanrip.py

## chanrip.py
#!/bin/env python3
# python3 chanrip.py --help

"""
Chanrip
=======

Rips a thread from 4chan or 8chan, downloading all posted files and saving all
replies to a HTML file. Optionally supports monitoring changes while downloading
new replies automatically and thumbnail downloading.

TODO:
- De-spaghettify and cleanup (it's a real mess currently)
- Make the chans/imageboards separate classes inheriting from an abstract class
- Themes for the output html?
- Support more imageboards

LICENSE:
The MIT License (MIT)

Copyright (c) 2014/2015 nucular

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

import os
import sys
import time, datetime
import re

import threading
import urllib.request
import urllib.error
import argparse
import json


CHANS = {
    "4chan": {
        "url": re.compile(r"boards\.4chan\.org\/(\w+)\/thread\/(\d+)"),
        "board": 0,
        "thread": 1,
        "api": "https://a.4cdn.org/{board}/thread/{thread}.json",
        "file": "https://i.4cdn.org/{board}/{tim}{ext}",
        "thumb": "https://0.t.4cdn.org/{board}/{tim}s.jpg",
        "filename": "{tim}-{filename}{ext}",
        "thumbname": "{tim}-{filename}{thext}",
        "gifthumbs": False
    },
    "8chan": {
        "url": re.compile(r"8chan\.co\/(\w+)\/res\/(\d+)\.html"),
        "board": 0,
        "thread": 1,
        "api": "https://8chan.co/{board}/res/{thread}.json",
        "file": "https://media.8chan.co/{board}/src/{tim}{ext}",
        "thumb": "https://media.8chan.co/{board}/thumb/{tim}{thext}",
        "filename": "{tim}-{filename}{ext}",
        "thumbname": "{tim}-{filename}{thext}",
        "gifthumbs": True
    }
}

USER_AGENT = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"

CSS_TOMORROW = """
body {
    background: #1d1f21 none;
    color: #C5C8C6;
    font-family: arial,helvetica,sans-serif;
    font-size: 10pt;
}

a:link, a:visited {
    color: #81a2be;
    text-decoration: underline;
}

a:link:hover {
    color: #5F89AC;
}

span.deadlink {
    color: #f22;
    text-decoration: line-through;
}

header {
    margin: 1em 0;
    text-align: center;
}

header h1 {
    font-family: tahoma;
    letter-spacing: -2px;
    font-size: 20pt;
    margin: 0;
}

header div.subtitle {
    font-size: 8pt;
}

header div.subtitle a {
    color: #C5C8C6;
    text-decoration: none;
}

header div.subtitle a:hover {
    text-decoration: underline;
}

hr {
    border: 0;
    border-top: 1px solid #282a2e;
    height: 1px;
    clear: left:
}

p.fileinfo {
    display: block;
    margin: 0 0 0 20px;
}

.post {
    max-width: 80%;
    background:
}

.file {
    float: left;
    margin-right: 2px;
    width: 210px;
}

.file:not(.multifile) {
    float: none;
}

.unimportant, .unimportant * {
    font-size: 10px;
}

a .post-image {
    float: left;
    padding: 5px;
    margin: 0 20px 0 0;
    max-width: 98%;
    width: auto;
    height: auto;
    max-height: 200px;
    max-width: 200px;
}

a .full-image {
    position: absolute;
    left: 5px;

    padding: 5px;
    margin: 0 20px 0 0;
    max-width: 98%; max-width: calc(100% - 20px);
}

div.post.op {
    margin-right: 20px;
    margin-bottom: 5px;
}

div.post {
    padding-left: 20px;
    clear: both;
}

div.post-hover {
    position: absolute;
    margin: 0 !important;
    box-shadow: 0px 3px 10px rgba(0,0,0,0.5);
}

p.intro {
    clear: none;
    margin: 0.5em 0;
    padding: 0;
    padding-bottom: 0.2em;
}

p.intro span.subject {
    color: #b294bb;
    font-weight: bold;
}

p.intro span.name {
    color: #C5C8C6;
    font-weight: bold;
}

p.intro span.post_no a {
    color: #C5C8C6;
    margin: 0;
}

p.intro a {
    text-decoration: none;
}

div.post div.body {
    clear: both;
    word-wrap: break-word;
    white-space: pre-wrap;

}

div.post.reply {
    display: inline-block;
    background-color: #282a2e;
    border: 1px solid #282a2e;
    margin-bottom: 2px;
    margin-left: 16px;
    margin-top: 2px;
    max-width: 94%; max-width: calc(100% - 16px);
    padding: 0.2em 0.3em 0.5em 0.6em;
}

div.post.reply p {
    margin: 0.3em 0 0 0;
}

div.post.reply div.body {
    margin-left: 1.8em;
    margin-top: 0.8em;
    padding-right: 3em;
    padding-bottom: 0.3em;
}

span.quote {
    color: #adbd68;
}
"""

JS_MAIN = """
String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;
};

$(".post-image-link").bind("click", function(e) {
    if (e.which == 2)
        return;
    e.preventDefault();
    var thumb = $(this).find(".post-image");
    if ($(this).hasClass("expanded")) {
        $(this).find(".full-image").remove();
        $(this).removeClass("expanded");
        $(window).scrollTop($(this).attr("data-scrolltop"));
    } else {
        var href = $(this).attr("href");
        if (href.endsWith(".mp4") || href.endsWith(".webm"))
            $("<video src=\\"" + $(this).attr("href") + "\\" class=\\"full-image\\" autoplay controls></video>").appendTo(this);
        else if (href.endsWith(".swf"))
            $("<object src=\\"" + $(this).attr("href") + "\\" class=\\"full-image\\"></object>").appendTo(this);
        else
            $("<img src=\\"" + $(this).attr("href") + "\\" class=\\"full-image\\">").appendTo(this);
        $(this).addClass("expanded");
        $(this).attr("data-scrolltop", $(window).scrollTop());
    }
});

$(".post-reply,.quotelink").bind("mouseenter", function(e) {
    var p = $($(this).attr("href"));
    if (p) {
        p.clone().appendTo("body")
            .addClass("post-hover")
            .css($(this).position())
            .bind("mouseleave", function(e) {
                $(this).remove();
            });
    }
});
"""

HTML_MAIN = """<!-- Thread ripped from {url} using ChanRip -->
<html>
<head>
<style>{css}</style>
</head>
<body>
<header>
<h1>/{board}/</h1>
<div class="subtitle"><a href="{url}">{url}</a></div>
</header>
<hr/>
<div class="thread">
{thread}
</div>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
<script>
{js}
</script>
</body>
</html>
"""

HTML_IMAGE = """
<p class="fileinfo">File <a href="{path}">{name}</a>
<span class="unimportant">({size}, {w}x{h}, {origname})</span>
</p>
<a href="{path}" target="_blank" class="post-image-link">
<img class="post-image" src="{thumbpath}">
</a>
"""

HTML_OP = """
<div class="post op" id="p{no}">
<p class="intro">
<span class="subject">{subject}</span>
<span class="name">{name}&nbsp;{uid}</span>
<time datetime="{date}">{date}</time>
<span class="post_no">No.<a href="#p{no}">{no}</a></span>
<span class="post_replies">{replies}</span>
</p>
<div class="body">{body}</div>
</div>
"""

HTML_REPLY = """
<div class="post reply" id="p{no}">
<p class="intro">
<span class="name">{name}&nbsp;{uid}</span>
<time datetime="{date}">{date}</time>
<span class="post_no">No.<a href="#p{no}">{no}</a></span>
<span class="post_replies">{replies}</span>
</p>
<div class="files">
{files}
</div>
<div class="body">{body}</div>
</div>
"""

def sizeof_fmt(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

def chunker(res, chunksize=4096, hook=None, store=False):
    totalbytes = (res.info().get("Content-Length") or "0").strip()
    totalbytes = int(totalbytes)
    bytesread = 0
    if store:
        data = bytes()

    try:
        while True:
            chunk = res.read(chunksize)
            bytesread += len(chunk)

            if not chunk:
                break
            if store:
                data += chunk

            if hook:
                if totalbytes == 0:
                    percent = "?"
                else:
                    percent = int(bytesread/totalbytes*100)
                hook(chunk, bytesread, totalbytes, percent)
    except KeyboardInterrupt:
        return True

    if store:
        return data

class UserAgentOpener(urllib.request.FancyURLopener):
    def __init__(self, useragent, *args, **kwargs):
        self.version = useragent


class Ripper(object):
    def __init__(self, url, directory, verbose=False, useragent=USER_AGENT, thumbs=True):
        self.verbose = verbose
        self.useragent = useragent
        self.thumbs = thumbs

        self.thread = None
        self.ripped = []
        self.postcount = 0

        if not os.path.isdir(directory):
            os.makedirs(directory)
        if thumbs and not os.path.isdir(os.path.join(directory, "_thumbs")):
            os.makedirs(os.path.join(directory, "_thumbs"))
        self.directory = directory

        self.chan = None
        m = None
        for i in CHANS.keys():
            m = CHANS[i]["url"].search(url)
            if m:
                self.chan = CHANS[i]
                break
        if not self.chan:
            raise NotImplementedError("{} is not supported (yet)".format(url))

        self.url = url
        self.board = m.groups()[self.chan["board"]]
        self.thread = m.groups()[self.chan["thread"]]

        self.apiurl = self.chan["api"]
        self.apiurl = self.apiurl.format(board=self.board, thread=self.thread)
        print("API: " + self.apiurl)

    def fetch(self):
        def hook(chunk, read, total, percent):
            sys.stdout.write("\rFetching thread... {}/{} {}%".format(read, total, percent))

        req = urllib.request.Request(self.apiurl)
        req.add_header("User-Agent", self.useragent)
        try:
            res = urllib.request.urlopen(req)
        except urllib.error.URLError as e:
            print(e)
            return True
        data = chunker(res, chunksize=128, hook=hook, store=True)
        print("")
        self.thread = json.loads(data.decode("utf-8"))

    def downloadFile(self, name, path, url, no=1, of=1):
        req = urllib.request.Request(url)
        req.add_header("User-Agent", self.useragent)
        try:
            res = urllib.request.urlopen(req)
        except urllib.error.URLError as e:
            print(e)
            return
        with open(path, "wb") as s:
            def hook(chunk, read, total, percent):
                sys.stdout.write("\r{}/{} {}... {}/{} {}%".format(no, of, name, read, total, percent))
                s.write(chunk)
            interrupted = chunker(res, hook=hook, chunksize=16384)

        if interrupted:
            print("\nInterrupted by user")
            os.remove(path)
            return True
        print("")

    def writeThread(self):
        print("Writing thread to index.html...")

        def figureThumbPath(v):
            if self.thumbs:
                v["thext"] = (self.chan["gifthumbs"] and v["ext"] == ".gif") and ".gif" or ".jpg"
                return os.path.join("_thumbs", self.chan["thumbname"].format(**v))
            else:
                return self.chan["filename"].format(**v)

        op = self.thread["posts"][0]
        thread = "<div class=\"files\">"

        if "extra_files" in op:
            thread += "\n<div class=\"file multifile\">"
        else:
            thread += "\n<div class=\"file\">"
        path = self.chan["filename"].format(**op)
        thread += HTML_IMAGE.format(
            path=path,
            thumbpath=figureThumbPath(op),
            name=str(op["tim"]) + op["ext"],
            size=sizeof_fmt(op["fsize"]),
            w=op["w"], h=op["h"],
            origname=op["filename"] + op["ext"]
        )
        thread += "</div>"

        if "extra_files" in op:
            for i in op["extra_files"]:
                thread += "\n<div class=\"file multifile\">"
                path = self.chan["filename"].format(**i)
                thread += HTML_IMAGE.format(
                    path=path,
                    thumbpath=figureThumbPath(i),
                    name=str(i["tim"]) + i["ext"],
                    size=sizeof_fmt(i["fsize"]),
                    w=i["w"], h=i["h"],
                    origname=i["filename"] + i["ext"]
                )
                thread += "</div>"
        thread += "</div>"

        thread += HTML_OP.format(
            subject="sub" in op and op["sub"] or "",
            name=op["name"],
            uid="id" in op and ("(ID: "+op["id"]+")") or "",
            date=datetime.datetime.fromtimestamp(op["time"]).strftime("%m/%d/%y (%a) %H:%M:%S"),
            no=op["no"],
            body="com" in op and op["com"] or "",
            replies=" ".join(["<a class=\"post-reply\" href=\"#p{no}\">&gt;&gt;{no}</a>".format(no=i) for i in op["replies"]])
        )

        for p in self.thread["posts"][1:]:
            if "tim" in p:
                files = "<div class=\"files\">"

                if "extra_files" in p:
                    files += "\n<div class=\"file multifile\">"
                else:
                    files += "\n<div class=\"file\">"

                path = self.chan["filename"].format(**p)
                files += HTML_IMAGE.format(
                    path=path,
                    thumbpath=figureThumbPath(p),
                    name=str(p["tim"]) + p["ext"],
                    size=sizeof_fmt(p["fsize"]),
                    w=p["w"], h=p["h"],
                    origname=p["filename"] + p["ext"]
                )
                files += "</div>"

                if "extra_files" in p:
                    for i in p["extra_files"]:
                        files += "\n<div class=\"file multifile\">"
                        path = self.chan["filename"].format(**i)
                        files += HTML_IMAGE.format(
                            path=path,
                            thumbpath=figureThumbPath(i),
                            name=str(i["tim"]) + i["ext"],
                            size=sizeof_fmt(i["fsize"]),
                            w=i["w"], h=i["h"],
                            origname=i["filename"] + i["ext"]
                        )
                        files += "</div>"
                files += "</div>"
            else:
                files = ""

            thread += HTML_REPLY.format(
                files=files,
                name=p["name"],
                uid="id" in p and ("(ID: "+p["id"]+")") or "",
                date=datetime.datetime.fromtimestamp(p["time"]).strftime("%m/%d/%y (%a) %H:%M:%S"),
                no=p["no"],
                body="com" in p and p["com"] or "",
                replies=" ".join(["<a class=\"post-reply\" href=\"#p{no}\">&gt;&gt;{no}</a>".format(no=i) for i in p["replies"]])
            )
            thread += "<br/>"

        with open(os.path.join(self.directory, "index.html"), "wt") as s:
            s.write(HTML_MAIN.format(
                url=self.url,
                css=CSS_TOMORROW,
                js=JS_MAIN,
                board=self.board,
                thread=thread
            ))

    def rip(self):
        firstrip = not self.thread
        if self.fetch():
            return

        postcount = len(self.thread["posts"])
        if postcount <= self.postcount:
            print("No new posts")
            return

        files = []
        thumbs = []
        for i,v in enumerate(self.thread["posts"]):
            # Map all replies
            if (not "replies" in v) or type(v["replies"]) != list:
                v["replies"] = []
            for i2,v2 in enumerate(self.thread["posts"][i:]):
                if ("com" in v2) and ("&gt;&gt;{}".format(v["no"]) in v2["com"]):
                    if not v2["no"] in v["replies"]:
                        v["replies"].append(v2["no"])

            # Fix some reply links
            if "com" in v:
                v["com"] = re.sub("href=\"\\/\\w+\\/res\\/\\d+\\.html\\#(\\d+)\"", "href=\"#p\\1\" class=\"quotelink\"", v["com"])

            if i < self.postcount:
                continue

            v.update({"board": self.board, "thread": self.thread})
            if "tim" in v:
                name = self.chan["filename"].format(**v)
                path = os.path.join(self.directory, name)
                url = self.chan["file"].format(**v)
                if not os.path.exists(path):
                    files.append([name, path, url])

                if self.thumbs:
                    v["thext"] = (self.chan["gifthumbs"] and v["ext"] == ".gif") and ".gif" or ".jpg"
                    name = self.chan["thumbname"].format(**v)
                    path = os.path.join(self.directory, "_thumbs", name)
                    url = self.chan["thumb"].format(**v)
                    if not os.path.exists(path):
                        thumbs.append([name, path, url])

                if "extra_files" in v:
                    for v2 in v["extra_files"]:
                        v2.update({"board": self.board, "thread": self.thread})
                        name = self.chan["filename"].format(**v2)
                        path = os.path.join(self.directory, name)
                        url = self.chan["file"].format(**v2)
                        if not os.path.exists(path):
                            files.append([name, path, url])

                        if self.thumbs:
                            v2["thext"] = (self.chan["gifthumbs"] and v["ext"] == ".gif") and ".gif" or ".jpg"
                            name = self.chan["thumbname"].format(**v2)
                            path = os.path.join(self.directory, "_thumbs", name)
                            url = self.chan["thumb"].format(**v2)
                            if not os.path.exists(path):
                                thumbs.append([name, path, url])

        print("{} not indexed post(s), downloading {} file(s)".format(postcount - self.postcount, len(files)))

        for i,v in enumerate(files):
            interrupted = self.downloadFile(*v, no=i+1, of=len(files))
            if interrupted:
                break

        if self.thumbs and len(thumbs) > 0:
            print("Downloading {} thumbnail(s)".format(len(thumbs)))

            for i,v in enumerate(thumbs):
                interrupted = self.downloadFile(*v, no=i+1, of=len(thumbs))
                if interrupted:
                    break

        self.postcount = postcount
        self.writeThread()

    def monitor(self, delay):
        timer = delay
        cleartimer = " " * (len(str(timer)) * 2 + 15)

        while True:
            while timer > 0:
                sys.stdout.write("\r" + cleartimer)
                sys.stdout.write("\rMonitoring... {}/{}".format(timer, delay))
                time.sleep(1)
                timer -= 1
            timer = delay
            print("")
            self.rip()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Thread ripper for image boards implementing the 4chan/8chan\
            APIs with optional monitoring."
    )
    parser.add_argument("URL", help="a link to the board thread")
    parser.add_argument("DIR", help="a folder to rip to")
    parser.add_argument("-m", "--monitor", action="store_true", help="keep running and regularly check thread for changes")
    parser.add_argument("-d", "--delay", type=int, metavar="SEC", default=20, help="delay between monitoring checks, defaults to 20")
    parser.add_argument("-v", "--verbose", action="store_true", help="show some debug logging")
    parser.add_argument("-u", "--useragent", default=USER_AGENT, help="the user agent to use on requests")
    parser.add_argument("--nothumbs", action="store_true", default=False, help="don't download thumbs with the files")
    args = parser.parse_args()

    ripper = Ripper(args.URL, args.DIR, verbose=args.verbose, useragent=args.useragent, thumbs=not args.nothumbs)

    ripper.rip()
    if args.monitor:
        try:
            ripper.monitor(args.delay)
        except KeyboardInterrupt:
            pass
    print("\nDone!")
	#!/bin/env python3
	# python3 chanrip.py --help

	"""
	Chanrip
	=======

	Rips a thread from 4chan or 8chan, downloading all posted files and saving all
	replies to a HTML file. Optionally supports monitoring changes while downloading
	new replies automatically and thumbnail downloading.

	TODO:
	- De-spaghettify and cleanup (it's a real mess currently)
	- Make the chans/imageboards separate classes inheriting from an abstract class
	- Themes for the output html?
	- Support more imageboards

	LICENSE:
	The MIT License (MIT)

	Copyright (c) 2014/2015 nucular

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
	"""

	import os
	import sys
	import time, datetime
	import re

	import threading
	import urllib.request
	import urllib.error
	import argparse
	import json


	CHANS = {
	"4chan": {
	"url": re.compile(r"boards\.4chan\.org\/(\w+)\/thread\/(\d+)"),
	"board": 0,
	"thread": 1,
	"api": "https://a.4cdn.org/{board}/thread/{thread}.json",
	"file": "https://i.4cdn.org/{board}/{tim}{ext}",
	"thumb": "https://0.t.4cdn.org/{board}/{tim}s.jpg",
	"filename": "{tim}-{filename}{ext}",
	"thumbname": "{tim}-{filename}{thext}",
	"gifthumbs": False
	},
	"8chan": {
	"url": re.compile(r"8chan\.co\/(\w+)\/res\/(\d+)\.html"),
	"board": 0,
	"thread": 1,
	"api": "https://8chan.co/{board}/res/{thread}.json",
	"file": "https://media.8chan.co/{board}/src/{tim}{ext}",
	"thumb": "https://media.8chan.co/{board}/thumb/{tim}{thext}",
	"filename": "{tim}-{filename}{ext}",
	"thumbname": "{tim}-{filename}{thext}",
	"gifthumbs": True
	}
	}

	USER_AGENT = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"

	CSS_TOMORROW = """
	body {
	background: #1d1f21 none;
	color: #C5C8C6;
	font-family: arial,helvetica,sans-serif;
	font-size: 10pt;
	}

	a:link, a:visited {
	color: #81a2be;
	text-decoration: underline;
	}

	a:link:hover {
	color: #5F89AC;
	}

	span.deadlink {
	color: #f22;
	text-decoration: line-through;
	}

	header {
	margin: 1em 0;
	text-align: center;
	}

	header h1 {
	font-family: tahoma;
	letter-spacing: -2px;
	font-size: 20pt;
	margin: 0;
	}

	header div.subtitle {
	font-size: 8pt;
	}

	header div.subtitle a {
	color: #C5C8C6;
	text-decoration: none;
	}

	header div.subtitle a:hover {
	text-decoration: underline;
	}

	hr {
	border: 0;
	border-top: 1px solid #282a2e;
	height: 1px;
	clear: left:
	}

	p.fileinfo {
	display: block;
	margin: 0 0 0 20px;
	}

	.post {
	max-width: 80%;
	background:
	}

	.file {
	float: left;
	margin-right: 2px;
	width: 210px;
	}

	.file:not(.multifile) {
	float: none;
	}

	.unimportant, .unimportant * {
	font-size: 10px;
	}

	a .post-image {
	float: left;
	padding: 5px;
	margin: 0 20px 0 0;
	max-width: 98%;
	width: auto;
	height: auto;
	max-height: 200px;
	max-width: 200px;
	}

	a .full-image {
	position: absolute;
	left: 5px;

	padding: 5px;
	margin: 0 20px 0 0;
	max-width: 98%; max-width: calc(100% - 20px);
	}

	div.post.op {
	margin-right: 20px;
	margin-bottom: 5px;
	}

	div.post {
	padding-left: 20px;
	clear: both;
	}

	div.post-hover {
	position: absolute;
	margin: 0 !important;
	box-shadow: 0px 3px 10px rgba(0,0,0,0.5);
	}

	p.intro {
	clear: none;
	margin: 0.5em 0;
	padding: 0;
	padding-bottom: 0.2em;
	}

	p.intro span.subject {
	color: #b294bb;
	font-weight: bold;
	}

	p.intro span.name {
	color: #C5C8C6;
	font-weight: bold;
	}

	p.intro span.post_no a {
	color: #C5C8C6;
	margin: 0;
	}

	p.intro a {
	text-decoration: none;
	}

	div.post div.body {
	clear: both;
	word-wrap: break-word;
	white-space: pre-wrap;

	}

	div.post.reply {
	display: inline-block;
	background-color: #282a2e;
	border: 1px solid #282a2e;
	margin-bottom: 2px;
	margin-left: 16px;
	margin-top: 2px;
	max-width: 94%; max-width: calc(100% - 16px);
	padding: 0.2em 0.3em 0.5em 0.6em;
	}

	div.post.reply p {
	margin: 0.3em 0 0 0;
	}

	div.post.reply div.body {
	margin-left: 1.8em;
	margin-top: 0.8em;
	padding-right: 3em;
	padding-bottom: 0.3em;
	}

	span.quote {
	color: #adbd68;
	}
	"""

	JS_MAIN = """
	String.prototype.endsWith = function(suffix) {
	return this.indexOf(suffix, this.length - suffix.length) !== -1;
	};

	$(".post-image-link").bind("click", function(e) {
	if (e.which == 2)
	return;
	e.preventDefault();
	var thumb = $(this).find(".post-image");
	if ($(this).hasClass("expanded")) {
	$(this).find(".full-image").remove();
	$(this).removeClass("expanded");
	$(window).scrollTop($(this).attr("data-scrolltop"));
	} else {
	var href = $(this).attr("href");
	if (href.endsWith(".mp4") \|\| href.endsWith(".webm"))
	$("<video src=\\"" + $(this).attr("href") + "\\" class=\\"full-image\\" autoplay controls></video>").appendTo(this);
	else if (href.endsWith(".swf"))
	$("<object src=\\"" + $(this).attr("href") + "\\" class=\\"full-image\\"></object>").appendTo(this);
	else
	$("<img src=\\"" + $(this).attr("href") + "\\" class=\\"full-image\\">").appendTo(this);
	$(this).addClass("expanded");
	$(this).attr("data-scrolltop", $(window).scrollTop());
	}
	});

	$(".post-reply,.quotelink").bind("mouseenter", function(e) {
	var p = $($(this).attr("href"));
	if (p) {
	p.clone().appendTo("body")
	.addClass("post-hover")
	.css($(this).position())
	.bind("mouseleave", function(e) {
	$(this).remove();
	});
	}
	});
	"""

	HTML_MAIN = """<!-- Thread ripped from {url} using ChanRip -->
	<html>
	<head>
	<style>{css}</style>
	</head>
	<body>
	<header>
	<h1>/{board}/</h1>
	<div class="subtitle"><a href="{url}">{url}</a></div>
	</header>
	<hr/>
	<div class="thread">
	{thread}
	</div>
	<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
	<script>
	{js}
	</script>
	</body>
	</html>
	"""

	HTML_IMAGE = """
	<p class="fileinfo">File <a href="{path}">{name}</a>
	<span class="unimportant">({size}, {w}x{h}, {origname})</span>
	</p>
	<a href="{path}" target="_blank" class="post-image-link">
	<img class="post-image" src="{thumbpath}">
	</a>
	"""

	HTML_OP = """
	<div class="post op" id="p{no}">
	<p class="intro">
	<span class="subject">{subject}</span>
	<span class="name">{name} {uid}</span>
	<time datetime="{date}">{date}</time>
	<span class="post_no">No.<a href="#p{no}">{no}</a></span>
	<span class="post_replies">{replies}</span>
	</p>
	<div class="body">{body}</div>
	</div>
	"""

	HTML_REPLY = """
	<div class="post reply" id="p{no}">
	<p class="intro">
	<span class="name">{name} {uid}</span>
	<time datetime="{date}">{date}</time>
	<span class="post_no">No.<a href="#p{no}">{no}</a></span>
	<span class="post_replies">{replies}</span>
	</p>
	<div class="files">
	{files}
	</div>
	<div class="body">{body}</div>
	</div>
	"""

	def sizeof_fmt(num, suffix='B'):
	for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
	if abs(num) < 1024.0:
	return "%3.1f%s%s" % (num, unit, suffix)
	num /= 1024.0
	return "%.1f%s%s" % (num, 'Yi', suffix)

	def chunker(res, chunksize=4096, hook=None, store=False):
	totalbytes = (res.info().get("Content-Length") or "0").strip()
	totalbytes = int(totalbytes)
	bytesread = 0
	if store:
	data = bytes()

	try:
	while True:
	chunk = res.read(chunksize)
	bytesread += len(chunk)

	if not chunk:
	break
	if store:
	data += chunk

	if hook:
	if totalbytes == 0:
	percent = "?"
	else:
	percent = int(bytesread/totalbytes*100)
	hook(chunk, bytesread, totalbytes, percent)
	except KeyboardInterrupt:
	return True

	if store:
	return data

	class UserAgentOpener(urllib.request.FancyURLopener):
	def __init__(self, useragent, args, *kwargs):
	self.version = useragent


	class Ripper(object):
	def __init__(self, url, directory, verbose=False, useragent=USER_AGENT, thumbs=True):
	self.verbose = verbose
	self.useragent = useragent
	self.thumbs = thumbs

	self.thread = None
	self.ripped = []
	self.postcount = 0

	if not os.path.isdir(directory):
	os.makedirs(directory)
	if thumbs and not os.path.isdir(os.path.join(directory, "_thumbs")):
	os.makedirs(os.path.join(directory, "_thumbs"))
	self.directory = directory

	self.chan = None
	m = None
	for i in CHANS.keys():
	m = CHANS[i]["url"].search(url)
	if m:
	self.chan = CHANS[i]
	break
	if not self.chan:
	raise NotImplementedError("{} is not supported (yet)".format(url))

	self.url = url
	self.board = m.groups()[self.chan["board"]]
	self.thread = m.groups()[self.chan["thread"]]

	self.apiurl = self.chan["api"]
	self.apiurl = self.apiurl.format(board=self.board, thread=self.thread)
	print("API: " + self.apiurl)

	def fetch(self):
	def hook(chunk, read, total, percent):
	sys.stdout.write("\rFetching thread... {}/{} {}%".format(read, total, percent))

	req = urllib.request.Request(self.apiurl)
	req.add_header("User-Agent", self.useragent)
	try:
	res = urllib.request.urlopen(req)
	except urllib.error.URLError as e:
	print(e)
	return True
	data = chunker(res, chunksize=128, hook=hook, store=True)
	print("")
	self.thread = json.loads(data.decode("utf-8"))

	def downloadFile(self, name, path, url, no=1, of=1):
	req = urllib.request.Request(url)
	req.add_header("User-Agent", self.useragent)
	try:
	res = urllib.request.urlopen(req)
	except urllib.error.URLError as e:
	print(e)
	return
	with open(path, "wb") as s:
	def hook(chunk, read, total, percent):
	sys.stdout.write("\r{}/{} {}... {}/{} {}%".format(no, of, name, read, total, percent))
	s.write(chunk)
	interrupted = chunker(res, hook=hook, chunksize=16384)

	if interrupted:
	print("\nInterrupted by user")
	os.remove(path)
	return True
	print("")

	def writeThread(self):
	print("Writing thread to index.html...")

	def figureThumbPath(v):
	if self.thumbs:
	v["thext"] = (self.chan["gifthumbs"] and v["ext"] == ".gif") and ".gif" or ".jpg"
	return os.path.join("_thumbs", self.chan["thumbname"].format(**v))
	else:
	return self.chan["filename"].format(**v)

	op = self.thread["posts"][0]
	thread = "<div class=\"files\">"

	if "extra_files" in op:
	thread += "\n<div class=\"file multifile\">"
	else:
	thread += "\n<div class=\"file\">"
	path = self.chan["filename"].format(**op)
	thread += HTML_IMAGE.format(
	path=path,
	thumbpath=figureThumbPath(op),
	name=str(op["tim"]) + op["ext"],
	size=sizeof_fmt(op["fsize"]),
	w=op["w"], h=op["h"],
	origname=op["filename"] + op["ext"]
	)
	thread += "</div>"

	if "extra_files" in op:
	for i in op["extra_files"]:
	thread += "\n<div class=\"file multifile\">"
	path = self.chan["filename"].format(**i)
	thread += HTML_IMAGE.format(
	path=path,
	thumbpath=figureThumbPath(i),
	name=str(i["tim"]) + i["ext"],
	size=sizeof_fmt(i["fsize"]),
	w=i["w"], h=i["h"],
	origname=i["filename"] + i["ext"]
	)
	thread += "</div>"
	thread += "</div>"

	thread += HTML_OP.format(
	subject="sub" in op and op["sub"] or "",
	name=op["name"],
	uid="id" in op and ("(ID: "+op["id"]+")") or "",
	date=datetime.datetime.fromtimestamp(op["time"]).strftime("%m/%d/%y (%a) %H:%M:%S"),
	no=op["no"],
	body="com" in op and op["com"] or "",
	replies=" ".join(["<a class=\"post-reply\" href=\"#p{no}\">>>{no}</a>".format(no=i) for i in op["replies"]])
	)

	for p in self.thread["posts"][1:]:
	if "tim" in p:
	files = "<div class=\"files\">"

	if "extra_files" in p:
	files += "\n<div class=\"file multifile\">"
	else:
	files += "\n<div class=\"file\">"

	path = self.chan["filename"].format(**p)
	files += HTML_IMAGE.format(
	path=path,
	thumbpath=figureThumbPath(p),
	name=str(p["tim"]) + p["ext"],
	size=sizeof_fmt(p["fsize"]),
	w=p["w"], h=p["h"],
	origname=p["filename"] + p["ext"]
	)
	files += "</div>"

	if "extra_files" in p:
	for i in p["extra_files"]:
	files += "\n<div class=\"file multifile\">"
	path = self.chan["filename"].format(**i)
	files += HTML_IMAGE.format(
	path=path,
	thumbpath=figureThumbPath(i),
	name=str(i["tim"]) + i["ext"],
	size=sizeof_fmt(i["fsize"]),
	w=i["w"], h=i["h"],
	origname=i["filename"] + i["ext"]
	)
	files += "</div>"
	files += "</div>"
	else:
	files = ""

	thread += HTML_REPLY.format(
	files=files,
	name=p["name"],
	uid="id" in p and ("(ID: "+p["id"]+")") or "",
	date=datetime.datetime.fromtimestamp(p["time"]).strftime("%m/%d/%y (%a) %H:%M:%S"),
	no=p["no"],
	body="com" in p and p["com"] or "",
	replies=" ".join(["<a class=\"post-reply\" href=\"#p{no}\">>>{no}</a>".format(no=i) for i in p["replies"]])
	)
	thread += "<br/>"

	with open(os.path.join(self.directory, "index.html"), "wt") as s:
	s.write(HTML_MAIN.format(
	url=self.url,
	css=CSS_TOMORROW,
	js=JS_MAIN,
	board=self.board,
	thread=thread
	))

	def rip(self):
	firstrip = not self.thread
	if self.fetch():
	return

	postcount = len(self.thread["posts"])
	if postcount <= self.postcount:
	print("No new posts")
	return

	files = []
	thumbs = []
	for i,v in enumerate(self.thread["posts"]):
	# Map all replies
	if (not "replies" in v) or type(v["replies"]) != list:
	v["replies"] = []
	for i2,v2 in enumerate(self.thread["posts"][i:]):
	if ("com" in v2) and (">>{}".format(v["no"]) in v2["com"]):
	if not v2["no"] in v["replies"]:
	v["replies"].append(v2["no"])

	# Fix some reply links
	if "com" in v:
	v["com"] = re.sub("href=\"\\/\\w+\\/res\\/\\d+\\.html\\#(\\d+)\"", "href=\"#p\\1\" class=\"quotelink\"", v["com"])

	if i < self.postcount:
	continue

	v.update({"board": self.board, "thread": self.thread})
	if "tim" in v:
	name = self.chan["filename"].format(**v)
	path = os.path.join(self.directory, name)
	url = self.chan["file"].format(**v)
	if not os.path.exists(path):
	files.append([name, path, url])

	if self.thumbs:
	v["thext"] = (self.chan["gifthumbs"] and v["ext"] == ".gif") and ".gif" or ".jpg"
	name = self.chan["thumbname"].format(**v)
	path = os.path.join(self.directory, "_thumbs", name)
	url = self.chan["thumb"].format(**v)
	if not os.path.exists(path):
	thumbs.append([name, path, url])

	if "extra_files" in v:
	for v2 in v["extra_files"]:
	v2.update({"board": self.board, "thread": self.thread})
	name = self.chan["filename"].format(**v2)
	path = os.path.join(self.directory, name)
	url = self.chan["file"].format(**v2)
	if not os.path.exists(path):
	files.append([name, path, url])

	if self.thumbs:
	v2["thext"] = (self.chan["gifthumbs"] and v["ext"] == ".gif") and ".gif" or ".jpg"
	name = self.chan["thumbname"].format(**v2)
	path = os.path.join(self.directory, "_thumbs", name)
	url = self.chan["thumb"].format(**v2)
	if not os.path.exists(path):
	thumbs.append([name, path, url])

	print("{} not indexed post(s), downloading {} file(s)".format(postcount - self.postcount, len(files)))

	for i,v in enumerate(files):
	interrupted = self.downloadFile(*v, no=i+1, of=len(files))
	if interrupted:
	break

	if self.thumbs and len(thumbs) > 0:
	print("Downloading {} thumbnail(s)".format(len(thumbs)))

	for i,v in enumerate(thumbs):
	interrupted = self.downloadFile(*v, no=i+1, of=len(thumbs))
	if interrupted:
	break

	self.postcount = postcount
	self.writeThread()

	def monitor(self, delay):
	timer = delay
	cleartimer = " " * (len(str(timer)) * 2 + 15)

	while True:
	while timer > 0:
	sys.stdout.write("\r" + cleartimer)
	sys.stdout.write("\rMonitoring... {}/{}".format(timer, delay))
	time.sleep(1)
	timer -= 1
	timer = delay
	print("")
	self.rip()

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Thread ripper for image boards implementing the 4chan/8chan\
	APIs with optional monitoring."
	)
	parser.add_argument("URL", help="a link to the board thread")
	parser.add_argument("DIR", help="a folder to rip to")
	parser.add_argument("-m", "--monitor", action="store_true", help="keep running and regularly check thread for changes")
	parser.add_argument("-d", "--delay", type=int, metavar="SEC", default=20, help="delay between monitoring checks, defaults to 20")
	parser.add_argument("-v", "--verbose", action="store_true", help="show some debug logging")
	parser.add_argument("-u", "--useragent", default=USER_AGENT, help="the user agent to use on requests")
	parser.add_argument("--nothumbs", action="store_true", default=False, help="don't download thumbs with the files")
	args = parser.parse_args()

	ripper = Ripper(args.URL, args.DIR, verbose=args.verbose, useragent=args.useragent, thumbs=not args.nothumbs)

	ripper.rip()
	if args.monitor:
	try:
	ripper.monitor(args.delay)
	except KeyboardInterrupt:
	pass
	print("\nDone!")