m3philis/Konachan-Downloader_v3.py

## Konachan-Downloader_v3.py
#! /usr/bin/python

from __future__ import print_function

import re
import os
import os.path
import sys
import http.client
import urllib.request
import time

# regexes
url_regex = re.compile("konachan.com/image/.+?/.+?\.(?:png|jpg)")
name_regex = re.compile("image/.*?/(.*)")

# variable
counter1, counter2 = 0, 15000
tag_filter = None
domain = http.client.HTTPConnection("konachan.com")

# little function to calculate the last page of search results


def page_count():
    # open connection to konachan.com
    domain = http.client.HTTPConnection("konachan.com")

    domain.request("GET", "/post?page=1&tags=" + tags.replace(" ", "+"))
    while True:
        try:
            first_page = domain.getresponse()
            break
        except http.client.BadStatusLine:
            time.sleep(1)
            domain.close()
            domain = http.client.HTTPConnection("konachan.com")
            domain.request("GET", "/post?page=1&tags=" + tags.replace(" ", "+"))

    # we got our response, now it's time to find that number
    first_page_source = str(first_page.read())
    page_list = first_page_source.split("Next Page")
    number = 0
    for line in page_list:
        if re.search("(?<=\/post\?page\=)\d+", line):
            number = re.search("(?<=\/post\?page\=)\d+", line).group(0)
        else:
            number = 2
    return int(number)

# we don't want to save every picture in one directory.
# so we create a new directory when we donwloaded 15k pics


def directory_size(directory_intern):
    if len(os.listdir(directory_intern)) >= 15000:
        print("Directory " + directory_intern + " full")
        counter1 += 15000
        counter2 += 15000
        directory = "Pics " + str(counter1) + " - " + str(counter2)
        if os.path.isdir(directory):
            print("Directory already exists; skip creation")
        else:
            os.makedirs(directory, 0o755, False)
        os.chdir("..")

# now we start

# user has to set path for pictures
print("Please set download location (full path required): ")
path = sys.stdin.readline()

# set tags, if user want to download specific pictures
print("Set Tags (seperate multiple tags with a whitespace;" +
    " connect tags with more than one word with an underscore): ")
tags = sys.stdin.readline().strip("\n")

# chdir in $path and create directory if it not exists
if not os.path.isdir(path.rstrip()):
    os.makedirs(path.rstrip(), 0o755, True)
os.chdir(path.rstrip())
if not os.path.isdir("Tags: " + tags):
    os.makedirs("Tags: " + tags, 0o755, True)
os.chdir("Tags: " + tags)


# creating directory for pics
directory = "Pics " + str(counter1) + " - " + str(counter2)
if not os.path.isdir(directory):
    os.makedirs(directory, 0o755, True)

# let's start with downloading

for page_number in range(1, page_count()):

    print("Starting download in page " + str(page_number))

    domain.request("GET", "/post?page=" + str(page_number) +
    "&tags=" + tags.replace(" ", "+"))

    while True:
        try:
            index_page = domain.getresponse()
            break
        except http.client.BadStatusLine:
            domain.close()
            domain = http.client.HTTPConnection("konachan.com")
            domain.request("GET", "/post?page=" + str(page_number) +
             "&tags=" + tags.replace(" ", "+"))
            time.sleep(1)

    # after we got the response from konachan we need the source code
    index_page_source = str(index_page.read())

    # and now we need save every link on this page in a list
    pics_list = index_page_source.split("Post.register")

    directory_size(directory)

    # now we can search every line for the pic link
    for pic in pics_list:
        pic_url = url_regex.search(re.sub("\\\\\\\\", "", pic))

        # if we found the url we download the pic
        # but with whitespaces instead of "%20"
        if pic_url:
            name = name_regex.search(pic_url.group(0)).group(1)
            print("     Downloading pic:  " + name.replace("%20", " ") +
            " in directory: " + directory)

            # a little check if pic already exists
            existance = False
            for dir in os.listdir():
                os.chdir(dir)
                if os.path.isfile(name.replace("%20", " ")):
                    print("     Pic is already on your pc! Skip!")
                    existance = True
                os.chdir("..")

            if not existance:
                os.chdir(directory)
                image = urllib.request.URLopener()
                image.retrieve("http://" +
                    pic_url.group(0), urllib.request.url2pathname(name))
                print("     Download finished")
                os.chdir("..")
	#! /usr/bin/python

	from __future__ import print_function

	import re
	import os
	import os.path
	import sys
	import http.client
	import urllib.request
	import time

	# regexes
	url_regex = re.compile("konachan.com/image/.+?/.+?\.(?:png\|jpg)")
	name_regex = re.compile("image/.?/(.)")

	# variable
	counter1, counter2 = 0, 15000
	tag_filter = None
	domain = http.client.HTTPConnection("konachan.com")

	# little function to calculate the last page of search results


	def page_count():
	# open connection to konachan.com
	domain = http.client.HTTPConnection("konachan.com")

	domain.request("GET", "/post?page=1&tags=" + tags.replace(" ", "+"))
	while True:
	try:
	first_page = domain.getresponse()
	break
	except http.client.BadStatusLine:
	time.sleep(1)
	domain.close()
	domain = http.client.HTTPConnection("konachan.com")
	domain.request("GET", "/post?page=1&tags=" + tags.replace(" ", "+"))

	# we got our response, now it's time to find that number
	first_page_source = str(first_page.read())
	page_list = first_page_source.split("Next Page")
	number = 0
	for line in page_list:
	if re.search("(?<=\/post\?page\=)\d+", line):
	number = re.search("(?<=\/post\?page\=)\d+", line).group(0)
	else:
	number = 2
	return int(number)

	# we don't want to save every picture in one directory.
	# so we create a new directory when we donwloaded 15k pics


	def directory_size(directory_intern):
	if len(os.listdir(directory_intern)) >= 15000:
	print("Directory " + directory_intern + " full")
	counter1 += 15000
	counter2 += 15000
	directory = "Pics " + str(counter1) + " - " + str(counter2)
	if os.path.isdir(directory):
	print("Directory already exists; skip creation")
	else:
	os.makedirs(directory, 0o755, False)
	os.chdir("..")

	# now we start

	# user has to set path for pictures
	print("Please set download location (full path required): ")
	path = sys.stdin.readline()

	# set tags, if user want to download specific pictures
	print("Set Tags (seperate multiple tags with a whitespace;" +
	" connect tags with more than one word with an underscore): ")
	tags = sys.stdin.readline().strip("\n")

	# chdir in $path and create directory if it not exists
	if not os.path.isdir(path.rstrip()):
	os.makedirs(path.rstrip(), 0o755, True)
	os.chdir(path.rstrip())
	if not os.path.isdir("Tags: " + tags):
	os.makedirs("Tags: " + tags, 0o755, True)
	os.chdir("Tags: " + tags)


	# creating directory for pics
	directory = "Pics " + str(counter1) + " - " + str(counter2)
	if not os.path.isdir(directory):
	os.makedirs(directory, 0o755, True)

	# let's start with downloading

	for page_number in range(1, page_count()):

	print("Starting download in page " + str(page_number))

	domain.request("GET", "/post?page=" + str(page_number) +
	"&tags=" + tags.replace(" ", "+"))

	while True:
	try:
	index_page = domain.getresponse()
	break
	except http.client.BadStatusLine:
	domain.close()
	domain = http.client.HTTPConnection("konachan.com")
	domain.request("GET", "/post?page=" + str(page_number) +
	"&tags=" + tags.replace(" ", "+"))
	time.sleep(1)

	# after we got the response from konachan we need the source code
	index_page_source = str(index_page.read())

	# and now we need save every link on this page in a list
	pics_list = index_page_source.split("Post.register")

	directory_size(directory)

	# now we can search every line for the pic link
	for pic in pics_list:
	pic_url = url_regex.search(re.sub("\\\\\\\\", "", pic))

	# if we found the url we download the pic
	# but with whitespaces instead of "%20"
	if pic_url:
	name = name_regex.search(pic_url.group(0)).group(1)
	print(" Downloading pic: " + name.replace("%20", " ") +
	" in directory: " + directory)

	# a little check if pic already exists
	existance = False
	for dir in os.listdir():
	os.chdir(dir)
	if os.path.isfile(name.replace("%20", " ")):
	print(" Pic is already on your pc! Skip!")
	existance = True
	os.chdir("..")

	if not existance:
	os.chdir(directory)
	image = urllib.request.URLopener()
	image.retrieve("http://" +
	pic_url.group(0), urllib.request.url2pathname(name))
	print(" Download finished")
	os.chdir("..")