/Main file

## Main file
#!/usr/bin/env python2
import sys
from PyQt4 import QtGui, QtCore
from scrape_ui import Ui_PieAndCake
import requests
from time import gmtime, strftime
import os
import platform
import getpass

username = getpass.getuser()
usr_os = platform.system()

def Homestuck():
        print 'Initiating download of the complete Homestuck archive.'
        #HTML
        rootdata = "/home/" + username + "/Documents/Homestuck/"
	print rootdata
        #images
        rootimg = "/home/" + username + "/Documents/Homestuck/images/"
        print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        #content is how many pages you want to download. Comment it out for auto-detection.
        #The first 31 pages include all the differnt types of pages in the series: .gif, multigif, and .swf files.
        global content
        content = 1

        #endval is the comic's identification number
        endval = "001901"

        #root page and image urls
        page = "http://www.mspaintadventures.com/?s=6&p="
        imgroot = "http://www.mspaintadventures.com/storyfiles/hs2/"
        favipath = rootdata + "favicon.ico"

        #setting up variables for later
	imgval = 0
	global imgval
        flashcounter = 0

        #Homestuck uses non-unicode symbols in some panels. This makes python crash, so we'll
        #remove the symbols.
        trans_table = ''.join( [chr(i) for i in range(128)] + [' '] * 128 )

        #create the folders for the data if they dont exist
        if not os.path.exists(rootdata):
                os.makedirs(rootdata)
        if not os.path.exists(rootimg):
                os.makedirs(rootimg)
                #download alignment images used on every page
                fnames = ["v2_blankstrip.gif",
                          "v2_blanksquare.gif",
                          "spacer.gif",
                          "v2_blanksquare2.gif",
                          "v2_blanksquare3.gif",
                          "favicon.ico"]

                for i, name in enumerate(fnames):
                        print "Fetching spacers... (%s/5)" % i
                        f = requests.get("http://www.mspaintadventures.com/images/" + name)
                        q = open(rootdata + name, 'w+')
                        q.write(f.content)
                        q.close()

        #script will now attempt to identify how many comics there are.
        print "Identifying amount of content to download..."
        r = requests.get("http://www.mspaintadventures.com/")
        html = r.text

        start  = html.find('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
        end   =  html.find("<!-----------------------LATEST PAGES----------------------------->")
        html = html[:start] + html[end:]

        start = html.find("<!-----------------------END LATEST PAGES------------------------->")
        end = html.find("</html>")
        html = html[:start] + html[end:]

        start = html.find("<!-----------------------LATEST PAGES----------------------------->")
        end = html.find('?s=6&p=')
        html = html[:start] + html[end:]

        start = html.find('">"')
        end = html.find('</html>')
        html = html[:start] + html[end:]

        html = html[13:]
        html = html[:6]

        currentcomicval = html
        currentcomicval = int(currentcomicval) - 1901

        print "Downloading " + str(currentcomicval) + " pages of comics."

        content = int(currentcomicval)
        global h_content
        h_content = content

        #main loop

        while content > 0:
                #set up file detection service. In need to know if there's a .gif, multiple .gifs, or a .swf
                gif =True
                multigif = True
                flash = True

                multigifid = 1

                imgval = int(imgval) + 1
                imgval = str(imgval).rjust(5,'0')

                urlgif = str(imgroot) + str(imgval) + ".gif"
                urlmultigif = str(imgroot) + str(imgval) + "_" + str(multigifid) + ".gif"
                flaurl = str(imgroot) + str(imgval) +"/" + str(imgval) + ".swf"

                urlgifpath = rootdata + str(imgval) + ".gif"
                urlmultigifpath = rootdata + str(imgval) + "_1.gif"
                flashpath = rootdata + str(imgval) + ".swf"

                #This checks to see if the file already exists; if it doesn't, it downloads it

                if not os.path.exists(urlgifpath) and not os.path.exists(urlmultigifpath) and not os.path.exists(flashpath):
                        gif = True
                        multigif = True
                        Flash = True

                        response = requests.get(urlgif)
                        if response.status_code == 404:
                                gif = False

                        response = requests.get(urlmultigif)
                        if response.status_code == 404:
                                multigif = False

                        response = requests.get(flaurl)
                        if response.status_code == 404:
                                Flash = False

                        #now to download the file

                        #regular, single .gifs
                        if multigif == False and Flash == False:
                                try: f = requests.get(urlgif)
                                except f.statuscode == 404:
                                        print "Something went wrong while downloading the .gif."
                                        print urlgif
                                        break
                                imgpath = rootimg + str(imgval) + ".gif"
                                q = open(imgpath, 'w+')
                                q.write(f.content)
                                q.close()

                        #more than 1 gif on a page
                        elif gif == False and Flash == False:
                                gifstatus = True
                                while gifstatus == True:
                                        urlmultigif = imgroot + imgval + "_" + str(multigifid) + ".gif"
                                        print urlmultigif
                                        f = requests.get(urlmultigif)
                                        if f.status_code == 404:
                                                gifstatus = False
                                                break
                                        imgpath = rootimg + str(imgval) + "_" + str(multigifid) + ".gif"
                                        q = open(imgpath, 'w+')
                                        q.write(f.content)
                                        q.close()
                                        multigifid += 1

                        #Flash content
                        elif gif == False and multigif == False:
                                flashcounter +=1
                                swfurl = imgroot + imgval + "/" + imgval + ".swf"
                                print swfurl
                                f = requests.get(swfurl)
                                imgpath = rootimg + str(imgval) + ".swf"
                                q = open(imgpath, 'w+')
                                q.write(f.content)
                                q.close()

                        else:
                                print "Something went horribly wrong!"
                else:
                        print "Image number " + imgval + " skipped."

                # Now we download the html
                root = rootdata + str(endval) + ".html"
                if not os.path.exists(root):
                        #create page id
                        url = page + str(endval)

                        #open the webpage
                        response = requests.get(url)
                        html = response.text

                        #write data to file and fix path associations
                        q = open(root, 'w+')

                        #fix paths and whatnot
                        html = html.replace("http://www.mspaintadventures.com/storyfiles/hs2/", rootdata)
                        htmlpath = rootdata + str(endval) + ".html"

                        start  = html.find("<!------------------------end comic content----------------------------------->")
                        end   =  html.find("</html>")
                        html = html[:start] + html[end:]

                        start  = html.find("<!------------------------begin nav----------------------------------->")
                        end   =  html.find("<!------------------------end nav----------------------------------->")
                        html = html[:start] + html[end:]
                        html = html.replace(str(endval), "")

                        #we need to increase the emdval by one to link to the next comic
                        endval = int(endval) + 1
                        endval = str(endval).rjust(6,'0')
                        htmlpath = rootdata + str(endval) + ".html"
                        html = html.replace("?s=6&p=" + endval, htmlpath)
                        html = html.replace("images/", rootdata)
                        html = html.replace("favicon.ico", favipath)

                        #flash URL repair code
                        if gif == False and multigif == False:
                                print "Repairing flash code..."
                                start  = html.find('<script language="javascript">AC_FL_RunContent = 0;</script>')
                                objns = '''     </object>
                </noscript>'''
                                end   =  html.find(objns)
                                html = html[:start] + html[end + 1:]
                                swffilelink = rootdata + str(imgval) + ".swf"
                                swflink = "<a href=" + '"' + swffilelink + '"' + 'target="_self" name="Flash Content Link">Click here for flash</a>'
                                html = html.replace("</object>", swflink)

                        html = html.encode('ascii', 'ignore')
                        q.write(html)
                        q.close()
                else:
                        endval = int(endval) + 1
                        endval = str(endval).rjust(6,'0')
                        print "html page " + endval + " skipped."

                content -= 1
                myapp.homeBar()

        print "Finsihed downloading @:", strftime("%Y-%m-%d %H:%M:%S", gmtime())

# End Homestuck download code

def QC():
        print 'Initiating download of the complete Qestionable Content archive.'

        #HTML
        rootdir = "/home/" + username + "/Documents/Questionable Content/"
        #CSS Local
        localcss = rootdir + 'newstyles.css'
        #local logo
        locallogo = rootdir + "logo.png"
        #images
        rootdata = "/home/" + username + "/Documents/Questionable Content/comics/"

        print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        #root page and image urls
        page = "http://www.questionablecontent.net/"
        imgroot = "http://www.questionablecontent.net/comics/"
        css = "http://questionablecontent.net/newstyles.css"
        logourl = "http://questionablecontent.net/testing/logo.png"
        favipath = rootdata + "favicon."

        if not os.path.exists(rootdir):
                os.mkdir(rootdir)
        if not os.path.exists(rootdata):
                os.mkdir(rootdata)
        if not os.path.exists(localcss):
                r = requests.get(css)
                css = r.text
                q = open(localcss, 'w+')
                q.write(css)
                q.close()
        if not os.path.exists(locallogo):
                r = requests.get(logourl)
                logo = r.content
                q = open(locallogo, 'w+')
                q.write(logo)
                q.close()

        #get current comic id.
        print "Identifying amount of content to download..."
        r = requests.get("http://www.questionablecontent.net")
        html = r.text
        start  = html.find('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
        end   =  html.find('<img id="strip" src="http://www.questionablecontent.net/comics/')
        html = html[:start] + html[end:]

        start  = html.find('.png">')
        end   =  html.find('</html>')
        html = html[:start] + html[end:]

        html = html[64:]
        html = html[:4]
        global qontent
        qontent = html
        global qc_content
        qc_content = qontent
        global qmgval
        qmgval = 1
        urlroot = 'http://questionablecontent.net/view.php?comic='
        print qontent
        # main downlaod loop
        while qontent > 0:
                url = urlroot + str(qmgval) + '.html'
                print url
                localpage = rootdir + str(qmgval) + '.html'
                localimage = rootdata + str(qmgval) + '.png'
                imgurl = imgroot + str(qmgval) + '.png'

                #get the webpage
                if not os.path.exists(localpage):
                        r = requests.get(urlroot)
                        html = r.text
                        #fix file path associations and write content to local file
                        nextcomic = int(qmgval) + 1
                        html = html.replace('./comics/', './comics/' + str(qmgval) + '.png')
                        html = html.replace('../testing/logo.png', './logo.png')
                        html = html.replace('view.php?comic=1', rootdir + str(nextcomic) + '.html')
                        start  = html.find('<b>Warning</b>')
                        end   =  html.find('<b>74</b><br />')
                        html = html[:start] + html[end + 5:]
                        q = open(localpage, 'w+')
                        q.write(html)
                        q.close()

                #get the image
                print imgurl
                if not os.path.exists(localimage):
                        r = requests.get(imgurl)
                        image = r.content
                        q = open(localimage, 'w+')
                        q.write(image)
                        q.close()

                qmgval = int(qmgval) + 1
                qontent = int(qontent) - 1
                myapp.qc_bar()


#unfinished, WordPress blogs needs more work
def bug():
        print 'Initiating download of the complete Bug archive.'
        #HTML
        rootdir = "/home/" + username + "/Documents/Bug/"
        #CSS Local
        localcss = rootdir + 'style.css'
        #images
        rootdata = "/home/" + username + "/Documents/Bug/comics/"

        print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        #root page and image urls
        page = "http://www.bugcomic.com/"
        imgroot = "http://www.bugcomic.com/comics/"
        css = "http://www.bugcomic.com/wp-content/themes/comicpress-sandy/style.css"

        if not os.path.exists(rootdir):
                os.mkdir(rootdir)
        if not os.path.exists(rootdata):
                os.mkdir(rootdata)
        if not os.path.exists(localcss):
                r = requests.get(css)
                css = r.text
                q = open(localcss, 'w+')
                q.write(css)
                q.close()

        #get the first comic. I don't know of any way to count comics on WordPress-based sites.
        firsturl = 'http://www.bugcomic.com/comics/letter/'
        global bontent
        bontent = 0
        r = requests.get(firsturl)
        html = r.text
        html = html.encode('ascii', 'ignore')
        htmlbackup = html
        start  = html.find('<!DOCTYPE html>')
        end   =  html.find('<div id="comic-1" class="comicpane"><a href="')
        html = html[:start + 44] + html[end + 1:]

        start  = html.find('<div id="subcontent-wrapper">')
        end   =  html.find('<!-- Compression = gzip -->')
        html = html[:start] + html[end + 25:]

        html = html.replace('http://www.bugcomic.com/', rootdir)
        q = open(rootdir + str(bontent) + '.html', 'w+')
        q.write(html)
        q.close

        html = htmlbackup
        html = html.encode('ascii', 'ignore')
        start  = html.find('class="navi navi-next" title="Next">Next</a>')
        end   =  html.find('<!-- Compression = gzip -->')
        html = html[:start] + html[end + 25:]

        start  = html.find('<!DOCTYPE html>')
        end   =  html.find('<td class="comic_navi_right">')
        html = html[:start] + html[end + 1:]

        start  = html.find('td class=')
        end   =  html.find('<a href="')
        html = html[:start] + html[end:]

        start  = html.find('<a hr')
        end   =  html.find('ef="')
        html = html[:start] + html[end + 4:]

        html = html[:-4]

        nexturl = html

class MyApp(QtGui.QMainWindow):
        def __init__(self):
                QtGui.QMainWindow.__init__(self)
                self.ui = Ui_PieAndCake()
                self.ui.setupUi(self)
                self.ui.the_button.clicked.connect(self.display_results)

        def display_results(self):
                if self.ui.cake_check.isChecked():
                    QC()
                if self.ui.pie_check.isChecked():
                    Homestuck()
                if self.ui.bugCheck.isChecked():
                    bug()
        def homeBar(self):
                # h_content = total | content = current comic
                addval = ((int(imgval) * 100) / h_content)
                self.ui.hBar.setValue(addval)
        def qc_bar(self):
                addval = ((int(qmgval) * 100) / int(qc_content))
                self.ui.qcBar.setValue(addval)
        def bugBar(self):
                print "bug comic"

if __name__ == '__main__':
        app = QtGui.QApplication(sys.argv)
        myapp = MyApp()
        myapp.show()

        sys.exit(app.exec_())

## Qt file
# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'ComicScrape.ui'
#
# Created: Thu Jul 19 19:13:41 2012
#      by: PyQt4 UI code generator 4.9.1
#
# WARNING! All changes made in this file will be lost!

from PyQt4 import QtCore, QtGui

try:
    _fromUtf8 = QtCore.QString.fromUtf8
except AttributeError:
    _fromUtf8 = lambda s: s

class Ui_PieAndCake(object):
    def setupUi(self, PieAndCake):
        PieAndCake.setObjectName(_fromUtf8("PieAndCake"))
        PieAndCake.resize(368, 140)
        self.centralwidget = QtGui.QWidget(PieAndCake)
        self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
        self.pie_check = QtGui.QCheckBox(self.centralwidget)
        self.pie_check.setGeometry(QtCore.QRect(10, 10, 121, 19))
        self.pie_check.setObjectName(_fromUtf8("pie_check"))
        self.cake_check = QtGui.QCheckBox(self.centralwidget)
        self.cake_check.setGeometry(QtCore.QRect(10, 40, 181, 19))
        self.cake_check.setObjectName(_fromUtf8("cake_check"))
        self.the_button = QtGui.QPushButton(self.centralwidget)
        self.the_button.setGeometry(QtCore.QRect(90, 110, 171, 24))
        self.the_button.setObjectName(_fromUtf8("the_button"))
        self.hBar = QtGui.QProgressBar(self.centralwidget)
        self.hBar.setGeometry(QtCore.QRect(230, 10, 131, 23))
        self.hBar.setProperty("value", 0)
        self.hBar.setObjectName(_fromUtf8("hBar"))
        self.qcBar = QtGui.QProgressBar(self.centralwidget)
        self.qcBar.setGeometry(QtCore.QRect(230, 40, 131, 23))
        self.qcBar.setProperty("value", 0)
        self.qcBar.setObjectName(_fromUtf8("qcBar"))
        self.bugCheck = QtGui.QCheckBox(self.centralwidget)
        self.bugCheck.setGeometry(QtCore.QRect(10, 70, 141, 22))
        self.bugCheck.setObjectName(_fromUtf8("bugCheck"))
        self.bugBar = QtGui.QProgressBar(self.centralwidget)
        self.bugBar.setGeometry(QtCore.QRect(230, 70, 131, 23))
        self.bugBar.setProperty("value", 0)
        self.bugBar.setObjectName(_fromUtf8("bugBar"))
        PieAndCake.setCentralWidget(self.centralwidget)

        self.retranslateUi(PieAndCake)
        QtCore.QMetaObject.connectSlotsByName(PieAndCake)

    def retranslateUi(self, PieAndCake):
        PieAndCake.setWindowTitle(QtGui.QApplication.translate("PieAndCake", "Webcomic Downloader", None, QtGui.QApplication.UnicodeUTF8))
        self.pie_check.setText(QtGui.QApplication.translate("PieAndCake", "Homestuck", None, QtGui.QApplication.UnicodeUTF8))
        self.cake_check.setText(QtGui.QApplication.translate("PieAndCake", "Questionable Content", None, QtGui.QApplication.UnicodeUTF8))
        self.the_button.setText(QtGui.QApplication.translate("PieAndCake", "Initiate Download", None, QtGui.QApplication.UnicodeUTF8))
        self.bugCheck.setText(QtGui.QApplication.translate("PieAndCake", "Bug", None, QtGui.QApplication.UnicodeUTF8))
	#!/usr/bin/env python2
	import sys
	from PyQt4 import QtGui, QtCore
	from scrape_ui import Ui_PieAndCake
	import requests
	from time import gmtime, strftime
	import os
	import platform
	import getpass

	username = getpass.getuser()
	usr_os = platform.system()

	def Homestuck():
	print 'Initiating download of the complete Homestuck archive.'
	#HTML
	rootdata = "/home/" + username + "/Documents/Homestuck/"
	print rootdata
	#images
	rootimg = "/home/" + username + "/Documents/Homestuck/images/"
	print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

	#content is how many pages you want to download. Comment it out for auto-detection.
	#The first 31 pages include all the differnt types of pages in the series: .gif, multigif, and .swf files.
	global content
	content = 1

	#endval is the comic's identification number
	endval = "001901"

	#root page and image urls
	page = "http://www.mspaintadventures.com/?s=6&p="
	imgroot = "http://www.mspaintadventures.com/storyfiles/hs2/"
	favipath = rootdata + "favicon.ico"

	#setting up variables for later
	imgval = 0
	global imgval
	flashcounter = 0

	#Homestuck uses non-unicode symbols in some panels. This makes python crash, so we'll
	#remove the symbols.
	trans_table = ''.join( [chr(i) for i in range(128)] + [' '] * 128 )

	#create the folders for the data if they dont exist
	if not os.path.exists(rootdata):
	os.makedirs(rootdata)
	if not os.path.exists(rootimg):
	os.makedirs(rootimg)
	#download alignment images used on every page
	fnames = ["v2_blankstrip.gif",
	"v2_blanksquare.gif",
	"spacer.gif",
	"v2_blanksquare2.gif",
	"v2_blanksquare3.gif",
	"favicon.ico"]

	for i, name in enumerate(fnames):
	print "Fetching spacers... (%s/5)" % i
	f = requests.get("http://www.mspaintadventures.com/images/" + name)
	q = open(rootdata + name, 'w+')
	q.write(f.content)
	q.close()

	#script will now attempt to identify how many comics there are.
	print "Identifying amount of content to download..."
	r = requests.get("http://www.mspaintadventures.com/")
	html = r.text

	start = html.find('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
	end = html.find("<!-----------------------LATEST PAGES----------------------------->")
	html = html[:start] + html[end:]

	start = html.find("<!-----------------------END LATEST PAGES------------------------->")
	end = html.find("</html>")
	html = html[:start] + html[end:]

	start = html.find("<!-----------------------LATEST PAGES----------------------------->")
	end = html.find('?s=6&p=')
	html = html[:start] + html[end:]

	start = html.find('">"')
	end = html.find('</html>')
	html = html[:start] + html[end:]

	html = html[13:]
	html = html[:6]

	currentcomicval = html
	currentcomicval = int(currentcomicval) - 1901

	print "Downloading " + str(currentcomicval) + " pages of comics."

	content = int(currentcomicval)
	global h_content
	h_content = content

	#main loop

	while content > 0:
	#set up file detection service. In need to know if there's a .gif, multiple .gifs, or a .swf
	gif =True
	multigif = True
	flash = True

	multigifid = 1

	imgval = int(imgval) + 1
	imgval = str(imgval).rjust(5,'0')

	urlgif = str(imgroot) + str(imgval) + ".gif"
	urlmultigif = str(imgroot) + str(imgval) + "_" + str(multigifid) + ".gif"
	flaurl = str(imgroot) + str(imgval) +"/" + str(imgval) + ".swf"

	urlgifpath = rootdata + str(imgval) + ".gif"
	urlmultigifpath = rootdata + str(imgval) + "_1.gif"
	flashpath = rootdata + str(imgval) + ".swf"

	#This checks to see if the file already exists; if it doesn't, it downloads it

	if not os.path.exists(urlgifpath) and not os.path.exists(urlmultigifpath) and not os.path.exists(flashpath):
	gif = True
	multigif = True
	Flash = True

	response = requests.get(urlgif)
	if response.status_code == 404:
	gif = False

	response = requests.get(urlmultigif)
	if response.status_code == 404:
	multigif = False

	response = requests.get(flaurl)
	if response.status_code == 404:
	Flash = False

	#now to download the file

	#regular, single .gifs
	if multigif == False and Flash == False:
	try: f = requests.get(urlgif)
	except f.statuscode == 404:
	print "Something went wrong while downloading the .gif."
	print urlgif
	break
	imgpath = rootimg + str(imgval) + ".gif"
	q = open(imgpath, 'w+')
	q.write(f.content)
	q.close()

	#more than 1 gif on a page
	elif gif == False and Flash == False:
	gifstatus = True
	while gifstatus == True:
	urlmultigif = imgroot + imgval + "_" + str(multigifid) + ".gif"
	print urlmultigif
	f = requests.get(urlmultigif)
	if f.status_code == 404:
	gifstatus = False
	break
	imgpath = rootimg + str(imgval) + "_" + str(multigifid) + ".gif"
	q = open(imgpath, 'w+')
	q.write(f.content)
	q.close()
	multigifid += 1

	#Flash content
	elif gif == False and multigif == False:
	flashcounter +=1
	swfurl = imgroot + imgval + "/" + imgval + ".swf"
	print swfurl
	f = requests.get(swfurl)
	imgpath = rootimg + str(imgval) + ".swf"
	q = open(imgpath, 'w+')
	q.write(f.content)
	q.close()

	else:
	print "Something went horribly wrong!"
	else:
	print "Image number " + imgval + " skipped."

	# Now we download the html
	root = rootdata + str(endval) + ".html"
	if not os.path.exists(root):
	#create page id
	url = page + str(endval)

	#open the webpage
	response = requests.get(url)
	html = response.text

	#write data to file and fix path associations
	q = open(root, 'w+')

	#fix paths and whatnot
	html = html.replace("http://www.mspaintadventures.com/storyfiles/hs2/", rootdata)
	htmlpath = rootdata + str(endval) + ".html"

	start = html.find("<!------------------------end comic content----------------------------------->")
	end = html.find("</html>")
	html = html[:start] + html[end:]

	start = html.find("<!------------------------begin nav----------------------------------->")
	end = html.find("<!------------------------end nav----------------------------------->")
	html = html[:start] + html[end:]
	html = html.replace(str(endval), "")

	#we need to increase the emdval by one to link to the next comic
	endval = int(endval) + 1
	endval = str(endval).rjust(6,'0')
	htmlpath = rootdata + str(endval) + ".html"
	html = html.replace("?s=6&p=" + endval, htmlpath)
	html = html.replace("images/", rootdata)
	html = html.replace("favicon.ico", favipath)

	#flash URL repair code
	if gif == False and multigif == False:
	print "Repairing flash code..."
	start = html.find('<script language="javascript">AC_FL_RunContent = 0;</script>')
	objns = ''' </object>
	</noscript>'''
	end = html.find(objns)
	html = html[:start] + html[end + 1:]
	swffilelink = rootdata + str(imgval) + ".swf"
	swflink = "<a href=" + '"' + swffilelink + '"' + 'target="_self" name="Flash Content Link">Click here for flash</a>'
	html = html.replace("</object>", swflink)

	html = html.encode('ascii', 'ignore')
	q.write(html)
	q.close()
	else:
	endval = int(endval) + 1
	endval = str(endval).rjust(6,'0')
	print "html page " + endval + " skipped."

	content -= 1
	myapp.homeBar()

	print "Finsihed downloading @:", strftime("%Y-%m-%d %H:%M:%S", gmtime())

	# End Homestuck download code

	def QC():
	print 'Initiating download of the complete Qestionable Content archive.'

	#HTML
	rootdir = "/home/" + username + "/Documents/Questionable Content/"
	#CSS Local
	localcss = rootdir + 'newstyles.css'
	#local logo
	locallogo = rootdir + "logo.png"
	#images
	rootdata = "/home/" + username + "/Documents/Questionable Content/comics/"

	print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

	#root page and image urls
	page = "http://www.questionablecontent.net/"
	imgroot = "http://www.questionablecontent.net/comics/"
	css = "http://questionablecontent.net/newstyles.css"
	logourl = "http://questionablecontent.net/testing/logo.png"
	favipath = rootdata + "favicon."

	if not os.path.exists(rootdir):
	os.mkdir(rootdir)
	if not os.path.exists(rootdata):
	os.mkdir(rootdata)
	if not os.path.exists(localcss):
	r = requests.get(css)
	css = r.text
	q = open(localcss, 'w+')
	q.write(css)
	q.close()
	if not os.path.exists(locallogo):
	r = requests.get(logourl)
	logo = r.content
	q = open(locallogo, 'w+')
	q.write(logo)
	q.close()

	#get current comic id.
	print "Identifying amount of content to download..."
	r = requests.get("http://www.questionablecontent.net")
	html = r.text
	start = html.find('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
	end = html.find('<img id="strip" src="http://www.questionablecontent.net/comics/')
	html = html[:start] + html[end:]

	start = html.find('.png">')
	end = html.find('</html>')
	html = html[:start] + html[end:]

	html = html[64:]
	html = html[:4]
	global qontent
	qontent = html
	global qc_content
	qc_content = qontent
	global qmgval
	qmgval = 1
	urlroot = 'http://questionablecontent.net/view.php?comic='
	print qontent
	# main downlaod loop
	while qontent > 0:
	url = urlroot + str(qmgval) + '.html'
	print url
	localpage = rootdir + str(qmgval) + '.html'
	localimage = rootdata + str(qmgval) + '.png'
	imgurl = imgroot + str(qmgval) + '.png'

	#get the webpage
	if not os.path.exists(localpage):
	r = requests.get(urlroot)
	html = r.text
	#fix file path associations and write content to local file
	nextcomic = int(qmgval) + 1
	html = html.replace('./comics/', './comics/' + str(qmgval) + '.png')
	html = html.replace('../testing/logo.png', './logo.png')
	html = html.replace('view.php?comic=1', rootdir + str(nextcomic) + '.html')
	start = html.find('<b>Warning</b>')
	end = html.find('<b>74</b><br />')
	html = html[:start] + html[end + 5:]
	q = open(localpage, 'w+')
	q.write(html)
	q.close()

	#get the image
	print imgurl
	if not os.path.exists(localimage):
	r = requests.get(imgurl)
	image = r.content
	q = open(localimage, 'w+')
	q.write(image)
	q.close()

	qmgval = int(qmgval) + 1
	qontent = int(qontent) - 1
	myapp.qc_bar()


	#unfinished, WordPress blogs needs more work
	def bug():
	print 'Initiating download of the complete Bug archive.'
	#HTML
	rootdir = "/home/" + username + "/Documents/Bug/"
	#CSS Local
	localcss = rootdir + 'style.css'
	#images
	rootdata = "/home/" + username + "/Documents/Bug/comics/"

	print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

	#root page and image urls
	page = "http://www.bugcomic.com/"
	imgroot = "http://www.bugcomic.com/comics/"
	css = "http://www.bugcomic.com/wp-content/themes/comicpress-sandy/style.css"

	if not os.path.exists(rootdir):
	os.mkdir(rootdir)
	if not os.path.exists(rootdata):
	os.mkdir(rootdata)
	if not os.path.exists(localcss):
	r = requests.get(css)
	css = r.text
	q = open(localcss, 'w+')
	q.write(css)
	q.close()

	#get the first comic. I don't know of any way to count comics on WordPress-based sites.
	firsturl = 'http://www.bugcomic.com/comics/letter/'
	global bontent
	bontent = 0
	r = requests.get(firsturl)
	html = r.text
	html = html.encode('ascii', 'ignore')
	htmlbackup = html
	start = html.find('<!DOCTYPE html>')
	end = html.find('<div id="comic-1" class="comicpane"><a href="')
	html = html[:start + 44] + html[end + 1:]

	start = html.find('<div id="subcontent-wrapper">')
	end = html.find('<!-- Compression = gzip -->')
	html = html[:start] + html[end + 25:]

	html = html.replace('http://www.bugcomic.com/', rootdir)
	q = open(rootdir + str(bontent) + '.html', 'w+')
	q.write(html)
	q.close

	html = htmlbackup
	html = html.encode('ascii', 'ignore')
	start = html.find('class="navi navi-next" title="Next">Next</a>')
	end = html.find('<!-- Compression = gzip -->')
	html = html[:start] + html[end + 25:]

	start = html.find('<!DOCTYPE html>')
	end = html.find('<td class="comic_navi_right">')
	html = html[:start] + html[end + 1:]

	start = html.find('td class=')
	end = html.find('<a href="')
	html = html[:start] + html[end:]

	start = html.find('<a hr')
	end = html.find('ef="')
	html = html[:start] + html[end + 4:]

	html = html[:-4]

	nexturl = html

	class MyApp(QtGui.QMainWindow):
	def __init__(self):
	QtGui.QMainWindow.__init__(self)
	self.ui = Ui_PieAndCake()
	self.ui.setupUi(self)
	self.ui.the_button.clicked.connect(self.display_results)

	def display_results(self):
	if self.ui.cake_check.isChecked():
	QC()
	if self.ui.pie_check.isChecked():
	Homestuck()
	if self.ui.bugCheck.isChecked():
	bug()
	def homeBar(self):
	# h_content = total \| content = current comic
	addval = ((int(imgval) * 100) / h_content)
	self.ui.hBar.setValue(addval)
	def qc_bar(self):
	addval = ((int(qmgval) * 100) / int(qc_content))
	self.ui.qcBar.setValue(addval)
	def bugBar(self):
	print "bug comic"

	if __name__ == '__main__':
	app = QtGui.QApplication(sys.argv)
	myapp = MyApp()
	myapp.show()

	sys.exit(app.exec_())
	# -- coding: utf-8 --

	# Form implementation generated from reading ui file 'ComicScrape.ui'
	#
	# Created: Thu Jul 19 19:13:41 2012
	# by: PyQt4 UI code generator 4.9.1
	#
	# WARNING! All changes made in this file will be lost!

	from PyQt4 import QtCore, QtGui

	try:
	_fromUtf8 = QtCore.QString.fromUtf8
	except AttributeError:
	_fromUtf8 = lambda s: s

	class Ui_PieAndCake(object):
	def setupUi(self, PieAndCake):
	PieAndCake.setObjectName(_fromUtf8("PieAndCake"))
	PieAndCake.resize(368, 140)
	self.centralwidget = QtGui.QWidget(PieAndCake)
	self.centralwidget.setObjectName(_fromUtf8("centralwidget"))
	self.pie_check = QtGui.QCheckBox(self.centralwidget)
	self.pie_check.setGeometry(QtCore.QRect(10, 10, 121, 19))
	self.pie_check.setObjectName(_fromUtf8("pie_check"))
	self.cake_check = QtGui.QCheckBox(self.centralwidget)
	self.cake_check.setGeometry(QtCore.QRect(10, 40, 181, 19))
	self.cake_check.setObjectName(_fromUtf8("cake_check"))
	self.the_button = QtGui.QPushButton(self.centralwidget)
	self.the_button.setGeometry(QtCore.QRect(90, 110, 171, 24))
	self.the_button.setObjectName(_fromUtf8("the_button"))
	self.hBar = QtGui.QProgressBar(self.centralwidget)
	self.hBar.setGeometry(QtCore.QRect(230, 10, 131, 23))
	self.hBar.setProperty("value", 0)
	self.hBar.setObjectName(_fromUtf8("hBar"))
	self.qcBar = QtGui.QProgressBar(self.centralwidget)
	self.qcBar.setGeometry(QtCore.QRect(230, 40, 131, 23))
	self.qcBar.setProperty("value", 0)
	self.qcBar.setObjectName(_fromUtf8("qcBar"))
	self.bugCheck = QtGui.QCheckBox(self.centralwidget)
	self.bugCheck.setGeometry(QtCore.QRect(10, 70, 141, 22))
	self.bugCheck.setObjectName(_fromUtf8("bugCheck"))
	self.bugBar = QtGui.QProgressBar(self.centralwidget)
	self.bugBar.setGeometry(QtCore.QRect(230, 70, 131, 23))
	self.bugBar.setProperty("value", 0)
	self.bugBar.setObjectName(_fromUtf8("bugBar"))
	PieAndCake.setCentralWidget(self.centralwidget)

	self.retranslateUi(PieAndCake)
	QtCore.QMetaObject.connectSlotsByName(PieAndCake)

	def retranslateUi(self, PieAndCake):
	PieAndCake.setWindowTitle(QtGui.QApplication.translate("PieAndCake", "Webcomic Downloader", None, QtGui.QApplication.UnicodeUTF8))
	self.pie_check.setText(QtGui.QApplication.translate("PieAndCake", "Homestuck", None, QtGui.QApplication.UnicodeUTF8))
	self.cake_check.setText(QtGui.QApplication.translate("PieAndCake", "Questionable Content", None, QtGui.QApplication.UnicodeUTF8))
	self.the_button.setText(QtGui.QApplication.translate("PieAndCake", "Initiate Download", None, QtGui.QApplication.UnicodeUTF8))
	self.bugCheck.setText(QtGui.QApplication.translate("PieAndCake", "Bug", None, QtGui.QApplication.UnicodeUTF8))