RealTrisT/walker.py

## walker.py
from urllib.parse import quote
from os import system, listdir
import sys
import requests
import time
import re
import colorama

#-----------------------------------------------------------------------things you might wanna change

#r is the requests object, ind is the current index, return 0 to stop, -1 to skip, and 1 to keep processing
def handleRequestData(r, ind):
    if r.status_code != 200:
        print("fuck: ", str(r.status_code))
        return 0
    if r.text.find("looking for something that does not, has not, will not, might not or must not exist") != -1:
        print(str(ind) + " Not found")
        return -1
    return 1

#ind is the current index, and arry is the array that contains the current expression search results
def consolePrintResults(ind, arry):
    print(str(ind), ": ",
        colorama.Fore.LIGHTBLUE_EX, arry[0].ljust(64), colorama.Fore.RESET,
        " uploader: ", colorama.Fore.MAGENTA, arry[1].ljust(20), colorama.Fore.RESET,
        " SE: ", colorama.Fore.GREEN, arry[2].ljust(5), colorama.Fore.RESET,
        " LE: ", colorama.Fore.GREEN, arry[3].ljust(5), colorama.Fore.RESET)

#elfile is the file object, ind is the current index, and arry is the array that contains the current expression search results
def filePrintResults(elfile, ind, arry):
    print(str(ind), ":",
        str(arry).encode('utf-8', 'replace'),
        file=elfile)

#the string that will be put in place on the array in case a regex fails
regexfail_repl = "NotFound"

#-----------------------------------------------------------------------things you might wanna change/>

def openOutputFile(filen):
    try:
        outputFile = open( filen, 'w')
    except IOError:
        print('could not open file')
        return False
    return outputFile

def closeOutputFile(filen):
    filen.close()

def getFileFormattedTime():
    eltime = time.localtime(time.time())
    return str(eltime[0]) + "-" + str(eltime[1]) + "-" + str(eltime[2]).zfill(2) + "_" + str(eltime[3]).zfill(2).zfill(2) + "-" + str(eltime[4]).zfill(2) + "-" + str(eltime[5]).zfill(2)

def getPropertyList(text):
    returnal = {}
    propertyLocation = 0
    propertyLocation = text.find(' ', propertyLocation)     #find first property
    while propertyLocation != -1:                              #while there are spaces outside property value (while there are propreties)
        beginindex = text.find('=\'', propertyLocation)+2      #find the begin of the property's value
        if beginindex == -1:                    #if we had just found a random space at the end of the line or something, break
            break
        foundEnd = False
        tempend = text.find('\'', beginindex)  #find potential end of property value
        while tempend != -1:                    #while there's potential ends of property value
            if text[tempend-1:tempend] == '\\': #check if before the apostrophe there's a backslash (which would indicate such apostrophe isn't the end of the property's value)
                tempend = text.find('\'', tempend+1)   #find next potential end of property value and repeat
                continue
            else:                               #theres no escape backslash, this is the end of the property's value
                foundEnd = True
                break
        if foundEnd:                    #all good
                returnal[text[propertyLocation+1:beginindex-2]] = text[beginindex:tempend] #set dictionary key to it's value
        else:                           #it's all fucked, user is a fuckface who can't follow simple instructions on how to write a config file
            return False
        propertyLocation = text.find(' ', tempend)   #find next property
    return returnal                     #if no tags are found empty dictionaries evaluate to False anyway so all good

def getSettings( configFileDirectory ):
    returnObject = {}
    try:
        configFile = open( configFileDirectory, 'r+')
    except IOError:
        print('could not open file')
        return False

    for line in configFile.readlines():
        tagbegin = line.find('<')+1
        tagend = line.find(' ', tagbegin)
        if tagbegin == -1 or tagend == -1:  #it's either just a newline or user is fucked in the head kek
            continue
        tag = line[tagbegin:tagend] #get tag name
        if tag not in returnObject: #if tag is a key in the return object, create a key with a tag's name and assign an empty list to it (to contain however many instances of that tag there may be)
            returnObject[tag] = []
        returnObject[tag].append(getPropertyList(line)) #push a dictionary containing the proprieties of the tag into the list
    configFile.close()
    return returnObject

def main():
    colorama.init()                         #not important


    if len(sys.argv) < 2:
        print('No Startup Args With Cfg File')
        return 0
    settings = getSettings(sys.argv[1].replace('"', ''))
    if settings == False:
        print("fail")
        return 0

    if 'output' not in settings:
        print("Provide output path in cfg file ples.")
        return 0
    if 'source' not in settings:
        print("Provide a source tag with the url, where '%torrentid' will be replaced by the increment.")
        return 0
    if 'firstid' not in settings:
        print("Provide a first id. Can be 0.")
        return 0
    if 'regex' not in settings:
        print("Provide at least 1 regex, otherwise what's the point.")
        return 0

    outf = openOutputFile(re.sub(r'%datetime', getFileFormattedTime(), settings['output'][0]['path']).replace('"', ''));
    if(outf is False):
        return 0

    CurrIndex = int(settings['firstid'][0]['id'])
    while True:
        CurrentPage = []
        r = requests.get(re.sub(r'%leid', str(CurrIndex), settings['source'][0]['url']))
        h = handleRequestData(r, CurrIndex)
        if h == 0:
            break
        elif h == -1:
            CurrIndex += 1
            continue
        for regex in settings['regex']:
            regexsearch = re.compile(regex["it"]).search(r.text)
            if regexsearch is None:
                CurrentPage.append(regexfail_repl)
            else:
                CurrentPage.append(re.compile(regex["it"]).search(r.text).groups(1)[0])

        consolePrintResults(CurrIndex, CurrentPage)
        filePrintResults(outf, CurrIndex, CurrentPage)

        CurrIndex += 1

    closeOutputFile(outf)


if __name__ == "__main__":
    main()
    sys.exit(0)
	from urllib.parse import quote
	from os import system, listdir
	import sys
	import requests
	import time
	import re
	import colorama

	#-----------------------------------------------------------------------things you might wanna change

	#r is the requests object, ind is the current index, return 0 to stop, -1 to skip, and 1 to keep processing
	def handleRequestData(r, ind):
	if r.status_code != 200:
	print("fuck: ", str(r.status_code))
	return 0
	if r.text.find("looking for something that does not, has not, will not, might not or must not exist") != -1:
	print(str(ind) + " Not found")
	return -1
	return 1

	#ind is the current index, and arry is the array that contains the current expression search results
	def consolePrintResults(ind, arry):
	print(str(ind), ": ",
	colorama.Fore.LIGHTBLUE_EX, arry[0].ljust(64), colorama.Fore.RESET,
	" uploader: ", colorama.Fore.MAGENTA, arry[1].ljust(20), colorama.Fore.RESET,
	" SE: ", colorama.Fore.GREEN, arry[2].ljust(5), colorama.Fore.RESET,
	" LE: ", colorama.Fore.GREEN, arry[3].ljust(5), colorama.Fore.RESET)

	#elfile is the file object, ind is the current index, and arry is the array that contains the current expression search results
	def filePrintResults(elfile, ind, arry):
	print(str(ind), ":",
	str(arry).encode('utf-8', 'replace'),
	file=elfile)

	#the string that will be put in place on the array in case a regex fails
	regexfail_repl = "NotFound"

	#-----------------------------------------------------------------------things you might wanna change/>

	def openOutputFile(filen):
	try:
	outputFile = open( filen, 'w')
	except IOError:
	print('could not open file')
	return False
	return outputFile

	def closeOutputFile(filen):
	filen.close()

	def getFileFormattedTime():
	eltime = time.localtime(time.time())
	return str(eltime[0]) + "-" + str(eltime[1]) + "-" + str(eltime[2]).zfill(2) + "_" + str(eltime[3]).zfill(2).zfill(2) + "-" + str(eltime[4]).zfill(2) + "-" + str(eltime[5]).zfill(2)

	def getPropertyList(text):
	returnal = {}
	propertyLocation = 0
	propertyLocation = text.find(' ', propertyLocation) #find first property
	while propertyLocation != -1: #while there are spaces outside property value (while there are propreties)
	beginindex = text.find('=\'', propertyLocation)+2 #find the begin of the property's value
	if beginindex == -1: #if we had just found a random space at the end of the line or something, break
	break
	foundEnd = False
	tempend = text.find('\'', beginindex) #find potential end of property value
	while tempend != -1: #while there's potential ends of property value
	if text[tempend-1:tempend] == '\\': #check if before the apostrophe there's a backslash (which would indicate such apostrophe isn't the end of the property's value)
	tempend = text.find('\'', tempend+1) #find next potential end of property value and repeat
	continue
	else: #theres no escape backslash, this is the end of the property's value
	foundEnd = True
	break
	if foundEnd: #all good
	returnal[text[propertyLocation+1:beginindex-2]] = text[beginindex:tempend] #set dictionary key to it's value
	else: #it's all fucked, user is a fuckface who can't follow simple instructions on how to write a config file
	return False
	propertyLocation = text.find(' ', tempend) #find next property
	return returnal #if no tags are found empty dictionaries evaluate to False anyway so all good

	def getSettings( configFileDirectory ):
	returnObject = {}
	try:
	configFile = open( configFileDirectory, 'r+')
	except IOError:
	print('could not open file')
	return False

	for line in configFile.readlines():
	tagbegin = line.find('<')+1
	tagend = line.find(' ', tagbegin)
	if tagbegin == -1 or tagend == -1: #it's either just a newline or user is fucked in the head kek
	continue
	tag = line[tagbegin:tagend] #get tag name
	if tag not in returnObject: #if tag is a key in the return object, create a key with a tag's name and assign an empty list to it (to contain however many instances of that tag there may be)
	returnObject[tag] = []
	returnObject[tag].append(getPropertyList(line)) #push a dictionary containing the proprieties of the tag into the list
	configFile.close()
	return returnObject

	def main():
	colorama.init() #not important


	if len(sys.argv) < 2:
	print('No Startup Args With Cfg File')
	return 0
	settings = getSettings(sys.argv[1].replace('"', ''))
	if settings == False:
	print("fail")
	return 0

	if 'output' not in settings:
	print("Provide output path in cfg file ples.")
	return 0
	if 'source' not in settings:
	print("Provide a source tag with the url, where '%torrentid' will be replaced by the increment.")
	return 0
	if 'firstid' not in settings:
	print("Provide a first id. Can be 0.")
	return 0
	if 'regex' not in settings:
	print("Provide at least 1 regex, otherwise what's the point.")
	return 0

	outf = openOutputFile(re.sub(r'%datetime', getFileFormattedTime(), settings['output'][0]['path']).replace('"', ''));
	if(outf is False):
	return 0

	CurrIndex = int(settings['firstid'][0]['id'])
	while True:
	CurrentPage = []
	r = requests.get(re.sub(r'%leid', str(CurrIndex), settings['source'][0]['url']))
	h = handleRequestData(r, CurrIndex)
	if h == 0:
	break
	elif h == -1:
	CurrIndex += 1
	continue
	for regex in settings['regex']:
	regexsearch = re.compile(regex["it"]).search(r.text)
	if regexsearch is None:
	CurrentPage.append(regexfail_repl)
	else:
	CurrentPage.append(re.compile(regex["it"]).search(r.text).groups(1)[0])

	consolePrintResults(CurrIndex, CurrentPage)
	filePrintResults(outf, CurrIndex, CurrentPage)

	CurrIndex += 1

	closeOutputFile(outf)


	if __name__ == "__main__":
	main()
	sys.exit(0)