Skip to content

Instantly share code, notes, and snippets.

@RealTrisT
Last active April 11, 2018 03:06
Show Gist options
  • Save RealTrisT/d2598728c0be0c7fc5dfb6574859c9d6 to your computer and use it in GitHub Desktop.
Save RealTrisT/d2598728c0be0c7fc5dfb6574859c9d6 to your computer and use it in GitHub Desktop.
from urllib.parse import quote
from os import system, listdir
import sys
import requests
import time
import re
import colorama
#-----------------------------------------------------------------------things you might wanna change
#r is the requests object, ind is the current index, return 0 to stop, -1 to skip, and 1 to keep processing
def handleRequestData(r, ind):
if r.status_code != 200:
print("fuck: ", str(r.status_code))
return 0
if r.text.find("looking for something that does not, has not, will not, might not or must not exist") != -1:
print(str(ind) + " Not found")
return -1
return 1
#ind is the current index, and arry is the array that contains the current expression search results
def consolePrintResults(ind, arry):
print(str(ind), ": ",
colorama.Fore.LIGHTBLUE_EX, arry[0].ljust(64), colorama.Fore.RESET,
" uploader: ", colorama.Fore.MAGENTA, arry[1].ljust(20), colorama.Fore.RESET,
" SE: ", colorama.Fore.GREEN, arry[2].ljust(5), colorama.Fore.RESET,
" LE: ", colorama.Fore.GREEN, arry[3].ljust(5), colorama.Fore.RESET)
#elfile is the file object, ind is the current index, and arry is the array that contains the current expression search results
def filePrintResults(elfile, ind, arry):
print(str(ind), ":",
str(arry).encode('utf-8', 'replace'),
file=elfile)
#the string that will be put in place on the array in case a regex fails
regexfail_repl = "NotFound"
#-----------------------------------------------------------------------things you might wanna change/>
def openOutputFile(filen):
try:
outputFile = open( filen, 'w')
except IOError:
print('could not open file')
return False
return outputFile
def closeOutputFile(filen):
filen.close()
def getFileFormattedTime():
eltime = time.localtime(time.time())
return str(eltime[0]) + "-" + str(eltime[1]) + "-" + str(eltime[2]).zfill(2) + "_" + str(eltime[3]).zfill(2).zfill(2) + "-" + str(eltime[4]).zfill(2) + "-" + str(eltime[5]).zfill(2)
def getPropertyList(text):
returnal = {}
propertyLocation = 0
propertyLocation = text.find(' ', propertyLocation) #find first property
while propertyLocation != -1: #while there are spaces outside property value (while there are propreties)
beginindex = text.find('=\'', propertyLocation)+2 #find the begin of the property's value
if beginindex == -1: #if we had just found a random space at the end of the line or something, break
break
foundEnd = False
tempend = text.find('\'', beginindex) #find potential end of property value
while tempend != -1: #while there's potential ends of property value
if text[tempend-1:tempend] == '\\': #check if before the apostrophe there's a backslash (which would indicate such apostrophe isn't the end of the property's value)
tempend = text.find('\'', tempend+1) #find next potential end of property value and repeat
continue
else: #theres no escape backslash, this is the end of the property's value
foundEnd = True
break
if foundEnd: #all good
returnal[text[propertyLocation+1:beginindex-2]] = text[beginindex:tempend] #set dictionary key to it's value
else: #it's all fucked, user is a fuckface who can't follow simple instructions on how to write a config file
return False
propertyLocation = text.find(' ', tempend) #find next property
return returnal #if no tags are found empty dictionaries evaluate to False anyway so all good
def getSettings( configFileDirectory ):
returnObject = {}
try:
configFile = open( configFileDirectory, 'r+')
except IOError:
print('could not open file')
return False
for line in configFile.readlines():
tagbegin = line.find('<')+1
tagend = line.find(' ', tagbegin)
if tagbegin == -1 or tagend == -1: #it's either just a newline or user is fucked in the head kek
continue
tag = line[tagbegin:tagend] #get tag name
if tag not in returnObject: #if tag is a key in the return object, create a key with a tag's name and assign an empty list to it (to contain however many instances of that tag there may be)
returnObject[tag] = []
returnObject[tag].append(getPropertyList(line)) #push a dictionary containing the proprieties of the tag into the list
configFile.close()
return returnObject
def main():
colorama.init() #not important
if len(sys.argv) < 2:
print('No Startup Args With Cfg File')
return 0
settings = getSettings(sys.argv[1].replace('"', ''))
if settings == False:
print("fail")
return 0
if 'output' not in settings:
print("Provide output path in cfg file ples.")
return 0
if 'source' not in settings:
print("Provide a source tag with the url, where '%torrentid' will be replaced by the increment.")
return 0
if 'firstid' not in settings:
print("Provide a first id. Can be 0.")
return 0
if 'regex' not in settings:
print("Provide at least 1 regex, otherwise what's the point.")
return 0
outf = openOutputFile(re.sub(r'%datetime', getFileFormattedTime(), settings['output'][0]['path']).replace('"', ''));
if(outf is False):
return 0
CurrIndex = int(settings['firstid'][0]['id'])
while True:
CurrentPage = []
r = requests.get(re.sub(r'%leid', str(CurrIndex), settings['source'][0]['url']))
h = handleRequestData(r, CurrIndex)
if h == 0:
break
elif h == -1:
CurrIndex += 1
continue
for regex in settings['regex']:
regexsearch = re.compile(regex["it"]).search(r.text)
if regexsearch is None:
CurrentPage.append(regexfail_repl)
else:
CurrentPage.append(re.compile(regex["it"]).search(r.text).groups(1)[0])
consolePrintResults(CurrIndex, CurrentPage)
filePrintResults(outf, CurrIndex, CurrentPage)
CurrIndex += 1
closeOutputFile(outf)
if __name__ == "__main__":
main()
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment