Last active
April 11, 2018 03:06
-
-
Save RealTrisT/d2598728c0be0c7fc5dfb6574859c9d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.parse import quote | |
from os import system, listdir | |
import sys | |
import requests | |
import time | |
import re | |
import colorama | |
#-----------------------------------------------------------------------things you might wanna change | |
#r is the requests object, ind is the current index, return 0 to stop, -1 to skip, and 1 to keep processing | |
def handleRequestData(r, ind): | |
if r.status_code != 200: | |
print("fuck: ", str(r.status_code)) | |
return 0 | |
if r.text.find("looking for something that does not, has not, will not, might not or must not exist") != -1: | |
print(str(ind) + " Not found") | |
return -1 | |
return 1 | |
#ind is the current index, and arry is the array that contains the current expression search results | |
def consolePrintResults(ind, arry): | |
print(str(ind), ": ", | |
colorama.Fore.LIGHTBLUE_EX, arry[0].ljust(64), colorama.Fore.RESET, | |
" uploader: ", colorama.Fore.MAGENTA, arry[1].ljust(20), colorama.Fore.RESET, | |
" SE: ", colorama.Fore.GREEN, arry[2].ljust(5), colorama.Fore.RESET, | |
" LE: ", colorama.Fore.GREEN, arry[3].ljust(5), colorama.Fore.RESET) | |
#elfile is the file object, ind is the current index, and arry is the array that contains the current expression search results | |
def filePrintResults(elfile, ind, arry): | |
print(str(ind), ":", | |
str(arry).encode('utf-8', 'replace'), | |
file=elfile) | |
#the string that will be put in place on the array in case a regex fails | |
regexfail_repl = "NotFound" | |
#-----------------------------------------------------------------------things you might wanna change/> | |
def openOutputFile(filen): | |
try: | |
outputFile = open( filen, 'w') | |
except IOError: | |
print('could not open file') | |
return False | |
return outputFile | |
def closeOutputFile(filen): | |
filen.close() | |
def getFileFormattedTime(): | |
eltime = time.localtime(time.time()) | |
return str(eltime[0]) + "-" + str(eltime[1]) + "-" + str(eltime[2]).zfill(2) + "_" + str(eltime[3]).zfill(2).zfill(2) + "-" + str(eltime[4]).zfill(2) + "-" + str(eltime[5]).zfill(2) | |
def getPropertyList(text): | |
returnal = {} | |
propertyLocation = 0 | |
propertyLocation = text.find(' ', propertyLocation) #find first property | |
while propertyLocation != -1: #while there are spaces outside property value (while there are propreties) | |
beginindex = text.find('=\'', propertyLocation)+2 #find the begin of the property's value | |
if beginindex == -1: #if we had just found a random space at the end of the line or something, break | |
break | |
foundEnd = False | |
tempend = text.find('\'', beginindex) #find potential end of property value | |
while tempend != -1: #while there's potential ends of property value | |
if text[tempend-1:tempend] == '\\': #check if before the apostrophe there's a backslash (which would indicate such apostrophe isn't the end of the property's value) | |
tempend = text.find('\'', tempend+1) #find next potential end of property value and repeat | |
continue | |
else: #theres no escape backslash, this is the end of the property's value | |
foundEnd = True | |
break | |
if foundEnd: #all good | |
returnal[text[propertyLocation+1:beginindex-2]] = text[beginindex:tempend] #set dictionary key to it's value | |
else: #it's all fucked, user is a fuckface who can't follow simple instructions on how to write a config file | |
return False | |
propertyLocation = text.find(' ', tempend) #find next property | |
return returnal #if no tags are found empty dictionaries evaluate to False anyway so all good | |
def getSettings( configFileDirectory ): | |
returnObject = {} | |
try: | |
configFile = open( configFileDirectory, 'r+') | |
except IOError: | |
print('could not open file') | |
return False | |
for line in configFile.readlines(): | |
tagbegin = line.find('<')+1 | |
tagend = line.find(' ', tagbegin) | |
if tagbegin == -1 or tagend == -1: #it's either just a newline or user is fucked in the head kek | |
continue | |
tag = line[tagbegin:tagend] #get tag name | |
if tag not in returnObject: #if tag is a key in the return object, create a key with a tag's name and assign an empty list to it (to contain however many instances of that tag there may be) | |
returnObject[tag] = [] | |
returnObject[tag].append(getPropertyList(line)) #push a dictionary containing the proprieties of the tag into the list | |
configFile.close() | |
return returnObject | |
def main(): | |
colorama.init() #not important | |
if len(sys.argv) < 2: | |
print('No Startup Args With Cfg File') | |
return 0 | |
settings = getSettings(sys.argv[1].replace('"', '')) | |
if settings == False: | |
print("fail") | |
return 0 | |
if 'output' not in settings: | |
print("Provide output path in cfg file ples.") | |
return 0 | |
if 'source' not in settings: | |
print("Provide a source tag with the url, where '%torrentid' will be replaced by the increment.") | |
return 0 | |
if 'firstid' not in settings: | |
print("Provide a first id. Can be 0.") | |
return 0 | |
if 'regex' not in settings: | |
print("Provide at least 1 regex, otherwise what's the point.") | |
return 0 | |
outf = openOutputFile(re.sub(r'%datetime', getFileFormattedTime(), settings['output'][0]['path']).replace('"', '')); | |
if(outf is False): | |
return 0 | |
CurrIndex = int(settings['firstid'][0]['id']) | |
while True: | |
CurrentPage = [] | |
r = requests.get(re.sub(r'%leid', str(CurrIndex), settings['source'][0]['url'])) | |
h = handleRequestData(r, CurrIndex) | |
if h == 0: | |
break | |
elif h == -1: | |
CurrIndex += 1 | |
continue | |
for regex in settings['regex']: | |
regexsearch = re.compile(regex["it"]).search(r.text) | |
if regexsearch is None: | |
CurrentPage.append(regexfail_repl) | |
else: | |
CurrentPage.append(re.compile(regex["it"]).search(r.text).groups(1)[0]) | |
consolePrintResults(CurrIndex, CurrentPage) | |
filePrintResults(outf, CurrIndex, CurrentPage) | |
CurrIndex += 1 | |
closeOutputFile(outf) | |
if __name__ == "__main__": | |
main() | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment