ohhdemgirls/imgur-uniq.py

## imgur-uniq.py
import random
import subprocess
import urllib.request
import os
import sys
import requests
import threading
import timeit
from multiprocessing.pool import ThreadPool

imgUrl = "http://i.imgur.com/"
dlPath = "./temp/"
finalPath = "./output/"
shaSum = "9b5936f4006146e4e1e9025b474c02863c0b5614132ad40db4b925a10e8bfbb9"

nLibReqErrors=0
nUrlsTried=0
nImgSearch=0
nUniqueImg=0
nNoRErrors=0

startTime=timeit.default_timer()

def randomnes():
	ext = ".jpg"
	r1 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r2 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r3 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r4 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r5 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')

	rT = r1 + r2 + r3 + r4 + r5
	rTE = rT + ext
	rTFull = imgUrl + rTE
	return (rTFull, rTE)

def downloadImages(rTFull, dlPath, filename):
	global nLibReqErrors,nNoRErrors
	local_file_name = dlPath + filename

	with open(local_file_name, 'wb') as f:
		try:
			r = urllib.request.urlopen(rTFull).read()
		except:
			# print ("ERROR IN URLLIB.REQUEST!! bypassing this url");
			nLibReqErrors+=1
		if (r):
			f.write(r)
		else:
			nNoRErrors+=1
		f.close()

	return local_file_name

def check_sha256sum(f, shaSum):
	file_name = f
	output = subprocess.check_output(['sha256sum', file_name])
	newShaSum = output[:64]
	newShaSum = newShaSum.decode("utf-8")

	if newShaSum == shaSum:
		os.remove(file_name)
		shortFileName = file_name[9:]
		# print ("Placeholder found. Deleting... \n")
		return 0

	else:
		#print ("Real picture found")

		return 1

def check_output_dir(f):
	outputDir = os.path.dirname(f)
	print ("Checking if output folders exist... If they don't they will be created now.")
	if not os.path.exists(outputDir):
		os.makedirs(outputDir)

check_output_dir(dlPath)
check_output_dir(finalPath)

def rndImgUrl():
	global nUrlsTried
	nUrlsTried+=1
	rTFull = randomnes()
	local_file_name = downloadImages(rTFull[0], dlPath, rTFull[1])
	gotImage=check_sha256sum(local_file_name, shaSum)
	if (gotImage>0):
		return rTFull[1]
	else:
		return 0

def googleSearch(threadID, url, filename):
	global nImgSearch,nUniqueImg
	headers = {}
	headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
	url= 'http://www.google.com/searchbyimage?image_url=' + url
	r = requests.get(url , headers=headers)

	nImgSearch+=1

	if (r.text.find("Pages that include matching images")>-1):
		os.remove(dlPath + filename)
	else:
		os.rename(dlPath+fileName, finalPath+fileName)
		nUniqueImg+=1
		# print ("Found UNIQUE image")

def showInfo():
	os.system('clear')
	if (nUrlsTried>0 and nImgSearch>0):
		timeElapsed=timeit.default_timer()-startTime
		print ("Number of URLs tried: ", nUrlsTried, " | Valid: ", nImgSearch/nUrlsTried*100, "%")
		print ("Number of images reverse searched: ", nImgSearch)
		print ("Number of unique images found: ", nUniqueImg, " | Percentage: ", nUniqueImg/nImgSearch*100)
		print ("")
		print ("Number of images in queue: ", len(urlsInQueue))
		print ("Number of librequest errors: ", nLibReqErrors)
		print ("Number of noR errors: ", nNoRErrors)
		print ("")
		print ("Runing for : ",timeElapsed, "seconds")
		print ("Image Searches per second : ", nImgSearch/timeElapsed)
		#print ("Average of URLs tried: ", nUrlsTried)
		#print ("Number of images reverse searched: ", nImgSearch)
		#print ("Number of unique images found: ", nUniqueImg)
	else:
		print ("Number of URLs tried: ", nUrlsTried)
		print ("Number of images reverse searched: ", nImgSearch)
		print ("Number of unique images found: ", nUniqueImg)
		print ("")
		print ("Number of images in queue: ", len(urlsInQueue))
		print ("Number of librequest errors: ", nLibReqErrors)
		print ("Number of noR errors: ", nNoRErrors)
		print ("")
		print ("Runing for : ", timeit.default_timer()-startTime, "seconds")

urlsInQueue=[]
fileNames=[]

pool = ThreadPool(processes=1)
t = threading.Thread(target=googleSearch, args=(0, "", "",))

while 1:
	if (len(urlsInQueue)>=1 and not t.isAlive()):
		fileName=fileNames.pop()
		t = threading.Thread(target=googleSearch, args=(0, urlsInQueue.pop(), fileName,))
		t.start()
		showInfo()

	if (len(urlsInQueue)<10):
		async_result = pool.apply_async(rndImgUrl)
		return_val = async_result.get()
		if (return_val!=0):
			urlsInQueue.append(imgUrl + return_val)
			fileNames.append(return_val)

		showInfo()
	import random
	import subprocess
	import urllib.request
	import os
	import sys
	import requests
	import threading
	import timeit
	from multiprocessing.pool import ThreadPool

	imgUrl = "http://i.imgur.com/"
	dlPath = "./temp/"
	finalPath = "./output/"
	shaSum = "9b5936f4006146e4e1e9025b474c02863c0b5614132ad40db4b925a10e8bfbb9"

	nLibReqErrors=0
	nUrlsTried=0
	nImgSearch=0
	nUniqueImg=0
	nNoRErrors=0

	startTime=timeit.default_timer()

	def randomnes():
	ext = ".jpg"
	r1 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r2 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r3 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r4 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
	r5 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')

	rT = r1 + r2 + r3 + r4 + r5
	rTE = rT + ext
	rTFull = imgUrl + rTE
	return (rTFull, rTE)

	def downloadImages(rTFull, dlPath, filename):
	global nLibReqErrors,nNoRErrors
	local_file_name = dlPath + filename

	with open(local_file_name, 'wb') as f:
	try:
	r = urllib.request.urlopen(rTFull).read()
	except:
	# print ("ERROR IN URLLIB.REQUEST!! bypassing this url");
	nLibReqErrors+=1
	if (r):
	f.write(r)
	else:
	nNoRErrors+=1
	f.close()

	return local_file_name

	def check_sha256sum(f, shaSum):
	file_name = f
	output = subprocess.check_output(['sha256sum', file_name])
	newShaSum = output[:64]
	newShaSum = newShaSum.decode("utf-8")

	if newShaSum == shaSum:
	os.remove(file_name)
	shortFileName = file_name[9:]
	# print ("Placeholder found. Deleting... \n")
	return 0

	else:
	#print ("Real picture found")

	return 1

	def check_output_dir(f):
	outputDir = os.path.dirname(f)
	print ("Checking if output folders exist... If they don't they will be created now.")
	if not os.path.exists(outputDir):
	os.makedirs(outputDir)

	check_output_dir(dlPath)
	check_output_dir(finalPath)

	def rndImgUrl():
	global nUrlsTried
	nUrlsTried+=1
	rTFull = randomnes()
	local_file_name = downloadImages(rTFull[0], dlPath, rTFull[1])
	gotImage=check_sha256sum(local_file_name, shaSum)
	if (gotImage>0):
	return rTFull[1]
	else:
	return 0

	def googleSearch(threadID, url, filename):
	global nImgSearch,nUniqueImg
	headers = {}
	headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
	url= 'http://www.google.com/searchbyimage?image_url=' + url
	r = requests.get(url , headers=headers)

	nImgSearch+=1

	if (r.text.find("Pages that include matching images")>-1):
	os.remove(dlPath + filename)
	else:
	os.rename(dlPath+fileName, finalPath+fileName)
	nUniqueImg+=1
	# print ("Found UNIQUE image")

	def showInfo():
	os.system('clear')
	if (nUrlsTried>0 and nImgSearch>0):
	timeElapsed=timeit.default_timer()-startTime
	print ("Number of URLs tried: ", nUrlsTried, " \| Valid: ", nImgSearch/nUrlsTried*100, "%")
	print ("Number of images reverse searched: ", nImgSearch)
	print ("Number of unique images found: ", nUniqueImg, " \| Percentage: ", nUniqueImg/nImgSearch*100)
	print ("")
	print ("Number of images in queue: ", len(urlsInQueue))
	print ("Number of librequest errors: ", nLibReqErrors)
	print ("Number of noR errors: ", nNoRErrors)
	print ("")
	print ("Runing for : ",timeElapsed, "seconds")
	print ("Image Searches per second : ", nImgSearch/timeElapsed)
	#print ("Average of URLs tried: ", nUrlsTried)
	#print ("Number of images reverse searched: ", nImgSearch)
	#print ("Number of unique images found: ", nUniqueImg)
	else:
	print ("Number of URLs tried: ", nUrlsTried)
	print ("Number of images reverse searched: ", nImgSearch)
	print ("Number of unique images found: ", nUniqueImg)
	print ("")
	print ("Number of images in queue: ", len(urlsInQueue))
	print ("Number of librequest errors: ", nLibReqErrors)
	print ("Number of noR errors: ", nNoRErrors)
	print ("")
	print ("Runing for : ", timeit.default_timer()-startTime, "seconds")

	urlsInQueue=[]
	fileNames=[]

	pool = ThreadPool(processes=1)
	t = threading.Thread(target=googleSearch, args=(0, "", "",))

	while 1:
	if (len(urlsInQueue)>=1 and not t.isAlive()):
	fileName=fileNames.pop()
	t = threading.Thread(target=googleSearch, args=(0, urlsInQueue.pop(), fileName,))
	t.start()
	showInfo()

	if (len(urlsInQueue)<10):
	async_result = pool.apply_async(rndImgUrl)
	return_val = async_result.get()
	if (return_val!=0):
	urlsInQueue.append(imgUrl + return_val)
	fileNames.append(return_val)

	showInfo()