Skip to content

Instantly share code, notes, and snippets.

@ohhdemgirls
Created January 31, 2014 08:13
Show Gist options
  • Save ohhdemgirls/8728258 to your computer and use it in GitHub Desktop.
Save ohhdemgirls/8728258 to your computer and use it in GitHub Desktop.
import random
import subprocess
import urllib.request
import os
import sys
import requests
import threading
import timeit
from multiprocessing.pool import ThreadPool
imgUrl = "http://i.imgur.com/"
dlPath = "./temp/"
finalPath = "./output/"
shaSum = "9b5936f4006146e4e1e9025b474c02863c0b5614132ad40db4b925a10e8bfbb9"
nLibReqErrors=0
nUrlsTried=0
nImgSearch=0
nUniqueImg=0
nNoRErrors=0
startTime=timeit.default_timer()
def randomnes():
ext = ".jpg"
r1 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
r2 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
r3 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
r4 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
r5 = random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
rT = r1 + r2 + r3 + r4 + r5
rTE = rT + ext
rTFull = imgUrl + rTE
return (rTFull, rTE)
def downloadImages(rTFull, dlPath, filename):
global nLibReqErrors,nNoRErrors
local_file_name = dlPath + filename
with open(local_file_name, 'wb') as f:
try:
r = urllib.request.urlopen(rTFull).read()
except:
# print ("ERROR IN URLLIB.REQUEST!! bypassing this url");
nLibReqErrors+=1
if (r):
f.write(r)
else:
nNoRErrors+=1
f.close()
return local_file_name
def check_sha256sum(f, shaSum):
file_name = f
output = subprocess.check_output(['sha256sum', file_name])
newShaSum = output[:64]
newShaSum = newShaSum.decode("utf-8")
if newShaSum == shaSum:
os.remove(file_name)
shortFileName = file_name[9:]
# print ("Placeholder found. Deleting... \n")
return 0
else:
#print ("Real picture found")
return 1
def check_output_dir(f):
outputDir = os.path.dirname(f)
print ("Checking if output folders exist... If they don't they will be created now.")
if not os.path.exists(outputDir):
os.makedirs(outputDir)
check_output_dir(dlPath)
check_output_dir(finalPath)
def rndImgUrl():
global nUrlsTried
nUrlsTried+=1
rTFull = randomnes()
local_file_name = downloadImages(rTFull[0], dlPath, rTFull[1])
gotImage=check_sha256sum(local_file_name, shaSum)
if (gotImage>0):
return rTFull[1]
else:
return 0
def googleSearch(threadID, url, filename):
global nImgSearch,nUniqueImg
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
url= 'http://www.google.com/searchbyimage?image_url=' + url
r = requests.get(url , headers=headers)
nImgSearch+=1
if (r.text.find("Pages that include matching images")>-1):
os.remove(dlPath + filename)
else:
os.rename(dlPath+fileName, finalPath+fileName)
nUniqueImg+=1
# print ("Found UNIQUE image")
def showInfo():
os.system('clear')
if (nUrlsTried>0 and nImgSearch>0):
timeElapsed=timeit.default_timer()-startTime
print ("Number of URLs tried: ", nUrlsTried, " | Valid: ", nImgSearch/nUrlsTried*100, "%")
print ("Number of images reverse searched: ", nImgSearch)
print ("Number of unique images found: ", nUniqueImg, " | Percentage: ", nUniqueImg/nImgSearch*100)
print ("")
print ("Number of images in queue: ", len(urlsInQueue))
print ("Number of librequest errors: ", nLibReqErrors)
print ("Number of noR errors: ", nNoRErrors)
print ("")
print ("Runing for : ",timeElapsed, "seconds")
print ("Image Searches per second : ", nImgSearch/timeElapsed)
#print ("Average of URLs tried: ", nUrlsTried)
#print ("Number of images reverse searched: ", nImgSearch)
#print ("Number of unique images found: ", nUniqueImg)
else:
print ("Number of URLs tried: ", nUrlsTried)
print ("Number of images reverse searched: ", nImgSearch)
print ("Number of unique images found: ", nUniqueImg)
print ("")
print ("Number of images in queue: ", len(urlsInQueue))
print ("Number of librequest errors: ", nLibReqErrors)
print ("Number of noR errors: ", nNoRErrors)
print ("")
print ("Runing for : ", timeit.default_timer()-startTime, "seconds")
urlsInQueue=[]
fileNames=[]
pool = ThreadPool(processes=1)
t = threading.Thread(target=googleSearch, args=(0, "", "",))
while 1:
if (len(urlsInQueue)>=1 and not t.isAlive()):
fileName=fileNames.pop()
t = threading.Thread(target=googleSearch, args=(0, urlsInQueue.pop(), fileName,))
t.start()
showInfo()
if (len(urlsInQueue)<10):
async_result = pool.apply_async(rndImgUrl)
return_val = async_result.get()
if (return_val!=0):
urlsInQueue.append(imgUrl + return_val)
fileNames.append(return_val)
showInfo()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment