Skip to content

Instantly share code, notes, and snippets.

@rmukh
Last active January 3, 2019 20:43
Show Gist options
  • Save rmukh/24d1c8cde8f9a4a009845f0b6e2f2110 to your computer and use it in GitHub Desktop.
Save rmukh/24d1c8cde8f9a4a009845f0b6e2f2110 to your computer and use it in GitHub Desktop.
Simple LightShot (prnt.sc) random scraper, crawler. Usage: python prntscScraper.py n_of_threads. Python 3. Made for research purposes only
from os import remove, path, makedirs
from random import choices, randint
from sys import argv, exit
from urllib.request import urlretrieve
from string import digits, ascii_uppercase, ascii_lowercase
from threading import Thread
#global variables
incrt_sizes = [0, 503, 4939, 4940, 4941, 12003, 5556]
temp = 1
store_fldr = 'images'
def getPic():
while True:
r_nmbr = randint(5, 6)
if r_nmbr == 6:
N = 3
p1 = ''.join(choices(ascii_uppercase + digits + ascii_lowercase, k=N))
p2 = ''.join(choices(digits + ascii_lowercase, k=N))
pic = p1 + p2 + ".jpg"
name = path.join(store_fldr, pic)
retr_url = "http://i.imgur.com/" + pic
try:
urlretrieve(retr_url, name)
if path.getsize(name) in incrt_sizes:
print("[-] Invalid: " + pic)
remove(name)
else:
print("[+] Valid: " + retr_url)
except:
pass
if r_nmbr == 5:
N = 5
pic = ''.join(choices(ascii_uppercase + digits + ascii_lowercase, k=N)) + ".jpg"
retr_url = "http://i.imgur.com/" + pic
name = path.join(store_fldr, pic)
try:
urlretrieve(retr_url, name)
if path.getsize(name) in incrt_sizes:
print("[-] Invalid: " + pic)
remove(name)
else:
print("[+] Valid: " + retr_url)
except:
pass
if __name__ == '__main__':
if len(argv) < 2:
exit("Usage: python " + argv[0] + " (number of threads)")
n_thread = int(argv[1])
if not path.exists(store_fldr):
makedirs(store_fldr)
while (temp <= n_thread):
try:
print("Starting thread #" + str(temp))
Thread(target=getPic).start()
temp += 1
except:
print("Error initializing thread...")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment