buzztiaan/

import urllib
import random
import gzip
import time
import os
import re

user_agents = [
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20100101 Ubuntu/raring Firefox/24.0',
]
class URLOpener(urllib.FancyURLopener):
    version = random.choice(user_agents)
urlopen = URLOpener().open

#if os.path.exists('randomseed'):
#    randomlist = [int(x) for x in open('randomseed','r').read().split(' ')]
#else:
#    random.seed()
#    randomlist = range(1,100000)
#    print('Shuffling...')
#    random.shuffle(randomlist)
#    print('Shuffling Complete.')
#    rfile = open('randomseed','w')
#    rfile.write(' '.join([str(x) for x in randomlist]))
#    rfile.close()

def grabber(maxcount=0):
    maxbadcount = 30
    oldmax = 111000
    if maxcount is not 0:
        mc = open('lastmaxcount','r+')
        lm = mc.read().strip()
        if lm is not '':
            oldmax = int(lm)
        else:
            oldmax = 111000
        mc.seek(0)
        mc.write(str(maxcount))
        mc.close()
        maxbadcount = 1

    basestring = 'http://www.thingiverse.com/thing:'
    end = '/zip'

    badcount = 0
    num = oldmax -1

    while badcount < maxbadcount:
        num = num + 1
        if not os.path.exists('thing_'+str(num)+'.html') and not os.path.exists('thing_'+str(num)+'.zip'):
            print ('Opening '+basestring+str(num)+end+' ...')
            zipdata = ''
            while zipdata == '':
                try:
                    zipdata = urlopen(basestring+str(num)+end).read()
                except IOError as e:
                    print (e.strerror)
                    time.sleep(random.randrange(2,5))
            if '<!DOCTYPE html>' not in zipdata:
                time.sleep(random.randrange(1,4))
                print ('Opening '+basestring+str(num)+' ...')
                pagedata = ''
                while pagedata == '':
                    try:
                        pagedata = urlopen(basestring+str(num)).read()
                    except IOError as e:
                        print (e.strerror)
                        time.sleep(random.randrange(2,5))
                if 'YOU HAVE REACHED THE END OF THE THINGIVERSE' in pagedata:
                    print ('Thing not found.')
                    if num > 100000:
                        badcount = badcount + 1
                        print ('Bad counter now at '+str(badcount))
                else:
                    print ('Page OK.')
                    badcount = 0
                    print ('Writing data.')
                    ofile = open('thing_'+str(num)+'.html','w')
                    zfile = open('thing_'+str(num)+'.zip','w')
                    ofile.write(pagedata)
                    zfile.write(zipdata)
                    zfile.close()
                    ofile.close()
            else:
                print ('Bad zip. Sorry!')
                if maxcount is 0:
                    maxcount = 111000
                if num > maxcount:
                    badcount = badcount + 1
                    print ('Bad counter now at '+str(badcount))
            time.sleep(random.randrange(0,2))
        else:
            print ('We have thing:'+str(num))

def newestfinder():
    pdata = urlopen('http://www.thingiverse.com/newest').read()
    match = re.search('thing-name-(\d+)',pdata)
    if match is not None:
        print (match.group(1))
        return int(match.group(1))
    else:
        print (pdata)
    return 0

grabber(newestfinder())
	import urllib
	import random
	import gzip
	import time
	import os
	import re

	user_agents = [
	'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
	'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
	'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
	'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20100101 Ubuntu/raring Firefox/24.0',
	]
	class URLOpener(urllib.FancyURLopener):
	version = random.choice(user_agents)
	urlopen = URLOpener().open

	#if os.path.exists('randomseed'):
	# randomlist = [int(x) for x in open('randomseed','r').read().split(' ')]
	#else:
	# random.seed()
	# randomlist = range(1,100000)
	# print('Shuffling...')
	# random.shuffle(randomlist)
	# print('Shuffling Complete.')
	# rfile = open('randomseed','w')
	# rfile.write(' '.join([str(x) for x in randomlist]))
	# rfile.close()

	def grabber(maxcount=0):
	maxbadcount = 30
	oldmax = 111000
	if maxcount is not 0:
	mc = open('lastmaxcount','r+')
	lm = mc.read().strip()
	if lm is not '':
	oldmax = int(lm)
	else:
	oldmax = 111000
	mc.seek(0)
	mc.write(str(maxcount))
	mc.close()
	maxbadcount = 1

	basestring = 'http://www.thingiverse.com/thing:'
	end = '/zip'

	badcount = 0
	num = oldmax -1

	while badcount < maxbadcount:
	num = num + 1
	if not os.path.exists('thing_'+str(num)+'.html') and not os.path.exists('thing_'+str(num)+'.zip'):
	print ('Opening '+basestring+str(num)+end+' ...')
	zipdata = ''
	while zipdata == '':
	try:
	zipdata = urlopen(basestring+str(num)+end).read()
	except IOError as e:
	print (e.strerror)
	time.sleep(random.randrange(2,5))
	if '<!DOCTYPE html>' not in zipdata:
	time.sleep(random.randrange(1,4))
	print ('Opening '+basestring+str(num)+' ...')
	pagedata = ''
	while pagedata == '':
	try:
	pagedata = urlopen(basestring+str(num)).read()
	except IOError as e:
	print (e.strerror)
	time.sleep(random.randrange(2,5))
	if 'YOU HAVE REACHED THE END OF THE THINGIVERSE' in pagedata:
	print ('Thing not found.')
	if num > 100000:
	badcount = badcount + 1
	print ('Bad counter now at '+str(badcount))
	else:
	print ('Page OK.')
	badcount = 0
	print ('Writing data.')
	ofile = open('thing_'+str(num)+'.html','w')
	zfile = open('thing_'+str(num)+'.zip','w')
	ofile.write(pagedata)
	zfile.write(zipdata)
	zfile.close()
	ofile.close()
	else:
	print ('Bad zip. Sorry!')
	if maxcount is 0:
	maxcount = 111000
	if num > maxcount:
	badcount = badcount + 1
	print ('Bad counter now at '+str(badcount))
	time.sleep(random.randrange(0,2))
	else:
	print ('We have thing:'+str(num))

	def newestfinder():
	pdata = urlopen('http://www.thingiverse.com/newest').read()
	match = re.search('thing-name-(\d+)',pdata)
	if match is not None:
	print (match.group(1))
	return int(match.group(1))
	else:
	print (pdata)
	return 0

	grabber(newestfinder())