Created
March 23, 2015 23:54
-
-
Save spiiin/ce2c43808d2d09fda361 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import urllib | |
import urllib2 | |
import os.path | |
import md5 | |
import threading | |
import time | |
import vk_param | |
max_threads = 10 #threads count | |
md = md5.new() | |
md.update(vk_param.password) | |
_hash = md.hexdigest() | |
_mail = vk_param.email.replace("@","%40") | |
_id = vk_param.your_id | |
cookie_string ="remixpass="+_hash+";\ | |
remixemail="+_mail+";\ | |
remixmid="+str(_id); | |
if len(sys.argv)<3: | |
print "please enter correct arguments" | |
print "Format \"vk_get_pictures ALBUM_ID SAVE_PATH [START_INDEX]\"" | |
sys.exit() | |
save_folder = sys.argv[2]; | |
album_id = sys.argv[1] | |
if len(sys.argv)>=4: | |
start_ind = int(sys.argv[3]) | |
else: | |
start_ind = 0 | |
saved_start_ind = start_ind | |
re_real_url = re.compile(r"http://cs[0-9]+\.vkontakte\.ru/u[0-9]+/[0-9]+/[a-z]_.+\.jpg") | |
pic_templ = re.compile(r"photo[-]?[0-9]+_[0-9]+") | |
total_found = 0 | |
found_image=999 | |
#show size of html files | |
total_text= 0 | |
def get_pic_addr(url): | |
req = urllib2.Request(url) | |
req.add_header("Cookie", cookie_string ) | |
f = urllib2.urlopen(req) | |
text = f.read(); | |
global total_text | |
total_text =total_text+len(text) | |
for s in text.splitlines(): | |
match = re_real_url.search(s); | |
if (match and s.find('photo')!=-1): | |
return s[match.start():match.end()] | |
raise PicNotFoundError("Page not contain pictures") | |
class PicNotFoundError(Exception): | |
pass | |
class GetOnePic(threading.Thread): | |
def __init__(self,url,no): | |
self.url=url | |
self.no=no | |
threading.Thread.__init__(self) | |
def run(self): | |
print str.rjust(str(self.no),3),'*'*5," search begin" | |
try: | |
#get real picture url from page with it | |
pic_url = get_pic_addr("http://vkontakte.ru/"+pic_page) | |
except PicNotFoundError: | |
print "This page without picture" | |
except: | |
print "Connection error" | |
else: | |
#if pictures found | |
file_name = str(self.no)+pic_url[pic_url.find("_"):] | |
full_file_name = os.path.join(save_folder,file_name) | |
if not os.path.exists(full_file_name): | |
#if file not exists yet | |
try: | |
#try to get it from servers | |
urllib.urlretrieve(pic_url,full_file_name) | |
except IOError: | |
print "Error while try saving file ", pic_url," to", full_file_name | |
else: | |
#all OK | |
print str.rjust(str(self.no),3),'*'*5,pic_url | |
else: | |
#if file already downloaded | |
print str.rjust(str(self.no),3),'*'*5,pic_url, "(exists)" | |
################################################################ | |
print "Connect..." | |
while found_image>0: | |
params = urllib.urlencode({"id" : album_id, "st" : start_ind}) | |
req = urllib2.Request("http://vkontakte.ru/photos.php?act=album&",params) | |
req.add_header("Cookie", cookie_string ) | |
try: | |
f = urllib2.urlopen(req) | |
except urllib2.URLError: | |
sys.exit("Connection error") | |
alb_page_text = f.read() | |
print "Album main page reading..." | |
print "read", len(alb_page_text), "bytes" | |
total_text = total_text + len(alb_page_text) | |
found_image=0 | |
for cur_line in alb_page_text.splitlines(): | |
match_obj = pic_templ.search(cur_line) | |
if (match_obj): | |
pic_page = cur_line[match_obj.start():match_obj.end()] | |
pic_in_album = saved_start_ind+total_found | |
#pic_url = get_pic_addr("http://vkontakte.ru/"+pic_page) | |
#file_name = str(pic_in_album)+pic_url[pic_url.find("_"):] | |
#urllib.urlretrieve(pic_url,os.path.join(save_folder,file_name)) | |
#print str.rjust(str(pic_in_album),3),'*'*5,pic_url | |
#multiloading | |
while threading.activeCount()>max_threads: | |
time.sleep(2.5) | |
GetOnePic(pic_page,pic_in_album).start() | |
found_image = found_image+1 | |
total_found = total_found+1 | |
start_ind+=20 | |
#wait,while all threads stop | |
while threading.activeCount()>1: | |
pass | |
print "Founded:",total_found,"pictures" | |
print "Download:",total_text,"bytes of text" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment