Created
October 17, 2018 09:21
-
-
Save myvyang/65d7c6c9f1960d35aff8348d43d0bad1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
url = "https://weibo.com/p/aj/album/loading?ajwvr=6&type=photo&owner_uid=1935400180&viewer_uid=1935400180&since_id=4266739550726685_-1_20180831_-1&page_id=1005051935400180&page=2&ajax_call=1&__rnd=1539765676886" | |
re_img = "curclear_picSrc=(.*?)%3F" | |
re_since = "since_id=([0-9_-]+)" | |
while True: | |
print(url) | |
links = [] | |
resp = requests.get(url, headers = {"cookie": cookie}) | |
text = resp.text | |
imgs = re.findall(re_img, text) | |
for item in imgs: | |
links.append(item.replace("%2F", "/")) | |
f = open("/tmp/links.txt", "a+") | |
for link in links: | |
f.write("%s\n" % link) | |
f.close() | |
ss = re.findall(re_since, text) | |
if len(ss) > 0: | |
since = ss[0] | |
print(since) | |
url = "https://weibo.com/p/aj/album/loading?ajwvr=6&type=photo&owner_uid=1935400180&viewer_uid=1935400180&since_id=" + since + "&page_id=1005051935400180&page=2&ajax_call=1&__rnd=1539765676886" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment