Skip to content

Instantly share code, notes, and snippets.

@ezdiy

ezdiy/vidme.py Secret

Created December 13, 2017 23:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ezdiy/17855d7421bbb416cbb3d8e0e1caf213 to your computer and use it in GitHub Desktop.
Save ezdiy/17855d7421bbb416cbb3d8e0e1caf213 to your computer and use it in GitHub Desktop.
import logging
import traceback
import time
import requests
import json
import sys
outdb = sys.argv[1]
outsrc = sys.argv[2]
sess = requests.Session()
def req(par):
backoff = 1
while True:
try:
u = "https://api.vid.me/videos/list?%s" % par
data=sess.get(u).text
if not data: return None
return json.loads(data)
except:
traceback.print_exc()
backoff *= 2
print(par)
time.sleep(backoff)
if True:
info = req('limit=1')
vid = int(info['videos'][0]['video_id'])
total = info['page']['total']
print("[*] Scraping db of %d videos, top vid=%d" % (total, vid))
else:
vid = 10000
total = 9999
vid=17450034
#vid=17340513
srco = open(outsrc, "w")
known=set()
basestep = 500
step = basestep
low = vid - step
range = 10000
currfile = 0
dbo = None
while True:
resp = req('minVideoId=%d&maxVideoId=%d&limit=100' % (low,vid+1))
if resp == None:
vid -= 99
low = max(vid - step,0)
continue
done = 0
for v in resp['videos']:
nvid = int(v['video_id'])
if nvid in known:
continue
done += 1
known.add(nvid)
vid = nvid
src = v["source"]
if src and ('/' not in src):
src = None
if src:
srco.write("%d %s %s\n" % (vid,v['url'],src))
else:
srco.write("%d %s\n" % (vid,v['url']))
v['complete_url'] = None
rng = vid - vid % range
if rng != currfile:
if dbo:
dbo.write("null]")
dbo.close()
currfile = rng
nfn = "%s_%08d.json" % (outdb,rng)
print("[*] New dump %s" % nfn)
dbo = open(nfn, "w")
dbo.write("[")
dbo.write(json.dumps(v) + ",")
if not done:
step += basestep
low = max(vid - step,0)
print("Raising step to %d" % step)
continue
step = basestep
got = len(known)
print("[*] %d/%d done, %.3f%%, top=%d" % (got, total, got * 100.0 / total, vid))
low = max(vid - step,0)
if low == 0:
break
dbo.write("null]")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment