Skip to content

Instantly share code, notes, and snippets.

@feisuzhu
Last active July 27, 2017 11:09
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save feisuzhu/cb7d7ab211b62c44d3b7 to your computer and use it in GitHub Desktop.
Save feisuzhu/cb7d7ab211b62c44d3b7 to your computer and use it in GitHub Desktop.

Parse -> LeanCloud 文件迁移工具

注意:这个工具是用来导入到中国区节点的

依赖: gevent、requests

Ubuntu 可以 # apt-get install python-gevent python-requests

用 pip 可以这样: # pip install gevent requests

用 setuptools 可以这样: # easy_install gevent requests

使用: 首先将Parse中的所有数据导入到LeanCloud,然后就可以在_File表中发现引用了 Parse 的文件的记录。 然后在应用的设置页面拿到应用的 AppId 和 MasterKey,在命令行下执行

# python migrate-parse-files.py AppId MasterKey

耐心的等待完成后就可以了。 如果迁移过程中断,重新运行就可以了。

使用前请备份 _File 表

出现使用问题的话,麻烦发邮件到 bwang [这里是at,嗯] leancloud.rocks

#!/usr/bin/python
# -*- coding: utf-8 -*-
# -- prioritized --
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from gevent import monkey
monkey.patch_all()
# -- stdlib --
import argparse
import json
import os
# -- third party --
from gevent.pool import Pool
import gevent
import requests
# -- code --
parser = argparse.ArgumentParser('lcup')
parser.add_argument('appid')
parser.add_argument('masterkey')
parser.add_argument('--concurrency', type=int, default=5)
options = parser.parse_args()
s = requests.session()
s.headers = {
'X-LC-Id': options.appid,
'X-LC-Key': options.masterkey + ',master',
'Content-Type': 'application/json',
}
worker_pool = Pool(options.concurrency + 1)
def migrate_file(oid, url):
assert 'parsetfss' in url
f = os.tmpfile()
print '[%s] Downloading %s' % (oid, url)
if '.' in url:
ext = url.split('.')[-1]
if len(ext) > 6:
ext = ''
else:
ext = '.' + ext
else:
ext = ''
resp = s.get(url, stream=True)
size = int(resp.headers['Content-Length'])
downloaded = 0
for data in resp.iter_content(16384):
downloaded += len(data)
print '[%s] Downloading' % oid, url, '(%s/%s) %.2f%%' % (downloaded, size, downloaded * 100.0 / size)
f.write(data)
size = f.tell()
f.seek(0)
print '[%s] Removing old entry' % oid
resp = s.delete('https://api.leancloud.cn/1.1/classes/_File/%s' % oid)
assert resp.ok, resp.json()
key = os.urandom(8).encode('hex').strip() + ext
print '[%s] Adding new entry' % oid
# MUST SUCCEED!
for _ in xrange(3):
try:
resp = s.post('https://api.leancloud.cn/1.1/qiniu', data=json.dumps({
"objectId": oid, "key": key, "name": url,
"metaData": {"size": size, "owner": "unknown"},
})).json()
token = resp['token']
break
except Exception:
import traceback
traceback.print_exc()
continue
print '[%s] Uploading to qiniu' % oid
resp = requests.post('https://up.qbox.me', data={'key': key, 'token': token}, files={'file': f})
assert resp.ok, resp.json()
print '[%s] Done' % oid
def verify_file(oid, old_url, new_url, size):
new_info = requests.head(new_url)
if not new_info.ok or int(new_info.headers['Content-Length']) != size:
print '[%s] Verify not ok, redownload...'
worker_pool.spawn(migrate_file, oid, old_url)
else:
print '[%s] Verify OK' % oid
def find_files():
offset = 0
migrate = []
verify = []
while True:
print '[FINDER] Fetching offset %s limit 100' % offset
resp = s.get('https://api.leancloud.cn/1.1/cloudQuery', params={
'cql': 'select * from _File limit %s,100' % offset
})
assert resp.ok, resp.json()
files = resp.json()['results']
if not files:
print '[FINDER] No more files'
return migrate, verify
for i in files:
if 'files.parsetfss.com' in i['url']:
migrate.append((i['objectId'], i['url']))
elif 'files.parsetfss.com' in i['name']:
verify.append((i['objectId'], i['name'], i['url'], i['metaData']['size']))
offset += len(files)
files, verify = find_files()
for oid, url in files:
worker_pool.spawn(migrate_file, oid, url)
for oid, old_url, new_url, size in verify:
worker_pool.spawn(verify_file, oid, old_url, new_url, size)
gevent.hub.get_hub().join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment