Skip to content

Instantly share code, notes, and snippets.

@liuchang0812
Last active March 20, 2017 08:18
Show Gist options
  • Save liuchang0812/d35e2ce4644b53bd65a375ef3724a16b to your computer and use it in GitHub Desktop.
Save liuchang0812/d35e2ce4644b53bd65a375ef3724a16b to your computer and use it in GitHub Desktop.
清理脚本
# -*- coding: utf-8 -*-
from qcloud_cos import ListFolderRequest, DelFileRequest, CosClient
def to_unicode(s):
if isinstance(s, str):
return s.decode('utf-8')
else:
return s
def to_utf8(s):
if isinstance(s, unicode):
return s.encode('utf-8')
else:
return s
class DeleteBrokenFiles(object):
def __init__(self, appid, access_id, secret_key, bucket, region, dry_run=True):
self._access_id = access_id
self._appid = appid
self._secret_key = secret_key
self._bucket = bucket
self._region = region
self._dry_run = dry_run
self._max_retry = 20
self._client = CosClient(self._appid, self._access_id, self._secret_key, region=self._region)
def dfs(self, path):
print "DFS: {path}".format(path=to_utf8(path))
_finish = False
_context = u''
max_retry = self._max_retry
path = to_unicode(path)
while not _finish:
request = ListFolderRequest(bucket_name=self._bucket, cos_path=path, context=_context)
ret = self._client.list_folder(request)
if ret['code'] != 0:
max_retry -= 1
else:
_finish = ret['data']['listover']
_context = ret['data']['context']
for item in ret['data']['infos']:
if 'filelen' in item:
try:
if self.__is_broken(item):
key = "{prefix}{filename}".format(prefix=path, filename=item['name'])
self.__delete_file(key)
except:
pass
else:
_sub_dir = "{prefix}{filename}".format(prefix=path.encode('utf-8'), filename=item['name'].encode('utf-8'))
self.dfs(_sub_dir)
if max_retry == 0:
_finish = True
@staticmethod
def __is_broken(self, task):
try:
return task['filelen'] != task['filesize'] or task['filelen'] == 0 or task['filesize'] == 0
except :
return False
def __delete_file(self, task):
if self._dry_run:
print "DELETE: {task}".format(task.encode('utf-8'))
return
for i in range(5):
try:
req = DelFileRequest(self._bucket, task)
ret = self._client.del_file(req)
if ret['code'] == 0:
print "DELETE: {task}".format(task.encode('utf-8'))
break
except:
pass
else:
print "ERROR: delete {key} failed".format(key=task.encode('utf-8'))
def run(self):
self.dfs('/')
if __name__ == "__main__":
inst = DeleteBrokenFiles(appid=yourappid, access_id=u'youraccessid', secret_key=u'yoursecretkey', bucket=u'yourbucket', region=u'tj', dry_run=False)
inst.run()
# -*- encoding: utf-8 -*-
from __future__ import print_function
from qcloud_cos import CosClient, ListFolderRequest
from argparse import ArgumentParser
from threading import Lock
from logging import getLogger, basicConfig, INFO
import sys
# f = open("cos/cos_files_{0}.txt".format(sys.argv[1].replace('/','_')), 'w')
basicConfig(stream=sys.stderr, level=INFO)
logger = getLogger(__name__)
f = open("cos_files.txt", 'w')
files = 0
lock = Lock()
def dfs(client, bucket, path, max_retry=20):
_finish = False
_context = u''
path = path.decode('utf-8')
while not _finish:
request = ListFolderRequest(bucket_name=bucket, cos_path=path, context=_context)
ret = client.list_folder(request)
if ret['code'] != 0:
max_retry -= 1
else:
_finish = ret['data']['listover']
_context = ret['data']['context']
for item in ret['data']['infos']:
if 'filelen' in item:
try:
f.write("{prefix}{filename}\t{size}\t{etag}\n".format(prefix=path.encode('utf-8'), filename=item['name'], size=item['filelen'], etag=item['sha']))
with lock:
global files
files += 1
except:
pass
else:
_sub_dir = "{prefix}{filename}".format(prefix=path.encode('utf-8'), filename=item['name'].encode('utf-8'))
logger.info("{num} dumpped, working: {dir}".format(num=files, dir=_sub_dir))
dfs(client, bucket, _sub_dir, max_retry)
# directory
if max_retry == 0:
_finish = True
def _main():
parser = ArgumentParser()
parser.add_argument('--appid', type=int, help='your appid', required=True)
parser.add_argument('--accessid', type=unicode, help='your access id', required=True)
parser.add_argument('--accesskey', type=unicode, help='your access key', required=True)
parser.add_argument('--region', type=unicode, help='your region', required=True)
parser.add_argument('--bucket', type=unicode, help='your bucket', required=True)
opt = parser.parse_args()
_api = CosClient(opt.appid, opt.accessid, opt.accesskey, region=opt.region)
logger.info("NOTICE: start to dump files within {bucket}".format(bucket=opt.bucket))
dfs(_api, opt.bucket, '/')
logger.info("NOTICE: finish works. {files} files have been dumped to {filename}".format(files=files, filename='cos_files.txt'))
if __name__ == "__main__":
_main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment