Last active
March 20, 2017 08:18
-
-
Save liuchang0812/d35e2ce4644b53bd65a375ef3724a16b to your computer and use it in GitHub Desktop.
清理脚本
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from qcloud_cos import ListFolderRequest, DelFileRequest, CosClient | |
def to_unicode(s): | |
if isinstance(s, str): | |
return s.decode('utf-8') | |
else: | |
return s | |
def to_utf8(s): | |
if isinstance(s, unicode): | |
return s.encode('utf-8') | |
else: | |
return s | |
class DeleteBrokenFiles(object): | |
def __init__(self, appid, access_id, secret_key, bucket, region, dry_run=True): | |
self._access_id = access_id | |
self._appid = appid | |
self._secret_key = secret_key | |
self._bucket = bucket | |
self._region = region | |
self._dry_run = dry_run | |
self._max_retry = 20 | |
self._client = CosClient(self._appid, self._access_id, self._secret_key, region=self._region) | |
def dfs(self, path): | |
print "DFS: {path}".format(path=to_utf8(path)) | |
_finish = False | |
_context = u'' | |
max_retry = self._max_retry | |
path = to_unicode(path) | |
while not _finish: | |
request = ListFolderRequest(bucket_name=self._bucket, cos_path=path, context=_context) | |
ret = self._client.list_folder(request) | |
if ret['code'] != 0: | |
max_retry -= 1 | |
else: | |
_finish = ret['data']['listover'] | |
_context = ret['data']['context'] | |
for item in ret['data']['infos']: | |
if 'filelen' in item: | |
try: | |
if self.__is_broken(item): | |
key = "{prefix}{filename}".format(prefix=path, filename=item['name']) | |
self.__delete_file(key) | |
except: | |
pass | |
else: | |
_sub_dir = "{prefix}{filename}".format(prefix=path.encode('utf-8'), filename=item['name'].encode('utf-8')) | |
self.dfs(_sub_dir) | |
if max_retry == 0: | |
_finish = True | |
@staticmethod | |
def __is_broken(self, task): | |
try: | |
return task['filelen'] != task['filesize'] or task['filelen'] == 0 or task['filesize'] == 0 | |
except : | |
return False | |
def __delete_file(self, task): | |
if self._dry_run: | |
print "DELETE: {task}".format(task.encode('utf-8')) | |
return | |
for i in range(5): | |
try: | |
req = DelFileRequest(self._bucket, task) | |
ret = self._client.del_file(req) | |
if ret['code'] == 0: | |
print "DELETE: {task}".format(task.encode('utf-8')) | |
break | |
except: | |
pass | |
else: | |
print "ERROR: delete {key} failed".format(key=task.encode('utf-8')) | |
def run(self): | |
self.dfs('/') | |
if __name__ == "__main__": | |
inst = DeleteBrokenFiles(appid=yourappid, access_id=u'youraccessid', secret_key=u'yoursecretkey', bucket=u'yourbucket', region=u'tj', dry_run=False) | |
inst.run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- encoding: utf-8 -*- | |
from __future__ import print_function | |
from qcloud_cos import CosClient, ListFolderRequest | |
from argparse import ArgumentParser | |
from threading import Lock | |
from logging import getLogger, basicConfig, INFO | |
import sys | |
# f = open("cos/cos_files_{0}.txt".format(sys.argv[1].replace('/','_')), 'w') | |
basicConfig(stream=sys.stderr, level=INFO) | |
logger = getLogger(__name__) | |
f = open("cos_files.txt", 'w') | |
files = 0 | |
lock = Lock() | |
def dfs(client, bucket, path, max_retry=20): | |
_finish = False | |
_context = u'' | |
path = path.decode('utf-8') | |
while not _finish: | |
request = ListFolderRequest(bucket_name=bucket, cos_path=path, context=_context) | |
ret = client.list_folder(request) | |
if ret['code'] != 0: | |
max_retry -= 1 | |
else: | |
_finish = ret['data']['listover'] | |
_context = ret['data']['context'] | |
for item in ret['data']['infos']: | |
if 'filelen' in item: | |
try: | |
f.write("{prefix}{filename}\t{size}\t{etag}\n".format(prefix=path.encode('utf-8'), filename=item['name'], size=item['filelen'], etag=item['sha'])) | |
with lock: | |
global files | |
files += 1 | |
except: | |
pass | |
else: | |
_sub_dir = "{prefix}{filename}".format(prefix=path.encode('utf-8'), filename=item['name'].encode('utf-8')) | |
logger.info("{num} dumpped, working: {dir}".format(num=files, dir=_sub_dir)) | |
dfs(client, bucket, _sub_dir, max_retry) | |
# directory | |
if max_retry == 0: | |
_finish = True | |
def _main(): | |
parser = ArgumentParser() | |
parser.add_argument('--appid', type=int, help='your appid', required=True) | |
parser.add_argument('--accessid', type=unicode, help='your access id', required=True) | |
parser.add_argument('--accesskey', type=unicode, help='your access key', required=True) | |
parser.add_argument('--region', type=unicode, help='your region', required=True) | |
parser.add_argument('--bucket', type=unicode, help='your bucket', required=True) | |
opt = parser.parse_args() | |
_api = CosClient(opt.appid, opt.accessid, opt.accesskey, region=opt.region) | |
logger.info("NOTICE: start to dump files within {bucket}".format(bucket=opt.bucket)) | |
dfs(_api, opt.bucket, '/') | |
logger.info("NOTICE: finish works. {files} files have been dumped to {filename}".format(files=files, filename='cos_files.txt')) | |
if __name__ == "__main__": | |
_main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment