Skip to content

Instantly share code, notes, and snippets.

@x1ah
Created October 25, 2019 09:24
Show Gist options
  • Save x1ah/346ce619bd393df2fe327b11e8401273 to your computer and use it in GitHub Desktop.
Save x1ah/346ce619bd393df2fe327b11e8401273 to your computer and use it in GitHub Desktop.
多进程进度条
# coding: utf-8
import gzip
import os
import re
from collections import Counter
from pathlib import Path
from tqdm import tqdm
from multiprocessing import Pool
import ujson
BASE_PATH = Path("/Users/x1ah/Downloads/tmp/qiniu/")
MATCH_PATTERN = re.compile('.+imageView2\/\d\/w\/(?P<w>\d+)\/(h\/(?P<h>\d+)\/)?.+')
def yield_files():
for file_name in os.listdir(BASE_PATH):
if file_name.endswith(".gz"):
yield file_name
def backup_res(filename, wh_rate, width, height):
line = ujson.dumps({"wh": wh_rate, "w": width, "h": height})
with open(filename, 'w') as f:
f.write(line)
def analyze(filename):
wh_rate = Counter()
width_counter = Counter()
height_counter = Counter()
filenames = list(yield_files())
pb = tqdm(total=5000000, desc=filename[-27:], position=filenames.index(filename))
filepath = BASE_PATH / filename
for line in gzip.open(filepath, 'r'):
pb.update(1)
if not line:
continue
try:
line = line.decode()
except:
continue
res = MATCH_PATTERN.match(line)
if not res:
continue
w, h = res.group('w'), res.group('h')
if not w or w == '0':
continue
w = int(w)
width_counter[w] += 1
if h and h != '0':
height_counter[h] += 1
wh_rate[round(w/int(h), 1)] += 1
try:
backup_filename = filename[14:-3] + ".json"
except:
backup_filename = filename + ".json"
backup_res(backup_filename, wh_rate, width_counter, height_counter)
def main():
pool = Pool(8, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),))
filenames = list(yield_files())
pool.map(analyze, filenames)
if __name__ == "__main__":
main()
@x1ah
Copy link
Author

x1ah commented Oct 25, 2019

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment