Skip to content

Instantly share code, notes, and snippets.

@yuq-1s
Created November 30, 2021 10:53
Show Gist options
  • Save yuq-1s/3b8cdc4514c6039d487bf6bef875e0cc to your computer and use it in GitHub Desktop.
Save yuq-1s/3b8cdc4514c6039d487bf6bef875e0cc to your computer and use it in GitHub Desktop.
Utility function for caching middle results of Python
import gzip
import logging
import pickle
import time
import yaml
from tqdm import tqdm
def cached(cache_path):
def wrapper(func):
open_fn = gzip.open if cache_path.endswith('.gz') else open
def new_func(*args, **kwargs):
try:
logging.info(f"Loading {cache_path} ...")
start = time.time()
with open_fn(cache_path, 'rb') as f:
ret = pickle.load(f)
end = time.time()
logging.info(f"Loaded {cache_path} in {end - start:.4f} seconds")
return ret
except (FileNotFoundError, EOFError):
ret = func(*args, **kwargs)
logging.info(f"Loading {cache_path} failed, generating it now ...")
with open_fn(cache_path, 'wb') as f:
pickle.dump(ret, f)
return ret
return new_func
return wrapper
@cached("cache/flashtext_kp.pkl")
def get_kp():
logging.info("Generating flashtext.KeywordProcessor ...")
from flashtext import KeywordProcessor
kp = KeywordProcessor()
with open('zh_list') as f:
for line in tqdm(f, total=18537072):
kp.add_keyword(line.strip())
return kp
if __name__ == '__main__':
kp = get_kp()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment