Skip to content

Instantly share code, notes, and snippets.

@tueda
Last active August 21, 2023 10:01
Show Gist options
  • Save tueda/ec0181f0f4c8961d49dc659f79cbfd4a to your computer and use it in GitHub Desktop.
Save tueda/ec0181f0f4c8961d49dc659f79cbfd4a to your computer and use it in GitHub Desktop.
Cache on drive for Google colaboratory. #python #colab #cache
!pip install git+https://gist.github.com/tueda/ec0181f0f4c8961d49dc659f79cbfd4a

from google.colab import drive
drive.mount('/content/drive')

import colab_cache

cache = colab_cache.Cache('hello-2.12.1')
if cache.exists():
    cache.load()
else:
    !curl -OL https://ftp.gnu.org/gnu/hello/hello-2.12.1.tar.gz
    !tar xf hello-2.12.1.tar.gz
    !cd hello-2.12.1 && ./configure && make
    cache.save()

!hello-2.12.1/hello
from __future__ import annotations
import functools
import shutil
import subprocess
import sys
from pathlib import Path
import yaml
def get_output(cmd: str) -> list[str]:
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
if result.stderr:
print("has err")
print(result.stderr, file=sys.stderr)
return result.stdout.splitlines()
def run(cmd: str) -> None:
subprocess.run(cmd, shell=True, check=True)
@functools.cache
def get_system_info() -> dict[str, str]:
python_info = {
"python_version": f"{sys.version_info[0]}.{sys.version_info[1]}",
"python_api_version": str(sys.api_version),
}
os_info_s = get_output("cat /etc/os-release | grep '^ID=\|^VERSION_ID='")
os_info = {
(ss := s.split("="))[0].lower().strip(): ss[1].strip('"').strip()
for s in os_info_s
}
cpu_info_s = get_output(
"cat /proc/cpuinfo | grep 'vendor_id\|cpu family\|model' | sort | uniq"
)
cpu_info = {(ss := s.split(":", 2))[0].strip(): ss[1].strip() for s in cpu_info_s}
return python_info | os_info | cpu_info
@functools.cache
def get_system_hash() -> str:
info = ";".join(f"{k}:{v}" for k, v in get_system_info().items())
return get_output(f"echo '{info}' | sha256sum | cut -d ' ' -f 1")[0]
class Cache:
def __init__(
self, path: str | Path, cache_root: str | Path = "/content/drive/MyDrive/.cache"
):
path = Path(path).absolute()
cache_root = Path(cache_root).absolute()
s = str(path)
while s and s[0] == "/":
s = s[1:]
cache_path = cache_root / s / get_system_hash() / f"{path.name}.tar.gz"
self._path = path
self._cache_root = cache_root
self._cache_path = cache_path
def exists(self):
return self._cache_path.is_file()
def load(self):
run(f"tar xPf {self._cache_path}")
print(f"cache loaded: {self._path} for {get_system_info()}")
def save(self):
tmp = Path(self._cache_path.name)
run(f"tar czPf {tmp} {self._path}")
self._cache_path.parent.mkdir(parents=True, exist_ok=True)
shutil.move(tmp, self._cache_path)
with (self._cache_path.parent / "system_info.yaml").open("w") as f:
yaml.dump(get_system_info(), f)
print(f"cache saved: {self._path} for {get_system_info()}")
from setuptools import setup, find_packages
setup(
name="colab_cache",
version="0.1.0",
py_modules=["colab_cache"],
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment