Skip to content

Instantly share code, notes, and snippets.

@amn41
Created October 17, 2016 07:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amn41/2e7346e1d84afeee12f6d9a3b0822759 to your computer and use it in GitHub Desktop.
Save amn41/2e7346e1d84afeee12f6d9a3b0822759 to your computer and use it in GitHub Desktop.
import rinocloud as rino
import shutil, os
import subprocess
import hashlib
"""
persist = Persistor(config.rino_token,config.rino_dir)
def save_model_new(persist,model_file,score):
temp_file="tmp_{0:06d}.txt".format(random.choice(range(10000)))
data_str = open(model_file).read()
_hash = persist.make_hash(data_str)
with open(temp_file, 'w') as f:
f.write(data_str)
tdata = persist._rino.Object(**{
"hash":_hash,
"domain":'xx',
"filetype":'xx',
"commit":commit,
"equivalence_key":'xx',
"filename_template":"xx_{0:07d}.txt"
})
version = persist.save_versioned_object(tdata,temp_file,config.rino_training_data,_hash)
"""
class Persistor(object):
def __init__(self,token,dir):
self._rino = rino
self._rino.api_key = token
self._rino.set_local_path(dir)
def current_commit(self):
try:
commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()
except:
commit = "unknown"
return commit
def make_hash(self,data_str):
m = hashlib.md5()
m.update(data_str)
return m.hexdigest()
def check_new(self,_hash,equivalence_key):
prev_versions = self.existing_versions(equivalence_key=equivalence_key)
hashes = [v["hash"] for v in prev_versions if "hash" in v]
is_new = not (_hash and _hash in hashes)
version=1
if (prev_versions):
version += max([int(v.version) for v in prev_versions])
return version, is_new
def existing_versions(self,equivalence_key):
return self._rino.Query().filter(equivalence_key=equivalence_key).query(overwrite=True)
def save_versioned_object(self,obj,temp_file,parent_dir,_hash=None): # returns object version number
assert "equivalence_key" in obj, "Cannot save versioned obj without equivalence_key"
commit = self.current_commit()
version, is_new = self.version_for_hash(_hash,obj["equivalence_key"])
if (is_new):
obj._parent = parent_dir
obj.version = version
obj.commit = commit
obj.set_name(obj.filename_template.format(obj.version))
shutil.move(temp_file,obj.filepath)
obj.save_local_metadata()
obj.upload()
else:
os.remove(temp_file)
return version
def fetch_version(self,equivalence_key,version=None):
if (not version):
return self.highest_version(equivalence_key)
else:
assert type(version) is int
results = self._rino.Query().filter(equivalence_key=equivalence_key,version=version).query(overwrite=True,limit=1)
if (len(results) > 0):
obj = results[0]
if (not os.path.isfile(obj.filepath)):
obj.download()
return obj
else:
return None
def highest_version(self,equivalence_key):
results = self._rino.Query().filter(equivalence_key=equivalence_key).sort("-version").query(overwrite=True,limit=1)
if (len(results) < 1):
return None
obj = results[0]
if (not os.path.isfile(obj.filepath)):
obj.download()
return obj
def version_for_hash(self,_hash,equivalence_key): # returns version number and whether or not this is new
version = 1
v_hi = self.highest_version(equivalence_key)
if (v_hi): version += int(v_hi["version"])
if (not _hash):
return version, True
else:
matches = self._rino.Query().filter(equivalence_key=equivalence_key,hash=_hash).query(overwrite=True,limit=1)
if (len(matches) == 1):
return int(matches[0]["version"]) , False
else:
return version , True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment