Skip to content

Instantly share code, notes, and snippets.

@peterjc
Created April 29, 2015 10:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterjc/5ebbf446d799f3aaa639 to your computer and use it in GitHub Desktop.
Save peterjc/5ebbf446d799f3aaa639 to your computer and use it in GitHub Desktop.
Rough script for populating Galaxy .shed.yml files with metadata already in Tool Shed
# Walks specified folders looking for .shed.yml files,
# with at least owner and name given.
#
# Matches the owner/name with the remote Tool Shed, and
# takes any missing meta-data from the remote Tool Shed.
#
# Pre-existing local data otherwise should be preserved.
#
# Does the yaml dump with some hackery because I couldn't
# work out how to make the library use the layout I wanted.
import yaml
import os
import sys
from bioblend import toolshed
# Seems at time of writing TestToolShed gives categories,
# but main ToolShed does not. Bug?
shed_url = "https://testtoolshed.g2.bx.psu.edu/"
tsi = toolshed.ToolShedInstance(shed_url)
repos = tsi.repositories.get_repositories()
cats = tsi.repositories.get_categories()
assert repos
assert cats
keys = ("name", "owner",
"homepage_url", "remote_repository_url",
"description", "long_description",
"categories", "type")
def shed_repo_config(shed_yaml_path):
if os.path.exists(shed_yaml_path):
with open(shed_yaml_path, "r") as f:
return yaml.load(f)
else:
return {}
def save_changes(shed_yaml_path, new_data):
# Quick and dirty - append mode, does not give ideal order
with open(shed_yaml_path, "a") as f:
for k in keys:
if k in new_data:
v = new_data[k]
if isinstance(v, list):
f.write("%s:\n- %s\n" % (k, "\n- ".join(v)))
else:
v = yaml.dump(v).rstrip()
if v.endswith("\n..."):
# Why?
v = v[:-4].rstrip()
# Strip redundant quote chars...
if v.count("'") == 2 and v[0] == "'" and v[-1] == "'":
v = v[1:-1]
assert "..." not in v, "%r --> %r" % (new_data[k], v)
f.write("%s: %s\n" % (k, v))
def find_remote(repos, name, owner):
for r in repos:
if r["name"] == name and r["owner"] == owner:
return r
raise KeyError("Tool Shed missing %s/%s" % (owner, name))
def sync_changes(local, remote):
updated = dict()
assert local["name"] == remote["name"]
assert local["owner"] == remote["owner"]
for k in keys:
if k not in local and k in remote:
print("Recording %s/%s field %s" % (owner, name, k))
updated[k] = str(remote[k])
remote_cat_ids = remote.get("category_ids", [])
if not remote_cat_ids:
print("Tool Shed did not tell us the categories for %s/%s" % (owner, name))
else:
# assert remote_cat_ids, remote
# print("Remote cat IDs: %r" % remote_cat_ids)
remote_categories = sorted(str(x["name"]) for x in cats if x["id"] in remote_cat_ids)
local_categories = local.get("categories", [])
# print("Remote cats: %r" % remote_categories)
# print("Local cats: %r" % local_categories)
if remote_categories and not local_categories:
print("Recording %s/%s field categories" % (owner, name))
updated["categories"] = remote_categories
elif set(remote_categories) != set(local_categories):
print("REPLACING %s/%s field categories" % (owner, name))
updated["categories"] = remote_categories
return updated
for root in sys.argv[1:]:
print("Walking %s" % root)
for (dirpath, dirnames, filenames) in os.walk(root):
if not ".shed.yml" in filenames:
continue
shed_yml = os.path.join(root, dirpath, ".shed.yml")
assert os.path.isfile(shed_yml), shed_yml
print(shed_yml)
local = shed_repo_config(shed_yml)
assert local is not None, shed_yml
try:
name = local["name"]
owner = local["owner"]
except KeyError:
print("Missing owner and/or name in %s" % shed_yml)
continue
try:
remote = find_remote(repos, name, owner)
except KeyError, e:
# Not in remote tool shed?
print(str(e))
continue
updated = sync_changes(local, remote)
if updated:
save_changes(shed_yml, updated)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment