Skip to content

Instantly share code, notes, and snippets.

@jdavcs
Last active March 24, 2020 18:07
Show Gist options
  • Save jdavcs/ec83dfc75a3936cef093995dd00985a4 to your computer and use it in GitHub Desktop.
Save jdavcs/ec83dfc75a3936cef093995dd00985a4 to your computer and use it in GitHub Desktop.
Performance test for galaxy's JobWrapper change_state()
from scripts.db_shell import *
def create_jobs_with_datasets(n, datasets_per_job):
jobs = []
for i in range(n):
job = Job()
jobs.append(job)
for j in range(datasets_per_job):
ds = Dataset(state=Dataset.states.NEW)
hda = HistoryDatasetAssociation()
hda.dataset = ds
job.add_output_dataset('out', hda)
sa_session.add(job)
sa_session.flush()
return jobs
def make_query():
sql = '''
UPDATE dataset
SET
state = :state,
update_time = :update_time
WHERE id IN (
SELECT hda.dataset_id FROM history_dataset_association hda
INNER JOIN job_to_output_dataset jtod
ON jtod.dataset_id = hda.id AND jtod.job_id = :job_id
);
UPDATE dataset
SET
state = :state,
update_time = :update_time
WHERE id IN (
SELECT ldda.dataset_id FROM library_dataset_dataset_association ldda
INNER JOIN job_to_output_library_dataset jtold
ON jtold.ldda_id = ldda.id AND jtold.job_id = :job_id
);
UPDATE history_dataset_association
SET
info = :info,
update_time = :update_time
WHERE id IN (
SELECT jtod.dataset_id
FROM job_to_output_dataset jtod
WHERE jtod.job_id = :job_id
);
UPDATE library_dataset_dataset_association
SET
info = :info,
update_time = :update_time
WHERE id IN (
SELECT jtold.ldda_id
FROM job_to_output_library_dataset jtold
WHERE jtold.job_id = :job_id
);
'''
return sql
def run_change_state_one_query(job, state, info, update_time):
sa_session.expire_all()
job.state = state
job.info = info
sa_session.add(job)
sa_session.flush()
params = {'job_id': job.id, 'state': state, 'info': info, 'update_time': update_time}
sa_session.execute(make_query(), params)
sa_session.flush()
def run_change_state_python(job, state, info):
sa_session.expire_all()
for dataset_assoc in job.output_datasets: # assume no output_library_datasets
dataset_instance = dataset_assoc.dataset # dataset_instance = hda or ldda
sa_session.refresh(dataset_instance) # assume job not supplied, so refresh
dataset_instance.dataset.state = job.state # equiv to dataset.raw_set_dataset_set(state)
dataset_instance.update() # assume state has changed
dataset_instance.info = info
sa_session.add(dataset_instance)
job.state = state # equiv to job.set_state(state); but do not add JobStateHistory
job.info = info
sa_session.add(job)
sa_session.flush()
def run_change_state_trigger(job, state, info):
sa_session.expire_all()
job.state = state
job.info = info
sa_session.add(job)
sa_session.flush()
if __name__ == '__main__':
jobs = create_jobs_with_datasets(10, 100)
update_time = galaxy.model.orm.now.now()
for j in jobs:
#run_change_state_python(j, 'ok-python5', 'updated-python5')
run_change_state_one_query(j, 'ok-query5', 'updated-query5', update_time)
#run_change_state_trigger(j, 'ok-trigger5', 'updated-trigger5')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment