Last active
February 5, 2021 22:55
-
-
Save clairemcwhite/6ecc9d741b33692d92002accb23c9a1c to your computer and use it in GitHub Desktop.
Notes on setting up and using the google cloud lifesciences pipelines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PROJECT_ID is the billable project code "word-word-number" | |
# Having -life-sciences suffix may be important | |
export BUCKET=gs://PROJECT_ID-life-sciences | |
# Make a bucket (storage folder) for input and output data | |
# See intro to installing gsutil in this gist https://gist.github.com/clairemcwhite/ca33c9b7385ee4e2b64641353399f3ba | |
gsutil mb ${BUCKET} | |
# First do the example script | |
# When at first it didn't recognize pipelines, running "$ gcloud components update" fixed it | |
gcloud beta lifesciences pipelines run \ | |
--regions us-east1 \ | |
--command-line 'samtools index ${BAM} ${BAI}' \ | |
--docker-image "gcr.io/genomics-tools/samtools" \ | |
--inputs BAM=gs://genomics-public-data/NA12878.chr20.sample.bam \ | |
--outputs BAI=${BUCKET}/NA12878.chr20.sample.bam.bai | |
# Check status with: Make long number be the job number that was just printed | |
gcloud beta lifesciences operations wait 012345678987654321 | |
# When it's done see output with | |
gsutil ls ${BUCKET} | |
# Now with making own dockerized script, ex. one called "my_docker" | |
docker build -t my_docker . | |
docker run my_docker | |
# Set up to push docker image to the google container registry | |
gcloud auth login | |
gcloud auth configure-docker | |
# More info on these tag here:https://cloud.google.com/container-registry/docs/pushing-and-pulling | |
# docker tag localreponame host/projectid/future_reponame_on_google | |
docker tag my_docker:latest us.gcr.io/PROJECT_ID/my_docker_12345 | |
docker push us.gcr.io/PROJECT-ID/my_docker_12345 | |
# This was prompted at some point | |
cloud config set project PROJECT-ID | |
# Add input data to the bucket | |
gsutil cp testfile.txt $BUCKET | |
# my_docker | |
gcloud beta lifesciences pipelines run \ | |
--regions us-east1 \ | |
--command-line 'Rscript testscript.R --infile ${IN} --outfile ${OUT}' \ | |
--docker-image "us.gcr.io/my_docker_12345" \ | |
--inputs IN=${BUCKET}/testfile.txt\ | |
--outputs PUT=${BUCKET}/testoutfile.csv | |
# See if it's done | |
gcloud beta lifesciences operations wait 13589916798356839505 | |
gsutil ls ${BUCKET} | |
# get output data back | |
gsutil cp ${BUCKET}/testoutfile . | |
## Example of a Dockerfile for R work | |
FROM rocker/r-ver:3.6.2 | |
# If using tidyverse functions | |
#FROM rocker/tidyverse:3.6.1 | |
# LABEL about the custom image | |
LABEL maintainer="claire.mcwhite@utexas.edu" | |
LABEL version="0.1" | |
LABEL description="This is custom Docker Image for doing an ldproxy_batch query to get proxy snps." | |
# File Author / Maintainer | |
MAINTAINER clairemcwhite <claire.mcwhite@utexas.edu> | |
# Prevents interactive tzdata question | |
ARG DEBIAN_FRONTEND=noninteractive | |
ENV TZ=America/New_York | |
# These were all prompted during attempts to run this docker image | |
RUN apt-get update && apt-get install --yes --no-install-recommends \ | |
libcurl4-openssl-dev \ | |
libssl-dev \ | |
python3 | |
# Install R packages | |
RUN install2.r --error \ | |
LDlinkR \ | |
argparse | |
# Script | |
ADD batch_ldlinkr.R / | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment