Skip to content

Instantly share code, notes, and snippets.

@brucehoff
Created March 2, 2016 00:56
Show Gist options
  • Save brucehoff/d4808247d0bffa37eda5 to your computer and use it in GitHub Desktop.
Save brucehoff/d4808247d0bffa37eda5 to your computer and use it in GitHub Desktop.
library(digest)
library(synapseClient)
getStorageLocationId<-function(projectId) {
synRestGET(sprintf("/projectSettings/%s/type/upload", projectId))$locations
}
# For a file that's been uploaded to an S3 bucket associated with a Synapse project, this creates
# a FileEntity in the project.
#
# s3file the S3 file 'key', i.e. the full path to the file within the S3 bucket, excluding the bucket name
# md5 the MD5 digest of the file to be linked
# storageLocationId the ID of the storage location object linking the Synapse project to the S3 bucket. See getStorageLocationId
# parentId the folder or project under which the file should be placed
# AWSbucketName the S3 bucket into which the file was loaded
#
linkS3FileToSynapse <- function(s3file, contentType = "application/octet-stream", md5, storageLocationId, parentId, AWSbucketName) {
fileName <- basename(s3file)
fileHandleContent <- list(
concreteType="org.sagebionetworks.repo.model.file.S3FileHandle",
bucketName=AWSbucketName,
key=s3file,
fileName=fileName,
contentType=contentType,
contentMd5=md5,
storageLocationId=storageLocationId )
s3filehandle <- synRestPOST("/externalFileHandle/s3", fileHandleContent,endpoint=synapseFileServiceEndpoint())
s3fileEntity <- synRestPOST("/entity", list(
concreteType="org.sagebionetworks.repo.model.FileEntity",
name=fileName,
dataFileHandleId=s3filehandle$id,
parentId=parentId))
}
# Given a folder of files already uploaded to an S3 bucket, this creates the corresponding
# FileEntities in Synapse. Before calling this you must have associated the bucket with
# the target Synapse project.
#
# folderPath the path to the folder on the local file system where the uploaded files reside
# AWSbucketName the S3 bucket to which the files have been uploaded
# s3FolderPath the path within 'AWSbucketName' to the folder where the files have been uploaded
# projectId the project associated with the S3 bucket
# parentId the project (projectId) or folder (within projectId) under which the file should be placed
#
linkUploadedFolderToSynapse<-function(folderPath, AWSbucketName, s3FolderPath, projectId, parentId) {
storageLocationId<-getStorageLocationId(projectId)
md5s<-sapply(list.files(folderPath, full.names=T), function(x)digest(x, algo="md5", file=T))
for (path in names(md5s)) {
md5<-md5s[path]
names(md5)<-NULL
message("path: ", path, " md5: ", md5)
s3file<-file.path(s3FolderPath, basename(path))
contentType<-synapseClient:::getMimeTypeForFile(path)
linkS3FileToSynapse(s3file, contentType, md5, storageLocationId, parentId, AWSbucketName)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment