Skip to content

Instantly share code, notes, and snippets.

@choishingwan
Created December 2, 2021 15:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save choishingwan/874d5a756c64b134362465e7bcd2dbc5 to your computer and use it in GitHub Desktop.
Save choishingwan/874d5a756c64b134362465e7bcd2dbc5 to your computer and use it in GitHub Desktop.
Nextflow meta helper
import java.util.ArrayList;
def reformat_meta(input, index, column){
def arrayItems = []
if(input instanceof nextflow.util.ArrayTuple){
arrayItems = new java.util.ArrayList(input)
}else{
arrayItems = input.clone()
}
def meta = arrayItems.remove(index)
def metaMap = meta.clone().subMap(column)
// reserve first element as the meta map
// second element to indicate if we have left
// over keys
if(metaMap == meta){
return( [metaMap, false, arrayItems])
}else{
def remain = meta.clone()
remain.keySet().removeAll(column)
return( [metaMap, true, remain, arrayItems])
}
}
// valid dupHandel = remove, rename, replace
def reconstruct_meta(input, dupHandle="remove"){
def arrayItems = input.clone()
// Use idx, because clone does not do deep copying
def idx = 0
def metaFromX = arrayItems[idx++]
def hasExtraMeta = arrayItems[idx++]
if(hasExtraMeta){
metaFromX += arrayItems[idx++]
}
def remainItemsFromX = arrayItems[idx++].clone()
// now the second items
hasExtraMeta = arrayItems[idx++]
if(hasExtraMeta){
def metaFromY = arrayItems[idx++].clone()
if(dupHandle == "rename"){
def duplicated = metaFromY.subMap(metaFromX.keySet()).keySet()
duplicated.each{ it ->
def value = metaFromY.remove(it)
def colidx = 1
def tmp = it+=".V"+colidx
while(metaFromX.containsKey(tmp)){
++colidx;
tmp = it+=".V"+colidx
}
metaFromY[tmp] = value
}
}
if(dupHandle != "replace"){
metaFromX = metaFromY + metaFromX
} else{
metaFromX+=metaFromY
}
}
def returnResult =[]
returnResult << metaFromX
// ADD resultlist
while(remainItemsFromX.size() != 0){
returnResult << remainItemsFromX.remove(0)
}
def remainItems = arrayItems[idx++].clone()
while(remainItems.size() != 0){
returnResult << remainItems.remove(0)
}
assert arrayItems.size() == idx
return (returnResult)
}
def combine_map(args){
// x, y, by = NULL, by.x = NULL, by.y = NULL
if(!args?.by_x){
args.by_x = args.by
}
if(!args?.by_y){
args.by_y = args.by_x
}
if(!args.containsKey("by_x")){
throw new Exception("Error: Must provide either the by or by_x and by_y arguments")
}
// now check index and also check if column is presented in both map
args.x.map{
a -> if(a.size() <= args.by_x){
throw new Exception("Error: Array index out of bound. Index for x is larger than size of x")
}
else if(args.column.size() != a[args.by_x].subMap(args.column).keySet().size()){
println(args.column)
println(a[args.by_x])
throw new Exception("Error: Some columns were either duplicated or not found in the map from x. Please check that you have provided the correct column parameter")
}
}
args.y.map{
a -> if(a.size() <= args.by_y){
throw new Exception("Error: Array index out of bound. Index for y is larger than size of y")
}
else if(args.column.size() != a[args.by_y].subMap(args.column).keySet().size()){
println(args.column)
println(a[args.by_y])
throw new Exception("Error: Some columns were either duplicated or not found in the map from y. Please check that you have provided the correct column parameter")
}
}
updatedX = args.x.map{ a -> reformat_meta(a, args.by_x, args.column)}
updatedY = args.y.map{ a -> reformat_meta(a, args.by_y, args.column)}
result = updatedX \
| combine(updatedY, by: 0) \
| map{ a -> reconstruct_meta(a, args.dup)} \
| unique
return (result)
/*
// TODO: One possible way to ensure mapping is to convert all internal values to string
// and back convert to whichever variable types that are reasonable, thus removing
// the type setting burden from the users, however, if for some reason,
// different entry of the channel contains map with different type setting, it will
// be impossible for us to determine what type to use
result = updatedRef \
| combine(updatedTarget, by:0) \
| map{ a -> reconstruct_meta(a, args.dup)}
return(result)*/
}
def addMeta(args){
if(!args?.idx){
args.idx = 0
}
if(!args.containsKey("x")){
throw new Exception("Error: You must provide `x` for addMeta")
}
if(!args.containsKey("meta")){
throw new Exception("Error: You must provide a map to `meta` for addMeta")
}
def result = []
for(def i = 0; i < args.x.size(); ++i){
if(i == args.idx){
def info = args.x[i].clone()
info = args.meta + info
result << info
}else{
def tmp = args.x[i] instanceof Map || args.x[i] instanceof List? args.x[i].clone() : args.x[i]
result << tmp
}
}
return(result)
}
def removeMeta(args){
if(!args?.idx){
args.idx = 0
}
if(!args.containsKey("x")){
throw new Exception("Error: You must provide `x` for removeMeta")
}
if(!args.containsKey("keys")){
throw new Exception("Error: You must provide a list of key(s) to `keys` for removeMeta")
}
def result = []
for(def i = 0; i < args.x.size(); ++i){
if(i == args.idx){
def meta = args.x[i].clone()
meta.keySet().removeAll(args.keys)
result << meta
}else{
def tmp = args.x[i] instanceof Map || args.x[i] instanceof List? args.x[i].clone() : args.x[i]
result << tmp
}
}
return(result)
}
def removeElements(args){
if(!args.containsKey("x")){
throw new Exception("Error: You must provide `x` for removeElements")
}
if(!args.containsKey("idx")){
throw new Exception("Error: You must provide a list of index(s) to `keys` for removeElements")
}
if(args.idx.size() == 0){
return(args.x)
}
def result = []
for(def i = 0; i < args.x.size(); ++i){
if(!args.idx.contains(i)){
result << args.x[i]
}
}
return(result)
}
def get_meta_value(meta, key, defaultValue){
return(
meta instanceof Map && meta.containsKey(key)? meta[(key)] : defaultValue
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment