Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@anthony-cros
Created February 22, 2021 18:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anthony-cros/1416c544438ef39ca36ae723d02c3ce9 to your computer and use it in GitHub Desktop.
Save anthony-cros/1416c544438ef39ca36ae723d02c3ce9 to your computer and use it in GitHub Desktop.
"~/lovd/eds_subset.tsv"
.stream { _.table('\t').nullValue("-").noArraySeparator }
// ---------------------------------------------------------------------------
.rename(
"DB-ID" ~> _id,
"DNA change (cDNA)" ~> "change_cdna",
"RNA change" ~> "change_rna",
"DNA change (genomic) (hg19)" ~> "change_dna_hg19",
"DNA change (hg38)" ~> "change_dna_hg38",
"Type/DNA" ~> "type",
"Allele" ~> "allele_origin",
"Origin" ~> "variant_origin")
.rename { _.toLowerCase.replace(" ", "_").replace("-", "_") }
.rename("vip" ~> "VIP") // restore legitimate acronym
.moveAsFirstKey(_id)
// ---------------------------------------------------------------------------
.split("effect") .by("/")
.split("predicted").by(", ")
.split("owner") .by(", ")
// TODO: confirm no other null semantics for this field
.removeIfValueFor("allele_origin").is("Unknown")
.renestIfKeys {_.startsWith("change") }.usingDefaultSeparator
.write("/tmp/eds_subset.jsonl")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment