Skip to content

Instantly share code, notes, and snippets.

View corneliusroemer's full-sized avatar

Cornelius Roemer corneliusroemer

View GitHub Profile
@corneliusroemer
corneliusroemer / Newick.g4
Created December 19, 2023 01:59
ANTLR4 grammar for parsing Newick trees
// A Newick grammar based on the specification by Gary Olsen: available at
// https://phylipweb.github.io/phylip/newick_doc.html The grammar does _not_ include the optional
// comments as I have not figured out how to include them without adding a lot of `comment?` rules
// to the grammar whick makes it very slow.
grammar Newick;
tree: descendantList rootLabel? branchLength? SEMI;
descendantList: LPAREN subtree (COMMA subtree)* RPAREN;
subtree: (descendantList internalNodeLabel? | leafLabel) branchLength?;
@corneliusroemer
corneliusroemer / constraint_detector.py
Created December 17, 2023 11:31
Script to detect incompatible splits in two newick trees
import argparse
from pprint import pp
from Bio import Phylo
def get_terminals(clade):
return frozenset(terminal.name for terminal in clade.get_terminals())
@corneliusroemer
corneliusroemer / nearest_nodes.ndjson
Created December 1, 2023 14:48
Test data to reproduce polars bug
This file has been truncated, but you can view the full file.
{"seqName":"England/MILK-28D52E2/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"USA/CO-CDPHE-2102374275/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"Wales/PHWC-PGY3IH/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"Turkey/HSGM-F70/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"USA/WI-CDC-QDX31282160/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"Canada/ON-SHL-21-02331/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"France/BRE-IPP48628/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"USA/CA-CDC-LC0396661/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"France/OCC-HCL021227635901/2021","nearestNodes":["B.1.617.2"]}
{"seqName":"env/Ukraine/80004/2023","nearestNodes":["NODE_0000637","XBB.1.9.1"]}
@corneliusroemer
corneliusroemer / auto-faculty-profile.py
Created September 26, 2023 02:24
Automatically generate faculty profiles using pubmed abstracts and GPT-4, filtering papers for relevance based on author position
import argparse
from math import e
from Bio import Entrez
import openai
def search_latest_papers(faculty_name, source="Pubmed", num_papers=5):
papers_info = []
if source == "Pubmed":
Entrez.email = "YOUR_EMAIL@example.com"
@corneliusroemer
corneliusroemer / auto-faculty-profile.py
Created September 25, 2023 23:40
Automatically generate faculty profiles using pubmed abstracts and GPT-4
import argparse
from math import e
from Bio import Entrez
import openai
def search_latest_papers(faculty_name, source="Pubmed", num_papers=5):
abstracts = []
if source == "Pubmed":
This file has been truncated, but you can view the full file.
{"version":"v2","meta":{"title":"SARS-CoV-2 phylogeny","updated":"2023-08-08","build_url":"https://github.com/neherlab/nextclade_data_workflows","maintainers":[{"name":"Cornelius Roemer","url":"https://neherlab.org"},{"name":"Richard Neher","url":"https://neherlab.org"}],"display_defaults":{"color_by":"clade_membership","distance_measure":"div","map_triplicate":false,"branch_label":"clade","transmission_lines":false},"genome_annotations":{"E":{"end":26472,"seqid":"defaults/reference_seq.gb","start":26245,"strand":"+","type":"CDS"},"M":{"end":27191,"seqid":"defaults/reference_seq.gb","start":26523,"strand":"+","type":"CDS"},"N":{"end":29533,"seqid":"defaults/reference_seq.gb","start":28274,"strand":"+","type":"CDS"},"ORF1a":{"end":13468,"seqid":"defaults/reference_seq.gb","start":266,"strand":"+","type":"CDS"},"ORF1b":{"end":21555,"seqid":"defaults/reference_seq.gb","start":13468,"strand":"+","type":"CDS"},"ORF3a":{"end":26220,"seqid":"defaults/reference_seq.gb","start":25393,"strand":"+","type":"CDS"},"ORF6":
This file has been truncated, but you can view the full file.
{ "version": "v2", "meta": { "title": "Subtree with Japan\/PG-462868\/2023|EPI_ISL_17243060|2023-02-28 and 51 other uploaded samples", "description": "Dataset generated by [UShER web interface](https:\/\/genome.ucsc.edu\/cgi-bin\/hgPhyloPlace) using the [usher](https:\/\/github.com\/yatisht\/usher\/) program. If you have metadata you wish to display, you can now drag on a CSV file and it will be added into this view, [see here](https:\/\/docs.nextstrain.org\/projects\/auspice\/en\/latest\/advanced-functionality\/drag-drop-csv-tsv.html) for more info.", "panels": [ "tree", "entropy"] , "display_defaults": { "branch_label": "aa mutations", "color_by": "pango_lineage_usher"} , "colorings": [ { "key": "userOrOld", "title": "Sample type", "type": "categorical", "scale": [ [ "uploaded sample", "#CC0000"] , [ "GISAID and\/or public", "#000000"] ] } , { "key": "gt", "title": "Genotype", "type": "categorical"} , { "key": "pango_lineage", "title": "Pango lineage", "type": "categorical"} , { "key": "Nextstrain_clade",
@corneliusroemer
corneliusroemer / auspice.json
Created March 23, 2023 14:52
Test tree for Auspice PR #1655
This file has been truncated, but you can view the full file.
{"version":"v2","meta":{"title":"SARS-CoV-2 phylogeny","updated":"2023-03-22","build_url":"https://github.com/neherlab/nextclade_data_workflows","maintainers":[{"name":"Cornelius Roemer","url":"https://neherlab.org"},{"name":"Richard Neher","url":"https://neherlab.org"}],"display_defaults":{"color_by":"clade_membership","distance_measure":"div","map_triplicate":false,"branch_label":"clade","transmission_lines":false},"genome_annotations":{"nuc":{"end":29903,"start":1,"strand":"+","seqid":"defaults/reference_seq.gb","type":"source"},"E":{"end":26472,"seqid":"defaults/reference_seq.gb","start":26245,"strand":"+","type":"CDS"},"M":{"end":27191,"seqid":"defaults/reference_seq.gb","start":26523,"strand":"+","type":"CDS"},"N":{"end":29533,"seqid":"defaults/reference_seq.gb","start":28274,"strand":"+","type":"CDS"},"ORF1a":{"end":13468,"seqid":"defaults/reference_seq.gb","start":266,"strand":"+","type":"CDS"},"ORF1b":{"end":21555,"seqid":"defaults/reference_seq.gb","start":13468,"strand":"+","type":"CDS"},"ORF3a":{"
@corneliusroemer
corneliusroemer / virus_properties.json
Created March 21, 2023 00:40
Virus properties using sites 100-29720 for placement
{
"schemaVersion": "1.10.0",
"placementMaskRanges": [{"begin":0,"end":100},{"begin":29720,"end":30000}],
"nucMutLabelMap": {
"174T": [
"20H"
],
"204T": [
"20E"
],
@corneliusroemer
corneliusroemer / virus_properties.json
Created March 21, 2023 00:29
Virus properties for placement of 100-10k only
{
"schemaVersion": "1.10.0",
"placementMaskRanges": [{"begin":0,"end":100},{"begin":10700,"end":30000}],
"nucMutLabelMap": {
"174T": [
"20H"
],
"204T": [
"20E"
],