Tobias Kind tobigithub

## guppy_basecalling.md

      
              1 file
            
          
              2 forks
            
          
              13 comments
            
          
              15 stars
            
          
                sirselim
                / guppy_basecalling.md
            
            
              Last active
              August 30, 2023 03:46
            
              
                My notes on setting up basecalling on Google Colab
              
          
    Nanopore basecalling on Google Colab


NOTE: this whole idea is the brain child of Jürgen Hench. He got it up and running and posted about it here. I am merely wrapping the idea in a hopefully easy to follow set of instructions for people to test themseleves.

This notebook describes processing of Nanopore sequencing data (fast5 files) in a Google Colab interactive notebook environment. This is made possible by utalising the GPU enabled runtime that is available via Colab.

  
## xyz2om2.py
#!/usr/bin/env python2
import numpy as np
import sys

elements = dict()
elements["H"] = 1
elements["C"] = 6
elements["N"] = 7
elements["O"] = 8
elements["F"] = 9

## basecalling_notes.md

      
              1 file
            
          
              5 forks
            
          
              248 comments
            
          
              50 stars
            
          
                sirselim
                / basecalling_notes.md
            
            
              Last active
              August 1, 2023 01:27
            
              
                a collection of my notes while working on nanopore basecalling on the Jetson Xavier
              
          
    Jetson Xavier basecalling notes

initial basecalling runs

'fast' flip-flop calling on the Jetson Xavier

guppy_basecaller --disable_pings --compress_fastq -c dna_r9.4.1_450bps_fast.cfg -i flongle_fast5_pass/ -s flongle_test2 -x 'auto' --recursive 

  
## ramdisk-ubuntu.txt
**************************************************************
* Using a Ramdisk under Ubuntu
* Increases throughput  1:12  (151 Mbyte/sec to 1900 Mbyte/sec)
* Increases latency     1:300 (0.15 Mbyte/sec to 126 Mbyte/sec)
***************************************************************

# create 10 Gbyte ramdisk ubuntu
sudo mkdir -p /media/ramdisk
sudo mount -t tmpfs -o size=10G tmpfs /media/ramdisk/

## mono_crash_maxquant_20181029T091700.log
$ uname -rm
4.18.5-arch1-1-ARCH x86_64

$ mono --version
Mono JIT compiler version 5.21.0 (master/05bc22015c9 Thu Oct 25 13:31:44 CEST 2018)
Copyright (C) 2002-2014 Novell, Inc, Xamarin Inc and Contributors. www.mono-project.com
        TLS:           __thread
        SIGSEGV:       altstack
        Notifications: epoll
        Architecture:  amd64

## MNIST_Keras2DML.py
################################### Keras2DML: Parallely training neural network with SystemML#######################################
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling2D, Dropout,Flatten
from keras import backend as K
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt

## xyz2om2.f90
! PUBLIC DOMAIN LICENSE 2017 BY ANDERS S. CHRISTENSEN
!
! I WROTE THIS BECAUSE I WAS BORED - I DON'T RECOMMEND
! WRITING FILE PARSERS IN FORTRAN BECAUSE IT IS NOT
! PRODUCTIVE.

program convert

    implicit none

## irisKaggle.sh
#!/bin/bash

# Change to your directory. Change this for directory.
cd /Users/mchirico/Dropbox/kaggle/iris

# Change machine environment
eval $(docker-machine env big2)

# Function to launch browser and container.
kjupyter() {

## Kind-sequence.R
# Kind Sequence
# https://oeis.org/A262602
# https://en.wikipedia.org/wiki/Kind_sequence
# 1, 1, 3, 5, 7, 9, 13, 19, 27, 39, 57, 83, 119, 171, 247, 357, 515, 743, 1073, 1549, 2235, 3225, 4655, 6719, 9697,
# 13995, 20199, 29153, 42075, 60725, 87643, 126493, 182563, 263487, 380283, 548851, 792139, 1143269, 1650045,
# 2381459, 3437085, 4960637, 7159533
# The sequence seems simply odd first, but then in a gentle
# and kind way escapes to higher numbers
# Tobias Kind (2015)

## pubchem_convert_SMILES_to_IUPAC.py
import sys
import requests
from lxml import etree

if __name__=="__main__":
    smiles = sys.argv[1]

    html_doc = requests.get("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/" + smiles + "/record/XML")
    root = etree.XML(html_doc.text)
	#!/usr/bin/env python2
	import numpy as np
	import sys

	elements = dict()
	elements["H"] = 1
	elements["C"] = 6
	elements["N"] = 7
	elements["O"] = 8
	elements["F"] = 9
	**************************************************************
	* Using a Ramdisk under Ubuntu
	* Increases throughput 1:12 (151 Mbyte/sec to 1900 Mbyte/sec)
	* Increases latency 1:300 (0.15 Mbyte/sec to 126 Mbyte/sec)
	***************************************************************

	# create 10 Gbyte ramdisk ubuntu
	sudo mkdir -p /media/ramdisk
	sudo mount -t tmpfs -o size=10G tmpfs /media/ramdisk/
	$ uname -rm
	4.18.5-arch1-1-ARCH x86_64

	$ mono --version
	Mono JIT compiler version 5.21.0 (master/05bc22015c9 Thu Oct 25 13:31:44 CEST 2018)
	Copyright (C) 2002-2014 Novell, Inc, Xamarin Inc and Contributors. www.mono-project.com
	TLS: __thread
	SIGSEGV: altstack
	Notifications: epoll
	Architecture: amd64
	################################### Keras2DML: Parallely training neural network with SystemML#######################################
	import tensorflow as tf
	import keras
	from keras.models import Sequential
	from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling2D, Dropout,Flatten
	from keras import backend as K
	from keras.models import Model
	import numpy as np
	import matplotlib.pyplot as plt
	! PUBLIC DOMAIN LICENSE 2017 BY ANDERS S. CHRISTENSEN
	!
	! I WROTE THIS BECAUSE I WAS BORED - I DON'T RECOMMEND
	! WRITING FILE PARSERS IN FORTRAN BECAUSE IT IS NOT
	! PRODUCTIVE.

	program convert

	implicit none
	#!/bin/bash

	# Change to your directory. Change this for directory.
	cd /Users/mchirico/Dropbox/kaggle/iris

	# Change machine environment
	eval $(docker-machine env big2)

	# Function to launch browser and container.
	kjupyter() {
	# Kind Sequence
	# https://oeis.org/A262602
	# https://en.wikipedia.org/wiki/Kind_sequence
	# 1, 1, 3, 5, 7, 9, 13, 19, 27, 39, 57, 83, 119, 171, 247, 357, 515, 743, 1073, 1549, 2235, 3225, 4655, 6719, 9697,
	# 13995, 20199, 29153, 42075, 60725, 87643, 126493, 182563, 263487, 380283, 548851, 792139, 1143269, 1650045,
	# 2381459, 3437085, 4960637, 7159533
	# The sequence seems simply odd first, but then in a gentle
	# and kind way escapes to higher numbers
	# Tobias Kind (2015)
	import sys
	import requests
	from lxml import etree

	if __name__=="__main__":
	smiles = sys.argv[1]

	html_doc = requests.get("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/" + smiles + "/record/XML")
	root = etree.XML(html_doc.text)