Skip to content

Instantly share code, notes, and snippets.

View dvanic's full-sized avatar

Darya Vanichkina dvanic

View GitHub Profile
@dvanic
dvanic / cigarfunction.r
Created July 29, 2016 07:26
R gist to summarise CIGAR string
matcher <- function(pattern, x) {
ind = gregexpr(pattern, x)[[1]]
start = as.numeric(ind)
end = start + attr(ind, "match.length")- 2
apply(cbind(start,end), 1, function(y) substr(x, start=y[1], stop=y[2]));
}
doone <- function(c, cigar) {
pat <- paste("\\d+", c , sep="")
sum(as.numeric(matcher(pat, cigar)), na.rm=T)
@dvanic
dvanic / readGTF.py
Last active August 15, 2016 06:26
pythonReadGTFlocal
from collections import defaultdict
import gzip
import pandas as pd
import re
GTF_HEADER = ['seqname', 'source', 'feature', 'start', 'end', 'score',
'strand', 'frame']
R_SEMICOLON = re.compile(r'\s*;\s*')
R_COMMA = re.compile(r'\s*,\s*')
@dvanic
dvanic / useful_pandas_snippets.py
Created August 17, 2016 07:31 — forked from bsweger/useful_pandas_snippets.md
Useful Pandas Snippets
#List unique values in a DataFrame column
pd.unique(df.column_name.ravel())
#Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
#Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(valuelist)]
@dvanic
dvanic / AwkOneLiners.sh
Created August 20, 2016 08:28
OneLinersFromMyArchiveawk
# http://www.student.northpark.edu/pemente/awk/awk1line.txt
# FILE SPACING:
# double space a file
awk '1;{print ""}'
awk 'BEGIN{ORS="\n\n"};1'
# double space a file which already has blank lines in it. Output file
# should contain no more than one blank line between lines of text.
@dvanic
dvanic / AwkOneLiners.sh
Last active August 20, 2016 08:29
OneLinersFromMyArchiveawk
# http://www.student.northpark.edu/pemente/awk/awk1line.txt
# FILE SPACING:
# double space a file
awk '1;{print ""}'
awk 'BEGIN{ORS="\n\n"};1'
# double space a file which already has blank lines in it. Output file
# should contain no more than one blank line between lines of text.
@dvanic
dvanic / char2var.R
Last active September 20, 2016 04:12
Character to variable and variable to character conversion #R
# Variable name to character
deparse(substitute(variable))
"variable"
# Character to variable
eval(as.name("variable"))
for (object in characterlistofobjectnames) {
if (exists(object) == FALSE) {
stop(paste0(paste0("The object ", object), " does not exist! Please edit your input list before continuing."))
}
}
# This is the file for the challenges
# 01 Raster structure --------
## Challenge 1
# Challenge: What units are our data in?
#
#
## Challenge 2
# Challenge: Use the output from the GDALinfo() function to find out what NoDataValue is used for our DSM_HARV dataset.
#
#
# https://stackoverflow.com/questions/158044/how-to-use-find-to-search-for-files-created-on-a-specific-date
# files created on June 7th
find . -type f -newermt 2007-06-07 ! -newermt 2007-06-08
@dvanic
dvanic / RtabulateNAsbyColumn
Created October 1, 2019 08:10
Tabulate NAs by column in R using dplyr
transaction1 %>%
select_if(function(x) any(is.na(x))) %>%
summarise_each(~sum(is.na(.))) %>%
t()