Darya Vanichkina dvanic

## char2var.R
# Variable name to character

deparse(substitute(variable))

"variable"

# Character to variable

eval(as.name("variable"))

## cigarfunction.r
matcher <- function(pattern, x) {

  ind = gregexpr(pattern, x)[[1]]
  start = as.numeric(ind)
  end = start + attr(ind, "match.length")- 2
  apply(cbind(start,end), 1, function(y) substr(x, start=y[1], stop=y[2]));
}
doone <- function(c, cigar) {
  pat <- paste("\\d+", c , sep="")
  sum(as.numeric(matcher(pat, cigar)), na.rm=T)

## readGTF.py
from collections import defaultdict
import gzip
import pandas as pd
import re


GTF_HEADER  = ['seqname', 'source', 'feature', 'start', 'end', 'score',
               'strand', 'frame']
R_SEMICOLON = re.compile(r'\s*;\s*')
R_COMMA     = re.compile(r'\s*,\s*')

## useful_pandas_snippets.py
#List unique values in a DataFrame column
pd.unique(df.column_name.ravel())

#Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

#Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(valuelist)]

## AwkOneLiners.sh
# http://www.student.northpark.edu/pemente/awk/awk1line.txt

# FILE SPACING:

# double space a file
awk '1;{print ""}'
awk 'BEGIN{ORS="\n\n"};1'

 # double space a file which already has blank lines in it. Output file
 # should contain no more than one blank line between lines of text.

## AwkOneLiners.sh
# http://www.student.northpark.edu/pemente/awk/awk1line.txt

# FILE SPACING:

# double space a file
awk '1;{print ""}'
awk 'BEGIN{ORS="\n\n"};1'

 # double space a file which already has blank lines in it. Output file
 # should contain no more than one blank line between lines of text.

## testexistence.R
for (object in characterlistofobjectnames) {
  if (exists(object) == FALSE) {
      stop(paste0(paste0("The object ", object), " does not exist! Please edit your input list before continuing."))
  }
  }

## challenges.R
# This is the file for the challenges
# 01 Raster structure --------
## Challenge 1
# Challenge: What units are our data in?
#
#
## Challenge 2
# Challenge: Use the output from the GDALinfo() function to find out what NoDataValue is used for our DSM_HARV dataset.
#
#

## FilesByDate.sh
# https://stackoverflow.com/questions/158044/how-to-use-find-to-search-for-files-created-on-a-specific-date
# files created on June 7th
find . -type f -newermt 2007-06-07 ! -newermt 2007-06-08

## RtabulateNAsbyColumn
transaction1 %>%
  select_if(function(x) any(is.na(x))) %>%
  summarise_each(~sum(is.na(.))) %>%
  t()
	# Variable name to character

	deparse(substitute(variable))

	"variable"

	# Character to variable

	eval(as.name("variable"))
	matcher <- function(pattern, x) {

	ind = gregexpr(pattern, x)[[1]]
	start = as.numeric(ind)
	end = start + attr(ind, "match.length")- 2
	apply(cbind(start,end), 1, function(y) substr(x, start=y[1], stop=y[2]));
	}
	doone <- function(c, cigar) {
	pat <- paste("\\d+", c , sep="")
	sum(as.numeric(matcher(pat, cigar)), na.rm=T)
	from collections import defaultdict
	import gzip
	import pandas as pd
	import re


	GTF_HEADER = ['seqname', 'source', 'feature', 'start', 'end', 'score',
	'strand', 'frame']
	R_SEMICOLON = re.compile(r'\s;\s')
	R_COMMA = re.compile(r'\s,\s')
	#List unique values in a DataFrame column
	pd.unique(df.column_name.ravel())

	#Convert Series datatype to numeric, getting rid of any non-numeric values
	df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

	#Grab DataFrame rows where column has certain values
	valuelist = ['value1', 'value2', 'value3']
	df = df[df.column.isin(valuelist)]
	# http://www.student.northpark.edu/pemente/awk/awk1line.txt

	# FILE SPACING:

	# double space a file
	awk '1;{print ""}'
	awk 'BEGIN{ORS="\n\n"};1'

	# double space a file which already has blank lines in it. Output file
	# should contain no more than one blank line between lines of text.
	for (object in characterlistofobjectnames) {
	if (exists(object) == FALSE) {
	stop(paste0(paste0("The object ", object), " does not exist! Please edit your input list before continuing."))
	}
	}
	# This is the file for the challenges
	# 01 Raster structure --------
	## Challenge 1
	# Challenge: What units are our data in?
	#
	#
	## Challenge 2
	# Challenge: Use the output from the GDALinfo() function to find out what NoDataValue is used for our DSM_HARV dataset.
	#
	#
	# https://stackoverflow.com/questions/158044/how-to-use-find-to-search-for-files-created-on-a-specific-date
	# files created on June 7th
	find . -type f -newermt 2007-06-07 ! -newermt 2007-06-08
	transaction1 %>%
	select_if(function(x) any(is.na(x))) %>%
	summarise_each(~sum(is.na(.))) %>%
	t()