Simon (Xingpeng) Li xingpel

## data-filter
filter <- function(threshold,zeros,countData){
        #1,threshold is the critical mean value of each row;
        #2,countdata is the input original countdata that is going to filter;
        #3,zeros is the critical number of zero reads that each row contained that would filtered out,
        #every row that contain more than this number of zero reads will be deleted.
        Means <- rowMeans(countData, na.rm = FALSE)
        fullcountdata <- cbind(countData,Means)
        filcountdata <- subset(fullcountdata,Means>threshold)
        filcountdata$Means <- NULL
        #delete the rows that have zero reads less than "zeros" number.

## Assignment1_Part3.R
corr <- function(directory, threshold = 0) {
        result <- NULL
        #for each csv file, use function complete() to obtain the complete number and
        #compare with threshold
        for (i in 1:332) {
                # get the number into 000 format, exp: 1 -> 001
                num <- sprintf("%03d",i)
                # read the file and saved to data
                path <- paste(directory,num,".csv",sep="")
                data <- read.csv(path)

## Assignment1_Part2.R
complete <- function(directory, id = 1:332) {
        #1,create an output matrix
        output <- NULL
        #2,for each id file, calculate NA numbers
        for (i in id){
                num1 <- sprintf("%03d",i)
                path <- paste(directory,num1,".csv",sep="")
                data <- read.csv(path)
                nobs <- 0
                #read each line

## Assignment1_Part1.R
pollutantmean <- function(directory, pollutant, id = 1:332) {
        #1, read id csv files and merge into one datafile called data.
        data <- NULL
        for (i in id) {
                count <- sprintf("%03d",i)
                path <- paste(directory,count,".csv",sep="")
                data <- rbind(data, read.csv(path))
        }
        #2, return the mean of the specific pollutant,
        #with variable precision = 4

## Mergeheadings
data_file = open("sampleData.txt")
result_file = open ("result.txt","w")
        for line in data_file:
                if "abc" in line:
                        key=line.split(' ')[0]
                        key=key.split('>')[1]
                        for line2 in open("dictionary.txt"):
                                if key in line2:
                                        result_file.write(">"+line2)
                else:

## gist:4e69ca25338f957955a9
with open ("anotation.txt", "r") as annotation:
anotation_dict = {}
for line in annotation:
line = line.split()
if line: #test whether it is an empty line
anotation_dict[line[0]]=line[1:]
else:
continue

# really should not parse the fasta file by myself. there are
	filter <- function(threshold,zeros,countData){
	#1,threshold is the critical mean value of each row;
	#2,countdata is the input original countdata that is going to filter;
	#3,zeros is the critical number of zero reads that each row contained that would filtered out,
	#every row that contain more than this number of zero reads will be deleted.
	Means <- rowMeans(countData, na.rm = FALSE)
	fullcountdata <- cbind(countData,Means)
	filcountdata <- subset(fullcountdata,Means>threshold)
	filcountdata$Means <- NULL
	#delete the rows that have zero reads less than "zeros" number.
	corr <- function(directory, threshold = 0) {
	result <- NULL
	#for each csv file, use function complete() to obtain the complete number and
	#compare with threshold
	for (i in 1:332) {
	# get the number into 000 format, exp: 1 -> 001
	num <- sprintf("%03d",i)
	# read the file and saved to data
	path <- paste(directory,num,".csv",sep="")
	data <- read.csv(path)
	complete <- function(directory, id = 1:332) {
	#1,create an output matrix
	output <- NULL
	#2,for each id file, calculate NA numbers
	for (i in id){
	num1 <- sprintf("%03d",i)
	path <- paste(directory,num1,".csv",sep="")
	data <- read.csv(path)
	nobs <- 0
	#read each line
	pollutantmean <- function(directory, pollutant, id = 1:332) {
	#1, read id csv files and merge into one datafile called data.
	data <- NULL
	for (i in id) {
	count <- sprintf("%03d",i)
	path <- paste(directory,count,".csv",sep="")
	data <- rbind(data, read.csv(path))
	}
	#2, return the mean of the specific pollutant,
	#with variable precision = 4
	data_file = open("sampleData.txt")
	result_file = open ("result.txt","w")
	for line in data_file:
	if "abc" in line:
	key=line.split(' ')[0]
	key=key.split('>')[1]
	for line2 in open("dictionary.txt"):
	if key in line2:
	result_file.write(">"+line2)
	else:
	with open ("anotation.txt", "r") as annotation:
	anotation_dict = {}
	for line in annotation:
	line = line.split()
	if line: #test whether it is an empty line
	anotation_dict[line[0]]=line[1:]
	else:
	continue

	# really should not parse the fasta file by myself. there are