Created
September 26, 2016 00:50
-
-
Save nk773/9eea2a0dd6e67a16fca37a917fcc9730 to your computer and use it in GitHub Desktop.
Compare two datasets and return result summary along with heatmap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
compareData<-function(dataset1, dataset2) | |
{ | |
# compares two datasets for each cell value. If dimensions of dataset differ, it uses minimum dimension | |
# Args: | |
# dataset1: first dataset to be compared. | |
# dataset2: second dataset to be compared. | |
# | |
# Returns: | |
# Returns list of the following | |
# dataframe containing boolean matrix for each cell indicating match or no match | |
# summary of matches within each column | |
# heatmap of each cell match/mismatch | |
d1<-dim(dataset1) | |
d2<-dim(dataset2) | |
if (d1[1]!=d2[1]|d1[2]!=d2[2]) { | |
print("**** ERROR: dataset sizes don't match ****") | |
paste("dataset1 size:",print(d1)) | |
paste("dataset2 size:",print(d2)) | |
print("using minimum size for comparison") | |
} | |
rowmin = min(d1[1], d2[1]) | |
colmin = min(d1[2], d2[2]) | |
res1<-(dataset1[1:rowmin, 1:colmin]==dataset2[1:rowmin, 1:colmin]) | |
sum1<-summary(res1) | |
sum2<-colSums(res1) | |
heatmap(1*res1,scale = "none", Rowv = NA, Colv = NA, col=c("red", "green")) | |
bplt<-barplot(sum2,col = heat.colors(d1[2]), beside=TRUE, legend=rownames(sum2),xlab="Column Names", ylab="Number of datapoints that match") | |
return(list(data.frame(res1),data.frame(sum1))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment