justmarkham/dplyr-minrank.R Secret

## dplyr-minrank.R
# generate some test data
set.seed(100)
test <- tbl_df(data.frame(ID=sample(1:100, 100000, replace=TRUE), X=sample(1:1000, 100000, replace=TRUE)))
test <- arrange(test, ID)
people <- group_by(test, ID)

mutate(people, new=row_number(desc(X))) %.% filter(ID==1) %.% arrange(new)
# result looks good:
#ID   X new
#1   1 999   1
#2   1 997   2
#3   1 996   3
#4   1 995   4
#5   1 995   5
#6   1 995   6
#7   1 994   7
#8   1 993   8
#9   1 993   9
#10  1 993  10

mutate(people, new=min_rank(desc(X))) %.% filter(ID==1) %.% arrange(new)
# result looks good:
#ID   X new
#1   1 999   1
#2   1 997   2
#3   1 996   3
#4   1 995   4
#5   1 995   4
#6   1 995   4
#7   1 994   7
#8   1 993   8
#9   1 993   8
#10  1 993   8

mutate(people, new=row_number(desc(X))) %.% filter(ID==2) %.% arrange(new)
# result looks good:
#ID    X new
#1   2 1000   1
#2   2  997   2
#3   2  997   3
#4   2  997   4
#5   2  996   5
#6   2  996   6
#7   2  996   7
#8   2  994   8
#9   2  993   9
#10  2  993  10

mutate(people, new=min_rank(desc(X))) %.% filter(ID==2) %.% arrange(new)
# strange result:
#ID   X new
#1   2 204   1
#2   2 451   2
#3   2 413   3
#4   2 688   4
#5   2 676   4
#6   2 988   6
#7   2 867   7
#8   2 154   7
#9   2 773   9
#10  2 541  10

### For every ID I have tried with these pair of statements (other than ID==1),
### row_number() seems to work and min_rank() doesn't.
	# generate some test data
	set.seed(100)
	test <- tbl_df(data.frame(ID=sample(1:100, 100000, replace=TRUE), X=sample(1:1000, 100000, replace=TRUE)))
	test <- arrange(test, ID)
	people <- group_by(test, ID)

	mutate(people, new=row_number(desc(X))) %.% filter(ID==1) %.% arrange(new)
	# result looks good:
	#ID X new
	#1 1 999 1
	#2 1 997 2
	#3 1 996 3
	#4 1 995 4
	#5 1 995 5
	#6 1 995 6
	#7 1 994 7
	#8 1 993 8
	#9 1 993 9
	#10 1 993 10

	mutate(people, new=min_rank(desc(X))) %.% filter(ID==1) %.% arrange(new)
	# result looks good:
	#ID X new
	#1 1 999 1
	#2 1 997 2
	#3 1 996 3
	#4 1 995 4
	#5 1 995 4
	#6 1 995 4
	#7 1 994 7
	#8 1 993 8
	#9 1 993 8
	#10 1 993 8

	mutate(people, new=row_number(desc(X))) %.% filter(ID==2) %.% arrange(new)
	# result looks good:
	#ID X new
	#1 2 1000 1
	#2 2 997 2
	#3 2 997 3
	#4 2 997 4
	#5 2 996 5
	#6 2 996 6
	#7 2 996 7
	#8 2 994 8
	#9 2 993 9
	#10 2 993 10

	mutate(people, new=min_rank(desc(X))) %.% filter(ID==2) %.% arrange(new)
	# strange result:
	#ID X new
	#1 2 204 1
	#2 2 451 2
	#3 2 413 3
	#4 2 688 4
	#5 2 676 4
	#6 2 988 6
	#7 2 867 7
	#8 2 154 7
	#9 2 773 9
	#10 2 541 10

	### For every ID I have tried with these pair of statements (other than ID==1),
	### row_number() seems to work and min_rank() doesn't.