{{ message }}

Instantly share code, notes, and snippets.

tmcw/floyd.js

Created Aug 23, 2012
Floyd's Algorithm for Random Subsets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 function sample(list, m) { var n = list.length; if (m > n) return void console && console.log('list length must be > sample'); var sampleList = []; for (var i = n - m; i < n; i++) { var item = list[~~(Math.random() * i)]; if (sampleList.indexOf(item) !== -1) sampleList.push(list[i]); else sampleList.push(item); } return sampleList; }

foo123 commented Aug 26, 2015

When the arraylist is already given there is a better algorithm of strict O(n) complexity (see below) The Floyd algorithm has O(n^2) worst-case complexity (due to the use of searches/hashmaps etc..)
Actually one can safely assume the arraylist as given always, since the selection eventually must be applied to some existing arraylist, so it is no major assumption to make nor drawback

// http://stackoverflow.com/a/32035986/3591273
function random_pick( a, k, non_destructive )
{
var picked, backup, i, selected, value, n = a.length;
k = Math.min( k, n );
picked = new Array( k );
backup = new Array( k );

non_destructive = false !== non_destructive;
// partially shuffle the array, and generate unbiased selection simultaneously
// this is a variation on fisher-yates-knuth shuffle
for (i=0; i<k; i++) // O(k) times
{
selected = Math.round( (--n)*Math.random() ); // unbiased sampling n * n-1 * n-2 * .. * n-k+1
value = a[ selected ];
a[ selected ] = a[ n ];
a[ n ] = value;
backup[ i ] = selected;
picked[ i ] = value;
}
if ( non_destructive )
{
// restore partially shuffled input array from backup
for (i=k-1; i>=0; i--) // O(k) times
{
selected = backup[ i ];
value = a[ n ];
a[ n ] = a[ selected ];
a[ selected ] = value;
n++;
}
}
return picked;
}