Skip to content

Instantly share code, notes, and snippets.

@mppf
Created August 21, 2018 21:13
Show Gist options
  • Save mppf/99336f1dc89cafcabfb7c963a7eea1da to your computer and use it in GitHub Desktop.
Save mppf/99336f1dc89cafcabfb7c963a7eea1da to your computer and use it in GitHub Desktop.
module OtherMSBRadix {
use Shell;
use Time;
/* Sorting criterion has a method:
(key:uint,done:bool) = criterion.keyBits(record, startbit)
(cmp=-1,0,1,nsamebits) = criterion.prefixCompare(recordA, recordB)
cmp=-1,0,1 = criterion.compare(recordA, recordB)
returns 64-bits of key starting at startbit
and done is set if startbit is beyond the number
of bits in this record.
*/
param RADIX_BITS = 8;
param DISTRIBUTE_BUFFER = 5;
config const sortSwitch = 256;
config const sortTask = 256;
config param CHECK_SORTS = false;
config param progress = false;
config param alwaysSerial = false;
config const maxTasks = here.numPUs(logical=true);
extern proc __builtin_prefetch(ref v);
inline
proc prefetch(ref x)
{
if here == x.locale then __builtin_prefetch(x);
}
inline
proc binForRecord(a, criterion, startbit:int, radixbits:int):int
{
var (bits,done) = criterion.keyBits(a,startbit);
var bin = if done then 0 else 1 + (bits >> (64 - radixbits));
return bin:int;
}
proc msbRadixSort(start_n:int, end_n:int, A:[], criterion, startbit = 0)
{
if( end_n - start_n < sortSwitch ) {
shellSort(start_n, end_n, A, criterion);
if CHECK_SORTS then checkSorted(start_n, end_n, A, criterion);
return;
}
if progress then writeln("radix sort start=", start_n, " end=", end_n, " startbit=", startbit);
const radixbits = RADIX_BITS;
const radix = 1 << radixbits;
// 0th bin is for records where we've consumed all the key.
var offsets:[0..radix] int;
var end_offsets:[0..radix] int;
// Step 1: count.
// Could be parallel... but that's hard to write unless we
// can do a custom reduction? Or have thread-private variables?
for i in start_n..end_n {
var bin = binForRecord(A[i], criterion, startbit, radixbits);
offsets[bin] += 1;
}
if progress then writeln("accumulate");
//writeln("COUNTS: ", offsets);
// Step 2: accumulate
end_offsets = + scan offsets;
for (off,end) in zip(offsets,end_offsets) {
off = start_n + end - off;
end = start_n + end;
}
//writeln("OFFSETS: ", offsets);
var curbin = 0;
if progress then writeln("shuffle");
// Step 3: shuffle
while true {
// Find the next bin that isn't totally in place.
while curbin <= radix && offsets[curbin] == end_offsets[curbin] {
curbin += 1;
}
if curbin > radix {
break;
}
param max_buf = DISTRIBUTE_BUFFER;
var buf: max_buf*A.eltType;
var used_buf = 0;
var end = end_offsets[curbin];
var endfast = max(offsets[curbin], end_offsets[curbin]-2*max_buf);
var bufstart = max(offsets[curbin], end_offsets[curbin]-max_buf);
var i = bufstart;
// Fill buf with up to max_buf records from the end of this bin.
while i < end {
buf[used_buf+1] = A[i];
used_buf += 1;
i += 1;
}
while offsets[curbin] < endfast {
// Now go through the records in buf
// putting them in their right home.
for param j in 1..max_buf {
var bin = binForRecord(buf[j], criterion, startbit, radixbits);
prefetch(A[offsets[bin]]);
// Swap buf[j] into its appropriate bin.
// Leave buf[j] with the next unsorted item.
A[offsets[bin]] <=> buf[j];
offsets[bin] += 1;
}
}
// Now, handle elements in bufstart...end_offsets[cur_bin]
while offsets[curbin] < end {
// Put buf[j] into its right home
var j = 1;
while used_buf > 0 && j <= used_buf {
var bin = binForRecord(buf[j], criterion, startbit, radixbits);
// Swap buf[j] into its appropriate bin.
// Leave buf[j] with the next unsorted item.
// But offsets[bin] might in the region we already read.
if bin == curbin && offsets[curbin] >= bufstart {
A[offsets[bin]] = buf[j];
buf[j] = buf[used_buf];
used_buf -= 1;
} else {
A[offsets[bin]] <=> buf[j];
}
offsets[bin] += 1;
j += 1;
}
}
}
if progress then writeln("sort sub-problems");
// Step 4: sort sub-problems.
// Note that shuffle changed the offsets to be == end_offset..
// put offsets back.
offsets[0] = start_n;
for i in 1..radix {
offsets[i] = end_offsets[i-1];
}
// This is a parallel version
if alwaysSerial == false {
const subbits = startbit + radixbits;
var nbigsubs = 0;
var bigsubs:[0..radix] (int,int);
const runningNow = here.runningTasks();
for bin in 0..radix {
// Does the bin contain more than one record?
const bin_start = offsets[bin];
const bin_end = if bin+1<=radix then offsets[bin+1]-1 else end_n;
const num = 1 + bin_end - bin_start;
if num <= 1 {
// do nothing
} else if num < sortTask || runningNow >= maxTasks {
// sort it in this thread
msbRadixSort(bin_start, bin_end, A, criterion, subbits);
} else {
// Add it to the list of things to do in parallel
bigsubs[nbigsubs] = (bin_start, bin_end);
nbigsubs += 1;
}
}
forall (bin,(bin_start,bin_end)) in zip(0..#nbigsubs,bigsubs) {
msbRadixSort(bin_start, bin_end, A, criterion, subbits);
}
} else {
// The serial version
for bin in 0..radix {
// Does the bin contain more than one record?
const bin_start = offsets[bin];
const bin_end = if bin+1<=radix then offsets[bin+1]-1 else end_n;
const num = 1 + bin_end - bin_start;
if num > 1 {
// sort it in this thread
msbRadixSort(bin_start, bin_end, A, criterion, startbit + radixbits);
}
}
}
if CHECK_SORTS then checkSorted(start_n, end_n, A, criterion);
}
proc checkSorted(start_n:int, end_n:int, A:[], criterion, startbit = 0)
{
for i in start_n+1..end_n {
var cmp = criterion.compare(A[i-1], A[i]);
if cmp > 0 {
writeln("Error: not sorted properly at i=", i, " A[i-1]=", A[i-1], " A[i]=", A[i], " in start=", start_n, " end=", end_n);
writeln(A);
assert(false);
}
}
}
}
@mppf
Copy link
Author

mppf commented Aug 21, 2018

here is the tester for it

use BitOps;
 use Sort;
 use Random;
 use OtherMSBRadix;
 use Time;

 config const printStats = true;
 config const size = 10000;

 record intCriterion {
   inline
   proc keyBits(x:int, startbit:int):(uint,bool) {
     var key:uint = (x:uint) << startbit;
     var done:bool = startbit >= 64;
     return (key,done);
       //x:uint << startbit, startbit >= 64);
   }
   inline
   proc compare(a:int, b:int ) {
     var cmp:int;
     if a == b then cmp = 0;
     else if a < b then cmp = -1;
      else cmp = 1;
     return cmp;
   }
 }

 proc main() {
   
   var array:[1..size] int; 
   fillRandom(array);
   
   for i in array.domain {
     array[i] = abs(array[i]);
   }

   var t: Timer;
   t.start();

   msbRadixSort(1, size, array, new intCriterion());

   t.stop();
 
   if printStats {
     writeln("Time: ", t.elapsed());
   }

   t.clear();

   //writeln(array);
   writeln("sorted array: ",isSorted(array));

  }

@mppf
Copy link
Author

mppf commented Aug 23, 2018

Here is the Shell.chpl source:

proc shellSort(start_n:int, end_n:int, A:[], criterion)
{
  // Based on Sedgewick's Shell Sort -- see
  // Analysis of Shellsort and Related Algorithms 1996
  // and see Marcin Ciura - Best Increments for the Average Case of Shellsort
  // for the choice of these increments.

  var n = 1 + end_n - start_n;
  var js,h,hs:int;
  var v,tmp:A.eltType;
  const incs = (701, 301, 132, 57, 23, 10, 4, 1);
  const nincs = incs.size;
  for k in 1..nincs {
    h = incs[k];
    hs = h + start_n;
    for is in hs..end_n {
      v = A[is];
      js = is;
      while js >= hs && criterion.compare(v,A[js-h]) < 0 {
        A[js] = A[js - h];
        js -= h;
      }
      A[js] = v;
    }
  }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment