Skip to content

Instantly share code, notes, and snippets.

@edefazio
Created October 16, 2015 15:06
Show Gist options
  • Save edefazio/f4badf9aaaece94d0b58 to your computer and use it in GitHub Desktop.
Save edefazio/f4badf9aaaece94d0b58 to your computer and use it in GitHub Desktop.
Performance of SWAR query scan verses List Lambda (stream() and parallelStream())
package swar.subword;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class SWAR_v_ListLambda_Perf
{
public static class XY
{
public final int x;
public final int y;
public XY( int x, int y )
{
this.x = x;
this.y = y;
}
public boolean bothOdd()
{
return ( x & 1 ) == 1 &&
( y & 1 ) == 1;
}
}
private static final int ROW_COUNT = 100000;
private static final int SCAN_ITERATIONS = 1000;
public static void main (String[] args)
{
/** Generate Random Test Data */
long[] xyCompound = new long[ ROW_COUNT ];
List<XY> xyList = new ArrayList<XY>( ROW_COUNT );
Random r = new Random();
for ( int i = 0; i < ROW_COUNT; i++ )
{
int x = r.nextInt();
int y = r.nextInt();
//we need this & MASK to avoid Java sign extension
xyCompound[ i ] = ( x + 0L << 32 ) | ( y & ( -1L >>> 32 ) );
xyList.add( new XY( x, y ) );
}
/** lets try a GC BEFORE running the timings*/
System.gc();
timeLambdaScan( xyList, SCAN_ITERATIONS );
timeLambdaParallelScan( xyList, SCAN_ITERATIONS );
timeSWARMask ( xyCompound, SCAN_ITERATIONS );
}
public static void timeLambdaScan ( List<XY> xyList, int iterations )
{
int found = 0;
long start = System.currentTimeMillis();
found = listLambdaScan(xyList, iterations );
long end = System.currentTimeMillis();
System.out.println ("List Lambda "+(end-start)+"ms for ("+iterations+"x) scanned "+xyList.size()+" found "+found);
}
public static void timeLambdaParallelScan ( List<XY> xyList, int iterations )
{
int found = 0;
long start = System.currentTimeMillis();
found = listLambdaParallelScan(xyList, iterations );
long end = System.currentTimeMillis();
System.out.println ("List Lambda (Parallel) "+(end-start)+"ms for ("+iterations+"x) scanned "+xyList.size()+" found "+found);
}
public static int listLambdaScan ( List<XY> xyList, int iterations )
{
int found = 0;
for (int it = 0; it < iterations; it++ )
{
found+= xyList.stream()
.filter(s -> s.bothOdd())
.count();
}
return found;
}
public static int listLambdaParallelScan ( List<XY> xyList, int iterations )
{
int found = 0;
for (int it = 0; it < iterations; it++ )
{
found+= xyList.parallelStream()
.filter(s -> s.bothOdd())
.count();
}
return found;
}
private static void timeSWARMask ( long[] xyCompound, int iterations )
{
long start = System.currentTimeMillis();
int found = compoundMaskBothOdd( xyCompound, iterations );
long end = System.currentTimeMillis();
System.out.println ("Compound SWAR Bit Mask "+(end-start)+"ms for ("+iterations+"x) scanned "+xyCompound.length+" found "+found);
}
public static final long BOTH_ODD_MASK = ( 1L << 32 ) + 1;
public static int compoundMaskBothOdd(long[] xyCompound, int iterations )
{
int found =0;
for (int it=0; it<iterations; it++)
{
for (int i=0; i<xyCompound.length; i++)
{
if ( ( xyCompound[ i ] & BOTH_ODD_MASK ) == BOTH_ODD_MASK )
{
found++;
}
}
}
return found;
}
}
@edefazio
Copy link
Author

Compares performance of full row query scan (to check if x,y are BOTH ODD)

  • on a List using a Lambda stream
  • on a List using a Lambda parallelStream
  • on 64-bit (long[] xyCompound) using SWAR-based evaluation
  Compound SWAR Bit Mask                     > 170000000 FAILS OOM
  List Lambda            FAIL OOM

//170M
Compound SWAR Bit Mask 14730ms for (100x) scanned 170000000
List Lambda FAIL OOM

// 160M
Compound SWAR Bit Mask 14403ms for (100x) scanned 160000000
List Lambda FAIL OOM

// 70M
Compound SWAR Bit Mask 5478ms for (100x) scanned 70000000
List Lambda FAIL OOM

//60M
Compound SWAR Bit Mask 4602ms for (100x) scanned 60000000
List Lambda 139783ms for (100x) scanned 60000000
List Lambda (Parallel) 211698ms for (100x) scanned 60000000

//50M
Compound SWAR Bit Mask 3851ms for (100x) scanned 50000000
List Lambda 34156ms for (100x) scanned 50000000
List Lambda (Parallel) 12728ms for (100x) scanned 50000000

//50M
Compound SWAR Bit Mask 391ms for (10x) scanned 50000000
List Lambda 5033ms for (10x) scanned 50000000
List Lambda (Parallel) 2357ms for (10x) scanned 50000000

//40M
Compound SWAR Bit Mask 312ms for (10x) scanned 40000000
List Lambda 3748ms for (10x) scanned 40000000
List Lambda (Parallel) 2092ms for (10x) scanned 40000000

Compound SWAR Bit Mask 234ms for (10x) scanned 30000000
List Lambda 2702ms for (10x) scanned 30000000
List Lambda (Parallel) 977ms for (10x) scanned 30000000

Compound SWAR Bit Mask 89ms for (10x) scanned 10000000
List Lambda 910ms for (10x) scanned 10000000
List Lambda (Parallel) 323ms for (10x) scanned 10000000

Compound SWAR Bit Mask 16ms for (10x) scanned 1000000
List Lambda 134ms for (10x) scanned 1000000
List Lambda (Parallel) 58ms for (10x) scanned 1000000

Compound SWAR Bit Mask 80ms for (100x) scanned 1000000
List Lambda 806ms for (100x) scanned 1000000
List Lambda (Parallel) 296ms for (100x) scanned 1000000

Compound SWAR Bit Mask 73ms for (1000x) scanned 100000
List Lambda 678ms for (1000x) scanned 100000
List Lambda (Parallel) 266ms for (1000x) scanned 100000

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment