mingwei-li

## solution.java
boolean updateValue(Event event) {
    long userInitiatedTimestamp = event.getUserInitiatedTimestamp();
    String userId = event.getUserId();
    User user = database.query(userId);
    long lastUserInitiatedTimestamp = user.getLastUserInitiatedTimestamp();

    // event arrives in correct order
    if(userInitiatedTimestamp > lastUserInitiatedTimestamp) {
        // register with latest timestamp
        user.setLastUserInitiatedTimestamp(userInitiatedTimestamp);

## more.scala
hs.refreshIndex("index1")
hs.deleteIndex("index1")
hs.restoreIndex("index1")
hs.deleteIndex("index2")
hs.vacuumIndex("index2")

## explain2.log
== Physical Plan ==
*(1) Project [name#11]
+- *(1) Filter (isnotnull(id#10) && (id#10 = 1))
   +- *(1) FileScan parquet [id#10,name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/Users/mingwli/Dev/lib/hadoop-2.9.2/bin/spark-warehouse/indexes/index/v__=0], PartitionFilters: [], PushedFilters: [IsNotNull(id), EqualTo(id,1)], ReadSchema: struct<id:int,name:string>

## enable.scala
spark.enableHyperspace
query.show()

## explain.log
=============================================================
Plan with indexes:
=============================================================
Project [name#11]
+- Filter (isnotnull(id#10) && (id#10 = 1))
   <----+- FileScan parquet [id#10,name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/Users/mingwli/Dev/lib/hadoop-2.9.2/bin/spark-warehouse/indexes/index/v__=0], PartitionFilters: [], PushedFilters: [IsNotNull(id), EqualTo(id,1)], ReadSchema: struct<id:int,name:string>---->

=============================================================
Plan without indexes:
=============================================================

## show.log
+-----+--------------+---------------+----------+--------------------+--------------------+--------------------+------+
| name|indexedColumns|includedColumns|numBuckets|              schema|       indexLocation|           queryPlan| state|
+-----+--------------+---------------+----------+--------------------+--------------------+--------------------+------+
|index|          [id]|         [name]|       200|{"type":"struct",...|file:/Users/mingw...|Relation[id#10,na...|ACTIVE|
+-----+--------------+---------------+----------+--------------------+--------------------+--------------------+------+

## create.scala
val hs = new Hyperspace(spark)
hs.createIndex(df, IndexConfig("index", indexedColumns = Seq("id"), includedColumns = Seq("name")))
hs.indexes.show()

## import.scala
import com.microsoft.hyperspace._
import com.microsoft.hyperspace.index._

## load-data.scala
val df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("hdfs://localhost:9000/hyperspace_test/customers.csv")
df.show()

## file.csv

          
            id
            name
            zip

            
              1
              john smith
              78750

            
              2
              john doe
              78758

            
              3
              mike tyson
              91731

            
              4
              mingwei li
              78750
	boolean updateValue(Event event) {
	long userInitiatedTimestamp = event.getUserInitiatedTimestamp();
	String userId = event.getUserId();
	User user = database.query(userId);
	long lastUserInitiatedTimestamp = user.getLastUserInitiatedTimestamp();

	// event arrives in correct order
	if(userInitiatedTimestamp > lastUserInitiatedTimestamp) {
	// register with latest timestamp
	user.setLastUserInitiatedTimestamp(userInitiatedTimestamp);
	hs.refreshIndex("index1")
	hs.deleteIndex("index1")
	hs.restoreIndex("index1")
	hs.deleteIndex("index2")
	hs.vacuumIndex("index2")
	== Physical Plan ==
	*(1) Project [name#11]
	+- *(1) Filter (isnotnull(id#10) && (id#10 = 1))
	+- *(1) FileScan parquet [id#10,name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/Users/mingwli/Dev/lib/hadoop-2.9.2/bin/spark-warehouse/indexes/index/v__=0], PartitionFilters: [], PushedFilters: [IsNotNull(id), EqualTo(id,1)], ReadSchema: struct<id:int,name:string>
	=============================================================
	Plan with indexes:
	=============================================================
	Project [name#11]
	+- Filter (isnotnull(id#10) && (id#10 = 1))
	<----+- FileScan parquet [id#10,name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/Users/mingwli/Dev/lib/hadoop-2.9.2/bin/spark-warehouse/indexes/index/v__=0], PartitionFilters: [], PushedFilters: [IsNotNull(id), EqualTo(id,1)], ReadSchema: struct<id:int,name:string>---->

	=============================================================
	Plan without indexes:
	=============================================================
	+-----+--------------+---------------+----------+--------------------+--------------------+--------------------+------+
	\| name\|indexedColumns\|includedColumns\|numBuckets\| schema\| indexLocation\| queryPlan\| state\|
	+-----+--------------+---------------+----------+--------------------+--------------------+--------------------+------+
	\|index\| [id]\| [name]\| 200\|{"type":"struct",...\|file:/Users/mingw...\|Relation[id#10,na...\|ACTIVE\|
	+-----+--------------+---------------+----------+--------------------+--------------------+--------------------+------+
	val hs = new Hyperspace(spark)
	hs.createIndex(df, IndexConfig("index", indexedColumns = Seq("id"), includedColumns = Seq("name")))
	hs.indexes.show()
	import com.microsoft.hyperspace._
	import com.microsoft.hyperspace.index._
	val df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("hdfs://localhost:9000/hyperspace_test/customers.csv")
	df.show()
id	name	zip
1	john smith	78750
2	john doe	78758
3	mike tyson	91731
4	mingwei li	78750