- Download dataset here
* Follow the well-comented code kmeans.scala
/* | |
This is a collections of examples about Apache Spark's RDD Api. These examples aim to help me test the RDD functionality. | |
References: | |
http://spark.apache.org/docs/latest/programming-guide.html | |
http://homepage.cs.latrobe.edu.au/zhe/ZhenHeSparkRDDAPIExamples.html | |
*/ | |
object RddAPI { |
import java.util.*; | |
import java.util.Map.Entry; | |
import java.util.stream.Collectors; | |
/** | |
* Created by Umberto on 16/05/2017. | |
*/ | |
public class HashMapUtils { |
import org.apache.commons.lang.SystemUtils | |
import org.apache.spark.mllib.random.RandomRDDs._ | |
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.{SparkConf, SparkContext} | |
import scala.math.sqrt | |
/** | |
* Created by Umberto on 08/02/2017. | |
*/ |
package knowledgebase.java.stream; | |
import java.time.Duration; | |
import java.util.*; | |
import static java.util.stream.Collectors.*; | |
/** | |
* Created by Umberto on 15/02/2017. | |
* https://dzone.com/articles/a-java-8-streams-cookbook |
package test.com.idlike.junit.df | |
import breeze.numerics.abs | |
import org.apache.spark.rdd.RDD | |
import org.apache.spark.sql.functions.col | |
import org.apache.spark.sql.{Column, DataFrame, Row} | |
/** | |
* Created by Umberto on 06/02/2017. | |
*/ |
public class Method1 { | |
/* | |
Adding synchronized to this method will makes it thread-safe. | |
When synchronized is added to a static method, the Class object is the object which is locked. | |
*/ | |
public static void main(String[] args) throws InterruptedException { | |
ProcessingThreadS pt = new ProcessingThreadS(); | |
Thread t1 = new Thread(pt, "t1"); |
/** | |
* Genereate unique ID from UUID in positive space | |
* Reference: http://www.gregbugaj.com/?p=587 | |
* @return long value representing UUID | |
*/ | |
private Long generateUniqueId() | |
{ | |
long val = -1; | |
do | |
{ |
def transposeRowMatrix(m: RowMatrix): RowMatrix = { | |
val transposedRowsRDD = m.rows.zipWithIndex.map{case (row, rowIndex) => rowToTransposedTriplet(row, rowIndex)} | |
.flatMap(x => x) // now we have triplets (newRowIndex, (newColIndex, value)) | |
.groupByKey | |
.sortByKey().map(_._2) // sort rows and remove row indexes | |
.map(buildRow) // restore order of elements in each row and remove column indexes | |
new RowMatrix(transposedRowsRDD) | |
} | |
def rowToTransposedTriplet(row: Vector, rowIndex: Long): Array[(Long, (Long, Double))] = { |
import java.util.Queue; | |
import java.util.concurrent.ConcurrentLinkedQueue; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.ScheduledExecutorService; | |
import java.util.concurrent.TimeUnit; | |
import java.util.concurrent.atomic.AtomicInteger; | |
/** | |
* @param <T> | |
*/ | |
public abstract class ObjectPool<T> { |
* Follow the well-comented code kmeans.scala