View KMeansSparkMLToMLLib.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.mllib.clustering.BisectingKMeans | |
import org.apache.spark.mllib.linalg.Vectors | |
import org.apache.spark.mllib.linalg.Vector | |
//std_features col is of type vector | |
scaledFeatures.select($"std_features").printSchema() | |
val tempFeatureRdd = scaledFeatures.select($"std_features").rdd | |
import scala.reflect.runtime.universe._ |
View exercise1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
from pyspark.sql.functions import * | |
from pyspark.sql import Row | |
from pyspark.sql.types import IntegerType | |
# Create the Spark session | |
spark = SparkSession.builder \ | |
.master("local") \ | |
.config("spark.sql.autoBroadcastJoinThreshold", -1) \ | |
.config("spark.executor.memory", "500mb") \ |
View spark_ml_custom_transformer.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.ml.Pipeline | |
import org.apache.spark.ml.PipelineStage | |
import org.apache.spark.ml.Transformer | |
import org.apache.spark.ml.classification.LogisticRegression | |
import org.apache.spark.ml.feature.LabeledPoint | |
import org.apache.spark.ml.linalg.DenseVector | |
import org.apache.spark.ml.linalg.Vectors | |
import org.apache.spark.ml.param.ParamMap | |
import org.apache.spark.sql.Dataset | |
import org.apache.spark.sql.Row |
View WikiPageClustering.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.diorsding.spark.ml; | |
import java.util.Arrays; | |
import java.util.List; | |
import org.apache.spark.SparkConf; | |
import org.apache.spark.SparkContext; | |
import org.apache.spark.ml.Pipeline; | |
import org.apache.spark.ml.PipelineModel; | |
import org.apache.spark.ml.PipelineStage; |
View TextClassification.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Arrays; | |
import java.util.List; | |
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.P; | |
import org.apache.spark.SparkConf; | |
import org.apache.spark.api.java.JavaSparkContext; | |
import org.apache.spark.api.java.function.MapFunction; | |
import org.apache.spark.ml.Pipeline; | |
import org.apache.spark.ml.PipelineModel; | |
import org.apache.spark.ml.PipelineStage; |
View DataFrameWithFileName.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.functions._ | |
import org.apache.spark.sql.SparkSession | |
object DataFrameWithFileNameApp extends App { | |
val spark: SparkSession = | |
SparkSession | |
.builder() | |
.appName("DataFrameApp") | |
.config("spark.master", "local[*]") |
View log4j for Spark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Root logger option | |
log4j.rootLogger=INFO, stdout | |
# Redirect log messages to console | |
log4j.appender.stdout=org.apache.log4j.ConsoleAppender | |
log4j.appender.stdout.Target=System.out | |
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout | |
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %t %c:%L - %m%n | |
log4j.com.ncr.eda=INFO, stdout |
View spark-shell-init-load-file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
:paste | |
import org.apache.spark.sql.types._ | |
import com.databricks.spark.xml._ | |
import org.apache.spark.sql.functions._ | |
// For implicit conversions like converting RDDs to DataFrames | |
import spark.implicits._ |
View better_history.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Custom history configuration | |
# Run script using: | |
# chmod u+x better_history.sh | |
# sudo su | |
# ./better_history.sh | |
echo ">>> Starting" | |
echo ">>> Loading configuration into /etc/bash.bashrc" | |
echo "HISTTIMEFORMAT='%F %T '" >> /etc/bash.bashrc | |
echo 'HISTFILESIZE=-1' >> /etc/bash.bashrc |
View .gitconfig
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[alias] | |
co = checkout | |
cob = checkout -b | |
coo = !git fetch && git checkout | |
br = branch | |
brd = branch -d | |
brD = branch -D | |
merged = branch --merged | |
dmerged = "git branch --merged | grep -v '\\*' | xargs -n 1 git branch -d" | |
st = status |
NewerOlder