Ivan sadikovi

## rustfmt.toml
max_width = 90
hard_tabs = false
tab_spaces = 2
newline_style = "Unix"
indent_style = "Block"
use_small_heuristics = false
format_strings = false
wrap_comments = true
comment_width = 90
normalize_comments = true

## spellchecker.scala
abstract class Spelling
case class CorrectSpelling(word: String) extends Spelling
case class IncorrectSpelling(word: String, suggestions: List[String]) extends Spelling

case class Spellchecker(dictionary: String) {
  private val numSuggestions = 10
  private val maxDistance = 5
  // set of valid words (replace with trie for space efficiency)
  private val set = readDict(dictionary)
  private val heap = new java.util.PriorityQueue[(Int, String)](

## minimum-ascii-delete-sum-for-two-strings.md

      
              1 file
            
          
              0 forks
            
          
              2 comments
            
          
              0 stars
            
          
                sadikovi
                / minimum-ascii-delete-sum-for-two-strings.md
            
            
              Last active
              November 6, 2017 07:06
            
              
                712. Minimum ASCII Delete Sum for Two Strings
              
          
    Given two strings s1, s2, find the lowest ASCII sum of deleted characters to make two strings equal.
Example 1:


Input: s1 = "sea", s2 = "eat"
Output: 231
Explanation: Deleting "s" from "sea" adds the ASCII value of "s" (115) to the sum. Deleting "t"
from "eat" adds 116 to the sum. At the end, both strings are equal, and 115 + 116 = 231 is the
minimum sum possible to achieve this.

Example 2:


## spark-parquet-writer-settings.scala
sc.hadoopConfiguration.set("parquet.writer.version", "v1") // either "v1" or "v2"
// disable vectorized reading, does not support delta encoding
spark.conf.set("spark.sql.parquet.enableVectorizedReader", "false")

## vector_join.scala
val vec1 = Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6).toDF("a")
val vec2 = Seq(1.1, 1.2, 1.3, 1.4, 1.5, 1.6).toDF("a")

// Approach 1
// index both vectors and join on that index
def join1(vec1: org.apache.spark.sql.DataFrame, vec2: org.apache.spark.sql.DataFrame): org.apache.spark.sql.DataFrame = {
  val v1 = vec1.withColumn("id", monotonically_increasing_id()).
    withColumn("id", dense_rank().over(org.apache.spark.sql.expressions.Window.orderBy("id")))
  val v2 = vec2.withColumn("id", monotonically_increasing_id()).
    withColumn("id", dense_rank().over(org.apache.spark.sql.expressions.Window.orderBy("id")))

## script.scala
// Print binary tree on multiple lines maintaining dependency of parent-children
//  0|            1
//  1|      7          8
//  2|   4     5   *      9
//  3| *  *  *  *       5   *
//  4|                *  *

abstract class TreeNode
case class NIL() extends TreeNode
case class BinaryTreeNode(

## code.scala
val df = Seq(
  (System.currentTimeMillis, "user1", 0.3, Seq(0.1, 0.2)),
  (System.currentTimeMillis + 1000000L, "user1", 0.5, Seq(0.1, 0.2)),
  (System.currentTimeMillis + 2000000L, "user1", 0.2, Seq(0.1, 0.2)),
  (System.currentTimeMillis + 3000000L, "user1", 0.1, Seq(0.1, 0.2)),
  (System.currentTimeMillis + 4000000L, "user1", 1.3, Seq(0.1, 0.2)),
  (System.currentTimeMillis + 5000000L, "user1", 2.3, Seq(0.1, 0.2)),
  (System.currentTimeMillis + 6000000L, "user2", 2.3, Seq(0.1, 0.2))
).toDF("t", "u", "s", "l")

## udf.scala
import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.apache.spark.sql.expressions._

val df = Seq(
  ("str", 1, 0.2)
).toDF("a", "b", "c").
  withColumn("struct", struct($"a", $"b", $"c"))

// UDF for struct

## queries.scala
// == Traffic case ==
val df = Seq(
  ("1", "2", 3),
  ("3", "2", 2),
  ("1", "3", 3),
  ("2", "1", 2),
  ("2", "3", 5)
).toDF("a", "b", "cnt")
df.createOrReplaceTempView("test")

## output.py
################################################################
# Example
################################################################

print("\n# Empty rows list")
show([])

print("\n# Rows list with inconsistent set of columns and margin of 4 spaces")
rows = [
    {"col1": 1, "col2": 2.1, "col3": "long text", "col4": "", "col5": True},
	max_width = 90
	hard_tabs = false
	tab_spaces = 2
	newline_style = "Unix"
	indent_style = "Block"
	use_small_heuristics = false
	format_strings = false
	wrap_comments = true
	comment_width = 90
	normalize_comments = true
	abstract class Spelling
	case class CorrectSpelling(word: String) extends Spelling
	case class IncorrectSpelling(word: String, suggestions: List[String]) extends Spelling

	case class Spellchecker(dictionary: String) {
	private val numSuggestions = 10
	private val maxDistance = 5
	// set of valid words (replace with trie for space efficiency)
	private val set = readDict(dictionary)
	private val heap = new java.util.PriorityQueue[(Int, String)](
	sc.hadoopConfiguration.set("parquet.writer.version", "v1") // either "v1" or "v2"
	// disable vectorized reading, does not support delta encoding
	spark.conf.set("spark.sql.parquet.enableVectorizedReader", "false")
	val vec1 = Seq(0.1, 0.2, 0.3, 0.4, 0.5, 0.6).toDF("a")
	val vec2 = Seq(1.1, 1.2, 1.3, 1.4, 1.5, 1.6).toDF("a")

	// Approach 1
	// index both vectors and join on that index
	def join1(vec1: org.apache.spark.sql.DataFrame, vec2: org.apache.spark.sql.DataFrame): org.apache.spark.sql.DataFrame = {
	val v1 = vec1.withColumn("id", monotonically_increasing_id()).
	withColumn("id", dense_rank().over(org.apache.spark.sql.expressions.Window.orderBy("id")))
	val v2 = vec2.withColumn("id", monotonically_increasing_id()).
	withColumn("id", dense_rank().over(org.apache.spark.sql.expressions.Window.orderBy("id")))
	// Print binary tree on multiple lines maintaining dependency of parent-children
	// 0\| 1
	// 1\| 7 8
	// 2\| 4 5 * 9
	// 3\| * * * * 5 *
	// 4\| * *

	abstract class TreeNode
	case class NIL() extends TreeNode
	case class BinaryTreeNode(
	val df = Seq(
	(System.currentTimeMillis, "user1", 0.3, Seq(0.1, 0.2)),
	(System.currentTimeMillis + 1000000L, "user1", 0.5, Seq(0.1, 0.2)),
	(System.currentTimeMillis + 2000000L, "user1", 0.2, Seq(0.1, 0.2)),
	(System.currentTimeMillis + 3000000L, "user1", 0.1, Seq(0.1, 0.2)),
	(System.currentTimeMillis + 4000000L, "user1", 1.3, Seq(0.1, 0.2)),
	(System.currentTimeMillis + 5000000L, "user1", 2.3, Seq(0.1, 0.2)),
	(System.currentTimeMillis + 6000000L, "user2", 2.3, Seq(0.1, 0.2))
	).toDF("t", "u", "s", "l")
	import org.apache.spark.sql._
	import org.apache.spark.sql.types._
	import org.apache.spark.sql.expressions._

	val df = Seq(
	("str", 1, 0.2)
	).toDF("a", "b", "c").
	withColumn("struct", struct($"a", $"b", $"c"))

	// UDF for struct
	// == Traffic case ==
	val df = Seq(
	("1", "2", 3),
	("3", "2", 2),
	("1", "3", 3),
	("2", "1", 2),
	("2", "3", 5)
	).toDF("a", "b", "cnt")
	df.createOrReplaceTempView("test")
	################################################################
	# Example
	################################################################

	print("\n# Empty rows list")
	show([])

	print("\n# Rows list with inconsistent set of columns and margin of 4 spaces")
	rows = [
	{"col1": 1, "col2": 2.1, "col3": "long text", "col4": "", "col5": True},