Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

View mostafam's full-sized avatar

Mostafa Majidpour mostafam

View GitHub Profile
@mostafam
mostafam / scoreREPL.scala
Created August 13, 2020 08:58
Read MLeap and score the LeapFrame
scala> import ml.combust.bundle.BundleFile
scala> import ml.combust.mleap.runtime.MleapSupport._
scala> import resource._
scala> import ml.combust.mleap.core.types._
scala> import ml.combust.mleap.runtime.frame.{DefaultLeapFrame, Row}
// creating LeapFrame equivalent to the DataFrame above
scala> val schema = StructType(StructField("email", ScalarType.String),
| StructField("first_name", ScalarType.String)).get
@mostafam
mostafam / createSparkPL.scala
Created August 13, 2020 08:56
Create Spark PipelineModel and Export it
scala> import org.apache.spark.ml.mleap.feature.StringChecker
scala> import ml.combust.mleap.core.feature.StringCheckerModel
scala> import org.apache.spark.ml.bundle.SparkBundleContext
scala> import ml.combust.bundle.BundleFile
scala> import ml.combust.mleap.spark.SparkSupport._
scala> import org.apache.spark.ml.{Pipeline, PipelineModel}
scala> import org.apache.spark.ml.feature.{StringIndexer,VectorAssembler}
scala> import org.apache.spark.sql._
scala> import org.apache.spark.sql.functions._
scala> import resource._
@mostafam
mostafam / StringCheckerOp.scala
Created August 13, 2020 08:46
StringCheckerOp.scala (Spark side)
package org.apache.spark.ml.bundle.extension.ops.feature
import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.mleap.core.feature.StringCheckerModel
import org.apache.spark.ml.bundle.SparkBundleContext
import org.apache.spark.ml.mleap.feature.StringChecker
/**
@mostafam
mostafam / StringCheckerOp.scala
Last active August 13, 2020 08:41
StringCheckerOp.scala (MLeap side)
package ml.combust.mleap.bundle.ops.feature
import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.OpModel
import ml.combust.mleap.bundle.ops.MleapOp
import ml.combust.mleap.core.feature.StringCheckerModel
import ml.combust.mleap.runtime.MleapContext
import ml.combust.mleap.runtime.transformer.feature.StringChecker
@mostafam
mostafam / StringChecker.scala
Created August 13, 2020 08:34
StringChecker.scala (Spark side)
package org.apache.spark.ml.mleap.feature
import ml.combust.mleap.core.feature.StringCheckerModel
import org.apache.hadoop.fs.Path
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCol}
import org.apache.spark.ml.util._
import org.apache.spark.sql.functions._
@mostafam
mostafam / StringChecker.scala
Created August 13, 2020 08:31
StringChecker.scala (MLeap side)
package ml.combust.mleap.runtime.transformer.feature
import ml.combust.mleap.core.feature.StringCheckerModel
import ml.combust.mleap.core.types._
import ml.combust.mleap.runtime.frame.{FrameBuilder, Row, Transformer}
import ml.combust.mleap.runtime.function.{StructSelector, UserDefinedFunction}
import scala.util.Try
package ml.combust.mleap.core.feature
import ml.combust.mleap.core.Model
import ml.combust.mleap.core.types.{ScalarType, StructField, StructType}
/**
* Created by mostafam on 6/30/20.
*/
case class StringCheckerModel(caseSensitive: Boolean) extends Model {
@mostafam
mostafam / stringCheckerInstance.scala
Last active August 13, 2020 08:08
stringChecker Instance
scala> val stringChecker = new StringChecker(uid = "string_checker", model = new StringCheckerModel(caseSensitive = false)).
setInputCols("text", "query").
setOutputCol("is_it_there?")
scala> val df = spark.createDataFrame(
Seq((0, "john.doe@gmail.com", "John"), (1, "JackieChan234@xyz.com","jack"), (2, "ping_pong@missed.org","Al"))
).toDF("id", "email", "first_name")
scala> df.show(false)
+---+---------------------+----------+
|id |email |first_name|
+---+---------------------+----------+
|0 |john.doe@gmail.com |John |
|1 |JackieChan234@xyz.com|jack |
@mostafam
mostafam / final_take.py
Created June 3, 2020 01:24
Final Take!
import pandas as pd
import numpy as np
from uszipcode import SearchEngine
import sqlite3
search = SearchEngine(db_file_dir="/tmp/db")
conn = sqlite3.connect("/tmp/db/simple_db.sqlite")
pdf = pd.read_sql_query("select zipcode, lat, lng, radius_in_miles,
bounds_west, bounds_east, bounds_north, bounds_south from
simple_zipcode",conn)