Skip to content

Instantly share code, notes, and snippets.

View crocker's full-sized avatar

Jason Crocker crocker

  • Clinetic
  • Raleigh, NC
View GitHub Profile
@crocker
crocker / jsonview.scala
Last active May 18, 2016 03:51
@JSONVIEW with Scala Object Mapper
// define your json views
object MyViews {
class ViewA {}
class ViewB {}
class ViewC {}
}
// configure your object mapper
val mapper = new ObjectMapper with ScalaObjectMapper
mapper.registerModule(DefaultScalaModule)
@crocker
crocker / guicenamed.scala
Created May 18, 2016 03:55
Named Guice dependencies in Scala
val elastic = injector.getInstance[ElasticClient](
Key.get(classOf[ElasticClient],
Names.named("elastic"))
)
@crocker
crocker / forest.scala
Last active August 17, 2016 13:50
Scala multi-root tree or forest
package com.signalpath.model
import scala.collection.mutable
class Forest[A]()(ordering: Ordering[A]) {
val tree = new mutable.LinkedHashMap[Option[A], mutable.ListBuffer[A]]()
def addNode(parent: Option[A], node: A): Unit = {
val children = tree.get(parent).map(_ += node).getOrElse(mutable.ListBuffer[A](node))
tree.put(parent,children.sorted(ordering))
db.subscription.updateMany({}, {$set: {last_processed: null}})
def getDateRange(year: Int, week: Int): (LocalDate, LocalDate) = {
val date = new LocalDate().withYear(year).withWeekOfWeekyear(week)
val startDate = date.withDayOfWeek(1)
val endDate = date.withDayOfWeek(7)
(startDate, endDate)
}
val dateFormat = "yyyy-MM-dd E"
val year = 2016
// This will not print to your logs (no matter how many times you run it)
dataRDD.map(c => println(c))
// This will print to the logs for the executors
dataRDD.map(c => println(c)).collect()
db.getCollection('patient').find().snapshot().forEach(
function (p) {
var lower_middle_name;
if(p.middle_name) lower_middle_name = p.middle_name.toLowerCase(); else lower_middle_name = null;
var lower_mrn;
if(p.mrn) lower_mrn = p.mrn.toLowerCase(); else lower_mrn = null;
var lower_email_address;
if(p.email_address) lower_email_address = p.email_address.toLowerCase(); else lower_email_address = null;
package com.databricks.spark.jira
import scala.io.Source
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.sources.{TableScan, BaseRelation, RelationProvider}
@crocker
crocker / spark-duplicates.scala
Last active July 2, 2020 12:15
Find duplicates in a Spark DataFrame
val transactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
.json("s3n://bucket-name/transaction.json")
transactions.groupBy("id", "organization").count.sort($"count".desc).show