Skip to content

Instantly share code, notes, and snippets.

View crocker's full-sized avatar

Jason Crocker crocker

  • Clinetic
  • Raleigh, NC
View GitHub Profile
@crocker
crocker / auth0-stripe.js
Created January 28, 2022 02:54
Auth0 Rule - Create/Update Stripe Customer on Auth0 Login
function (user, context, callback) {
var stripe = require('stripe')('sk_test....');
user.app_metadata = user.app_metadata || {};
var customer = {
email: user.email,
name: user.name
};
if ('stripe_customer_id' in user.app_metadata) {
@crocker
crocker / es-basic-histogram-aggregation.json
Created March 12, 2017 20:57
Basic Elasticsearch histogram
{
"aggs": {
"histogram": {
"date_histogram": {
"field": "date",
"interval": "month"
}
}
}
}
val today = LocalDate.now
val todayTransactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
.json(s"s3n://bucket-name/${today}/transaction.json")
val yesterdayTransactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
@crocker
crocker / spark-duplicates.scala
Last active July 2, 2020 12:15
Find duplicates in a Spark DataFrame
val transactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
.json("s3n://bucket-name/transaction.json")
transactions.groupBy("id", "organization").count.sort($"count".desc).show
package com.databricks.spark.jira
import scala.io.Source
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.sources.{TableScan, BaseRelation, RelationProvider}
db.getCollection('patient').find().snapshot().forEach(
function (p) {
var lower_middle_name;
if(p.middle_name) lower_middle_name = p.middle_name.toLowerCase(); else lower_middle_name = null;
var lower_mrn;
if(p.mrn) lower_mrn = p.mrn.toLowerCase(); else lower_mrn = null;
var lower_email_address;
if(p.email_address) lower_email_address = p.email_address.toLowerCase(); else lower_email_address = null;
// This will print to the logs for the executors
dataRDD.map(c => println(c)).collect()
// This will not print to your logs (no matter how many times you run it)
dataRDD.map(c => println(c))
def getDateRange(year: Int, week: Int): (LocalDate, LocalDate) = {
val date = new LocalDate().withYear(year).withWeekOfWeekyear(week)
val startDate = date.withDayOfWeek(1)
val endDate = date.withDayOfWeek(7)
(startDate, endDate)
}
val dateFormat = "yyyy-MM-dd E"
val year = 2016
db.subscription.updateMany({}, {$set: {last_processed: null}})