Skip to content

Instantly share code, notes, and snippets.

View bigorn0's full-sized avatar
💭
Enthousiast

Ugo Bechameil bigorn0

💭
Enthousiast
View GitHub Profile
#!/usr/bin/env node
// Replace the
// chmod +x pg-test.js
// npm install --save pg
// ./pg-test.js
package main
import (
"encoding/json"
"fmt"
"github.com/ghodss/yaml"
"io/ioutil"
"path/filepath"
)
You can use select with varargs including *:
import spark.implicits._
df.select($"*" +: Seq("A", "B", "C").map(c =>
sum(c).over(Window.partitionBy("ID").orderBy("time")).alias(s"cum$c")
): _*)
This:
Maps columns names to window expressions with Seq("A", ...).map(...)
// Restify Server CheatSheet.
// More about the API: http://mcavage.me/node-restify/#server-api
// Install restify with npm install restify
// 1.1. Creating a Server.
// http://mcavage.me/node-restify/#Creating-a-Server
var restify = require('restify');
import java.sql._
dataframe.coalesce("NUMBER OF WORKERS").mapPartitions((d) => Iterator(d)).foreach { batch =>
val dbc: Connection = DriverManager.getConnection("JDBCURL")
val st: PreparedStatement = dbc.prepareStatement("YOUR PREPARED STATEMENT")
batch.grouped("# Of Rows you want per batch").foreach { session =>
session.foreach { x =>
st.setDouble(1, x.getDouble(1))
st.addBatch()
}
@bigorn0
bigorn0 / dood_cleanup.sh
Created August 25, 2016 22:25
Cleanup Dood
#!/bin/sh
str_in_list() {
str="$1"
shift
list="$@"
if test "${list#*$str}" != "$list"
then
return 0 # $str is in $list
else
@bigorn0
bigorn0 / Run_spark_submit_from_scala.md
Last active August 24, 2016 21:08
Run_spark_submit_from_scala.md

Running spark jogs from scala code

http://henningpetersen.com/post/22/running-apache-spark-jobs-from-applications

Example out of spark test classes

// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
  private def runSparkSubmit(args: Seq[String]): Unit = {
    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
    val process = Utils.executeCommand(
 Seq("./bin/spark-submit") ++ args,

How to split/redirect part of the logs based on level

apache-spark-stderr-and-stdout

Try this in log4j.properties passed to Spark (or modify default configuration under Spark/conf)

# Log to stdout and stderr
log4j.rootLogger=INFO, stdout, stderr