Ugo Bechameil bigorn0

## pg-test.js
#!/usr/bin/env node

// Replace the

// chmod +x pg-test.js
// npm install --save pg
// ./pg-test.js


## deepmerge.go
package main

import (
	"encoding/json"
	"fmt"
	"github.com/ghodss/yaml"
	"io/ioutil"
	"path/filepath"
)

## Spark apply function on multiple columns at once
You can use select with varargs including *:

import spark.implicits._

df.select($"*" +: Seq("A", "B", "C").map(c =>
  sum(c).over(Window.partitionBy("ID").orderBy("time")).alias(s"cum$c")
): _*)
This:

Maps columns names to window expressions with Seq("A", ...).map(...)

## factory.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigorn0
                / factory.md
            
            
              Created
              February 14, 2017 08:39
            
          
    https://github.com/drone/drone-wall
https://github.com/drone/drone

  
## 1-restify-server-cheatsheet.js
// Restify Server CheatSheet.
// More about the API: http://mcavage.me/node-restify/#server-api
// Install restify with npm install restify


// 1.1. Creating a Server.
// http://mcavage.me/node-restify/#Creating-a-Server


var restify = require('restify');

## Second approach
import java.sql._
dataframe.coalesce("NUMBER OF WORKERS").mapPartitions((d) => Iterator(d)).foreach { batch =>
val dbc: Connection = DriverManager.getConnection("JDBCURL")
val st: PreparedStatement = dbc.prepareStatement("YOUR PREPARED STATEMENT")

batch.grouped("# Of Rows you want per batch").foreach { session =>
  session.foreach { x =>
    st.setDouble(1, x.getDouble(1))
    st.addBatch()
  }

## dood_cleanup.sh
#!/bin/sh

str_in_list() {
    str="$1"
    shift
    list="$@"
    if test "${list#*$str}" != "$list"
    then
        return 0    # $str is in $list
    else

## Run_spark_submit_from_scala.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigorn0
                / Run_spark_submit_from_scala.md
            
            
              Last active
              August 24, 2016 21:08
            
              
                Run_spark_submit_from_scala.md
              
          
    Running spark jogs from scala code

http://henningpetersen.com/post/22/running-apache-spark-jobs-from-applications
Example out of spark test classes

// NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
  private def runSparkSubmit(args: Seq[String]): Unit = {
    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
    val process = Utils.executeCommand(
 Seq("./bin/spark-submit") ++ args,


## docker-security.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigorn0
                / docker-security.md
            
            
              Created
              August 24, 2016 21:05
            
          
    AppArmor

https://medium.com/@lucjuggery/docker-apparmor-30-000-foot-view-60c5a5deb7b#.nj5v0if6h


## spark_logging-log4j.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigorn0
                / spark_logging-log4j.md
            
            
              Last active
              July 28, 2016 22:32
            
          
    How to split/redirect part of the logs based on level

apache-spark-stderr-and-stdout
Try this in log4j.properties passed to Spark (or modify default configuration under Spark/conf)

# Log to stdout and stderr
log4j.rootLogger=INFO, stdout, stderr
	#!/usr/bin/env node

	// Replace the

	// chmod +x pg-test.js
	// npm install --save pg
	// ./pg-test.js
	package main

	import (
	"encoding/json"
	"fmt"
	"github.com/ghodss/yaml"
	"io/ioutil"
	"path/filepath"
	)
	You can use select with varargs including *:

	import spark.implicits._

	df.select($"*" +: Seq("A", "B", "C").map(c =>
	sum(c).over(Window.partitionBy("ID").orderBy("time")).alias(s"cum$c")
	): _*)
	This:

	Maps columns names to window expressions with Seq("A", ...).map(...)
	// Restify Server CheatSheet.
	// More about the API: http://mcavage.me/node-restify/#server-api
	// Install restify with npm install restify


	// 1.1. Creating a Server.
	// http://mcavage.me/node-restify/#Creating-a-Server


	var restify = require('restify');
	import java.sql._
	dataframe.coalesce("NUMBER OF WORKERS").mapPartitions((d) => Iterator(d)).foreach { batch =>
	val dbc: Connection = DriverManager.getConnection("JDBCURL")
	val st: PreparedStatement = dbc.prepareStatement("YOUR PREPARED STATEMENT")

	batch.grouped("# Of Rows you want per batch").foreach { session =>
	session.foreach { x =>
	st.setDouble(1, x.getDouble(1))
	st.addBatch()
	}
	#!/bin/sh

	str_in_list() {
	str="$1"
	shift
	list="$@"
	if test "${list#*$str}" != "$list"
	then
	return 0 # $str is in $list
	else