Örjan Angré (Lundberg) oluies

## StatsCount.scala
import java.time.Instant
import java.{lang, util}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{Cell, HBaseConfiguration}
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.util.Bytes.toBytes

## spark-read-from-csv-string.scala

    val csv:String = """
                       |rowKeyLineage                                                |eventTimestamp|dataEventId|dataDictId               |sourceKey                      |transport|eventType |recordId|processingTime|application|entity     |product|lifecycle     |lineDate           |dayOfMonth|dayOfYear|
                       |1488327264000:2001200:MUREX:DK_LEI_6000:BOND:CONTRACT        |1488327264000 |2001200    |MUREXCONTRACT_DELTA      |20170301MUREXCONTRACT.csv      |FTP      |DISTRIBUTE|        |1512074938523 |MUREX      |DK_LEI_6000|BOND   |CONTRACT      |2017-03-01 01:14:24|1         |60       |
      |1488327264000:2001200:MUREX:DK_LEI_6100:REPO:CONTRACT        |1488327264000 |2001200    |MUREXCONTRACT_DELTA      |20170301MUREXCONTRACT.csv      |FTP      |DISTRIBUTE|        |1512074938523 |MUREX      |DK_LEI_6100|REPO   |CONTRACT      |2017-03-01 01:14:24|1         |60       |
      |1488327264000:2001200:MUREX:DK_LEI_6200:BOND:CONTRACT        |1488327264000 |2001200    |MURE

## beam_elastic.jva
public static void main(String[] args) throws IOException {


        PipelineOptionsFactory.register(PipelineCmdLineOptions.class);
        PipelineCmdLineOptions options =
            PipelineOptionsFactory.fromArgs(args).as(PipelineCmdLineOptions.class);


        //Config config = ConfigFactory.parseFile(new File(args[0]));
        //LOG.info(config.root().render(ConfigRenderOptions.concise().setFormatted(true)));

## brews.txt
ack
adns
ansible
ant
apache-spark
argtable
arpack
augeas
autoconf
autojump

## gist:babe7f169fea228a5c564a9831c6ca68

  test("interpolateColor. Above 60") {
    assert(Visualization.interpolateColor(Color.threasholds, 60.001) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 60.01) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 60.1) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 61) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 62) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 63) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 100) === Color(255, 255, 255))
    assert(Visualization.interpolateColor(Color.threasholds, 1000) === Color(255, 255, 255))

## TemperatureReading
case class Station(STN: Option[Int], WBAN: Option[Int], GPSLocation: Location)

case class TemperatureReading(stn: String,
                              wban: String,
                              month: Int,
                              day: Int,
                              temperature: Double)


case class Location(lat: Double, lon: Double){

## after.csv

          
            territory_id
            first_name
            last_name
            email
            employee_id

            
              XMOWSM54
              Peter
              Alexander
              palexander0@unesco.org
              E00QTOF

            
              XMRNBM47
              Samuel
              Lopez
              slopez1@163.com
              E00UBFA

            
              XMOWMF87
              Elizabeth
              Stone
              estone2@usatoday.com
              E00WDYK

            
              XMZWPW22
              William
              Carroll
              wcarroll3@odnoklassniki.ru
              E00VDYQ

            
              XMOWRW46
              Carolyn
              Little
              clittle4@ox.ac.uk
              E00HUPR

            
              XMZNDX26
              Marilyn
              Robinson
              mrobinson5@wired.com
              E00ZJGS

            
              XMZNAI68
              Christopher
              Rogers
              crogers6@posterous.com
              E00DCHF

            
              XMONCD74
              Anthony
              Allen
              aallen7@flickr.com
              E00ACEQ

            
              XMRNMD81
              Martin
              Baker
              mbaker8@hatena.ne.jp
              E00DKRZ

## sqlserver_schema_to_spark.sql
use [database_ONE_two]
select  'val ' + so.name + '_sch = StructType(Seq( ' + o.list + '))'
from    sysobjects so
cross apply
    (SELECT
        '  StructField("'+column_name+'", ' +
           case data_type
		    when 'char' then 'StringType'
			when 'varchar' then 'StringType'
			when 'nvarchar' then 'StringType'

## exclude_targetdirs.sh
#WARNING: Use at your own risk. No warranties expressed or implied. YMMV. Drive responsibly. Eat healthy.

#First, `cd` into the parent dir for all of your `sbt`/`maven` projects (I assume you have one of those)
find "$(cd ..; pwd)" -type d -name "target" -exec sudo tmutil addexclusion -p {} +

## SpakDFtoHiveDDL.py
# Dictionary to map Spark data types to Hive
d = {'StringType':'STRING', 'DoubleType':'DOUBLE', 'IntegerType': 'INT', 'DateType':'DATE', 'LongType': 'BIGINT'}

# Convert to Hive schema
schemastring = ', '.join([field.name + ' ' + d[str(field.dataType)] for field in df.schema.fields])
hivetablename='mortgage_all'
output_path='path'
filename='filename'
# Create Hive table
ddl = """CREATE EXTERNAL TABLE IF NOT EXISTS %s(%s) STORED AS ORC LOCATION '%s'""" % (hivetablename, schemastring, output_path + filename)
	import java.time.Instant
	import java.{lang, util}

	import org.apache.hadoop.conf.Configuration
	import org.apache.hadoop.hbase.{Cell, HBaseConfiguration}
	import org.apache.hadoop.hbase.client.Result
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable
	import org.apache.hadoop.hbase.mapreduce.TableInputFormat
	import org.apache.hadoop.hbase.util.Bytes
	import org.apache.hadoop.hbase.util.Bytes.toBytes

	val csv:String = """
	\|rowKeyLineage \|eventTimestamp\|dataEventId\|dataDictId \|sourceKey \|transport\|eventType \|recordId\|processingTime\|application\|entity \|product\|lifecycle \|lineDate \|dayOfMonth\|dayOfYear\|
	\|1488327264000:2001200:MUREX:DK_LEI_6000:BOND:CONTRACT \|1488327264000 \|2001200 \|MUREXCONTRACT_DELTA \|20170301MUREXCONTRACT.csv \|FTP \|DISTRIBUTE\| \|1512074938523 \|MUREX \|DK_LEI_6000\|BOND \|CONTRACT \|2017-03-01 01:14:24\|1 \|60 \|
	\|1488327264000:2001200:MUREX:DK_LEI_6100:REPO:CONTRACT \|1488327264000 \|2001200 \|MUREXCONTRACT_DELTA \|20170301MUREXCONTRACT.csv \|FTP \|DISTRIBUTE\| \|1512074938523 \|MUREX \|DK_LEI_6100\|REPO \|CONTRACT \|2017-03-01 01:14:24\|1 \|60 \|
	\|1488327264000:2001200:MUREX:DK_LEI_6200:BOND:CONTRACT \|1488327264000 \|2001200 \|MURE
	public static void main(String[] args) throws IOException {


	PipelineOptionsFactory.register(PipelineCmdLineOptions.class);
	PipelineCmdLineOptions options =
	PipelineOptionsFactory.fromArgs(args).as(PipelineCmdLineOptions.class);


	//Config config = ConfigFactory.parseFile(new File(args[0]));
	//LOG.info(config.root().render(ConfigRenderOptions.concise().setFormatted(true)));
	ack
	adns
	ansible
	ant
	apache-spark
	argtable
	arpack
	augeas
	autoconf
	autojump

	test("interpolateColor. Above 60") {
	assert(Visualization.interpolateColor(Color.threasholds, 60.001) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 60.01) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 60.1) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 61) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 62) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 63) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 100) === Color(255, 255, 255))
	assert(Visualization.interpolateColor(Color.threasholds, 1000) === Color(255, 255, 255))
	case class Station(STN: Option[Int], WBAN: Option[Int], GPSLocation: Location)

	case class TemperatureReading(stn: String,
	wban: String,
	month: Int,
	day: Int,
	temperature: Double)


	case class Location(lat: Double, lon: Double){
territory_id	first_name	last_name	email	employee_id
XMOWSM54	Peter	Alexander	palexander0@unesco.org	E00QTOF
XMRNBM47	Samuel	Lopez	slopez1@163.com	E00UBFA
XMOWMF87	Elizabeth	Stone	estone2@usatoday.com	E00WDYK
XMZWPW22	William	Carroll	wcarroll3@odnoklassniki.ru	E00VDYQ
XMOWRW46	Carolyn	Little	clittle4@ox.ac.uk	E00HUPR
XMZNDX26	Marilyn	Robinson	mrobinson5@wired.com	E00ZJGS
XMZNAI68	Christopher	Rogers	crogers6@posterous.com	E00DCHF
XMONCD74	Anthony	Allen	aallen7@flickr.com	E00ACEQ
XMRNMD81	Martin	Baker	mbaker8@hatena.ne.jp	E00DKRZ
	use [database_ONE_two]
	select 'val ' + so.name + '_sch = StructType(Seq( ' + o.list + '))'
	from sysobjects so
	cross apply
	(SELECT
	' StructField("'+column_name+'", ' +
	case data_type
	when 'char' then 'StringType'
	when 'varchar' then 'StringType'
	when 'nvarchar' then 'StringType'
	#WARNING: Use at your own risk. No warranties expressed or implied. YMMV. Drive responsibly. Eat healthy.

	#First, `cd` into the parent dir for all of your `sbt`/`maven` projects (I assume you have one of those)
	find "$(cd ..; pwd)" -type d -name "target" -exec sudo tmutil addexclusion -p {} +
	# Dictionary to map Spark data types to Hive
	d = {'StringType':'STRING', 'DoubleType':'DOUBLE', 'IntegerType': 'INT', 'DateType':'DATE', 'LongType': 'BIGINT'}

	# Convert to Hive schema
	schemastring = ', '.join([field.name + ' ' + d[str(field.dataType)] for field in df.schema.fields])
	hivetablename='mortgage_all'
	output_path='path'
	filename='filename'
	# Create Hive table
	ddl = """CREATE EXTERNAL TABLE IF NOT EXISTS %s(%s) STORED AS ORC LOCATION '%s'""" % (hivetablename, schemastring, output_path + filename)