Skip to content

Instantly share code, notes, and snippets.

View oluies's full-sized avatar

Örjan Angré (Lundberg) oluies

  • Sweden
  • 02:38 (UTC +02:00)
  • X @oluies
View GitHub Profile
@oluies
oluies / StatsCount.scala
Last active December 18, 2017 09:26
HBase extract kv._2 -> map( column family name, map ( column qualifier name, value ) )
import java.time.Instant
import java.{lang, util}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{Cell, HBaseConfiguration}
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.util.Bytes.toBytes
@oluies
oluies / spark-read-from-csv-string.scala
Created December 1, 2017 08:32
spark-read-from-string
val csv:String = """
|rowKeyLineage |eventTimestamp|dataEventId|dataDictId |sourceKey |transport|eventType |recordId|processingTime|application|entity |product|lifecycle |lineDate |dayOfMonth|dayOfYear|
|1488327264000:2001200:MUREX:DK_LEI_6000:BOND:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6000|BOND |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:DK_LEI_6100:REPO:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6100|REPO |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:DK_LEI_6200:BOND:CONTRACT |1488327264000 |2001200 |MURE
public static void main(String[] args) throws IOException {
PipelineOptionsFactory.register(PipelineCmdLineOptions.class);
PipelineCmdLineOptions options =
PipelineOptionsFactory.fromArgs(args).as(PipelineCmdLineOptions.class);
//Config config = ConfigFactory.parseFile(new File(args[0]));
//LOG.info(config.root().render(ConfigRenderOptions.concise().setFormatted(true)));
@oluies
oluies / brews.txt
Created September 14, 2017 19:34 — forked from anonymous/brews.txt
ack
adns
ansible
ant
apache-spark
argtable
arpack
augeas
autoconf
autojump
test("interpolateColor. Above 60") {
assert(Visualization.interpolateColor(Color.threasholds, 60.001) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 60.01) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 60.1) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 61) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 62) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 63) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 100) === Color(255, 255, 255))
assert(Visualization.interpolateColor(Color.threasholds, 1000) === Color(255, 255, 255))
case class Station(STN: Option[Int], WBAN: Option[Int], GPSLocation: Location)
case class TemperatureReading(stn: String,
wban: String,
month: Int,
day: Int,
temperature: Double)
case class Location(lat: Double, lon: Double){
@oluies
oluies / after.csv
Created August 8, 2017 12:34 — forked from bfritz/after.csv
rapture-csv in Ammonite REPL
territory_id first_name last_name email employee_id
XMOWSM54 Peter Alexander palexander0@unesco.org E00QTOF
XMRNBM47 Samuel Lopez slopez1@163.com E00UBFA
XMOWMF87 Elizabeth Stone estone2@usatoday.com E00WDYK
XMZWPW22 William Carroll wcarroll3@odnoklassniki.ru E00VDYQ
XMOWRW46 Carolyn Little clittle4@ox.ac.uk E00HUPR
XMZNDX26 Marilyn Robinson mrobinson5@wired.com E00ZJGS
XMZNAI68 Christopher Rogers crogers6@posterous.com E00DCHF
XMONCD74 Anthony Allen aallen7@flickr.com E00ACEQ
XMRNMD81 Martin Baker mbaker8@hatena.ne.jp E00DKRZ
@oluies
oluies / sqlserver_schema_to_spark.sql
Created February 20, 2017 14:07
create spark StructFields from a SQL Server schema Raw
use [database_ONE_two]
select 'val ' + so.name + '_sch = StructType(Seq( ' + o.list + '))'
from sysobjects so
cross apply
(SELECT
' StructField("'+column_name+'", ' +
case data_type
when 'char' then 'StringType'
when 'varchar' then 'StringType'
when 'nvarchar' then 'StringType'
@oluies
oluies / exclude_targetdirs.sh
Created January 21, 2017 18:30 — forked from viktorklang/exclude_targetdirs.sh
Adds all your sbt target dirs as path excludes for Time Machine
#WARNING: Use at your own risk. No warranties expressed or implied. YMMV. Drive responsibly. Eat healthy.
#First, `cd` into the parent dir for all of your `sbt`/`maven` projects (I assume you have one of those)
find "$(cd ..; pwd)" -type d -name "target" -exec sudo tmutil addexclusion -p {} +
# Dictionary to map Spark data types to Hive
d = {'StringType':'STRING', 'DoubleType':'DOUBLE', 'IntegerType': 'INT', 'DateType':'DATE', 'LongType': 'BIGINT'}
# Convert to Hive schema
schemastring = ', '.join([field.name + ' ' + d[str(field.dataType)] for field in df.schema.fields])
hivetablename='mortgage_all'
output_path='path'
filename='filename'
# Create Hive table
ddl = """CREATE EXTERNAL TABLE IF NOT EXISTS %s(%s) STORED AS ORC LOCATION '%s'""" % (hivetablename, schemastring, output_path + filename)