Animesh Trivedi animeshtrivedi

## laptop-bashrc-fedora
# ~/.bashrc: executed by bash(1) for non-login shells.
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
# for examples

#lib64 variants come from Fedora installation

export TERM=xterm-color
export GO_HOME=/home/atr/src/go/
export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/
export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH

## NUC-bashrc-ubuntu
atr@atrnuc:~$ cat .bashrc
# ~/.bashrc: executed by bash(1) for non-login shells.
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
# for examples

export TERM=xterm-color
export GO_HOME=/home/atr/src/go/
export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/
export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH
export LD_LIBRARY_PATH=/home/atr/local/lib/:/home/atr/local/usr/local/lib/:$LD_LIBRARY_PATH

## ocssd-main.c
/*
 * MIT License

Copyright (c) 2020-2021

Authors: Sacheendra Talluri, Giulia Frascaria, and, Animesh Trivedi
This code is part of the Storage System Course at VU Amsterdam

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

## .vimrc
" Don't try to be vi compatible
set nocompatible

" Helps force plugins to load correctly when it is turned back on below
filetype off

" TODO: Load plugins here (pathogen or vundle)

" Turn on syntax highlighting
syntax on

## azim-patch
This patch deos the following changes:

* moves two common function "getNullCount" and "splitAndTransferValidityBuffer" to the top-level BaseValueVector. This change requries moving "validityBuffer" to the BaseValueVector class (as recommended in this TODO: https://github.com/apache/arrow/blob/master/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java#L89)
* optimize the implementation of loadValidityBuffer (in the BaseValueVector) to just pass the reference for the validity buffer read from the storage
* optimize for the common boundary condition when all variables are valid (as done in the C++ code: https://github.com/apache/arrow/blob/master/cpp/src/arrow/array.h#L290)

The optimization delivers performance.

Tests: Read 50M integers from a single Int column (2GB).

## gist:9341ce0c14664ea8139076a3fb63324e

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                animeshtrivedi
                / gist:9341ce0c14664ea8139076a3fb63324e
            
            
              Created
              September 17, 2019 06:57
                — forked from bobisme/gist:1078482
            
              
                Build/install custom linux kernel headers in ubuntu.
              
          
    Linux Headers


Install tools to build:

sudo apt-get update
sudo apt-get install kernel-package fakeroot wget bzip2

Linux-2.6.39.1-linode34 is same as regular 2.6.39


## UberTeraSort.scala
// Author: Animesh Trivedi
//         atr@zurich.ibm.com

import org.apache.spark.sql.{SaveMode, SparkSession}

import scala.collection.mutable.ListBuffer
import scala.util.Random

private def generateTSRecord(key: Array[Byte], recBuf:Array[Byte], rand: Random): Unit = {
  val fixed = 10

## spark-defaults.conf
# Command to launch TPCDS:
# ./bin/spark-submit -v  --master local[2] --class com.ibm.crail.spark.tools.ParquetGenerator ~/jars/parquet-generator-1.0.jar -c tpcds -o crail://localhost:9060/F1/tpcds/ -p 4 -t 4 -tsf 1 -tdsd /home/atr/zrl/external/github/databricks/tpcds-kit/tools/ -tdd 1
# And you need to put core-site.xml from crail into the conf folder.

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at

## crail-site.config
crail.blocksize				4096

crail.buffersize			4096
#crail.buffersize			1048576
#crail.buffersize			8192
#crail.slicesize				8192


crail.regionsize			1073741824
crail.cachelimit			1073741824

## ParquetToArrow.java
/* This code snippet is a part of the blog at
https://github.com/animeshtrivedi/blog/blob/master/post/2017-12-26-arrow.md
*/

import com.google.common.collect.ImmutableList;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.*;
import org.apache.arrow.vector.dictionary.DictionaryProvider;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
	# ~/.bashrc: executed by bash(1) for non-login shells.
	# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
	# for examples

	#lib64 variants come from Fedora installation

	export TERM=xterm-color
	export GO_HOME=/home/atr/src/go/
	export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/
	export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH
	atr@atrnuc:~$ cat .bashrc
	# ~/.bashrc: executed by bash(1) for non-login shells.
	# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
	# for examples

	export TERM=xterm-color
	export GO_HOME=/home/atr/src/go/
	export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/
	export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH
	export LD_LIBRARY_PATH=/home/atr/local/lib/:/home/atr/local/usr/local/lib/:$LD_LIBRARY_PATH
	/*
	* MIT License

	Copyright (c) 2020-2021

	Authors: Sacheendra Talluri, Giulia Frascaria, and, Animesh Trivedi
	This code is part of the Storage System Course at VU Amsterdam

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	" Don't try to be vi compatible
	set nocompatible

	" Helps force plugins to load correctly when it is turned back on below
	filetype off

	" TODO: Load plugins here (pathogen or vundle)

	" Turn on syntax highlighting
	syntax on
	This patch deos the following changes:

	* moves two common function "getNullCount" and "splitAndTransferValidityBuffer" to the top-level BaseValueVector. This change requries moving "validityBuffer" to the BaseValueVector class (as recommended in this TODO: https://github.com/apache/arrow/blob/master/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java#L89)
	* optimize the implementation of loadValidityBuffer (in the BaseValueVector) to just pass the reference for the validity buffer read from the storage
	* optimize for the common boundary condition when all variables are valid (as done in the C++ code: https://github.com/apache/arrow/blob/master/cpp/src/arrow/array.h#L290)

	The optimization delivers performance.

	Tests: Read 50M integers from a single Int column (2GB).
	// Author: Animesh Trivedi
	// atr@zurich.ibm.com

	import org.apache.spark.sql.{SaveMode, SparkSession}

	import scala.collection.mutable.ListBuffer
	import scala.util.Random

	private def generateTSRecord(key: Array[Byte], recBuf:Array[Byte], rand: Random): Unit = {
	val fixed = 10
	# Command to launch TPCDS:
	# ./bin/spark-submit -v --master local[2] --class com.ibm.crail.spark.tools.ParquetGenerator ~/jars/parquet-generator-1.0.jar -c tpcds -o crail://localhost:9060/F1/tpcds/ -p 4 -t 4 -tsf 1 -tdsd /home/atr/zrl/external/github/databricks/tpcds-kit/tools/ -tdd 1
	# And you need to put core-site.xml from crail into the conf folder.

	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	crail.blocksize 4096

	crail.buffersize 4096
	#crail.buffersize 1048576
	#crail.buffersize 8192
	#crail.slicesize 8192


	crail.regionsize 1073741824
	crail.cachelimit 1073741824
	/* This code snippet is a part of the blog at
	https://github.com/animeshtrivedi/blog/blob/master/post/2017-12-26-arrow.md
	*/

	import com.google.common.collect.ImmutableList;
	import org.apache.arrow.memory.RootAllocator;
	import org.apache.arrow.vector.*;
	import org.apache.arrow.vector.dictionary.DictionaryProvider;
	import org.apache.arrow.vector.types.FloatingPointPrecision;
	import org.apache.arrow.vector.types.pojo.ArrowType;