rjurney/gist:966acf3071224d4cf768

## gistfile1.txt
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._

import org.apache.avro.generic.GenericRecord
import org.apache.avro.mapred.AvroKey
import org.apache.avro.mapreduce.AvroKeyInputFormat
import org.apache.hadoop.io.NullWritable
import org.apache.commons.lang.StringEscapeUtils.escapeCsv

import org.apache.avro.file.DataFileStream

val file = sc.textFile("hdfs://hivecluster2/securityx/beaconing_activity.txt/2014/05/12/14/hour")
val arys = file.map{line => line.split("\t")}

val avroRdd = sc.newAPIHadoopFile("hdfs://hivecluster2/securityx/web_proxy_mef/2014/05/27/19/*",
classOf[AvroKeyInputFormat[GenericRecord]],
classOf[AvroKey[GenericRecord]],
classOf[NullWritable])


import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import java.net.URI
import java.io.BufferedInputStream

val input = "hdfs://hivecluster2/securityx/web_proxy_mef/2014/05/27/19/part-m-00019.avro"
val inPath = new Path(input);
val fs = FileSystem.get(URI.create(input), new Configuration());
val inStream = new BufferedInputStream(fs.open(inPath));
	import org.apache.spark.SparkContext
	import org.apache.spark.SparkContext._

	import org.apache.avro.generic.GenericRecord
	import org.apache.avro.mapred.AvroKey
	import org.apache.avro.mapreduce.AvroKeyInputFormat
	import org.apache.hadoop.io.NullWritable
	import org.apache.commons.lang.StringEscapeUtils.escapeCsv

	import org.apache.avro.file.DataFileStream

	val file = sc.textFile("hdfs://hivecluster2/securityx/beaconing_activity.txt/2014/05/12/14/hour")
	val arys = file.map{line => line.split("\t")}

	val avroRdd = sc.newAPIHadoopFile("hdfs://hivecluster2/securityx/web_proxy_mef/2014/05/27/19/*",
	classOf[AvroKeyInputFormat[GenericRecord]],
	classOf[AvroKey[GenericRecord]],
	classOf[NullWritable])


	import org.apache.hadoop.fs.Path
	import org.apache.hadoop.fs.FileSystem
	import org.apache.hadoop.conf.Configuration
	import java.net.URI
	import java.io.BufferedInputStream

	val input = "hdfs://hivecluster2/securityx/web_proxy_mef/2014/05/27/19/part-m-00019.avro"
	val inPath = new Path(input);
	val fs = FileSystem.get(URI.create(input), new Configuration());
	val inStream = new BufferedInputStream(fs.open(inPath));