Skip to content

Instantly share code, notes, and snippets.

View AtlasPilotPuppy's full-sized avatar

anant asthana AtlasPilotPuppy

  • Salt Lake City
View GitHub Profile
@AtlasPilotPuppy
AtlasPilotPuppy / question1_19.py
Last active January 2, 2016 21:19
python matplotlib example code.
from pylab import *
#create the function q(t) based on the question
def q(t):
if t < 0:
return 0
if t <= 10:
return 5 * t
if t <= 60:
return 60 - t
@AtlasPilotPuppy
AtlasPilotPuppy / hbase_rdd.scala
Last active August 3, 2016 14:21
Accessing Hbase from Apache Spark
import org.apache.spark.rdd.NewHadoopRDD
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
val sc = new SparkContext("local", "Simple App")
@AtlasPilotPuppy
AtlasPilotPuppy / SparkHbaseALS.scala
Last active August 29, 2015 14:03
Uses values in hbase tables to train and test ALS model in MLib.
import org.apache.spark.rdd.NewHadoopRDD
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.Rating
import scala.collection.mutable.ArrayBuffer
@AtlasPilotPuppy
AtlasPilotPuppy / whiteman_vehicledata.py
Created September 16, 2014 20:48
Whiteman data joined with Vehicle data(2008- 2013)
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
whiteman_pems = pd.read_csv('whiteman_pems.csv')
whiteman_cleaned = whiteman_pems.fillna(0)
@AtlasPilotPuppy
AtlasPilotPuppy / visualizing_crime.py
Last active October 28, 2017 02:53
Visualizing Crime data from SFPD using Matplotlib, Pandas
# data can be found at https://data.sfgov.org/api/views/tmnf-yvry/rows.csv?accessType=DOWNLOAD
# or https://data.sfgov.org/Public-Safety/SFPD-Incidents-Previous-Three-Months/tmnf-yvry
import time
import matplotlib.colors as colors
import matplotlib.cm as cmx
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
import numpy as np
import pandas
@AtlasPilotPuppy
AtlasPilotPuppy / SparkSqlIntro.scala
Last active August 29, 2015 14:07
Quick Intro to Spark SQL
// data files can be downloaded at https://s3.amazonaws.com/hw-sandbox/tutorial1/infochimps_dataset_4778_download_16677-csv.zip
import java.io.Serializable
import java.util
import org.apache.spark.sql._
val sc = new SparkContext("spark://master:7077", "Spark SQL Intro")
val sqlContext = new SQLContext(sc)
import sqlContext.createSchemaRDD
@AtlasPilotPuppy
AtlasPilotPuppy / SparkSqlIntro.py
Last active July 28, 2018 10:14
Introduction to spark SQL using python
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType
sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
sqlContext = SQLContext(sc)
dividends = sc.textFile("hdfs://master:9000/user/hdfs/NYSE_dividends_A.csv")
dividends_parsed = dividends.filter(lambda r: not r.startswith('exchange')).map(lambda r: r.split(',')).map(
lambda row: {'exchange': row[0], 'stock_symbol': row[1], 'date': row[2], 'dividends': float(row[3])})
@AtlasPilotPuppy
AtlasPilotPuppy / log_analysis.py
Last active August 29, 2015 14:08
Log analysis using Spark
# Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
# log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z
import shlex
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType
sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
sqlContext = SQLContext(sc)
@AtlasPilotPuppy
AtlasPilotPuppy / parallelize.scala
Created October 23, 2014 19:01
Parallelize collection in Spark
// sc is the spark context
val data = Array(1, 2, 3, 4, 5)
val distData = sc.parallelize(data)
@AtlasPilotPuppy
AtlasPilotPuppy / LogAnalysis.scala
Last active August 29, 2015 14:08
Log analysis in Scala
// Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
// log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z
import org.apache.spark.SparkContext
import org.apache.spark.sql._
import java.util.regex.Pattern
val sc = SparkContext("spark://master:7077", "Log Analysis")
val sqlContest = new SQLContext(sc)
import sqlContext.createSchemaRDD