anant asthana AtlasPilotPuppy

## question1_19.py
from pylab import *

#create the function q(t) based on the question
def q(t):
    if t < 0:
        return 0
    if t <= 10:
        return 5 * t
    if t <= 60:
        return 60  - t

## hbase_rdd.scala
import org.apache.spark.rdd.NewHadoopRDD
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._

val sc = new SparkContext("local", "Simple App")

## SparkHbaseALS.scala
import org.apache.spark.rdd.NewHadoopRDD
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.Rating
import scala.collection.mutable.ArrayBuffer

## whiteman_vehicledata.py
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages


whiteman_pems = pd.read_csv('whiteman_pems.csv')
whiteman_cleaned = whiteman_pems.fillna(0)

## visualizing_crime.py
# data can be found at https://data.sfgov.org/api/views/tmnf-yvry/rows.csv?accessType=DOWNLOAD
# or https://data.sfgov.org/Public-Safety/SFPD-Incidents-Previous-Three-Months/tmnf-yvry
import time
import matplotlib.colors as colors
import matplotlib.cm as cmx
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
import numpy as np
import pandas

## SparkSqlIntro.scala
// data files can be downloaded at https://s3.amazonaws.com/hw-sandbox/tutorial1/infochimps_dataset_4778_download_16677-csv.zip

import java.io.Serializable
import java.util

import org.apache.spark.sql._

val sc = new SparkContext("spark://master:7077", "Spark SQL Intro")
val sqlContext = new SQLContext(sc)
import sqlContext.createSchemaRDD

## SparkSqlIntro.py
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
sqlContext = SQLContext(sc)
dividends = sc.textFile("hdfs://master:9000/user/hdfs/NYSE_dividends_A.csv")
dividends_parsed = dividends.filter(lambda r: not r.startswith('exchange')).map(lambda r: r.split(',')).map(
  lambda row: {'exchange': row[0], 'stock_symbol': row[1], 'date': row[2], 'dividends': float(row[3])})

## log_analysis.py
# Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
# log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z
import shlex

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
sqlContext = SQLContext(sc)

## parallelize.scala
// sc is the spark context

val data = Array(1, 2, 3, 4, 5)
val distData = sc.parallelize(data)

## LogAnalysis.scala
// Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
// log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z

import org.apache.spark.SparkContext
import org.apache.spark.sql._
import java.util.regex.Pattern

val sc = SparkContext("spark://master:7077", "Log Analysis")
val sqlContest = new SQLContext(sc)
import sqlContext.createSchemaRDD
	from pylab import *

	#create the function q(t) based on the question
	def q(t):
	if t < 0:
	return 0
	if t <= 10:
	return 5 * t
	if t <= 60:
	return 60 - t
	import org.apache.spark.rdd.NewHadoopRDD
	import org.apache.hadoop.hbase.mapreduce.TableInputFormat
	import org.apache.hadoop.hbase.HBaseConfiguration
	import org.apache.hadoop.hbase.client.Result
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable
	import scala.collection.JavaConversions._
	import scala.collection.JavaConverters._

	val sc = new SparkContext("local", "Simple App")
	import pandas as pd
	import numpy as np
	import statsmodels.formula.api as sm
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_pdf import PdfPages


	whiteman_pems = pd.read_csv('whiteman_pems.csv')
	whiteman_cleaned = whiteman_pems.fillna(0)
	# data can be found at https://data.sfgov.org/api/views/tmnf-yvry/rows.csv?accessType=DOWNLOAD
	# or https://data.sfgov.org/Public-Safety/SFPD-Incidents-Previous-Three-Months/tmnf-yvry
	import time
	import matplotlib.colors as colors
	import matplotlib.cm as cmx
	from matplotlib import pyplot as plt
	from matplotlib.patches import Patch
	import numpy as np
	import pandas
	// data files can be downloaded at https://s3.amazonaws.com/hw-sandbox/tutorial1/infochimps_dataset_4778_download_16677-csv.zip

	import java.io.Serializable
	import java.util

	import org.apache.spark.sql._

	val sc = new SparkContext("spark://master:7077", "Spark SQL Intro")
	val sqlContext = new SQLContext(sc)
	import sqlContext.createSchemaRDD
	from pyspark import SparkContext
	from pyspark.sql import SQLContext
	from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

	sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
	sqlContext = SQLContext(sc)
	dividends = sc.textFile("hdfs://master:9000/user/hdfs/NYSE_dividends_A.csv")
	dividends_parsed = dividends.filter(lambda r: not r.startswith('exchange')).map(lambda r: r.split(',')).map(
	lambda row: {'exchange': row[0], 'stock_symbol': row[1], 'date': row[2], 'dividends': float(row[3])})
	# Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
	# log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z
	import shlex

	from pyspark import SparkContext
	from pyspark.sql import SQLContext
	from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

	sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
	sqlContext = SQLContext(sc)
	// sc is the spark context

	val data = Array(1, 2, 3, 4, 5)
	val distData = sc.parallelize(data)
	// Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
	// log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z

	import org.apache.spark.SparkContext
	import org.apache.spark.sql._
	import java.util.regex.Pattern

	val sc = SparkContext("spark://master:7077", "Log Analysis")
	val sqlContest = new SQLContext(sc)
	import sqlContext.createSchemaRDD