anant asthana AtlasPilotPuppy

## colorization.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                AtlasPilotPuppy
                / colorization.ipynb
            
            
              Last active
              April 13, 2016 05:22
            
              
                Neural network to colorize black and white images
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## audio_analysis.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                AtlasPilotPuppy
                / audio_analysis.ipynb
            
            
              Last active
              April 8, 2016 17:46
            
              
                Analysis of an audio track
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## accumulator.py
from pyspark import SparkContext

sc = SparkContext('spark://master:7077', 'accumulator example')
# accumulators are initialized with a initial value
# they have and add method to add values to the accumulator
# and a value property that is visibile only to the master

accum = sc.accumulator(0)
data = sc.parallelize(range(1,1000))

## LogAnalysis.scala
// Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
// log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z

import org.apache.spark.SparkContext
import org.apache.spark.sql._
import java.util.regex.Pattern

val sc = SparkContext("spark://master:7077", "Log Analysis")
val sqlContest = new SQLContext(sc)
import sqlContext.createSchemaRDD

## parallelize.scala
// sc is the spark context

val data = Array(1, 2, 3, 4, 5)
val distData = sc.parallelize(data)

## log_analysis.py
# Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
# log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z
import shlex

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
sqlContext = SQLContext(sc)

## SparkSqlIntro.py
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
sqlContext = SQLContext(sc)
dividends = sc.textFile("hdfs://master:9000/user/hdfs/NYSE_dividends_A.csv")
dividends_parsed = dividends.filter(lambda r: not r.startswith('exchange')).map(lambda r: r.split(',')).map(
  lambda row: {'exchange': row[0], 'stock_symbol': row[1], 'date': row[2], 'dividends': float(row[3])})

## SparkSqlIntro.scala
// data files can be downloaded at https://s3.amazonaws.com/hw-sandbox/tutorial1/infochimps_dataset_4778_download_16677-csv.zip

import java.io.Serializable
import java.util

import org.apache.spark.sql._

val sc = new SparkContext("spark://master:7077", "Spark SQL Intro")
val sqlContext = new SQLContext(sc)
import sqlContext.createSchemaRDD

## visualizing_crime.py
# data can be found at https://data.sfgov.org/api/views/tmnf-yvry/rows.csv?accessType=DOWNLOAD
# or https://data.sfgov.org/Public-Safety/SFPD-Incidents-Previous-Three-Months/tmnf-yvry
import time
import matplotlib.colors as colors
import matplotlib.cm as cmx
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
import numpy as np
import pandas

## whiteman_vehicledata.py
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages


whiteman_pems = pd.read_csv('whiteman_pems.csv')
whiteman_cleaned = whiteman_pems.fillna(0)
	from pyspark import SparkContext

	sc = SparkContext('spark://master:7077', 'accumulator example')
	# accumulators are initialized with a initial value
	# they have and add method to add values to the accumulator
	# and a value property that is visibile only to the master

	accum = sc.accumulator(0)
	data = sc.parallelize(range(1,1000))
	// Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
	// log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z

	import org.apache.spark.SparkContext
	import org.apache.spark.sql._
	import java.util.regex.Pattern

	val sc = SparkContext("spark://master:7077", "Log Analysis")
	val sqlContest = new SQLContext(sc)
	import sqlContext.createSchemaRDD
	// sc is the spark context

	val data = Array(1, 2, 3, 4, 5)
	val distData = sc.parallelize(data)
	# Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html
	# log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z
	import shlex

	from pyspark import SparkContext
	from pyspark.sql import SQLContext
	from pyspark.sql import Row, StructField, StructType, StringType, IntegerType

	sc = SparkContext('spark://master:7077', 'Spark SQL Intro')
	sqlContext = SQLContext(sc)
	// data files can be downloaded at https://s3.amazonaws.com/hw-sandbox/tutorial1/infochimps_dataset_4778_download_16677-csv.zip

	import java.io.Serializable
	import java.util

	import org.apache.spark.sql._

	val sc = new SparkContext("spark://master:7077", "Spark SQL Intro")
	val sqlContext = new SQLContext(sc)
	import sqlContext.createSchemaRDD
	# data can be found at https://data.sfgov.org/api/views/tmnf-yvry/rows.csv?accessType=DOWNLOAD
	# or https://data.sfgov.org/Public-Safety/SFPD-Incidents-Previous-Three-Months/tmnf-yvry
	import time
	import matplotlib.colors as colors
	import matplotlib.cm as cmx
	from matplotlib import pyplot as plt
	from matplotlib.patches import Patch
	import numpy as np
	import pandas
	import pandas as pd
	import numpy as np
	import statsmodels.formula.api as sm
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_pdf import PdfPages


	whiteman_pems = pd.read_csv('whiteman_pems.csv')
	whiteman_cleaned = whiteman_pems.fillna(0)