Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkContext | |
sc = SparkContext('spark://master:7077', 'accumulator example') | |
# accumulators are initialized with a initial value | |
# they have and add method to add values to the accumulator | |
# and a value property that is visibile only to the master | |
accum = sc.accumulator(0) | |
data = sc.parallelize(range(1,1000)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html | |
// log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z | |
import org.apache.spark.SparkContext | |
import org.apache.spark.sql._ | |
import java.util.regex.Pattern | |
val sc = SparkContext("spark://master:7077", "Log Analysis") | |
val sqlContest = new SQLContext(sc) | |
import sqlContext.createSchemaRDD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// sc is the spark context | |
val data = Array(1, 2, 3, 4, 5) | |
val distData = sc.parallelize(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Log file contains the first 200 lines from http://ita.ee.lbl.gov/html/contrib/EPA-HTTP.html | |
# log file can be found at ftp://ita.ee.lbl.gov/traces/epa-http.txt.Z | |
import shlex | |
from pyspark import SparkContext | |
from pyspark.sql import SQLContext | |
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType | |
sc = SparkContext('spark://master:7077', 'Spark SQL Intro') | |
sqlContext = SQLContext(sc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkContext | |
from pyspark.sql import SQLContext | |
from pyspark.sql import Row, StructField, StructType, StringType, IntegerType | |
sc = SparkContext('spark://master:7077', 'Spark SQL Intro') | |
sqlContext = SQLContext(sc) | |
dividends = sc.textFile("hdfs://master:9000/user/hdfs/NYSE_dividends_A.csv") | |
dividends_parsed = dividends.filter(lambda r: not r.startswith('exchange')).map(lambda r: r.split(',')).map( | |
lambda row: {'exchange': row[0], 'stock_symbol': row[1], 'date': row[2], 'dividends': float(row[3])}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// data files can be downloaded at https://s3.amazonaws.com/hw-sandbox/tutorial1/infochimps_dataset_4778_download_16677-csv.zip | |
import java.io.Serializable | |
import java.util | |
import org.apache.spark.sql._ | |
val sc = new SparkContext("spark://master:7077", "Spark SQL Intro") | |
val sqlContext = new SQLContext(sc) | |
import sqlContext.createSchemaRDD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# data can be found at https://data.sfgov.org/api/views/tmnf-yvry/rows.csv?accessType=DOWNLOAD | |
# or https://data.sfgov.org/Public-Safety/SFPD-Incidents-Previous-Three-Months/tmnf-yvry | |
import time | |
import matplotlib.colors as colors | |
import matplotlib.cm as cmx | |
from matplotlib import pyplot as plt | |
from matplotlib.patches import Patch | |
import numpy as np | |
import pandas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import statsmodels.formula.api as sm | |
import matplotlib.pyplot as plt | |
from matplotlib.backends.backend_pdf import PdfPages | |
whiteman_pems = pd.read_csv('whiteman_pems.csv') | |
whiteman_cleaned = whiteman_pems.fillna(0) |
NewerOlder