Created
July 21, 2015 08:11
-
-
Save Emaasit/a25c41abe15a75c76e42 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set the system environment variables | |
Sys.setenv(SPARK_HOME = "C:/Apache/spark-1.4.1") | |
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths())) | |
#load the Sparkr library | |
library(SparkR) | |
# Create a spark context and a SQL context | |
sc <- sparkR.init(master = "local") | |
sqlContext <- sparkRSQL.init(sc) | |
#create a sparkR DataFrame | |
DF <- createDataFrame(sqlContext, faithful) | |
head(DF) | |
# Create a simple local data.frame | |
localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) | |
# Convert local data frame to a SparkR DataFrame | |
df <- createDataFrame(sqlContext, localDF) | |
# Print its schema | |
printSchema(df) | |
# root | |
# |-- name: string (nullable = true) | |
# |-- age: double (nullable = true) | |
# Create a DataFrame from a JSON file | |
path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json") | |
peopleDF <- jsonFile(sqlContext, path) | |
printSchema(peopleDF) | |
# Register this DataFrame as a table. | |
registerTempTable(peopleDF, "people") | |
# SQL statements can be run by using the sql methods provided by sqlContext | |
teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") | |
# Call collect to get a local data.frame | |
teenagersLocalDF <- collect(teenagers) | |
# Print the teenagers in our dataset | |
print(teenagersLocalDF) | |
# Stop the SparkContext now | |
sparkR.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment