Skip to content

Instantly share code, notes, and snippets.

@robintux
Created April 30, 2023 14:41
Show Gist options
  • Save robintux/105171ddb877528a2479679644b3fad8 to your computer and use it in GitHub Desktop.
Save robintux/105171ddb877528a2479679644b3fad8 to your computer and use it in GitHub Desktop.
# Instalamos el jdk (java)
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
# Descargamos spark
!wget https://dlcdn.apache.org/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3.tgz
# Descomprimimos el binario de spark
!tar xvzf spark-3.4.0-bin-hadoop3.tgz
# Cargamos el modulo os
import os
# Agreguemos un par de valores a os.environ
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"]= "/content/spark-3.4.0-bin-hadoop3"
# Instalamos findspark
!pip install -q findspark
# Enseñemosle al interprete ipython de nuestro notebook donde se encuentra spark
import findspark
findspark.init()
findspark.find()
# Iniciemos una sesion de spark
from pyspark.sql import SparkSession
# Creamos una sesion de spark
spark1 = SparkSession.builder.master("local[*]").getOrCreate()
# Mostremos la informacion de esta sesion de spark
spark1
# Creamos el contexto
from pyspark import SparkContext
# sc1 = SparkContext(master = "local[*]", appName = "UltimaClase")
sc = SparkContext.getOrCreate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment