with pyspark.SparkContext("local", "PySparkWordCount") as sc: file_path = "example.csv"
data = sc.textFile(file_path)
data = sc.textFile(file_path).cache
# http://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-crawler-pyspark-extensions-python-intro.html | |
pip install awscli --upgrade --user | |
source ~/.bash_profile | |
aws --version |
#Documentación | |
#https://conda.io/docs/user-guide/tasks/manage-environments.html | |
#Listar los enviroments | |
conda info --envs | |
conda env list | |
#Crear ambiente especificando la version de python | |
conda create -n myenv python=3.4 |
# Inclusive range
x = 1..10
# Exclusive range
x = 1...10
# Obtener el primero
x.begin
x.first