This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Soure http://stackoverflow.com/questions/30214474/how-to-run-multiple-jobs-in-one-sparkcontext-from-separate-threads-in-pyspark | |
# Prereqs: | |
# set | |
# spark.dynamicAllocation.enabled true | |
# spark.shuffle.service.enabled true | |
# in spark-defaults.conf | |
import threading | |
from pyspark import SparkContext, SparkConf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Write DataFrame to Disk | |
spark_df.coalesce(1).write.csv( '<saved_output/YOUR_FOLDER_NAME>', header=True, mode='overwrite' ) | |
# Read from Disk to DataFrame | |
new_spark_df = sqlContext.read.csv(s3_path, header=True, inferSchema=False) # For S3 | |
new_spark_df = sqlContext.read.csv('<LOCATION>', header=True, inferSchema=False) # mode='FAILFAST' | |
# SORTING | |
from pyspark.sql.functions import col | |
col_name = 'restaurant_id' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############# | |
# VARIABLES # | |
############# | |
access_key_id = '<ACCESS KEY ID>' | |
secret_access_key = '<SOME SECRET SHIT>' | |
bucket_name = 'my-awesome-bucket' | |
folder_name = 'upload_folder' | |
file_name = 'uploaded_doge_shit.jpg' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
find . -name ".ipynb_checkpoints" | grep ipynb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import seaborn as sns | |
sns.set_style("whitegrid") | |
sns.set(font_scale=1.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import col, concat, lit | |
custom_concat = [col('appName'), lit('|'), col('platform'), lit('|'), | |
col('carrier'), lit('|'), col('connectionType'), lit('|'), | |
col('country'), lit('|'), col('city'), lit('|'), | |
col('userAgent')] | |
# Add a new column entitled "custom_col" | |
union_df = union_df.withColumn('custom_col', concat(*custom_concat)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a Vertex DataFrame with unique ID column "id" | |
v = sqlContext.createDataFrame([ | |
("a", "Alice", 34), | |
("b", "Bob", 36), | |
("c", "Charlie", 30), | |
], ["id", "name", "age"]) | |
# Create an Edge DataFrame with "src" and "dst" columns | |
e = sqlContext.createDataFrame([ | |
("a", "b", "friend"), | |
("b", "c", "follow"), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Check out: | |
# https://gist.github.com/adrianorsouza/df4759b0583dcd112da4 | |
# http://olivierlacan.com/posts/launch-sublime-text-3-from-the-command-line/ | |
# To usr/bin | |
sudo ln -s /Applications/Sublime\ Text.app/Contents/SharedSupport/bin/subl /usr/bin/subl | |
# To use/***LOCAL***/bin | |
ln -s "/Applications/Sublime Text.app/Contents/SharedSupport/bin/subl" /usr/local/bin/subl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Widen width of notebook | |
from IPython.core.display import display, HTML | |
display(HTML("<style>.container { width:98% !important; }</style>")) | |
# Set | |
import pandas as pd | |
pd.set_option('display.max_columns', 50) | |
pd.set_option('display.max_colwidth', 200) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
n02119789 1 kit_fox | |
n02100735 2 English_setter | |
n02110185 3 Siberian_husky | |
n02096294 4 Australian_terrier | |
n02102040 5 English_springer | |
n02066245 6 grey_whale | |
n02509815 7 lesser_panda | |
n02124075 8 Egyptian_cat | |
n02417914 9 ibex | |
n02123394 10 Persian_cat |