Skip to content

Instantly share code, notes, and snippets.

@oneryalcin
Created September 23, 2019 20:42
Show Gist options
  • Save oneryalcin/d7d2a1227b0d936ca1aa66ce0625bcb0 to your computer and use it in GitHub Desktop.
Save oneryalcin/d7d2a1227b0d936ca1aa66ce0625bcb0 to your computer and use it in GitHub Desktop.
1 Sparkify Import libs
# import libraries
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession, Window
from pyspark.sql.functions import count, when, isnan, isnull, desc_nulls_first, desc, \
from_unixtime, col, dayofweek, dayofyear, hour, to_date, month
import pyspark.sql.functions as F
from pyspark.ml.feature import OneHotEncoderEstimator, StringIndexer, VectorAssembler, StandardScaler, MinMaxScaler
from pyspark.ml.classification import DecisionTreeClassifier, RandomForestClassifier
# sc = SparkContext(appName="Project_workspace")
# This is another way of doing (If you are running local cluster setMaster="local")
# configure = SparkConf().setAppName("name").setMaster("IP Address")
# sc = SparkContext(conf=configure)
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment