Skip to content

Instantly share code, notes, and snippets.

View RyMey's full-sized avatar

Rya Meyvriska RyMey

  • Jakarta, Indonesia
View GitHub Profile
@RyMey
RyMey / JoinDataFramePython
Created May 5, 2019 05:41
Melakukan join antar dataframe
# membuat dataframe baru
schemaGender = StructType([
StructField("id", IntegerType(), True),
StructField("name", StringType(), True)])
dataGender = sparkSession.createDataFrame([(0, "Female"), (1, "Male")], schemaGender)
dataGender.show()
# join dengan table lama
print(dataFrameFile.columns)
@RyMey
RyMey / JoinDatasetJava
Created May 5, 2019 05:35
Membuat dataframe agar bisa di join
// membuat dataframe baru
StructType schema = DataTypes.createStructType(new StructField[]{
DataTypes.createStructField("id", DataTypes.IntegerType, false),
DataTypes.createStructField("name", DataTypes.StringType, false)
});
Dataset<Row> dataGender = sparkSession.createDataFrame(Arrays.asList(
RowFactory.create(0, "Female"),
RowFactory.create(1, "Male")
), schema);
@RyMey
RyMey / OperateDataPython
Created May 5, 2019 05:33
Operasi2 data pada python
# menambah column company
@udf()
def mapping_function_company(email):
sub_by_at = email[email.find("@"):]
return sub_by_at[1:sub_by_at.find('.')]
dataFrameFile = dataFrameFile.withColumn("company", mapping_function_company("email"))
@RyMey
RyMey / OperateDataJava
Created May 5, 2019 05:31
Operasi2 data pada Java
// menambah column company
UserDefinedFunction mappingFunctionCompany = udf(
(String email) -> {
String subByAt = email.substring(email.indexOf('@'));
return subByAt.substring(1, subByAt.indexOf('.'));
},
DataTypes.StringType
);
datasetFile = datasetFile.withColumn("company", mappingFunctionCompany.apply(
@RyMey
RyMey / ReadFileJava
Created May 5, 2019 05:12
Membaca File pada java
// ambil file nanti disimpan ke RDD (bentuknya semacam list)
JavaRDD<String> rddFile = sparkContext.textFile("/Users/rya.meyvriska/Downloads/mock_data.csv");
rddFile.take(3).forEach(element -> System.out.println(element));
// membaca file ke dataset
Dataset<Row> datasetFile = sparkSession.read()
.option("header", "true")
.csv("/Users/rya.meyvriska/Downloads/mock_data.csv");
datasetFile.show();
@RyMey
RyMey / ReadFilePython
Last active May 5, 2019 05:02
Membaca file pada pyspark
# simpan file ke RDD (nanti bentuknya semacam list)
rddFile = sparkContext.textFile("/Users/rya.meyvriska/Downloads/mock_data.csv")
for v in rddFile.take(3):
print(v)
# membaca file ke dataframe
dataFrameFile = sparkSession.read.option("header", "true").csv("/Users/rya.meyvriska/Downloads/mock_data.csv")
dataFrameFile.show()
dataFrameFile.describe().show()
@RyMey
RyMey / GetWordPython
Created May 5, 2019 04:20
Mengambil Jumlah Kata yang awalannya D
# membuat RDD dari String
# membuat string yang nantinya ingin disimpan pada RDD
story = "In the song, Maui told Moana about his amazing deeds. Why - he pulled up the islands from the sea," +
"he lifted the sky, he even found fire and gave it to humans! As a demi-god, Maui was born with" +
"special powers. Demi-god means one parent is a god and the other is human. Maui’s father was the god" +
"and his mother was human."
# menyimpan string pada rdd
rddStory = sparkContext.parallelize([story])
@RyMey
RyMey / GetWordJava
Last active May 5, 2019 04:18
Mengambil jumlah kata
// membuat RDD dari String
// membuat string yang nantinya ingin disimpan pada RDD
String story = "In the song, Maui told Moana about his amazing deeds. Why - he pulled up the islands from the sea," +
"he lifted the sky, he even found fire and gave it to humans! As a demi-god, Maui was born with" +
"special powers. Demi-god means one parent is a god and the other is human. Maui’s father was the god" +
"and his mother was human."
// bikin semacam list di spark dari story yg udah dibikin, karena isinya 1 jadinya singletonList
JavaRDD<String> rddStory = sparkContext.parallelize(Collections.singletonList(story));
# membuat konfigurasi spark
sparkConf = SparkConf().setMaster("local").setAppName("Python Spark Playground")
# membuat spark context
sparkContext = SparkContext(sparkConf)
# membuat spark session
sparkSession = SparkSession(sparkContext)
@RyMey
RyMey / ConfigurationSparkJava
Created May 5, 2019 03:25
Membuat Konfigurasi Spark Context dan Spark Session pada Java
// konfigrasi spark context (aplikasi spark)
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("local");
sparkConf.setAppName("learn-spark");
// membuat spark context
JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
sparkContext.setLogLevel("ERROR");
// khusus untuk dataset