Created
March 16, 2019 19:09
-
-
Save corneliouzbett/bb0bb9c04bb8737520c21e6f102be74a to your computer and use it in GitHub Desktop.
creating datasets in Java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Arrays; | |
import java.util.Collections; | |
import java.io.Serializable; | |
import org.apache.spark.api.java.function.MapFunction; | |
import org.apache.spark.sql.Dataset; | |
import org.apache.spark.sql.Row; | |
import org.apache.spark.sql.Encoder; | |
import org.apache.spark.sql.Encoders; | |
public static class Person implements Serializable { | |
private String name; | |
private int age; | |
public String getName() { | |
return name; | |
} | |
public void setName(String name) { | |
this.name = name; | |
} | |
public int getAge() { | |
return age; | |
} | |
public void setAge(int age) { | |
this.age = age; | |
} | |
} | |
// Create an instance of a Bean class | |
Person person = new Person(); | |
person.setName("Andy"); | |
person.setAge(32); | |
// Encoders are created for Java beans | |
Encoder<Person> personEncoder = Encoders.bean(Person.class); | |
Dataset<Person> javaBeanDS = spark.createDataset( | |
Collections.singletonList(person), | |
personEncoder | |
); | |
javaBeanDS.show(); | |
// Encoders for most common types are provided in class Encoders | |
Encoder<Integer> integerEncoder = Encoders.INT(); | |
Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder); | |
Dataset<Integer> transformedDS = primitiveDS.map( | |
(MapFunction<Integer, Integer>) value -> value + 1, | |
integerEncoder); | |
transformedDS.collect(); // Returns [2, 3, 4] | |
// DataFrames can be converted to a Dataset by providing a class. Mapping based on name | |
String path = "examples/src/main/resources/people.json"; | |
Dataset<Person> peopleDS = spark.read().json(path).as(personEncoder); | |
peopleDS.show(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment