Skip to content

Instantly share code, notes, and snippets.

@davidwhitney
Last active March 26, 2019 11:27
Show Gist options
  • Save davidwhitney/139e0764136486565c36a039f14fe99c to your computer and use it in GitHub Desktop.
Save davidwhitney/139e0764136486565c36a039f14fe99c to your computer and use it in GitHub Desktop.
Create Spark Row in Java for tests etc
package com.gfk.igl.insights.processor.spark.builders;
import lombok.var;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
public class RowWithSchema {
private final StructType schema;
private final ArrayList<Row> rows;
public RowWithSchema(StructField... fields){
this.schema = DataTypes.createStructType(fields);
rows = new ArrayList<>();
}
public RowWithSchema withRow(Object ... values) throws NoSuchFieldException, IllegalAccessException, InvocationTargetException, InstantiationException {
var ctors = GenericRowWithSchema.class.getConstructors();
var paramCount = 0;
Constructor selected = ctors[0];
for(var c : ctors){
if(c.getParameterCount() > paramCount){
selected = c;
}
}
selected.setAccessible(true);
var instance = (Row) selected.newInstance(values, schema);
rows.add(instance);
return this;
}
public ArrayList<Row> toArrayList(){
return rows;
}
}
class Usage {
void something() {
var rows = new RowWithSchema(
DataTypes.createStructField("item_id", DataTypes.IntegerType, true),
DataTypes.createStructField("someDouble", DataTypes.DoubleType, true),
DataTypes.createStructField("some_other_number", DataTypes.IntegerType, true),
DataTypes.createStructField("something_else", DataTypes.IntegerType, true)
)
.withRow(1, 2.0d ,3, 4)
.toArrayList();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment