This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class DedupFn<K, V> extends DoFn<KV<K, V>, KV<K, V>> { | |
private static final String STATE_ID = "messageDedup"; | |
private final Duration dedupWindow; | |
@StateId(STATE_ID) | |
private final StateSpec<ValueState<Long>> lastAccessedTime = StateSpecs.value(VarLongCoder.of()); | |
public DedupFn(Duration dedupWindow) { | |
this.dedupWindow = dedupWindow; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LogicalProject(intColGroup=[$1], stringColGroup=[$0], doubleMax=[$2], longSum=[$3]) | |
LogicalAggregate(group=[{0, 1}], agg#0=[MAX($2)], agg#1=[SUM($3)]) | |
LogicalProject(stringCol=[$1], intCol=[$0], doubleCol=[$2], longCol=[$5]) | |
LogicalJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner]) | |
LogicalFilter(condition=[AND(>($2, 1.0E0), =($1, 'hello world'))]) | |
LogicalProject(intCol=[$7], stringCol=[$8], doubleCol=[$5]) | |
LogicalTableScan(table=[[dummy]]) | |
LogicalFilter(condition=[ENDSWITH(PIG_TUPLE($1, 'hello world'))]) | |
LogicalProject(intCol=[$7], stringCol=[$8], longCol=[$6]) | |
LogicalTableScan(table=[[dummy]]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inputData = load 'dummy' using AvroStorage(); | |
project1 = foreach inputData generate intCol, stringCol, doubleCol; | |
filter1 = filter project1 by doubleCol > 1.0 and stringCol == 'hello world'; | |
project2 = foreach inputData generate intCol, stringCol, longCol; | |
filter2 = filter project2 by ENDSWITH(stringCol, 'hello world'); | |
joined = foreach(JOIN filter1 by (intCol, stringCol), filter2 by (intCol, stringCol)) generate | |
filter1::stringCol as stringCol, | |
filter1::intCol as intCol, | |
filter1::doubleCol as doubleCol, | |
filter2::longCol as longCol; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.linkedin.beam.io.LiKafkaIO; | |
import com.linkedin.beam.io.LiKafkaIOConfig; | |
import com.linkedin.beam.pipeline.SamzaPipelineOptionsFactory; | |
import com.linkedin.beam.transforms.Joins; | |
import com.linkedin.ump.samza.excution.SamzaExecUtil; | |
import java.util.Iterator; | |
import java.util.Objects; | |
import org.apache.avro.Schema; | |
import org.apache.avro.generic.GenericData; | |
import org.apache.avro.generic.GenericRecord; |