helena/TopAByBJob.scala

## part-0000
Which generates a part-file of data in the format of:

2034  cid1  a
1025  cid1	g
2034	cid3	g
1025	cid3	a
2034	cid6	f
1025	cid6	b

## TopAByBJob.scala
class TopAByBJob(args: Args) extends DailyJobWithKeep(args, classOf[ProtobufTypeForS3PathPartition]) with TypeAFilters {

  PailSource.source[FooProtobuf](rootpath, structure, directories).read
    .mapTo('pailItem -> ('b, 'a)) { e: FooProtobuf ⇒ e.b -> calculateA(e) }
    .filter('a) { n: String ⇒ n.nonEmpty }
    .groupBy(('b, 'a)) { _.size('count) }
    .groupBy('b) { _.sortedReverseTake[(Long, String, String)](('count, 'b, 'a) -> 'tcount, keep) }
    .flatMapTo('tcount -> ('count, 'b, 'a)) { t: (List[(Long, String, String)]) ⇒ t }
    .write(Tsv(outputdir))
}
	Which generates a part-file of data in the format of:

	2034 cid1 a
	1025 cid1 g
	2034 cid3 g
	1025 cid3 a
	2034 cid6 f
	1025 cid6 b
	class TopAByBJob(args: Args) extends DailyJobWithKeep(args, classOf[ProtobufTypeForS3PathPartition]) with TypeAFilters {

	PailSource.source[FooProtobuf](rootpath, structure, directories).read
	.mapTo('pailItem -> ('b, 'a)) { e: FooProtobuf ⇒ e.b -> calculateA(e) }
	.filter('a) { n: String ⇒ n.nonEmpty }
	.groupBy(('b, 'a)) { _.size('count) }
	.groupBy('b) { _.sortedReverseTake[(Long, String, String)](('count, 'b, 'a) -> 'tcount, keep) }
	.flatMapTo('tcount -> ('count, 'b, 'a)) { t: (List[(Long, String, String)]) ⇒ t }
	.write(Tsv(outputdir))
	}