Skip to content

Instantly share code, notes, and snippets.

@qrtt1
Created November 13, 2014 14:27
Show Gist options
  • Save qrtt1/5f99111a42fc361d0e6d to your computer and use it in GitHub Desktop.
Save qrtt1/5f99111a42fc361d0e6d to your computer and use it in GitHub Desktop.
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
public class Spark720pStats {
@SuppressWarnings("serial")
public static void main(String[] args) {
String appName = "720pStats";
String master = "local[10]";
SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);
JavaSparkContext sc = new JavaSparkContext(conf);
Configuration.configure(sc);
JavaPairRDD<String, Integer> distFile = sc.textFile("s3n://mycdn.logs/*")
.map(new Function<String, String>() {
@Override
public String call(String v1) throws Exception {
String[] v = v1.split("\t");
if (v.length != 19) {
return "";
}
/* map full log to filename */
return v[7];
}
}).filter(new Function<String, Boolean>() {
@Override
public Boolean call(String v1) throws Exception {
/* filter 720p mp4 */
return StringUtils.endsWithIgnoreCase(v1, ".mp4") && StringUtils.contains(v1, "720p");
}
}).mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String t) throws Exception {
/* count different bitrates */
for (String key : new String[] { "2m", "3m", "4m" }) {
if (StringUtils.contains(t, key)) {
return new Tuple2<String, Integer>(key, 1);
}
}
return new Tuple2<String, Integer>("unknown", 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
});
System.out.println(distFile.collect());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment