Skip to content

Instantly share code, notes, and snippets.

@stdatalabs
Last active October 24, 2016 08:00
Show Gist options
  • Save stdatalabs/47e83a6cdae88cf94454db2c5c60be84 to your computer and use it in GitHub Desktop.
Save stdatalabs/47e83a6cdae88cf94454db2c5c60be84 to your computer and use it in GitHub Desktop.
A storm topology to count the list of top words used in tweets regarding a topic. More @ stdatalabs.blogspot.com
import java.util.*;
import com.stdatalabs.Storm.IgnoreWordsBolt;
import com.stdatalabs.Storm.TwitterSampleSpout;
import com.stdatalabs.Storm.WordCounterBolt;
import com.stdatalabs.Storm.JsonWordSplitterBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
/**
* Arguments: <comsumerKey> <consumerSecret> <accessToken> <accessTokenSecret> <topic-name> <keyword>
* <comsumerKey> - Twitter consumer key
* <consumerSecret> - Twitter consumer secret
* <accessToken> - Twitter access token
* <accessTokenSecret> - Twitter access token secret
* <keyword> - The keyword to filter tweets
*
* More discussion at stdatalabs.blogspot.com
*
* @author Sachin Thirumala
*/
public class TwitterWordCountTopology {
public static void main(String[] args) throws Exception {
String consumerKey = args[0];
String consumerSecret = args[1];
String accessToken = args[2];
String accessTokenSecret = args[3];
String[] arguments = args.clone();
String[] keyWords = Arrays.copyOfRange(arguments, 4, arguments.length);
Config config = new Config();
config.setDebug(false);
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("twitter-spout",
new TwitterSampleSpout(consumerKey, consumerSecret, accessToken, accessTokenSecret, keyWords));
builder.setBolt("WordSplitterBolt", new StringWordSplitterBolt(5)).shuffleGrouping("twitter-spout");
builder.setBolt("IgnoreWordsBolt", new IgnoreWordsBolt()).shuffleGrouping("WordSplitterBolt");
builder.setBolt("WordCounterBolt", new WordCounterBolt(5, 5 * 60, 50)).shuffleGrouping("IgnoreWordsBolt");
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("TwitterWordCountStorm", config, builder.createTopology());
// Thread.sleep(10000);
// cluster.shutdown();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment