Skip to content

Instantly share code, notes, and snippets.

@l2fprod
Last active August 29, 2015 14:25
Show Gist options
  • Save l2fprod/5bf5c9b6a73c0b8ab016 to your computer and use it in GitHub Desktop.
Save l2fprod/5bf5c9b6a73c0b8ab016 to your computer and use it in GitHub Desktop.
SOmusic and IBM Insights for Twitter
{
"search": {
"results": 19,
"current": 19
},
"tweets": [
{
...
"message": {
"gnip": {
"profileLocations": [
{
"geo": {
"type": "point",
"coordinates": [
4.88969,
52.37403
]
},
"address": {
"region": "North Holland",
"countryCode": "NL",
"locality": "Amsterdam",
"country": "Netherlands"
},
"displayName": "Amsterdam, North Holland, Netherlands",
"objectType": "place"
}
],
"urls": [
{
"expanded_url": "http://twitter.com/Bluemix_Plant/status/597277951177003009/photo/1",
"expanded_status": 200,
"url": "http://t.co/ymXVSxDqLT"
},
{
"expanded_url": "https://developer.ibm.com/bluemix/2015/05/08/somusic-and-bluemix/",
"expanded_status": 403,
"url": "http://t.co/0auJetjZZx"
}
],
"language": {
"value": "en"
}
},
"body": "SOmusic - music from social networks #music #bluemix #cloudant http://t.co/0auJetjZZx http://t.co/ymXVSxDqLT",
"favoritesCount": 0,
"link": "http://twitter.com/Bluemix_Plant/statuses/597277951177003009",
"retweetCount": 0,
"twitter_lang": "en",
"postedTime": "2015-05-10T05:52:15.000Z",
"provider": {
"link": "http://www.twitter.com",
"displayName": "Twitter",
"objectType": "service"
},
"actor": {
"twitterTimeZone": "Amsterdam",
"summary": "green and colorful. Powered by #Bluemix My water provider is @hansb001 IBM Innovation Space",
"friendsCount": 40,
"favoritesCount": 10,
"location": {
"displayName": "Amsterdam",
"objectType": "place"
},
"link": "http://www.twitter.com/Bluemix_Plant",
"postedTime": "2015-03-19T14:13:39.000Z",
"image": "https://pbs.twimg.com/profile_images/578561976961470464/cB7TzdGx_normal.jpeg",
"links": [
{
"rel": "me",
"href": "http://www.bluemix.net"
}
],
"listedCount": 7,
"id": "id:twitter.com:3096826149",
"languages": [
"en"
],
"verified": false,
"followersCount": 31,
"utcOffset": "7200",
"statusesCount": 1112,
"displayName": "Bluemix_Plant",
"preferredUsername": "Bluemix_Plant",
"objectType": "person"
},
"object": {
"id": "object:search.twitter.com,2005:597277951177003009",
"summary": "SOmusic - music from social networks #music #bluemix #cloudant http://t.co/0auJetjZZx http://t.co/ymXVSxDqLT",
"link": "http://twitter.com/Bluemix_Plant/statuses/597277951177003009",
"postedTime": "2015-05-10T05:52:15.000Z",
"objectType": "note"
},
"twitter_entities": {
"trends": [],
"symbols": [],
"urls": [
{
"expanded_url": "http://bit.ly/1AOirb6",
"indices": [
63,
85
],
"display_url": "bit.ly/1AOirb6",
"url": "http://t.co/0auJetjZZx"
}
],
"hashtags": [
{
"text": "music",
"indices": [
37,
43
]
},
{
"text": "bluemix",
"indices": [
44,
52
]
},
{
"text": "cloudant",
"indices": [
53,
62
]
}
],
"media": [
{
"id": 597277951000907800,
"sizes": {
"small": {
"w": 340,
"h": 176,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 600,
"h": 311,
"resize": "fit"
},
"large": {
"w": 640,
"h": 332,
"resize": "fit"
}
},
"media_url_https": "https://pbs.twimg.com/media/CEn1IcWVEAITbdU.png",
"media_url": "http://pbs.twimg.com/media/CEn1IcWVEAITbdU.png",
"expanded_url": "http://twitter.com/Bluemix_Plant/status/597277951177003009/photo/1",
"indices": [
86,
108
],
"id_str": "597277951000907778",
"display_url": "pic.twitter.com/ymXVSxDqLT",
"type": "photo",
"url": "http://t.co/ymXVSxDqLT"
}
],
"user_mentions": []
},
"twitter_filter_level": "low",
"id": "tag:search.twitter.com,2005:597277951177003009",
"twitter_extended_entities": {
"media": [
{
"id": 597277951000907800,
"sizes": {
"small": {
"w": 340,
"h": 176,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 600,
"h": 311,
"resize": "fit"
},
"large": {
"w": 640,
"h": 332,
"resize": "fit"
}
},
"media_url_https": "https://pbs.twimg.com/media/CEn1IcWVEAITbdU.png",
"media_url": "http://pbs.twimg.com/media/CEn1IcWVEAITbdU.png",
"expanded_url": "http://twitter.com/Bluemix_Plant/status/597277951177003009/photo/1",
"indices": [
86,
108
],
"id_str": "597277951000907778",
"display_url": "pic.twitter.com/ymXVSxDqLT",
"type": "photo",
"url": "http://t.co/ymXVSxDqLT"
}
]
},
"verb": "post",
"generator": {
"link": "http://login.voicestorm.com",
"displayName": "VoiceStorm"
},
"objectType": "activity"
},
"cde": {
"content": {
"sentiment": {
"polarity": "NEUTRAL",
"evidence": []
}
},
"author": {
"location": {
"state": "North Holland",
"country": "Netherlands",
"city": "Amsterdam"
},
"gender": "unknown"
}
}
},
{
"message": {
"gnip": {
"urls": [
{
"expanded_url": "http://twitter.com/flocalvez/status/607879175547060226/photo/1",
"expanded_status": 200,
"url": "http://t.co/PzrnAjOtaW"
}
],
"language": {
"value": "en"
}
},
"body": "awajeet: PCFLB: RT flocalvez: You can review the cool Somusic demo at #IBM booth dotScale & win #Bluemix 30d free … http://t.co/PzrnAjOtaW",
"favoritesCount": 0,
"link": "http://twitter.com/awajeet/statuses/607900213559361536",
"retweetCount": 0,
"twitter_lang": "en",
"postedTime": "2015-06-08T13:21:19.000Z",
"provider": {
"link": "http://www.twitter.com",
"displayName": "Twitter",
"objectType": "service"
},
"actor": {
"twitterTimeZone": null,
"summary": null,
"friendsCount": 249,
"favoritesCount": 2,
"link": "http://www.twitter.com/awajeet",
"postedTime": "2011-11-12T08:57:57.000Z",
"image": "https://pbs.twimg.com/profile_images/558517934985732096/RmcL6MTX_normal.jpeg",
"links": [
{
"rel": "me",
"href": null
}
],
"listedCount": 272,
"id": "id:twitter.com:410586917",
"languages": [
"en"
],
"verified": false,
"followersCount": 254,
"utcOffset": null,
"statusesCount": 85033,
"displayName": "Awajeet Arya",
"preferredUsername": "awajeet",
"objectType": "person"
},
"object": {
"id": "object:search.twitter.com,2005:607900213559361536",
"summary": "awajeet: PCFLB: RT flocalvez: You can review the cool Somusic demo at #IBM booth dotScale & win #Bluemix 30d free … http://t.co/PzrnAjOtaW",
"link": "http://twitter.com/awajeet/statuses/607900213559361536",
"postedTime": "2015-06-08T13:21:19.000Z",
"objectType": "note"
},
"twitter_entities": {
"trends": [],
"symbols": [],
"urls": [],
"hashtags": [
{
"text": "IBM",
"indices": [
70,
74
]
},
{
"text": "Bluemix",
"indices": [
100,
108
]
}
],
"media": [
{
"id": 607879166390763500,
"sizes": {
"small": {
"w": 340,
"h": 255,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 600,
"h": 450,
"resize": "fit"
},
"large": {
"w": 1024,
"h": 768,
"resize": "fit"
}
},
"media_url_https": "https://pbs.twimg.com/media/CG-e4ZMVAAA4-70.jpg",
"media_url": "http://pbs.twimg.com/media/CG-e4ZMVAAA4-70.jpg",
"expanded_url": "http://twitter.com/flocalvez/status/607879175547060226/photo/1",
"indices": [
120,
142
],
"source_status_id_str": "607879175547060226",
"source_status_id": 607879175547060200,
"id_str": "607879166390763520",
"display_url": "pic.twitter.com/PzrnAjOtaW",
"type": "photo",
"url": "http://t.co/PzrnAjOtaW"
}
],
"user_mentions": []
},
"twitter_filter_level": "low",
"id": "tag:search.twitter.com,2005:607900213559361536",
"twitter_extended_entities": {
"media": [
{
"id": 607879166390763500,
"sizes": {
"small": {
"w": 340,
"h": 255,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 600,
"h": 450,
"resize": "fit"
},
"large": {
"w": 1024,
"h": 768,
"resize": "fit"
}
},
"media_url_https": "https://pbs.twimg.com/media/CG-e4ZMVAAA4-70.jpg",
"media_url": "http://pbs.twimg.com/media/CG-e4ZMVAAA4-70.jpg",
"expanded_url": "http://twitter.com/flocalvez/status/607879175547060226/photo/1",
"indices": [
120,
142
],
"source_status_id_str": "607879175547060226",
"source_status_id": 607879175547060200,
"id_str": "607879166390763520",
"display_url": "pic.twitter.com/PzrnAjOtaW",
"type": "photo",
"url": "http://t.co/PzrnAjOtaW"
},
{
"id": 607879173323956200,
"sizes": {
"small": {
"w": 340,
"h": 255,
"resize": "fit"
},
"thumb": {
"w": 150,
"h": 150,
"resize": "crop"
},
"medium": {
"w": 600,
"h": 450,
"resize": "fit"
},
"large": {
"w": 1024,
"h": 768,
"resize": "fit"
}
},
"media_url_https": "https://pbs.twimg.com/media/CG-e4zBVIAADwfM.jpg",
"media_url": "http://pbs.twimg.com/media/CG-e4zBVIAADwfM.jpg",
"expanded_url": "http://twitter.com/flocalvez/status/607879175547060226/photo/1",
"indices": [
120,
142
],
"source_status_id_str": "607879175547060226",
"source_status_id": 607879175547060200,
"id_str": "607879173323956224",
"display_url": "pic.twitter.com/PzrnAjOtaW",
"type": "photo",
"url": "http://t.co/PzrnAjOtaW"
}
]
},
"verb": "post",
"generator": {
"link": "http://ifttt.com",
"displayName": "IFTTT"
},
"objectType": "activity"
},
"cde": {
"content": {
"sentiment": {
"polarity": "POSITIVE",
"evidence": [
{
"sentimentTerm": "cool",
"polarity": "POSITIVE"
},
{
"sentimentTerm": "win",
"polarity": "POSITIVE"
},
{
"sentimentTerm": "free",
"polarity": "POSITIVE"
}
]
}
},
"author": {
"location": {},
"gender": "male"
}
}
},
...
],
"related": {
"next": {
"href": "https://cdeservice.mybluemix.net/api/v1/messages/search?q=somusic+bluemix&from=19&size=20"
}
}
}
/**
* Queries the Insights for Twitter service. Expects a "twitterinsights" service
* bound to the app.
*/
public class TwitterCollector extends Collector {
private GetWithThrottle get = new GetWithThrottle();
static final SimpleDateFormat postedFormat = new SimpleDateFormat(
"yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ENGLISH);
{
postedFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
}
private boolean accept(Tweet tweet) {
return tweet.message.getCreatedAt().after(getStart())
&& tweet.message.getCreatedAt().before(getEnd());
}
public List<Song> collect() {
List<Song> results = new ArrayList<Song>();
try {
Log.info("twitter",
"Collecting tweets between " + postedFormat.format(getStart())
+ " and " + postedFormat.format(getEnd()));
// lookup the twitter credentials
VcapNode services = VcapNode.root();
String username = services.byName("twitterinsights").n("credentials")
.s("username");
String password = services.byName("twitterinsights").n("credentials")
.s("password");
String host = services.byName("twitterinsights").n("credentials")
.s("host");
get.setBasicAuthorization(username, password);
String twitterInsights = "https://" + host;
// build the first URL
String nextPage = twitterInsights + "/api/v1/messages/search?size=200&q="
+ getQuery() + "+posted:" + postedFormat.format(getStart()) + ","
+ postedFormat.format(getEnd());
Gson gson = new Gson();
// loop as long as twitter has results for the query
do {
Log.info("twitter", "Fetching " + nextPage);
String jsonResults = get.getBody(nextPage);
if (jsonResults.length() > 0) {
Log.info(
"twitter",
"Got reply: "
+ jsonResults.substring(0,
Math.min(jsonResults.length() - 1, 1024)));
}
// convert the JSON in a Java object we can use
Response response = gson.fromJson(jsonResults, Response.class);
if (response == null || response.tweets == null
|| response.tweets.isEmpty()) {
Log.info("twitter", "No more results");
break;
}
// process tweets, keep only the one within our timeframe
for (Tweet tweet : response.tweets) {
if (tweet.message.gnip != null && tweet.message.gnip.urls != null
&& accept(tweet)) {
for (Url url : tweet.message.gnip.urls) {
// look at the resolved links in the tweet
Provider provider = Provider.getProvider(url.expanded_url);
// and if we have a provider able to manage this url, persist it
if (provider != null) {
Log.info("twitter", "Adding " + url.expanded_url);
Song song = new Song();
song.setId(UUID.randomUUID().toString());
song.setSourceId(tweet.message.id);
song.setLink(url.expanded_url);
song.setCreatedAt(tweet.message.getCreatedAt());
results.add(song);
Log.info("twitter", " Added " + song);
}
}
}
}
nextPage = response.related.next.href;
} while (true);
} catch (Exception e) {
e.printStackTrace();
Log.severe("twitter", e);
}
Log.info("twitter", "Found " + results.size() + " tweets");
return results;
}
/**
* Java representation of the Insights JSON to make it easier to process.
*/
public static class Response {
Related related;
List<Tweet> tweets;
}
public static class Related {
Next next;
}
public static class Next {
String href;
}
public static class Tweet {
Message message;
}
public static class Message {
static final String TWITTER = "yyyy-MM-dd'T'HH:mm:ss.000'Z'";
static final SimpleDateFormat sf = new SimpleDateFormat(TWITTER,
Locale.ENGLISH);
{
sf.setTimeZone(TimeZone.getTimeZone("UTC"));
}
Gnip gnip;
String id;
String postedTime;
public Date getCreatedAt() {
try {
return sf.parse(postedTime);
} catch (ParseException e) {
e.printStackTrace();
return null;
}
}
}
public static class Gnip {
Url[] urls;
}
public static class Url {
String expanded_url;
String url;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment