Created
June 17, 2010 17:50
-
-
Save wadey/442463 to your computer and use it in GitHub Desktop.
JavaScript parser for Tweet Entities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* twitter-entities.js | |
* This function converts a tweet with "entity" metadata | |
* from plain text to linkified HTML. | |
* | |
* See the documentation here: http://dev.twitter.com/pages/tweet_entities | |
* Basically, add ?include_entities=true to your timeline call | |
* | |
* Copyright 2010, Wade Simmons | |
* Licensed under the MIT license | |
* http://wades.im/mons | |
* | |
* Requires jQuery | |
*/ | |
function escapeHTML(text) { | |
return $('<div/>').text(text).html() | |
} | |
function linkify_entities(tweet) { | |
if (!(tweet.entities)) { | |
return escapeHTML(tweet.text) | |
} | |
// This is very naive, should find a better way to parse this | |
var index_map = {} | |
$.each(tweet.entities.urls, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a href='"+escapeHTML(entry.url)+"'>"+escapeHTML(text)+"</a>"}] | |
}) | |
$.each(tweet.entities.hashtags, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a href='http://twitter.com/search?q="+escape("#"+entry.text)+"'>"+escapeHTML(text)+"</a>"}] | |
}) | |
$.each(tweet.entities.user_mentions, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a title='"+escapeHTML(entry.name)+"' href='http://twitter.com/"+escapeHTML(entry.screen_name)+"'>"+escapeHTML(text)+"</a>"}] | |
}) | |
var result = "" | |
var last_i = 0 | |
var i = 0 | |
// iterate through the string looking for matches in the index_map | |
for (i=0; i < tweet.text.length; ++i) { | |
var ind = index_map[i] | |
if (ind) { | |
var end = ind[0] | |
var func = ind[1] | |
if (i > last_i) { | |
result += escapeHTML(tweet.text.substring(last_i, i)) | |
} | |
result += func(tweet.text.substring(i, end)) | |
i = end - 1 | |
last_i = end | |
} | |
} | |
if (i > last_i) { | |
result += escapeHTML(tweet.text.substring(last_i, i)) | |
} | |
return result | |
} |
It seems that unicode emoji are disturbing the way it parse the string.
There is a offset of one character by emoji placed before any given entity.
Edit : Nervermind, got it to work. Fully functionable with emojis and html special chars here : https://gist.github.com/LenaicTerrier/112880ee39723d182f71
Not pretty, but does the job. (I also used @thilo and @dcpesses contribs)
Would you be ok to publish this as a Bower repository?
does the job, but it's better/safer to use twitter's own text processing library:
https://github.com/twitter/twitter-text
For anybody using Ember.js: ember-cli-twitter-entities
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks, saved me some work. I added the ability to show inline media in my fork here: https://gist.github.com/thilo/8651530.