Skip to content

Instantly share code, notes, and snippets.

@iwek
Created July 9, 2012 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iwek/3076954 to your computer and use it in GitHub Desktop.
Save iwek/3076954 to your computer and use it in GitHub Desktop.
PHP Twitter Results without Retweets and Duplicates
<?php
/**
* Twitter Search Results PHP script that removes retweets and similar tweets.
*
* @author http://techslides.com
* @link http://techslides.com/grab-twitter-results-with-php-and-remove-retweets-and-duplicates
*/
//curl request returns json output via json_decode php function
function curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, false);
$result = curl_exec($ch);
$json_output = json_decode($result,true);
curl_close($ch);
return $json_output;
}
//autolink function uses regex to transform link text to actual links
function autolink($str, $attributes=array()) {
$attrs = '';
foreach ($attributes as $attribute => $value) {
$attrs .= " {$attribute}=\"{$value}\"";
}
$str = ' ' . $str;
$str = preg_replace(
'`([^"=\'>])((http|https|ftp)://[^\s<]+[^\s<\.)])`i',
'$1<a href="$2"'.$attrs.'>$2</a>',
$str
);
$str = substr($str, 1);
return $str;
}
//parse the json output, remove wierd characters, and return an array of actual tweets
function getResults($json){
$results = array();
foreach($json["results"] as $e){
//clean up wierd characters
$clean = preg_replace('/[^(\x20-\x7F)\x0A]*/','', $e['text']);
$text = autolink($clean);
$results[] = $text;
}
return $results;
}
//use the php similar_text function to remove similar tweets
function removeSimilar($results){
$return = array();
foreach($results as $a){
$check = 0;
foreach($results as $b){
if($a != $b){
similar_text($a, $b, $sim);
if($sim > 70){
//similarity to other elements based on 70
$check = 1;
}
}
}
if($check == 0){
$return[] = $a;
}
}
return $return;
}
$search = $_GET["term"];
if (empty($search)) {
//term param not passed in url
exit;
} else {
//change space to plus and remove retweets
$term = str_replace(" ", "+", $search)."+-filter:retweets";
}
//grab 100 recent results and include entities
$url = 'http://search.twitter.com/search.json?q='.$term.'&rpp=100&include_entities=true&result_type=recent';
//make a curl request and return json
$json = curl($url);
//pass the json to a function that returns an array of tweets
$results = getResults($json);
//pass the tweets to a function that removes similar tweets
$clean = removeSimilar($results);
//print the results using an unordered list
echo "<ul>";
foreach($clean as $a){
echo "<li>".$a."</li>";
}
echo "</ul>";
?>
@iwek
Copy link
Author

iwek commented Jul 11, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment