Created
July 9, 2012 14:47
-
-
Save iwek/3076954 to your computer and use it in GitHub Desktop.
PHP Twitter Results without Retweets and Duplicates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Twitter Search Results PHP script that removes retweets and similar tweets. | |
* | |
* @author http://techslides.com | |
* @link http://techslides.com/grab-twitter-results-with-php-and-remove-retweets-and-duplicates | |
*/ | |
//curl request returns json output via json_decode php function | |
function curl($url){ | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_HEADER, false); | |
$result = curl_exec($ch); | |
$json_output = json_decode($result,true); | |
curl_close($ch); | |
return $json_output; | |
} | |
//autolink function uses regex to transform link text to actual links | |
function autolink($str, $attributes=array()) { | |
$attrs = ''; | |
foreach ($attributes as $attribute => $value) { | |
$attrs .= " {$attribute}=\"{$value}\""; | |
} | |
$str = ' ' . $str; | |
$str = preg_replace( | |
'`([^"=\'>])((http|https|ftp)://[^\s<]+[^\s<\.)])`i', | |
'$1<a href="$2"'.$attrs.'>$2</a>', | |
$str | |
); | |
$str = substr($str, 1); | |
return $str; | |
} | |
//parse the json output, remove wierd characters, and return an array of actual tweets | |
function getResults($json){ | |
$results = array(); | |
foreach($json["results"] as $e){ | |
//clean up wierd characters | |
$clean = preg_replace('/[^(\x20-\x7F)\x0A]*/','', $e['text']); | |
$text = autolink($clean); | |
$results[] = $text; | |
} | |
return $results; | |
} | |
//use the php similar_text function to remove similar tweets | |
function removeSimilar($results){ | |
$return = array(); | |
foreach($results as $a){ | |
$check = 0; | |
foreach($results as $b){ | |
if($a != $b){ | |
similar_text($a, $b, $sim); | |
if($sim > 70){ | |
//similarity to other elements based on 70 | |
$check = 1; | |
} | |
} | |
} | |
if($check == 0){ | |
$return[] = $a; | |
} | |
} | |
return $return; | |
} | |
$search = $_GET["term"]; | |
if (empty($search)) { | |
//term param not passed in url | |
exit; | |
} else { | |
//change space to plus and remove retweets | |
$term = str_replace(" ", "+", $search)."+-filter:retweets"; | |
} | |
//grab 100 recent results and include entities | |
$url = 'http://search.twitter.com/search.json?q='.$term.'&rpp=100&include_entities=true&result_type=recent'; | |
//make a curl request and return json | |
$json = curl($url); | |
//pass the json to a function that returns an array of tweets | |
$results = getResults($json); | |
//pass the tweets to a function that removes similar tweets | |
$clean = removeSimilar($results); | |
//print the results using an unordered list | |
echo "<ul>"; | |
foreach($clean as $a){ | |
echo "<li>".$a."</li>"; | |
} | |
echo "</ul>"; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Twitter Script Demo