Skip to content

Instantly share code, notes, and snippets.

@kiang
Created March 25, 2014 14:33
Show Gist options
  • Save kiang/9763078 to your computer and use it in GitHub Desktop.
Save kiang/9763078 to your computer and use it in GitHub Desktop.
A small script to collect links and extract titles
<?php
$path = dirname(__FILE__);
$urlText = '';
$urls = file_get_contents($path . '/tmp/list');
$urls = explode("\n", $urls);
$urls = array_unique($urls);
natcasesort($urls);
if (!isset($_GET['op'])) {
$_GET['op'] = 'default';
}
switch ($_GET['op']) {
case 'all':
foreach ($urls AS $url) {
$cache = $path . '/tmp/' . md5($url);
if (!file_exists($cache) || filesize($cache) < 10) {
file_put_contents($cache, file_get_contents($url));
}
$news = file_get_contents($cache);
$titlePos = stripos($news, '<title');
$titlePos = stripos($news, '>', $titlePos) + 1;
$title = '';
if (false !== $titlePos) {
$titleEnd = stripos($news, '</title>', $titlePos);
$title = substr($news, $titlePos, $titleEnd - $titlePos);
$title = trim(str_replace(array("\n"), array(' '), $title));
}
if(empty($news)) continue;
$charsetPos = stripos($news, 'charset=');
if(false !== $charsetPos) {
$charsetPos += 8;
$charsetEnd = stripos($news, '>', $charsetPos) + 1;
$fromEncoding = substr($news, $charsetPos, $charsetEnd - $charsetPos);
$fromEncoding = str_replace(array(' ', '/' , '>', '"', '\''), '', $fromEncoding);
}
$title = mb_convert_encoding($title, 'utf-8', $fromEncoding);
$urlText .= implode("\t", array(
$url,
$title,
)) . "\n";
}
file_put_contents($path . '/tmp/list', implode("\n", $urls));
break;
default:
if (!empty($_POST['urls'])) {
$newUrls = explode("\n", $_POST['urls']);
$fh = fopen($path . '/tmp/list', 'a');
foreach ($newUrls AS $newUrl) {
$newUrl = trim($newUrl);
if (false === array_search($newUrl, $urls)) {
fputs($fh, $newUrl . "\n");
$cache = $path . '/tmp/' . md5($newUrl);
if (!file_exists($cache) || filesize($cache) < 10) {
file_put_contents($cache, file_get_contents($newUrl));
}
$news = file_get_contents($cache);
$titlePos = stripos($news, '<title');
$titlePos = stripos($news, '>', $titlePos) + 1;
$title = '';
if (false !== $titlePos) {
$titleEnd = stripos($news, '</title>', $titlePos);
$title = substr($news, $titlePos, $titleEnd - $titlePos);
$title = trim(str_replace(array("\n"), array(' '), $title));
}
$urlText .= implode("\t", array(
$newUrl,
$title,
)) . "\n";
}
}
fclose($fh);
}
}
?>
<form method="post" action="index.php?op=default">
<textarea name="urls" cols="120" rows="30"><?php echo $urlText; ?></textarea>
<br /><br /><input type="submit" />
<br /><br /><a href="index.php?op=all">show all</a>
</form>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment