Skip to content

Instantly share code, notes, and snippets.

@tobiasfabian
Last active March 17, 2024 18:45
Show Gist options
  • Save tobiasfabian/51ee860b58066fcf9fb55f88d89be5a6 to your computer and use it in GitHub Desktop.
Save tobiasfabian/51ee860b58066fcf9fb55f88d89be5a6 to your computer and use it in GitHub Desktop.
Kirby bookmark
<?php
use Kirby\Http\Remote;
use Kirby\Cms\File;
use Kirby\Cms\Page;
use Kirby\Filesystem\F;
use Kirby\Filesystem\Mime;
use Kirby\Toolkit\Dom;
use Kirby\Toolkit\Str;
use Kirby\Toolkit\V;
/**
* @method static Field note()
* @method static Field description()
* @method static Field dateCreated()
* @method static Field dateFetched()
* @method static Field lang()
* @method static Field meta()
* @method static Field text()
* @method static Field html()
*/
class BookmarkPage extends Page
{
public const jsonFlags = JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE;
public const allowedTags = ['button', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'br', 'hr', 'ul', 'ol', 'li', 'p', 'a', 'pre', 'code', 'blockquote', 'video', 'img', 'picture', 'source'];
public const allowedAttrs = ['src', 'href', 'alt', 'srcset', 'id'];
public function storeRemoteData() {
try {
if ($this->content()->url()->isEmpty()) {
throw new Exception('No url given');
}
$url = $this->content()->url()->toString();
$remote = new Remote($url, [
'agent' => option('remote.userAgent', 'Personal Bookmark Tool'),
]);
$remote = $remote->fetch();
$html = $remote->content();
$dom = $this->getDom($html);
$canonicalUrl = $this->getCanonicalUrl($dom);
$metaTags = $this->getMetaTags($dom);
if ($metaTags['og:image'] ?? false) {
$this->saveOgImage($metaTags['og:image'], $metaTags['og:image:alt'] ?? null);
}
$newContent = [
'url' => $canonicalUrl ?? $url,
'title' => $metaTags['og:title'] ?? $this->getTitle($dom),
'description' => $metaTags['og:description'] ?? $this->getDescription($dom),
'lang' => $this->getLang($dom),
'meta' => json_encode($metaTags, self::jsonFlags),
'html' => $html,
'text' => $this->getText($dom),
'dateFetched' => date('c'),
];
$updatedPage = $this->update($newContent);
$updatedPage = $updatedPage->changeStatusToListed();
if ($canonicalUrl !== null && $canonicalUrl !== $url) {
$updatedPage->changeSlug(Str::slug($canonicalUrl));
}
} catch (Exception $exception) {
// Delete page if anything went wrong
$this->delete();
throw $exception;
}
}
public function getDom($html): Dom
{
$dom = new Dom($html);
return $dom;
}
public function getCanonicalUrl($dom): ?string
{
/** @var ?DOMElement $element */
$element = $dom->query('//link[@rel="canonical" and @href]')->item(0);
return $element?->getAttribute('href');
}
public function getMetaTags(Dom $dom): array
{
$tags = $dom->query('//meta[@property and @content]');
$tagsArray = [];
foreach ($tags as $tag) {
/** @var DOMElement $tag */
$tagsArray[$tag->getAttribute('property')] = $tag->getAttribute('content');
}
return $tagsArray;
}
public function getLang(Dom $dom): ?string
{
/** @var ?DOMElement $element */
$element = $dom->query('//html[@lang]')->item(0);
return $element?->getAttribute('lang');
}
public function getTitle(Dom $dom): ?string
{
/** @var ?DOMElement $element */
$element = $dom->query('//title')->item(0);
return $element?->textContent;
}
public function getDescription(Dom $dom): ?string
{
/** @var ?DOMElement $element */
$element = $dom->query('//meta[@property="description" and @content]')->item(0);
return $element?->textContent;
}
/**
* Returns text of article, main or body
* element with very few semantic elements
* like, h1, h2, a, img, etc.
*/
public function getText(Dom $dom): ?string
{
$dom->sanitize([
'allowedAttrs' => self::allowedAttrs,
]);
$tags = $dom->query('//*[self::article or self::main or self::body]');
$main = $tags->item($tags->count() - 1);
if ($main) {
$htmlString = strip_tags($main->C14N(), self::allowedTags);
$htmlString = preg_replace('/^\s+|\s+$/m', '', $htmlString);
$htmlString = preg_replace('/\n{2,}/', "\n", $htmlString);
return $htmlString;
}
return null;
}
public function saveOgImage(string $url, ?string $alt = null)
{
$file = null;
if (V::url($url)) {
$remote = new Remote($url);
$remote = $remote->fetch();
try {
$mimeType = $this->getMimeTypeFromHeaders($remote->headers());
$extension = Mime::toExtension($mimeType);
if (F::extensionToType($extension) === 'image') {
$filename = 'og-image.' . $extension;
$file = $this->addFile($remote->content(), $filename, 'og-image', [
'url' => $url,
'alt' => $alt,
]);
}
} catch(Exception $exception) {
throw $exception;
}
}
return $file;
}
public function getMimeTypeFromHeaders($headers): string
{
$contentType = $headers['Content-Type'] ?? $headers['content-type'] ?? null;
$mimeType = preg_replace('/(\w+\/\w+).*/i', '${1}', $contentType);
return $mimeType;
}
public function addFile(
mixed $remoteContent,
string $filename,
string $template,
array $content = [],
): File
{
$tempFilePath = sys_get_temp_dir() . '/' . $filename;
$tempFile = new Kirby\Filesystem\File([
'root' => $tempFilePath,
]);
$tempFile->write($remoteContent);
$file = $this->createFile([
'parent' => $this,
'filename' => $filename,
'source' => $tempFilePath,
'template' => $template,
'content' => array_merge([
'dateFetched' => date('c'),
], $content),
]);
return $file;
}
public function ogImage(): ?File
{
$ogImage = $this->files()->template('og-image')->first();
if ($ogImage) {
return $ogImage;
}
return null;
}
public function tags(): array
{
$tags = $this->content()->tags()->split(',');
sort($tags, SORT_NATURAL | SORT_FLAG_CASE);
return $tags;
}
}
<?php
return [
'page.create:after' => function (Kirby\Cms\Page $page) {
if ($page->intendedTemplate()->name() === 'bookmark') {
/** @var BookmarkPage $page */
$page = $page->update([
'dateCreated' => date('c'),
]);
$page->storeRemoteData();
}
},
];
@tobiasfabian
Copy link
Author

kirby-bookmarks.mp4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment