Skip to content

Instantly share code, notes, and snippets.

@Sogl
Last active June 26, 2023 18:30
Show Gist options
  • Save Sogl/78dce78b254b39a6678f759b3ff981b7 to your computer and use it in GitHub Desktop.
Save Sogl/78dce78b254b39a6678f759b3ff981b7 to your computer and use it in GitHub Desktop.
Algolia Pro Flex for Grav CMS
<?php declare(strict_types=1);
namespace Grav\Plugin\AlgoliaPro;
use Grav\Common\Grav;
use Grav\Common\Page\Interfaces\PageInterface;
use Grav\Common\Page\Pages;
use Grav\Common\Utils;
class CrawlFlexpageSearch extends CrawlPageSearch
{
protected static $notAllowedRoutes = [
'/grid',
'/forgot_password',
'/login',
'/profile',
'/therapies',
'/therapies/add',
'/',
'/zachem-etot-sait/o-lekciyakh',
'/zachem-etot-sait/ob-avtorakh',
'/zachem-etot-sait/oplata-i-dostup',
'/zachem-etot-sait/updates',
];
/**
* Process and index Page based on response + Grav page
* @param string $lang
* @param array $responses
* @return array
*/
protected function indexPageResponses(string $lang, array $responses): array
{
$grav = Grav::instance();
/** @var Pages $pages */
$pages = $grav['pages'];
$pages->enablePages();
$flex = Grav::instance()->get('flex');
$initialPage = $pages->find('/therapies/therapy');
//Flex objects loop
foreach ($responses as $key => $response) {
$fulUrl = $response->getInfo()['url'];
$route = parse_url($fulUrl, PHP_URL_PATH);
//we don't need some routes
if (in_array($route, static::$notAllowedRoutes)) {
unset($responses[$key]);
continue;
}
if (Utils::startsWith($route, '/therapies/')) {
$routeObj = basename($route);
$therapy = $flex->getObject($routeObj, 'therapies');
//clone initial fake Page
//this eliminates the error when one entry is written everywhere
$page = clone $initialPage;
$page->id($page->modified() . md5($route));
$page->slug($routeObj);
$page->folder($routeObj);
$page->route($route);
$page->rawRoute($route);
$page->title($therapy->getProperty('title'));
$page->content($therapy->getProperty('description'));
// taxonomy
$tags['tag'] = $therapy->getProperty('tags');
$page->taxonomy($tags);
$pages->addPage($page, $route);
//clear object
$page = null;
}
}
$index = $this->getIndexer($lang);
$status = [];
$records = [];
$steps = count($responses);
if ($callback = $this->getProgressCallback()) {
$callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>');
}
foreach ($responses as $response) {
$headers = $response->getHeaders();
$info = $response->getInfo();
$url = $info['url'] ?? 'unknown';
$route = $headers['grav-page-route'][0] ?? '';
$base = $headers['grav-base'][0] ?? '';
$page = $pages->find($route);
if ($base) {
$url = str_replace($base, '', $url);
}
if ($page instanceof PageInterface) {
$this->addRecordFromResponse($page, $response, $url,$records, $status);
} else {
$status[] = [
'status' => 'error',
'msg' => 'Page Not Found: ' . $route,
'url' => $url
];
if ($callback = $this->getProgressCallback()) {
$callback(-1);
}
}
}
if ($this->production_mode !== false && !empty($records)) {
$index->partialUpdateObjects($records, [
'createIfNotExists' => true
]);
}
return $status;
}
}
<?php declare(strict_types=1);
namespace Grav\Plugin\AlgoliaPro;
use Grav\Common\Grav;
use Grav\Framework\Flex\Flex;
use Grav\Framework\Flex\FlexDirectory;
use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection;
use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapyObject;
use Grav\Framework\Flex\Interfaces\FlexCollectionInterface;
use Grav\Framework\Flex\Interfaces\FlexObjectInterface;
use Grav\Common\Utils;
use Grav\Plugin\ShortcodeCore\ShortcodeManager;
use Grav\Common\Page\Page;
class FlexTherapiesSearch extends FlexSearch implements AlgoliaProClassInterface
{
/**
* @param array $options
* @return array
*/
public function indexConfiguration(array $options = []): array
{
$grav = Grav::instance();
$conf = $this->index_configuration;
$name = null;
/** @var Language $language */
$language = $grav['language'];
if ($language->enabled()) {
if (isset($options['lang'])) {
$lang = $options['lang'];
unset($options['lang']);
}
$name = $lang ?? $language->getActive() ?? $language->getDefault();
}
$index = $this->getIndexer($name);
$conf->set('name', $index->getIndexName());
return $conf->toArray();
}
/**
* Return collection of objects to be indexed. Make sure you filter away inaccessible objects.
*
* @return FlexCollectionInterface
*/
protected function getFilteredCollection(): FlexCollectionInterface
{
$grav = Grav::instance();
/** @var Flex $flex */
$flex = $grav['flex'];
/** @var FlexDirectory $directory */
$directory = $flex->getDirectory('therapies');
/** @var TherapiesCollection $collection */
$collection = $directory->getCollection()->filterBy(['published' => true]);
return $collection;
}
/**
* Return true if object can be handled by this class.
*
* @param FlexObjectInterface $object
* @return bool
*/
protected function checkObject(FlexObjectInterface $object): bool
{
return $object instanceof TherapyObject;
}
/**
* Each object can have multiple records, so return array of records.
*
* @param TherapyObject $object
* @return array
*/
protected function getRecord(FlexObjectInterface $object): array
{
//set content and enable toc anchors
$page = new Page();
$page->content($object->description);
//get content
$content = $page->content();
$record = new \stdClass();
$object_url = $this->getUrl($object);
$record->url = $object_url;
$record->title = $object->title;
$record->summary = $this->shortenText($content, 256);
$record->access = null;
// taxonomy
$tags = $object->tags;
if (!empty($tags)) {
$record->taxonomy['tag'] = $tags;
}
// language
$record->language = 'ru';
// breadcrumbs
$breadcrumbs = [];
$breadcrumbs[] = ['name' => 'Случаи терапии', 'url' => '/therapies'];
$breadcrumbs[] = ['name' => $object->title, 'url' => $object_url];
$record->breadcrumbs = $breadcrumbs;
$base_url = trim($object_url, '/');
$base_id = md5($base_url);
//works
// $content = Utils::processMarkdown($object->description);
// $sm = new ShortcodeManager();
// $content = $sm->processShortcodes($content);
$flex_data = (array) $record;
// content processing
$blocks = $this->splitHTMLContent($content);
$flex_chunks = [];
$counter = 1;
foreach ($blocks as $block) {
$block_data = [];
$block_content = $block['content'] ?? '';
if (isset($block['tag'], $block['header'])) {
$block_data['objectType'] = 'header';
$block_data['headers'][$block['tag']][] = $block['header'];
$block_data['subtitle'] = $block['header'];
$block_data['summary'] = $this->getFirstWords($block_content, 50);
if (!empty($block['id'])) {
$block_data['url'] = $flex_data['url'] . '#' . $block['id'];
}
}
$block_chunks = $this->splitContentIntoChunks($block_content);
foreach ($block_chunks as $chunk) {
$block_data['objectID'] = $base_id . '_' . $counter++;
$block_data['baseURL'] = $base_url;
$block_data['content'] = $chunk;
$flex_chunks[] = array_merge($flex_data, $block_data);
}
}
return $flex_chunks;
}
/**
* Get URL for the object.
*
* @param FlexObjectInterface $object
* @return string|null
*/
protected function getUrl(FlexObjectInterface $object): ?string
{
return $object->url();
}
function shortenText($text, $max_length = 140, $cut_off = '…', $keep_word = false)
{
//clear all tags and delete whitespaces
$text = trim(strip_tags($text));
if(strlen($text) <= $max_length) {
return $text;
}
if(strlen($text) > $max_length) {
if($keep_word) {
$text = mb_substr($text, 0, $max_length + 1);
if($last_space = strrpos($text, ' ')) {
$text = mb_substr($text, 0, $last_space);
$text = rtrim($text);
$text .= $cut_off;
}
} else {
$text = mb_substr($text, 0, $max_length);
$text = rtrim($text);
$text .= $cut_off;
}
}
return $text;
}
}
<?php declare(strict_types=1);
namespace Grav\Plugin\AlgoliaPro;
use Grav\Common\Grav;
use Grav\Common\Page\Pages;
use Grav\Common\Yaml;
use RocketTheme\Toolbox\Event\Event;
use Grav\Framework\Flex\Flex;
use Grav\Framework\Flex\FlexDirectory;
use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection;
use Grav\Plugin\PageToc\MarkupFixer;
use Grav\Common\Page\Interfaces\PageInterface;
use Grav\Common\Plugin;
class GravFlexpageSearch extends GravPageSearch
{
protected static $notAllowedRoutes = [
'/grid',
'/forgot_password',
'/login',
'/profile',
'/therapies',
'/therapies/add',
'/therapies/therapy',
'/',
'/zachem-etot-sait/o-lekciyakh',
'/zachem-etot-sait/ob-avtorakh',
'/zachem-etot-sait/oplata-i-dostup',
'/zachem-etot-sait/updates',
];
protected static $allowedTemplates = [
'docs',
'therapy'
];
protected function indexPages(array $options = []): array
{
$grav = Grav::instance();
/** @var Pages $pages */
$pages = $grav['pages'];
$pages->enablePages();
//MY CUSTOM FLEX CODE
/** @var Flex $flex */
$flex = Grav::instance()->get('flex');
$initialPage = $pages->find('/therapies/therapy');
/** @var FlexDirectory $directory */
$directory = $flex->getDirectory('therapies');
/** @var TherapiesCollection $collection */
$collection = $directory->getCollection()->filterBy(['published' => true]);
foreach ($collection as $therapy) {
//clone initial fake Page
//this eliminates the error when one entry is written everywhere
$page = clone $initialPage;
$routeObj = $therapy->getProperty('slug');
$route = '/therapies/' . $routeObj;
$page->id($page->modified() . md5($route));
$page->slug($routeObj);
$page->folder($routeObj);
$page->route($route);
$page->rawRoute($route);
$page->title($therapy->getProperty('title'));
$page->content($therapy->getProperty('description'));
// taxonomy
$tags['tag'] = $therapy->getProperty('tags');
$page->taxonomy($tags);
$pages->addPage($page, $route);
//clear object
$page = null;
}
// Get custom filters
$filter = $this->index_configuration->get('filters');
$records = [];
$status = [];
$lang = $options['lang'] ?? null;
$route = $options['route'] ?? null;
$index = $this->getIndexer($lang);
$collection = [];
if ($route) {
$page = $pages->find($route);
if ($page && $page->exists() && $page->routable() && $page->published()) {
$collection[] = $page;
}
} elseif (is_array($filter) && array_key_exists('items', $filter)) {
if (is_string($filter['items'])) {
$filter['items'] = Yaml::parse($filter['items']);
}
$collection = $pages->getCollection($filter)->published()->routable();
} else {
$collection = $pages->all()->published()->routable();
}
//delete some routes
foreach (static::$notAllowedRoutes as $naRoute) {
$tempPage = $pages->find($naRoute);
if ($tempPage) {
$collection->remove($tempPage->path());
}
}
//and use only specific templates
$collection = $collection->ofOneOfTheseTypes(static::$allowedTemplates);
$steps = count($collection);
if ($callback = $this->getProgressCallback()) {
$callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>');
}
//for TOC search
$markup_fixer = new MarkupFixer();
foreach ($collection as $page) {
$url = $page->url();
// update progress callback
if ($callback = $this->getProgressCallback()) {
$callback();
}
if (!$this->processPage($page)) {
$status[] = [
'status' => 'info',
'msg' => 'Page manually skipped',
'url' => $url
];
if ($callback = $this->getProgressCallback()) {
$callback(-1);
}
continue;
}
if ($page->redirect()) {
$status[] = [
'status' => 'info',
'msg' => 'Page is a redirect',
'url' => $url
];
if ($callback = $this->getProgressCallback()) {
$callback(-1);
}
continue;
}
try {
$skip_event = Grav::instance()->fireEvent('onAlgoliaProPageSkip',
new Event(['name' => $this->name, 'config' => $this->index_configuration, 'object' => $page]));
if (isset($skip_event['status'])) {
$status[] = $skip_event['status'];
if ($callback = $this->getProgressCallback()) {
$callback(-1);
}
continue;
}
$content = trim($page->content());
//for TOC search
$content = $markup_fixer->fix($content, $this->getAnchorOptions($page));
$skip_empty_content = $this->index_configuration->get('content.skip_empty', true);
if ($skip_empty_content && empty($content)) {
$status[] = [
'status' => 'info',
'msg' => 'Page has no content, skipping',
'url' => $url
];
if ($callback = $this->getProgressCallback()) {
$callback(-1);
}
continue;
}
$page_records = $this->getPageData($content, $page);
$updatable_records = $this->recordsNeedUpdating($page_records);
$records = array_merge($records, $updatable_records);
if (count($records) > 0) {
$status[] = [
'status' => 'success',
'msg' => 'Page indexed',
'url' => $url
];
} else {
$status[] = [
'status' => 'info',
'msg' => 'Cache entry found, no records need updating',
'url' => $url
];
}
} catch (\Exception $e) {
$status[] = [
'status' => 'error',
'msg' => $e->getMessage(),
'url' => $url
];
if ($callback = $this->getProgressCallback()) {
$callback(-1);
}
}
}
if ($this->production_mode && !empty($records)) {
$index->partialUpdateObjects($records, [
'createIfNotExists' => true
]);
}
return $status;
}
protected function getAnchorOptions(PageInterface $page = null, $start = null, $depth = null): array
{
$page = $page ?? $this->grav['page'];
return [
'start' => (int) ($start ?? $this->configVar('anchors.start', $page,1)),
'depth' => (int) ($depth ?? $this->configVar('anchors.depth', $page,6)),
'hclass' => $this->configVar('hclass', $page,null),
'link' => $this->configVar('anchors.link', $page,true),
'position' => $this->configVar('anchors.position', $page,'before'),
'aria' => $this->configVar('anchors.aria', $page,'Anchor'),
'icon' => $this->configVar('anchors.icon', $page,'#'),
'class' => $this->configVar('anchors.class', $page,null),
'maxlen' => (int) ($this->configVar('anchors.slug_maxlen', $page,null)),
'prefix' => $this->configVar('anchors.slug_prefix', $page,null),
];
}
public static function configVar($var, $page = null, $default = null)
{
return Plugin::inheritedConfigOption('page-toc', $var, $page, $default);
}
public function modifyObject(object $object, array $options = [], bool $update = true): array
{
//we don't need to perform operations on each edit/delete
return ['status' => 'success', 'message' => 'test mode'];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment