Skip to content

Instantly share code, notes, and snippets.

@ursbraem
Last active March 12, 2019 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ursbraem/098c7b66df4b7784b6662d65aeac112e to your computer and use it in GitHub Desktop.
Save ursbraem/098c7b66df4b7784b6662d65aeac112e to your computer and use it in GitHub Desktop.
Mask indexer for ke_search
<?php
namespace STUBR\StubrIndexer;
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
use TYPO3\CMS\Core\Utility\GeneralUtility;
/**
* This class implements a hook into the TYPO3 ke_search extension.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
*/
class RegisterMaskcontentIndexer {
// TODO: EXTENDEN AUS ABSTRACT
/**
* Name of this extension.
* @var string
*/
private $extension_name = 'stubr_indexer';
/*
* Name of the icon file used to display this indexer in the backend.
* @var string
*/
private $icon_file = 'Icons/mask.gif';
/**
* Description for the indexer that will appear in the backend.
* @var string
*/
private $indexer_description = 'Content (mask)';
/**
* The content type (CType) of mask elements.
* @var string
*/
private $indexer_type = 'mask_content';
/**
* Hook method used by ke_search to register indexer configurations.<br>
* Creates an array containing the configuration and appends it to the
* given $params array.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @param array $params An array containing all indexer configurations?
* @param array $pObj The parent object?
*/
function registerIndexerConfiguration(&$params, $pObj) {
/* Gets the path to this extension */
$path = ExtensionManagementUtility::extRelPath($this->extension_name);
/* Add new indexer item to the list */
$params['items'][] = array(
$this->indexer_description,
$this->indexer_type,
$path . $this->icon_file
);
}
/**
* Hook method used by ke_search to register a custom indexer.<br>
* This method creates an indexer instance and returns the status
*
* @param array $indexerConfig Configuration from TYPO3 Backend
* @param array $indexerObject Reference to indexer class.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @return string Status for indexer run in the backend.
*/
public function customIndexer(&$indexerConfig, &$indexerObject) {
/* Prepare the output variable */
$content = '';
/* If the indexer configuration is ment for this indexer */
if ($indexerConfig['type'] == $this->indexer_type) {
/* Get an instance for the TYPO3 ObjectManager */
$this->objectManager = GeneralUtility::makeInstance(
'TYPO3\CMS\Extbase\Object\ObjectManager'
);
/* Get a fresh indexer instance from the ObjectManager */
$indexer = $this->objectManager->get(
'STUBR\StubrIndexer\TypesMaskcontent',
$indexerObject
);
/* Returns the indexer status */
$content = $indexer->startIndexing();
}
return $content;
}
}
<?php
namespace STUBR\StubrIndexer;
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
use TYPO3\CMS\Core\Utility\GeneralUtility;
/**
* This class implements a hook into the TYPO3 ke_search extension.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
*/
class RegisterMaskpageIndexer {
// TODO: EXTENDEN AUS ABSTRACT
/**
* Name of this extension.
* @var string
*/
private $extension_name = 'stubr_indexer';
/*
* Name of the icon file used to display this indexer in the backend.
* @var string
*/
private $icon_file = 'Icons/mask.gif';
/**
* Description for the indexer that will appear in the backend.
* @var string
*/
private $indexer_description = 'Pages (mask)';
/**
* The content type (CType) of mask elements.
* @var string
*/
private $indexer_type = 'mask_page';
/**
* Hook method used by ke_search to register indexer configurations.<br>
* Creates an array containing the configuration and appends it to the
* given $params array.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @param array $params An array containing all indexer configurations?
* @param array $pObj The parent object?
*/
function registerIndexerConfiguration(&$params, $pObj) {
/* Gets the path to this extension */
$path = ExtensionManagementUtility::extRelPath($this->extension_name);
/* Add new indexer item to the list */
$params['items'][] = array(
$this->indexer_description,
$this->indexer_type,
$path . $this->icon_file
);
}
/**
* Hook method used by ke_search to register a custom indexer.<br>
* This method creates an indexer instance and returns the status
*
* @param array $indexerConfig Configuration from TYPO3 Backend
* @param array $indexerObject Reference to indexer class.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @return string Status for indexer run in the backend.
*/
public function customIndexer(&$indexerConfig, &$indexerObject) {
/* Prepare the output variable */
$content = '';
/* If the indexer configuration is ment for this indexer */
if ($indexerConfig['type'] == $this->indexer_type) {
/* Get an instance for the TYPO3 ObjectManager */
$this->objectManager = GeneralUtility::makeInstance(
'TYPO3\CMS\Extbase\Object\ObjectManager'
);
/* Get a fresh indexer instance from the ObjectManager */
$indexer = $this->objectManager->get(
'STUBR\StubrIndexer\TypesMaskpage',
$indexerObject
);
/* Returns the indexer status */
$content = $indexer->startIndexing();
}
return $content;
}
}
<?php
namespace STUBR\StubrIndexer;
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
use STUBR\StubrIndexer\RegisterMaskcontentIndexer;
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
/* Oldschool require for non namespaced classes */
require_once(
ExtensionManagementUtility::extPath('ke_search')
. 'Classes/indexer/types/class.tx_kesearch_indexer_types_tt_content.php'
);
/**
*
* @author Urs Bräm <ub@sturmundbraem.ch>
*/
class TypesMaskcontent extends \tx_kesearch_indexer_types_tt_content {
/**
* The indexer's type.
* @var string
*/
private $indexer_type = 'mask_content';
/**
* Overwrites the parent implementation to set a the where clause for the
* database query. The CType should match the type used by mask.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @param int $uid uid of page
*/
public function getPageContent($uid) {
/* Overwrite the where clause to only look for mask */
$this->whereClauseForCType = sprintf(
'CType="%s"',
'mask_content'
);
/* Now let the parent implementation work */
parent::getPageContent($uid);
}
/**
* Parses content from a mask_content element.<br>
* The content string should come from the parent implementation of <br>
* <code>
* tx_kesearch_indexer_types_tt_content->getPageContent()
* </code>
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @param string $ttContentRow A row from tt_content
* @return string the parsed content?
*/
public function getContentFromContentElement($ttContentRow) {
/* Parse the flexform data and return it */
return $this->parseFlexformData($ttContentRow['pi_flexform']);
}
/**
* Helper function to process the flexform data.
*
* @author Urs Bräm <ub@sturmundbraem.ch>
* @param string $flexformData TYPO3 flexform data from tt_content
* @return string
*/
protected function parseFlexformData($flexformData) {
/* Process the data */
$parsedData = strip_tags( // 3. strip html?
html_entity_decode( // 2. decode actual content?
strip_tags($flexformData) // 1. strip xml?
)
);
/* Replace any kind of whitespace with a single space */
$output = preg_replace('/\s\s+/', ' ', $parsedData);
return $output;
}
}
<?php
namespace STUBR\StubrIndexer;
/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/
use STUBR\StubrIndexer\RegisterMaskcontentIndexer;
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
/* Oldschool require for non namespaced classes */
require_once(
ExtensionManagementUtility::extPath('ke_search')
. '/Classes/lib/class.tx_kesearch_lib_helper.php'
);
/**
*
* @author Urs Bräm <ub@sturmundbraem.ch>
*/
class TypesMaskpage extends \tx_kesearch_indexer_types_page {
/**
* get content of current page and save data to db
* @param $uid page-UID that has to be indexed
*/
public function getPageContent($uid) {
// get content elements for this page // edit ub
$fields = 'uid, pid, header, bodytext, CType, sys_language_uid, header_layout, fe_group, subheader, tx_mask_plaintext';
// hook to modify the page content fields
if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPageContentFields'])) {
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPageContentFields'] as $_classRef) {
$_procObj = & \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef);
$_procObj->modifyPageContentFields(
$fields, $this
);
}
}
$table = 'tt_content';
$where = 'pid = ' . intval($uid);
$where .= ' AND (' . $this->whereClauseForCType . ')';
// add condition for not indexing gridelement columns with colPos = -2 (= invalid)
if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded('gridelements')) {
$where .= ' AND colPos <> -2 ';
}
$where .= \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields($table);
$where .= \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($table);
// Get access restrictions for this page, this access restrictions apply to all
// content elements of this pages. Individual access restrictions
// set for the content elements will be ignored. Use the content
// element indexer if you need that feature!
$pageAccessRestrictions = $this->getInheritedAccessRestrictions($uid);
// add ke_search tags current page
$tags = $this->pageRecords[intval($uid)]['tags'];
// add system categories as tags
\tx_kesearch_helper::makeSystemCategoryTags($tags, $uid, $table);
// Compile content for this page from individual content elements with
// respect to the language.
// While doing so, fetch also content from attached files and write
// their content directly to the index.
$GLOBALS['TYPO3_DB']->store_lastBuiltQuery = 1;
//$ttContentRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows($fields, $table, $where);
$ttContentRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows($fields, $table, $where);
//echo $GLOBALS['TYPO3_DB']->debug_lastBuiltQuery;
$pageContent = array();
if (count($ttContentRows)) {
foreach ($ttContentRows as $ttContentRow) {
$content = ' ';
// index header
// add header only if not set to "hidden"
if ($ttContentRow['header_layout'] != 100) {
$content .= strip_tags($ttContentRow['header']) . "\n";
}
// index content of this content element and find attached or linked files.
// Attached files are saved as file references, the RTE links directly to
// a file, thus we get file objects.
// Files go into the index no matter if "index_content_with_restrictions" is set
// or not, that means even if protected content elements do not go into the index,
// files do. Since each file gets it's own index entry with correct access
// restrictons, that's no problem from a access permission perspective (in fact, it's a feature).
if (in_array($ttContentRow['CType'], $this->fileCTypes)) {
$fileObjects = $this->findAttachedFiles($ttContentRow);
} else {
$fileObjects = $this->findLinkedFilesInRte($ttContentRow);
$content .= $this->getContentFromContentElement($ttContentRow) . "\n";
}
// index the files found
if (!$pageAccessRestrictions['hidden']) {
$this->indexFiles($fileObjects, $ttContentRow, $pageAccessRestrictions['fe_group'], $tags) . "\n";
}
// add content from this content element to page content
// ONLY if this content element is not access protected
// or protected content elements should go into the index
// by configuration.
if ($this->indexerConfig['index_content_with_restrictions'] == 'yes' || $ttContentRow['fe_group'] == '' || $ttContentRow['fe_group'] == '0') {
$pageContent[$ttContentRow['sys_language_uid']] .= $content;
}
}
} else {
$this->counterWithoutContent++;
return;
}
// make it possible to modify the indexerConfig via hook
$indexerConfig = $this->indexerConfig;
// make it possible to modify the default values via hook
$indexEntryDefaultValues = array(
'type' => 'page',
'uid' => $uid,
'params' => '',
'feGroupsPages' => $pageAccessRestrictions['fe_group'],
'debugOnly' => FALSE
);
// hook for custom modifications of the indexed data, e. g. the tags
if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPagesIndexEntry'])) {
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPagesIndexEntry'] as $_classRef) {
$_procObj = & \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef);
$_procObj->modifyPagesIndexEntry(
$uid, $pageContent, $tags, $this->cachedPageRecords, $additionalFields, $indexerConfig, $indexEntryDefaultValues, $this
);
}
}
// store record in index table
if (count($pageContent)) {
foreach ($pageContent as $language_uid => $content) {
if (!$pageAccessRestrictions['hidden'] && $this->checkIfpageShouldBeIndexed($uid, $language_uid)) {
// overwrite access restrictions with language overlay values
$accessRestrictionsLanguageOverlay = $pageAccessRestrictions;
$pageAccessRestrictions['fe_group'] = $indexEntryDefaultValues['feGroupsPages'];
if ($language_uid > 0) {
if ($this->cachedPageRecords[$language_uid][$uid]['fe_group']) {
$accessRestrictionsLanguageOverlay['fe_group'] = $this->cachedPageRecords[$language_uid][$uid]['fe_group'];
}
if ($this->cachedPageRecords[$language_uid][$uid]['starttime']) {
$accessRestrictionsLanguageOverlay['starttime'] = $this->cachedPageRecords[$language_uid][$uid]['starttime'];
}
if ($this->cachedPageRecords[$language_uid][$uid]['endtime']) {
$accessRestrictionsLanguageOverlay['endtime'] = $this->cachedPageRecords[$language_uid][$uid]['endtime'];
}
}
$title = $this->cachedPageRecords[$language_uid][$uid]['title'];
$this->pObj->storeInIndex(
$indexerConfig['storagepid'], // storage PID
$title, // page title
$indexEntryDefaultValues['type'], // content type
$indexEntryDefaultValues['uid'], // target PID: where is the single view?
$content, // indexed content, includes the title (linebreak after title)
$tags, // tags
$indexEntryDefaultValues['params'], // typolink params for singleview
substr($content,0,strpos(preg_replace( "/\r|\n/", " ",$content), ' ', 200)).' …', // abstract // edit ub
$language_uid, // language uid
$accessRestrictionsLanguageOverlay['starttime'], // starttime
$accessRestrictionsLanguageOverlay['endtime'], // endtime
$accessRestrictionsLanguageOverlay['fe_group'], // fe_group
$indexEntryDefaultValues['debugOnly'], // debug only?
$additionalFields // additional fields added by hooks
);
$this->counter++;
}
}
}
return;
}
/**
*
* Extracts content from content element and returns it as plain text
* for writing it directly to the index
*
* @author Christian Bülter <buelter@kennziffer.com>
* @since 24.09.13
* @param array $ttContentRow content element
* @return string
*/
public function getContentFromContentElement($ttContentRow) {
// bodytext // edit ub
$bodytext = $ttContentRow['header'].' '.$ttContentRow['subheader'].' '.$ttContentRow['bodytext'].' '.$ttContentRow['tx_mask_plaintext'];
// following lines prevents having words one after the other like: HelloAllTogether
$bodytext = str_replace('<td', ' <td', $bodytext);
$bodytext = str_replace('<br', ' <br', $bodytext);
$bodytext = str_replace('<p', ' <p', $bodytext);
$bodytext = str_replace('<li', ' <li', $bodytext);
if ($ttContentRow['CType'] == 'table') {
// replace table dividers with whitespace
$bodytext = str_replace('|', ' ', $bodytext);
}
$bodytext = strip_tags($bodytext);
// hook for modifiying a content elements content
if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyContentFromContentElement'])) {
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyContentFromContentElement'] as $_classRef) {
$_procObj = & \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef);
$_procObj->modifyContentFromContentElement(
$bodytext, $ttContentRow, $this
);
}
}
// edit ub
/* Replace any kind of whitespace with a single space */
$bodytext = preg_replace('/\s\s+/', ' ', $bodytext);
return $bodytext;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment