Skip to content

Instantly share code, notes, and snippets.

@dmitryd
Last active October 9, 2015 12:20
Show Gist options
  • Save dmitryd/5fd4100e5542ada38e1e to your computer and use it in GitHub Desktop.
Save dmitryd/5fd4100e5542ada38e1e to your computer and use it in GitHub Desktop.
Indexing file content in TYPO3 6.2 with Solr
<?php
namespace My\solrfileindexing\Xclass;
use TYPO3\CMS\Core\Utility\GeneralUtility;
/**
* Requires cc_text* extensions!
*/
class DocumentFactory extends \TYPO3\Solr\Solrfal\Indexing\DocumentFactory {
/**
* @param \Apache_Solr_Document $document
* @param \TYPO3\CMS\Core\Resource\File $file
* @return void
*/
protected function addFileInformation(\Apache_Solr_Document $document, \TYPO3\CMS\Core\Resource\File $file) {
$document->setField('content', $this->extractContent($file));
parent::addFileInformation($document, $file);
}
/**
*
*
* @param \TYPO3\CMS\Core\Resource\File $file
* @return string
*/
protected function extractContent(\TYPO3\CMS\Core\Resource\File $file) {
$fileContent = '';
$mimeType = $file->getMimeType();
if ($mimeType == 'text/plain') {
// we can read text files directly
$fileContent = $file->getContents();
}
else {
// other subtypes should be handled by the text service
$serviceChain = '';
while (empty($fileContent) && ($service = GeneralUtility::makeInstanceService('textExtract', $file->getExtension(), $serviceChain))) {
$serviceChain .= ',' . $service->getServiceKey();
$filePath = $file->getForLocalProcessing(FALSE);
$service->setInputFile($filePath, $file->getExtension());
$serviceConfiguration = array('wantedCharset' => 'utf-8');
$service->process('', '', $serviceConfiguration);
$fileContent = trim($service->getOutput());
}
}
$contentExtractor = GeneralUtility::makeInstance('Tx_Solr_HtmlContentExtractor', $fileContent);
/** @var \Tx_Solr_HtmlContentExtractor $contentExtractor */
return $contentExtractor->getIndexableContent();
}
}
<?php
$GLOBALS['TYPO3_CONF_VARS']['SYS']['Objects']['TYPO3\\Solr\\Solrfal\\Indexing\\DocumentFactory'] = array(
'className' => 'My\\solrfileindexing\\Xclass\\DocumentFactory',
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment