Skip to content

Instantly share code, notes, and snippets.

@davidhoang101
Forked from WengerK/README.md
Created October 26, 2018 04:44
Show Gist options
  • Save davidhoang101/5f626027a12666aae14bc6c23fd34312 to your computer and use it in GitHub Desktop.
Save davidhoang101/5f626027a12666aae14bc6c23fd34312 to your computer and use it in GitHub Desktop.
Programatically use Solr on Drupal 8

🔍 Solr (6.1.0+) search Engine & Tika (1.13+) Extractor

We are using solr for search index.

Solr need to be configured for drupal. Follow the INSTALL.txt found in the search_api_solr module.

As a pre-requisite for running your own Solr server, you'll need Java 6 or higher.

Installation

Install all prerequisites and configuration from web/modules/contrib/search_api_solr/INSTALL.txt then

Start Solr with: $ bin/solr start

Create the new instance index-my-project $ bin/solr create -c index-my-project

The creation will generate a folder under $SOLR_PATH/6.1.0/server/solr/index-my-project

Copy all the configs files from web/modules/contrib/search_api_solr/solr-conf/6.x/* into $SOLR_PATH/6.1.0/server/solr/index-my-project/conf

$ cp web/modules/contrib/search_api_solr/solr-conf/6.x/* $SOLR_PATH/6.1.0/server/solr/index-my-project/conf

Restart Solr $ bin/solr restart

Check your Solr status with solr status and with http://localhost:8983/solr/#/

Usage

Start Solr with:

$ bin/solr start

Index all your content with:

$ drush cron

Clean the index:

$ drush sapi-c [index]

Index everything at once:

$ drush sapi-i [index]

Drupal Configuration

Check the Solr configuration on admin/config/search/search-api/server/solr/edit and the Tika under admin/config/search/search_api_attachments

Add partial search

By default Solr don't match partial search, you should edit the schema.xml file and add the following filters on the fieldType name="text".

<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25" />

Improved natural language search

You should improved the natural search by editing the stopwords.txt file. Check stopwords on stopwords-iso

Replace the default SnowballPorterFilterFactory from language="English" to language="French",

You should improved the natural search by editing the synonyms.txt file.

Highlights

http://www.codewrecks.com/blog/index.php/2013/05/27/hilight-matched-text-inside-documents-indexed-with-solr-plus-tika/

<?php
/**
* @file
* Contains \Drupal\custom_search\Controller\SearchController.
*/
namespace Drupal\custom_search\Controller;
use Drupal\Core\Controller\ControllerBase;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Drupal\search_api\ParseMode\ParseModePluginManager;
use Symfony\Component\HttpFoundation\RequestStack;
use Drupal\search_api\Entity\Index;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Url;
/**
* SearchController.
*/
class SearchController extends ControllerBase {
/**
* Number of result by page
* @var Integer
*/
private $limit = 12;
/**
* Request stack that controls the lifecycle of requests
* @var RequestStack
*/
private $request;
/**
* The node Storage.
*
* @var \Drupal\node\NodeStorageInterface
*/
protected $nodeStorage;
/**
* The parse mode manager.
*
* @var \Drupal\search_api\ParseMode\ParseModePluginManager
*/
private $parseModeManager;
/**
* Class constructor.
*/
public function __construct(EntityTypeManagerInterface $entity, RequestStack $request, ParseModePluginManager $parse_mode_manager) {
$this->nodeStorage = $entity->getStorage('node');
$this->request = $request->getMasterRequest();
$this->parseModeManager = $parse_mode_manager;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container) {
// Instantiates this form class.
return new static(
$container->get('entity_type.manager'),
$container->get('request_stack'),
$container->get('plugin.manager.search_api.parse_mode')
);
}
public function search() {
$variables = array('results' => array(), 'search' => array());
// Retrieve routes with parameters
$route = $this->request->attributes->get('_route');
$params = $this->request->query->all();
$search = $this->request->query->get('q');
$type = $this->request->query->get('type');
if (!empty($search)) {
/* @var $search_api_index \Drupal\search_api\IndexInterface */
$search_api_index = Index::load('full_website');
// Create the query.
$query = $search_api_index->query([
'limit' => $this->limit,
'offset' => !is_null($this->request->get('page')) ? $this->request->get('page') * $this->limit : 0,
]);
$query->setFulltextFields(['title', 'body', 'filename', 'saa_field_file_document', 'saa_field_file_news', 'saa_field_file_page']);
$parse_mode = $this->parseModeManager->createInstance('terms');
$query->setParseMode($parse_mode);
// $parse_mode->setConjunction('OR');
$query->keys($search);
$query->sort('search_api_relevance', 'DESC');
// Facets
$server = $search_api_index->getServerInstance();
if ($server->supportsFeature('search_api_facets')) {
$query->setOption('search_api_facets', [
'type' => [
'field' => 'type',
'limit' => 20,
'operator' => 'AND',
'min_count' => 1,
'missing' => TRUE,
],
]);
}
// Retrieve facets before.
$query_facets = clone $query;
$results_facets = $query_facets->execute();
$facets = $results_facets->getExtraData('search_api_facets', []);
if (!empty($type)) {
$query = $query->addCondition('type', $type);
}
$results = $query->execute();
if (isset($facets['type']) && !empty($facets['type'])) {
foreach ($facets['type'] as $key => $facet) {
$facets['type'][$key]['filter'] = trim($facet['filter'], '"');
$facets['type'][$key]['filter_name'] = $this->typeMachineNameToHuman(trim($facet['filter'], '"'));
$params['type'] = $facets['type'][$key]['filter'];
$facets['type'][$key]['url'] = Url::fromRoute($route, $params);
}
}
$params = $this->request->query->all();
$variables['search'] = array(
'search' => $search,
'count' => $results->getResultCount(),
'facets' => $facets,
'type' => $type,
'type_name' => $this->typeMachineNameToHuman($type),
);
$solr_response = $results->getExtraData('search_api_solr_response');
foreach ($results as $key => $result) {
// $highlights = $this->getHighlights($result->getId(), $solr_response['highlighting']);
// $excerpt = $this->getExcerpt($highlights);
$excerpt = $result->getExcerpt();
$variables['results'][$key] = array(
'nid' => isset($result->getField('nid')->getValues()[0]) ? $result->getField('nid')->getValues()[0] : NULL,
'title' => isset($result->getField('title')->getValues()[0]) ? $result->getField('title')->getValues()[0] : NULL,
'body' => $excerpt,
'original_body' => isset($result->getField('body')->getValues()[0]) ? $result->getField('body')->getValues()[0] : NULL
);
if (isset($result->getField('type')->getValues()[0])) {
$label = $this->typeMachineNameToHuman($result->getField('type')->getValues()[0]);
$variables['results'][$key]['type'] = $label;
}
}
// Pager
pager_default_initialize($results->getResultCount(), $this->limit);
$variables['pager'] = array(
'#type' => 'pager',
'#quantity' => '5',
);
}
return [
'#theme' => 'custom_search_search_page',
'#variables' => $variables,
// Set cache for 0 seconds.
'#cache' => ['max-age' => 0],
];
}
private function typeMachineNameToHuman($machine_name) {
$label = ucfirst($machine_name);
switch ($machine_name) {
case 'news':
$label = t('Actualité');
break;
case 'advisor':
$label = t('Conseiller');
break;
case 'product':
$label = t('Produit');
break;
case 'faq':
$label = t('Question-réponse');
break;
case 'partnership':
$label = t('Partenaire');
break;
case 'offer':
$label = t('Coupons Bella Vita');
break;
case 'building':
$label = t('Construction');
break;
case 'managementcontracts':
case 'management_contracts':
$label = t('Mandats de gestion');
break;
}
return $label;
}
private function getHighlights($item_id, $solr_highlighting) {
foreach ($solr_highlighting as $id => $highlights) {
if (strpos($id, $item_id) !== false) {
return $highlights;
}
}
}
private function getExcerpt(array $highlighting) {
$highlights = [];
foreach ($highlighting as $highlight) {
foreach ($highlight as $subitem) {
$highlights[] = $subitem;
}
}
$excerpt = implode('... ', $highlights);
$excerpt = str_replace('[HIGHLIGHT]', '<mark>', $excerpt);
$excerpt = str_replace('[/HIGHLIGHT]', '</mark>', $excerpt);
return $excerpt;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment