Created
December 1, 2011 19:25
-
-
Save elliotttf/1419169 to your computer and use it in GitHub Desktop.
Example of adding files to solr document that are only linked to in the node body.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Implements hook_apachesolr_document_handlers(). | |
*/ | |
function example_apachesolr_document_handlers($type, $namespace) { | |
if ($type == 'node' && $namespace == 'example') { | |
return array('example_add_documents'); | |
} | |
} | |
/** | |
* Implements hook_apachesolr_update_index(). | |
* | |
* @see apachesolr_attachments_update_index(). | |
*/ | |
function example_apachesolr_update_index($document, $node, $namespace) { | |
if (variable_get('apachesolr_attachments_extract_using', 'tika') == 'solr' || variable_get('apachesolr_attachments_tika_path', '')) { | |
$start = REQUEST_TIME; | |
$cron_try = variable_get('apachesolr_attachments_cron_try', 20); | |
$cron_limit = variable_get('apachesolr_attachments_cron_limit', 100); | |
$cron_time_limit = variable_get('apachesolr_attachments_cron_time_limit', 15); | |
$num_tried = 0; | |
do { | |
$rows = apachesolr_get_nodes_to_index('example', $cron_try); | |
// Calls apachesolr_attachments_apachesolr_document_handlers() and | |
// ultimately apachesolr_attachments_add_documents(). | |
$success = apachesolr_index_nodes($rows, 'example'); | |
$num_tried += $cron_try; | |
} while ($success && ($num_tried < $cron_limit) && (REQUEST_TIME - $start < $cron_time_limit)); | |
} | |
} | |
/** | |
* This assumes the files you are indexing are not part of the | |
* files table but rather just linked in the node body. | |
*/ | |
function example_add_documents($node, $namespace) { | |
$hash = apachesolr_site_hash(); | |
// Find all of the documents. | |
$documents = $matches = array(); | |
if (preg_match_all('/<a ([^>]+)?href="([^"]+\.(pdf|txt))"/', $node->body, $matches)) { | |
foreach ($matches[2] as $href) { | |
$file = pathinfo($href); | |
$text = apachesolr_attachments_get_attachment_text($file); | |
if (!$text) { | |
watchdog('example', "No text to index, bro!"); | |
continue; | |
} | |
// Create the solr document. | |
$document = new ApacheSolrDocument(); | |
// A single file might be attached to multiple nodes. | |
// You might want to use something other than the filename | |
// (if it's not unique) to define the document_id, a separate | |
// table to track these files could work, or something like UUID. | |
$file_id = $file['basename']; | |
$document->id = apachesolr_document_id($file_id . '-' . $node->nid, 'file'); | |
$document->site = url(NULL, array('absolute' => TRUE)); | |
$document->hash = $hash; | |
$document->entity_type = 'file'; | |
$document->entity_id = $file_id; | |
$document->bundle = $node->type; | |
$document->bundle_name = node_type_get_name($node); | |
$document->label = $file['filename']; | |
$document->is_nid = $node->nid; | |
$document->url = file_create_url($href); | |
$document->path = file_stream_wrapper_get_instance_by_uri($href)->getDirectoryPath() . '/' . file_uri_target($href); | |
$document->content = $file['filename'] . ' ' . $text; | |
$document->ss_name = $node->name; | |
// We want the name to ale be searchable for keywords. | |
$document->tos_name = $node->name; | |
// Everything else uses dynamic fields | |
$document->is_uid = $node->uid; | |
$document->bs_status = $node->status; | |
$document->bs_sticky = $node->sticky; | |
$document->bs_promote = $node->promote; | |
$document->is_tnid = $node->tnid; | |
$document->bs_translate = $node->translate; | |
if (empty($node->language)) { | |
// 'und' is the language-neutral code in Drupal 7. | |
$document->ss_language = LANGUAGE_NONE; | |
} | |
else { | |
$document->ss_language = $node->language; | |
} | |
$document->ds_created = apachesolr_date_iso(filectime($href)); | |
$document->ds_changed = $document->ds_created; | |
// apachesolr_attachments-specific fields. | |
$document->ss_filemime = file_get_mimetype($href); | |
$document->ss_file_node_title = apachesolr_clean_text($node->title); | |
$document->ss_file_node_url = url('node/' . $node->nid, array('absolute' => TRUE)); | |
// Add taxonomy to document. | |
$indexed_fields = apachesolr_entity_fields('node'); | |
foreach ($indexed_fields as $index_key => $field_info) { | |
if ($field_info['field']['type'] == 'taxonomy_term_reference') { // Add only taxonomy. | |
$field_name = $field_info['field']['field_name']; | |
// See if the node has fields that can be indexed | |
if (isset($node->{$field_name})) { | |
// Got a field. | |
$function = $field_info['indexing_callback']; | |
if ($function && function_exists($function)) { | |
// NOTE: This function should always return an array. One | |
// node field may be indexed to multiple Solr fields. | |
$fields = $function($node, $field_name, $index_key, $field_info); | |
foreach ($fields as $field) { | |
// It's fine to use this method also for single value fields. | |
$document->setMultiValue($field['key'], $field['value']); | |
} | |
} | |
} | |
} | |
} | |
// Let modules add to the document. | |
foreach (module_implements('apachesolr_update_index') as $module) { | |
$function = $module . '_apachesolr_update_index'; | |
$function($document, $node, $namespace); | |
} | |
drupal_alter('apachesolr_attachments_index', $document, $node, $file, $namespace); | |
$documents[] = $document; | |
} | |
} | |
return $documents; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment