Skip to content

Instantly share code, notes, and snippets.

@barryhunter
Created July 20, 2022 11:49
Show Gist options
  • Save barryhunter/fba3938f7d9764a35be14bc363e673db to your computer and use it in GitHub Desktop.
Save barryhunter/fba3938f7d9764a35be14bc363e673db to your computer and use it in GitHub Desktop.
A conventient system to automate automatic rebuilding of sphinx/manticore indexed. A distilled and simplified version of https://github.com/geograph-project/geograph-project/blob/british-isles/system/docker/manticore/usr/local/bin/indexer-wrapper.php
#!/usr/bin/php
<?php
$db = mysqli_connect($_SERVER['MYSQL_HOST'],$_SERVER['MYSQL_USER'],$_SERVER['CONF_DB_PWD'],$_SERVER['MYSQL_DATABASE']);
if (mysqli_connect_errno()) {
throw new RuntimeException('mysqli connection error: ' . mysqli_connect_error());
}
$server_id = db_Quote(trim(`hostname`));
#####################################################
//this is deliberately a GLOBAL lock, so that no two instances (even staging!) are indexing at the same time!
if (!db_getOne("SELECT GET_LOCK('indexer_active',60)"))
die("unable to get a lock;\n");
$hour = date('G');
$indexes = db_getAll("
SELECT sph_index.index_name, preindex, postindex, server_id, last_indexed
FROM sph_index LEFT JOIN sph_server_index ON (sph_index.index_name = sph_server_index.index_name AND server_id = $server_id)
WHERE DATE_ADD(coalesce(last_indexed,'2000-01-01 00:00:00'), interval `minutes` minute) < NOW() AND minhour <= $hour AND active = 1 ORDER BY type+0");
if (empty($indexes)) {
$done = array();
foreach ($indexes as $row) {
if (!empty($done[$row['index_name']])) //may of been done as pre/post on previous run!
continue;
############
$list = array();
if (!empty($row['preindex']))
$list = array($row['preindex']=>1)+$list;
$list[$row['index_name']]=1;
if (!empty($row['postindex']))
$list[$row['postindex']]=1;
############
$cmd = "indexer --config /etc/sphinxsearch/sphinx.conf ".implode(" ",array_keys($list))." --rotate"; //--sighup-each if large indexes?
passthru($cmd);
foreach ($list as $index => $dummy) {
$name = db_Quote(trim($index));
$sql = "REPLACE INTO sph_server_index SET index_name = $name, server_id = $server_id, last_indexed = NOW()";
db_Execute($sql);
$done[$index]=1;
}
############
}
}
db_Execute("DO RELEASE_LOCK('indexer_active')");
exit;
#####################################################
/*
Tables required:
CREATE TABLE `sph_index` (
`index_name` varchar(64) NOT NULL,
`active` tinyint(3) unsigned NOT NULL DEFAULT 1,
`minutes` mediumint(9) NOT NULL DEFAULT 15,
`minhour` tinyint(3) unsigned NOT NULL DEFAULT 0,
`preindex` varchar(64) NOT NULL,
`postindex` varchar(64) NOT NULL,
`type` enum('master','main','delta','single','static') NOT NULL DEFAULT 'single',
`created` datetime NOT NULL,
`updated` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
PRIMARY KEY (`index_name`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
CREATE TABLE `sph_server_index` (
`index_name` varchar(64) NOT NULL,
`server_id` varchar(64) NOT NULL DEFAULT '?',
`last_indexed` datetime NOT NULL,
`updated` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
PRIMARY KEY (`index_name`,`server_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
Adding a new index is simple as:
INSERT INTO sph_index SET index_name = 'index123', minutes=300, type='single';
... creates an index that is indexed about every 300 minutes. It will automatically refresh on all search servers (that run the indexer-wrapper.php)
For a main+delta use:
INSERT INTO sph_index SET index_name = 'idx1_main', postindex = 'idx1_delta', minutes=86400, type='main';
INSERT INTO sph_index SET index_name = 'idx1_delta', minutes=300, type='delta';
... creates both indexes, that run on differnet schedules. whenever the main is recreated, the delta is always immidately processed too (using 'postindex')
Set 'minhour' on an index, so it wont be run in the early hours. Eg 'minhour=7' means that index wont be refreshed in the early hours (eg if nobody is using the site, nopoint updating an index)
*/
#####################################################
# real basic wrapper (somewhat like adodb)
function db_Quote($in) {
return "'".mysqli_real_escape_string($GLOBALS['db'], $in)."'";
}
function db_getOne($sql) {
$result = mysqli_query($GLOBALS['db'], $sql);
return mysqli_fetch_array($result)[0];
}
function db_getAll($sql) {
$data = array();
$result = mysqli_query($GLOBALS['db'], $sql);
while ($row = mysqli_fetch_assoc($result))
$data[] = $row;
return $data;
}
function db_Execute($sql) {
return mysqli_query($GLOBALS['db'], $sql);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment