Skip to content

Instantly share code, notes, and snippets.

@manticoresearch
manticoresearch / docker-compose.yml
Last active May 11, 2020 16:25
mariadb + manticore: docker-compose up all and ./rebuild.sh to rebuild an index from MariaDB and rotate it
version: '3.1'
services:
manticore:
image: manticoresearch/manticore
volumes:
- ./index.conf:/etc/manticoresearch/manticore.conf
db:
image: mariadb
environment:
- MYSQL_ROOT_PASSWORD=pass
@manticoresearch
manticoresearch / index.php
Last active April 9, 2020 02:23
./www/index.php which gets a search request, finds the results in Manticore and renders them
<form><h1>Manticore</h1><input name="search" type="text" style="width: 50%; border: 1px solid" value="<?=$_GET['search']?>"></form>
<hr>
<?php
if (isset($_GET['search'])) { # we have a search request, let's process it
$ch = curl_init(); # initializing curl
curl_setopt($ch, CURLOPT_URL,"http://manticore:9308/sql"); # we'll connect to Manticore's /sql endpoint via HTTP. There's also /json/search/ which gives much more granular control, but for the sake of simplicity we'll use the /sql endpoint
curl_setopt($ch, CURLOPT_POST, 1); # we'll send via POST
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); # we need the response back, don't output it
curl_setopt($ch, CURLOPT_POSTFIELDS, "mode=raw&query=SELECT url, highlight({}, 'title') title, highlight({}, 'body') body FROM rt WHERE MATCH('{$_GET['search']}') LIMIT 10"); /* here we are SELECTing :
- url
@manticoresearch
manticoresearch / Dockerfile
Last active April 9, 2020 02:24
php/Dockerfile which builds an image with php 7.4 + wget + mysqli extension
# Let's take php 7.4 as a base image
FROM php:7.4-cli
# We'll also install wget and PHP mysqli extension
RUN apt-get update \
&& apt-get -y install wget \
&& docker-php-source extract \
&& docker-php-ext-install mysqli \
&& docker-php-source delete
# We'll use load.php, so we need to copy it to the image
COPY load.php /usr/src/myapp/
@manticoresearch
manticoresearch / docker-compose.yml
Last active April 9, 2020 02:24
docker-compose.yml to run a simple web crawler: manticore, php, php+apache
version: '2.2'
services:
# Manticore Search is a small yet powerful database for search with awesome full-text search capabilities
manticore:
# we'll just use their official image
image: manticoresearch/manticore:3.4.0
# and create a volume for data persistency
volumes:
- ./data:/var/lib/manticore
@manticoresearch
manticoresearch / load.php
Last active April 9, 2020 02:25
php/load.php that can read from wget at STDIN and write to Manticore
<?php
$f = fopen('php://stdin', 'r'); # we'll be waiting for data at STDIN
$manticore = new mysqli('manticore', '', '', '', 9306); # let's connect to Manticore Search via MySQL protocol
$manticore->query("CREATE TABLE IF NOT EXISTS rt(title text, body text, url text stored) html_strip='1' html_remove_elements='style,script,a' morphology='stem_en' index_sp='1'"); /* creating a table "rt" if it doesn't exist with the following settings:
- html_strip='1': stripping HTML is on
- html_remove_elements='style,script,a': for HTML tags <style>/<script>/<a> we don't need their contents, so we are stripping them completely
- morphology='stem_en': we'll use English stemmer as a morphology processor
- index_sp='1': we'll also index sentences and paragraphs for more advanced full-text search capabilities and better relevance
*/
while (!feof($f)) { # reading from STDIN while there's something