Skip to content

Instantly share code, notes, and snippets.

@yoshihara
Last active October 14, 2017 11:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoshihara/bed63fca0f852aed282f93f7c73b9e99 to your computer and use it in GitHub Desktop.
Save yoshihara/bed63fca0f852aed282f93f7c73b9e99 to your computer and use it in GitHub Desktop.
test for embulk-output-groonga with BASIC Authentication BASIC auth: groonga / test
groonga:$apr1$L.uiPlZS$nkSfHS1TwwtXhPuFvGFci/
FROM debian:stable-slim
MAINTAINER haruka yoshihara
# package install
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y --no-install-recommends make g++ curl \
ca-certificates xz-utils patch libmecab-dev mecab \
git bash file openssl sudo groonga-tokenizer-mecab groonga groonga-httpd
# mecab-ipadic-neologd
# NOTE: インストール先が /usr/local/lib/mecab/dic/ipadic なのはMecabが辞書を探すときにそのディレクトリを参照しているため
RUN git clone --depth=1 https://github.com/neologd/mecab-ipadic-neologd.git /usr/src/mecab-ipadic-neologd && \
/usr/src/mecab-ipadic-neologd/bin/install-mecab-ipadic-neologd -n -y -p /usr/local/lib/mecab/dic/ipadic && \
rm -rf /usr/src/mecab-ipadic-neologd
# basic認証
COPY .htpasswd /etc/nginx/.htpasswd
# groonga
WORKDIR /var/lib/groonga/db/
COPY schema.grn ./
RUN cat schema.grn | groonga db
COPY groonga-httpd.conf /etc/groonga/httpd/groonga-httpd.conf
RUN service groonga-httpd stop
WORKDIR /var/lib/groonga/
RUN mkdir log && touch log/log && touch log/query-log
EXPOSE 10041
# ENTRYPOINT ["groonga"]
CMD ["groonga-httpd", "-g", "daemon off;"]
in:
type: file
path_prefix: ./test.csv
parser:
type: csv
charset: UTF-8
newline: CRLF
delimiter: ","
skip_header_lines: 0
columns:
- {name: _key, type: string}
- {name: text, type: string}
# out:
# type: stdout
out:
type: groonga
table: Memo
key_column: _key
host: localhost
port: 10041
protocol: http
columns:
- {name: "_key", type: string}
- {name: "text", type: string}
worker_processes 1;
pid /var/run/groonga-httpd.pid;
# Match this to the file owner of Groonga database files if groonga-httpd is
# run as root.
user root root;
env GRN_IN_VALUES_TOO_MANY_INDEX_MATCH_RATIO;
env GRN_BETWEEN_TOO_MANY_INDEX_MATCH_RATIO;
env GRN_II_CURSOR_SET_MIN_ENABLE;
env GRN_INDEX_CHUNK_SPLIT_ENABLE;
env GRN_MECAB_CHUNKED_TOKENIZE_ENABLED;
env GRN_MECAB_CHUNK_SIZE_THRESHOLD;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
keepalive_timeout 65;
# The default Groonga database path.
groonga_database /var/lib/groonga/db/db;
# groonga_database /usr/local/groonga/favorites.db;
# Create a Groonga database automatically if the Groonga database doesn't
# exist.
#
# Note that this option is danger when worker_processes is greater than 1.
# Because one or more worker processes may create the same Groonga database
# at the same time. If you can create a Groonga database before running
# groonga-httpd, you should do it.
groonga_database_auto_create off;
# The default Groonga cache limit. The cache limit can be set
# for each worker. It can't be set for each Groonga database.
# groonga_cache_limit 100;
server {
listen 10041;
server_name localhost;
location /d/ {
groonga on;
# You can disable log for Groonga.
groonga_log_path /var/lib/groonga/log/log;
# You can disable query log for Groonga.
groonga_query_log_path /var/lib/groonga/log/query-log;
# You can custom database path.
groonga_database /var/lib/groonga/db/db;
# If you send large data with one 'load' command, you may need to
# increase the max size limitation.
# client_max_body_size 50m;
add_header 'Access-Control-Allow-Credentials' true;
add_header 'Access-Control-Allow-Origin' 'http://localhost:8080';
add_header 'Access-Control-Allow-Methods' 'POST, GET, OPTIONS';
add_header 'Access-Control-Allow-Headers' 'Authorization,Content-Type,Accept,Origin,User-Agent,DNT,Cache-Control,X-Mx-ReqToken,Keep-Alive,X-Requested-With,If-Modified-Since';
add_header 'Content-Type' 'text/json charset=UTF-8';
}
location / {
auth_basic "Restricted";
auth_basic_user_file /etc/nginx/.htpasswd;
root /usr/share/groonga/html/admin;
index index.html;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}
}
table_remove Memo
table_remove Terms
table_create Memo TABLE_PAT_KEY ShortText --default_tokenizer TokenMecab
table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenMecab --normalizer NormalizerAuto
column_create --table Memo --name text --type ShortText --flags COLUMN_SCALAR
column_create --table Terms --name text_index --flags COLUMN_INDEX|WITH_POSITION --type Memo --source text
1 hogefuga
2 bar
3 hoge
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment