Skip to content

Instantly share code, notes, and snippets.

@Deviad
Created November 21, 2020 19:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Deviad/486513dc0ab5f82f6ea1ba06d3dc9d6b to your computer and use it in GitHub Desktop.
Save Deviad/486513dc0ab5f82f6ea1ba06d3dc9d6b to your computer and use it in GitHub Desktop.
Fscrawler Connection refused
---
name: "idx"
fs:
url: "/usr/app/data"
update_rate: "15m"
indexed_chars: 100%
json_support: false
filename_as_id: false
add_filesize: true
remove_deleted: true
add_as_inner_object: false
store_source: false
index_content: false
attributes_support: false
raw_metadata: false
xml_support: false
index_folders: true
lang_detect: true
continue_on_error: false
ocr:
language: "eng"
enabled: true
pdf_strategy: "ocr_and_text"
follow_symlinks: false
elasticsearch:
nodes:
- url: "http://elasticsearch:9200"
username: "elastic"
password: "changeme"
bulk_size: 100
flush_interval: "5s"
byte_size: "10mb"
version: '3.2'
services:
elasticsearch:
container_name: elasticsearch
build:
context: elasticsearch/
args:
ELK_VERSION: 7.9.3
volumes:
- type: bind
source: ${PWD}/elasticsearch/config/elasticsearch.yml
target: /usr/share/elasticsearch/config/elasticsearch.yml
read_only: true
- type: volume
source: elasticsearch
target: /usr/share/elasticsearch/data
ports:
- "9200:9200"
- "9300:9300"
environment:
- "ES_JAVA_OPTS=-Xmx256m -Xms256m"
- "ELASTIC_PASSWORD=changeme"
- "node.name=elasticsearch"
- "discovery.seed_hosts=elasticsearch"
- "cluster.initial_master_nodes=elasticsearch"
# Use single node discovery in order to disable production mode and avoid bootstrap checks.
# see: https://www.elastic.co/guide/en/elasticsearch/reference/current/bootstrap-checks.html
networks:
- elk
logstash:
build:
context: logstash/
args:
ELK_VERSION: 7.9.3
volumes:
- type: bind
source: ${PWD}/logstash/config/logstash.yml
target: /usr/share/logstash/config/logstash.yml
read_only: true
- type: bind
source: ${PWD}/logstash/pipeline
target: /usr/share/logstash/pipeline
read_only: true
ports:
- "5044:5044"
- "5001:5001/tcp"
- "5001:5001/udp"
- "9600:9600"
environment:
LS_JAVA_OPTS: "-Xmx256m -Xms256m"
networks:
- elk
depends_on:
- elasticsearch
kibana:
build:
context: kibana/
args:
ELK_VERSION: 7.9.3
volumes:
- type: bind
source: ${PWD}/kibana/config/kibana.yml
target: /usr/share/kibana/config/kibana.yml
read_only: true
ports:
- "5601:5601"
networks:
- elk
depends_on:
- elasticsearch
fscrawler:
environment:
FSCRAWLER_FNAME: "fscrawler-es7-2.7-SNAPSHOT"
build:
context: fscrawler/
restart: always
volumes:
- type: bind
source: ${HOME}/document-folder/
target: /usr/app/data/
read_only: true
- type: bind
source: ${PWD}/fscrawler/config/
target: /usr/app/config/
depends_on:
- elasticsearch
- kibana
command: bash -c "./wait-for-it.sh -h elasticsearch -t 60 -p 9200 && ${PWD}/${FSCRAWLER_FNAME}/bin/fscrawler --config_dir /usr/app/config idx --rest"
networks:
- elk
networks:
elk:
driver: bridge
volumes:
elasticsearch:
FROM openjdk:8-alpine AS builder
ENV LANG C.UTF-8
ENV MAVEN_OPTS "-Xmx256m -Xms256m"
RUN apk add --update --no-cache openssl wget maven
COPY . /usr/src/
WORKDIR /usr/src/
RUN mvn clean package
FROM openjdk:8-jre-alpine
ENV ES es7
ENV VERSION 2.7-SNAPSHOT
ENV FSCRAWLER_FNAME fscrawler-$ES-$VERSION
RUN apk add --update \
&& addgroup -S fscrawler && adduser -S -G fscrawler fscrawler
WORKDIR /usr/app
COPY --from=builder /usr/src/distribution/es7/target/$FSCRAWLER_FNAME.zip .
RUN unzip $FSCRAWLER_FNAME.zip && rm $FSCRAWLER_FNAME.zip \
&& mkdir -p data/idx \
&& mkdir config \
&& chown -R fscrawler:fscrawler .
ENTRYPOINT $FSCRAWLER_FNAME/bin/fscrawler --config_dir /usr/app/config idx --rest
drwxr-xr-x@ 37 spotted staff 1184 Nov 21 18:31 .
drwxr-xr-x 9 spotted admin 288 Nov 21 18:16 ..
-rw-r--r--@ 1 spotted staff 14340 Nov 21 21:19 .DS_Store
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 .github
-rwxr-xr-x@ 1 spotted staff 59 Nov 20 07:11 .gitignore
-rwxr-xr-x@ 1 spotted staff 228 Nov 20 07:11 .mergify.yml
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 .mvn
-rwxr-xr-x@ 1 spotted staff 546 Nov 20 07:11 .travis.yml
-rwxr-xr-x@ 1 spotted staff 3225 Nov 20 07:11 CODE_OF_CONDUCT.md
-rwxr-xr-x@ 1 spotted staff 4688 Nov 20 07:11 CONTRIBUTING.md
-rw-r--r--@ 1 spotted staff 701 Nov 21 19:39 Dockerfile
-rwxr-xr-x@ 1 spotted staff 11358 Nov 20 07:11 LICENSE
-rwxr-xr-x@ 1 spotted staff 137 Nov 20 07:11 NOTICE
-rwxr-xr-x@ 1 spotted staff 5356 Nov 20 07:11 README.md
drwxr-xr-x@ 5 spotted staff 160 Nov 21 17:01 beans
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 cli
drwxr-xr-x@ 5 spotted staff 160 Nov 21 18:16 config
drwxr-xr-x@ 4 spotted staff 128 Nov 21 16:30 contrib
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 core
drwxr-xr-x@ 6 spotted staff 192 Nov 20 07:11 crawler
-rwxr-xr-x@ 1 spotted staff 235 Nov 20 07:11 deploy-settings.xml
drwxr-xr-x@ 7 spotted staff 224 Nov 21 18:13 distribution
drwxr-xr-x@ 7 spotted staff 224 Nov 20 07:11 docs
drwxr-xr-x@ 7 spotted staff 224 Nov 21 19:24 elasticsearch-client
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 framework
drwxr-xr-x@ 6 spotted staff 192 Nov 20 07:11 integration-tests
-rwxr-xr-x@ 1 spotted staff 79 Nov 20 07:11 lgtm.yml
-rwxr-xr-x@ 1 spotted staff 45158 Nov 20 07:11 pom.xml
-rwxr-xr-x@ 1 spotted staff 6969 Nov 20 07:11 release.sh
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 rest
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 settings
drwxr-xr-x@ 4 spotted staff 128 Nov 21 16:15 src
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 test-documents
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 test-framework
drwxr-xr-x@ 4 spotted staff 128 Nov 20 07:11 tika
-rwxr-xr-x@ 1 spotted staff 291 Nov 20 07:11 travis.sh
-rwxr-xr-x@ 1 spotted staff 5224 Nov 20 07:11 wait-for-it.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment