Skip to content

Instantly share code, notes, and snippets.

View mosuka's full-sized avatar
🏠
Working from home

Minoru Osuka mosuka

🏠
Working from home
View GitHub Profile
@mosuka
mosuka / tokenizers.py
Created May 16, 2021 11:24
Whoosh Tokenizer for Janome
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Minoru Osuka
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
" - For Neovim: stdpath('data') . '/plugged'
" - Avoid using standard Vim directory names like 'plugin'
call plug#begin('~/.vim/plugged')
" Make sure you use single quotes
" Shorthand notation; fetches https://github.com/junegunn/vim-easy-align
Plug 'junegunn/vim-easy-align'
" Any valid git URL is allowed
@mosuka
mosuka / raspberry_pi_setup.txt
Last active August 11, 2020 11:57
Raspberry Pi setup
# step1 - initialize OS
$ export RASPBERRY_PI_HOSTNAME=raspberrypi001
$ touch /media/${USER}/boot/ssh
$ ls /media/${USER}/boot/ssh
$ sed -i -e "s/rootfstype=ext4/rootfstype=ext4 cgroup_enable=cpuset cgroup_memory=memory cgroup_memory=1/g" /media/${USER}/boot/cmdline.txt
$ cat /media/${USER}/boot/cmdline.txt
$ sudo sed -i -e "s/raspberrypi/${RASPBERRY_PI_HOSTNAME}/g" /media/${USER}/rootfs/etc/hostname
$ cat /media/${USER}/rootfs/etc/hostname
$ sudo sed -i -e "s/raspberrypi/${RASPBERRY_PI_HOSTNAME}/g" /media/${USER}/rootfs/etc/hosts
$ cat /media/${USER}/rootfs/etc/hosts
#!/usr/bin/env bash
rm -rf ca
mkdir -p ca/{client,server}
#openssl genrsa -aes256 -out ca/ca.key 4096 chmod 400 ca/ca.key
openssl genrsa -out ca/ca.key 4096
chmod 400 ca/ca.key
openssl req -new -x509 -sha256 -days 730 -key ca/ca.key -out ca/ca.crt -subj '/C=JP/ST=Tokyo/L=Tokyo/O=Example Ltd./OU=Web/CN=example.com' -nodes
chmod 444 ca/ca.crt
# download wikipedia dump
curl -o ~/tmp/enwiki-20190101-pages-articles.xml.bz2 https://dumps.wikimedia.org/enwiki/20190101/enwiki-20190101-pages-articles.xml.bz2
# clone wikiextractor
git clone git@github.com:attardi/wikiextractor.git
# parse wikipedia dump
$ cd wikiextractor
$ ./WikiExtractor.py -o ~/tmp/enwiki --json ~/tmp/enwiki-20190101-pages-articles.xml.bz2
#!/bin/bash
grafana-server --config=/usr/local/etc/grafana/grafana.ini --homepath /usr/local/share/grafana cfg:default.paths.logs=/usr/local/var/log/grafana cfg:default.paths.data=/usr/local/var/lib/grafana cfg:default.paths.plugins=/usr/local/var/lib/grafana/plugins
Ubuntu 16.04 LTS (Xenial Xerus)
$ sudo apt-get install libleveldb-dev libstemmer-dev libicu-dev build-essential
$ go get -u -v github.com/blevesearch/cld2
$ cd $GOPATH/src/github.com/blevesearch/cld2
$ git clone https://github.com/CLD2Owners/cld2.git
$ cd cld2/internal
$ ./compile_libs.sh
$ sudo cp *.so /usr/local/lib
$ go get -u -v -tags full github.com/blevesearch/bleve
$ curl -OL https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip
$ sudo unzip protoc-3.3.0-linux-x86_64.zip -d /opt/protoc-3.3.0
$ sudo ln -s /opt/protoc-3.3.0/bin/protoc /usr/bin/protoc
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<profiles version="12">
<profile kind="CodeFormatterProfile" name="Solr" version="12">
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
package main
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve"
_ "github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
_ "github.com/blevesearch/blevex/lang/ja"
"os"
)