This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.HashMap; | |
import java.util.Map; | |
public class NgramCreator { | |
/** | |
* 入力したtextからn-gramを生成. | |
* n-gramとその出現回数を格納したMapを返す. | |
* 生成時,半角スペースでsplitして1単語とみなす. | |
* | |
* @param text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# -*- coding: utf-8 -*- | |
############################################################################### | |
# LIBSVM(LIBLINEAR)の学習データのスケーリング(標準化)を行う. | |
# 各素性が平均0,分散1の正規分布に従うようにスケーリングする. | |
# | |
# 次のコマンドで実行できる. | |
# $ python libsvm_gaussian_scaler.py [options] | |
# [options] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# -*- coding: utf-8 -*- | |
############################################################ | |
# | |
# テキストファイルの全ての行に共通の置換処理を行うスクリプト. | |
# | |
# 使い方: | |
# ・19,22行目付近の「置換対象の文字列」「置換後の文字列」を設定. | |
# ・下記コマンドで実行. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.concurrent.LinkedBlockingQueue; | |
import backtype.storm.spout.SpoutOutputCollector; | |
import backtype.storm.task.TopologyContext; | |
import backtype.storm.topology.OutputFieldsDeclarer; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# -*- coding: utf-8 -*- | |
############################################################################### | |
# LIBSVM(LIBLINEAR)の学習データをコサイン正規化する. | |
# | |
# 次のコマンドで実行できる. | |
# $ python libsvm_cos-normalize.py [options] | |
# [options] | |
# -i file: 入力ファイル.省略すると35行目付近のINPUT_PATHで指定した値となる. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import opennlp.tools.postag.POSModel; | |
import opennlp.tools.postag.POSTaggerME; | |
public class OpennlpPOSTaggerTest { | |
public static void main(String[] args) throws Exception { | |
// モデルファイル |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 下記ページのコードを一部修正したものです. | |
# http://sugamasao.hatenablog.com/entry/2014/11/17/000355 | |
require 'csv' | |
original_data = { | |
'wikipedia' => 'jawiki-latest-all-titles-in-ns0', | |
'hatena' => 'keywordlist_furigana.csv' | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# -*- coding: utf-8 -*- | |
############################################################################### | |
# LIBLINEARのパラメータのグリッドサーチを行い,Accuracyが高かったパラメータTop10を表示. | |
# 内部でLIBSVMに付属されているgrid.pyを呼び出しているので,用意した上でディレクトリの | |
# パスを37行目のGRID_DIRPATHに記述しておくこと. | |
# (grid.pyの使い方は付属のREADMEを参照するか,ググってください) | |
# | |
# 次のコマンドで実行できる. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# LIBLINEAR 2.01 の Makefile に cross-validation の評価尺度変更のコードを追記したもの. | |
# | |
# cross-validation の評価尺度変更 | |
# http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/eval/ | |
CXX ?= g++ | |
CC ?= gcc | |
CFLAGS = -Wall -Wconversion -O3 -fPIC | |
LIBS ?= blas/blas.a | |
SHVER = 3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# URLリストを読み込み,存在しないURLのみファイルに追記書き出し. | |
FILE_PATH=urls.txt | |
S3_PATH=$1 | |
DL_PATH=${S3_PATH} | |
cat ${FILE_PATH} | while read line | |
do |
OlderNewer