inouetakuya/extended_analyze.json

## extended_analyze.json
// $ curl -XPOST 'localhost:9200/_extended_analyze?analyzer=kuromoji&pretty' -d '絶対に手を出してはいけない相手を夜這いしちゃった俺'
{
  "custom_analyzer" : false,
  "analyzer" : {
    "kuromoji" : [ {
      "token" : "絶対",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "word",
      "position" : 1,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : null,
          "inflectionType" : null,
          "inflectionForm (en)" : null,
          "inflectionForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "noun-adverbial",
          "partOfSpeech" : "名詞-副詞可能"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "zettai",
          "reading" : "ゼッタイ",
          "pronunciation (en)" : "zettai",
          "pronunciation" : "ゼッタイ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e7 b5 b6 e5 af be]"
        }
      }
    }, {
      "token" : "手",
      "start_offset" : 3,
      "end_offset" : 4,
      "type" : "word",
      "position" : 3,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : null,
          "inflectionType" : null,
          "inflectionForm (en)" : null,
          "inflectionForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "noun-common",
          "partOfSpeech" : "名詞-一般"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "te",
          "reading" : "テ",
          "pronunciation (en)" : "te",
          "pronunciation" : "テ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e6 89 8b]"
        }
      }
    }, {
      "token" : "出す",
      "start_offset" : 5,
      "end_offset" : 7,
      "type" : "word",
      "position" : 5,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : "出す"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : "5-row-cons-s",
          "inflectionType" : "五段・サ行",
          "inflectionForm (en)" : "conjunctive",
          "inflectionForm" : "連用形"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "verb-main",
          "partOfSpeech" : "動詞-自立"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "dashi",
          "reading" : "ダシ",
          "pronunciation (en)" : "dashi",
          "pronunciation" : "ダシ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e5 87 ba e3 81 99]"
        }
      }
    }, {
      "token" : "いける",
      "start_offset" : 9,
      "end_offset" : 11,
      "type" : "word",
      "position" : 8,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : "いける"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : "1-row",
          "inflectionType" : "一段",
          "inflectionForm (en)" : "imperfective",
          "inflectionForm" : "未然形"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "verb-auxiliary",
          "partOfSpeech" : "動詞-非自立"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "ike",
          "reading" : "イケ",
          "pronunciation (en)" : "ike",
          "pronunciation" : "イケ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e3 81 84 e3 81 91 e3 82 8b]"
        }
      }
    }, {
      "token" : "相手",
      "start_offset" : 13,
      "end_offset" : 15,
      "type" : "word",
      "position" : 10,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : null,
          "inflectionType" : null,
          "inflectionForm (en)" : null,
          "inflectionForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "noun-common",
          "partOfSpeech" : "名詞-一般"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "aite",
          "reading" : "アイテ",
          "pronunciation (en)" : "aite",
          "pronunciation" : "アイテ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e7 9b b8 e6 89 8b]"
        }
      }
    }, {
      "token" : "夜這い",
      "start_offset" : 16,
      "end_offset" : 19,
      "type" : "word",
      "position" : 12,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : null,
          "inflectionType" : null,
          "inflectionForm (en)" : null,
          "inflectionForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "noun-common",
          "partOfSpeech" : "名詞-一般"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "yobai",
          "reading" : "ヨバイ",
          "pronunciation (en)" : "yobai",
          "pronunciation" : "ヨバイ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e5 a4 9c e9 80 99 e3 81 84]"
        }
      }
    }, {
      "token" : "ちゃう",
      "start_offset" : 20,
      "end_offset" : 23,
      "type" : "word",
      "position" : 14,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : "ちゃう"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : "5-row-cons-w-cons-onbin",
          "inflectionType" : "五段・ワ行促音便",
          "inflectionForm (en)" : "conjunctive-ta-connection",
          "inflectionForm" : "連用タ接続"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "verb-auxiliary",
          "partOfSpeech" : "動詞-非自立"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "cha",
          "reading" : "チャッ",
          "pronunciation (en)" : "cha",
          "pronunciation" : "チャッ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e3 81 a1 e3 82 83 e3 81 86]"
        }
      }
    }, {
      "token" : "俺",
      "start_offset" : 24,
      "end_offset" : 25,
      "type" : "word",
      "position" : 16,
      "extended_attributes" : {
        "org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
          "baseForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
          "inflectionType (en)" : null,
          "inflectionType" : null,
          "inflectionForm (en)" : null,
          "inflectionForm" : null
        },
        "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
          "partOfSpeech (en)" : "noun-pronoun-misc",
          "partOfSpeech" : "名詞-代名詞-一般"
        },
        "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
          "reading (en)" : "ore",
          "reading" : "オレ",
          "pronunciation (en)" : "ore",
          "pronunciation" : "オレ"
        },
        "org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
          "keyword" : false
        },
        "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
          "positionLength" : 1
        },
        "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
          "bytes" : "[e4 bf ba]"
        }
      }
    } ]
  }
}
	// $ curl -XPOST 'localhost:9200/_extended_analyze?analyzer=kuromoji&pretty' -d '絶対に手を出してはいけない相手を夜這いしちゃった俺'
	{
	"custom_analyzer" : false,
	"analyzer" : {
	"kuromoji" : [ {
	"token" : "絶対",
	"start_offset" : 0,
	"end_offset" : 2,
	"type" : "word",
	"position" : 1,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : null,
	"inflectionType" : null,
	"inflectionForm (en)" : null,
	"inflectionForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "noun-adverbial",
	"partOfSpeech" : "名詞-副詞可能"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "zettai",
	"reading" : "ゼッタイ",
	"pronunciation (en)" : "zettai",
	"pronunciation" : "ゼッタイ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e7 b5 b6 e5 af be]"
	}
	}
	}, {
	"token" : "手",
	"start_offset" : 3,
	"end_offset" : 4,
	"type" : "word",
	"position" : 3,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : null,
	"inflectionType" : null,
	"inflectionForm (en)" : null,
	"inflectionForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "noun-common",
	"partOfSpeech" : "名詞-一般"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "te",
	"reading" : "テ",
	"pronunciation (en)" : "te",
	"pronunciation" : "テ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e6 89 8b]"
	}
	}
	}, {
	"token" : "出す",
	"start_offset" : 5,
	"end_offset" : 7,
	"type" : "word",
	"position" : 5,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : "出す"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : "5-row-cons-s",
	"inflectionType" : "五段・サ行",
	"inflectionForm (en)" : "conjunctive",
	"inflectionForm" : "連用形"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "verb-main",
	"partOfSpeech" : "動詞-自立"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "dashi",
	"reading" : "ダシ",
	"pronunciation (en)" : "dashi",
	"pronunciation" : "ダシ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e5 87 ba e3 81 99]"
	}
	}
	}, {
	"token" : "いける",
	"start_offset" : 9,
	"end_offset" : 11,
	"type" : "word",
	"position" : 8,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : "いける"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : "1-row",
	"inflectionType" : "一段",
	"inflectionForm (en)" : "imperfective",
	"inflectionForm" : "未然形"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "verb-auxiliary",
	"partOfSpeech" : "動詞-非自立"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "ike",
	"reading" : "イケ",
	"pronunciation (en)" : "ike",
	"pronunciation" : "イケ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e3 81 84 e3 81 91 e3 82 8b]"
	}
	}
	}, {
	"token" : "相手",
	"start_offset" : 13,
	"end_offset" : 15,
	"type" : "word",
	"position" : 10,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : null,
	"inflectionType" : null,
	"inflectionForm (en)" : null,
	"inflectionForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "noun-common",
	"partOfSpeech" : "名詞-一般"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "aite",
	"reading" : "アイテ",
	"pronunciation (en)" : "aite",
	"pronunciation" : "アイテ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e7 9b b8 e6 89 8b]"
	}
	}
	}, {
	"token" : "夜這い",
	"start_offset" : 16,
	"end_offset" : 19,
	"type" : "word",
	"position" : 12,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : null,
	"inflectionType" : null,
	"inflectionForm (en)" : null,
	"inflectionForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "noun-common",
	"partOfSpeech" : "名詞-一般"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "yobai",
	"reading" : "ヨバイ",
	"pronunciation (en)" : "yobai",
	"pronunciation" : "ヨバイ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e5 a4 9c e9 80 99 e3 81 84]"
	}
	}
	}, {
	"token" : "ちゃう",
	"start_offset" : 20,
	"end_offset" : 23,
	"type" : "word",
	"position" : 14,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : "ちゃう"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : "5-row-cons-w-cons-onbin",
	"inflectionType" : "五段・ワ行促音便",
	"inflectionForm (en)" : "conjunctive-ta-connection",
	"inflectionForm" : "連用タ接続"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "verb-auxiliary",
	"partOfSpeech" : "動詞-非自立"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "cha",
	"reading" : "チャッ",
	"pronunciation (en)" : "cha",
	"pronunciation" : "チャッ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e3 81 a1 e3 82 83 e3 81 86]"
	}
	}
	}, {
	"token" : "俺",
	"start_offset" : 24,
	"end_offset" : 25,
	"type" : "word",
	"position" : 16,
	"extended_attributes" : {
	"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
	"baseForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
	"inflectionType (en)" : null,
	"inflectionType" : null,
	"inflectionForm (en)" : null,
	"inflectionForm" : null
	},
	"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
	"partOfSpeech (en)" : "noun-pronoun-misc",
	"partOfSpeech" : "名詞-代名詞-一般"
	},
	"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
	"reading (en)" : "ore",
	"reading" : "オレ",
	"pronunciation (en)" : "ore",
	"pronunciation" : "オレ"
	},
	"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
	"keyword" : false
	},
	"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
	"positionLength" : 1
	},
	"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
	"bytes" : "[e4 bf ba]"
	}
	}
	} ]
	}
	}